1
2
3
4
5
6
7
8
9
10
11#include <linux/mm.h>
12#include <linux/swap.h>
13#include <linux/module.h>
14#include <linux/bit_spinlock.h>
15#include <linux/interrupt.h>
16#include <linux/bitops.h>
17#include <linux/slab.h>
18#include <linux/proc_fs.h>
19#include <linux/seq_file.h>
20#include <linux/kmemcheck.h>
21#include <linux/cpu.h>
22#include <linux/cpuset.h>
23#include <linux/mempolicy.h>
24#include <linux/ctype.h>
25#include <linux/debugobjects.h>
26#include <linux/kallsyms.h>
27#include <linux/memory.h>
28#include <linux/math64.h>
29#include <linux/fault-inject.h>
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109#define SLAB_DEBUG_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
110 SLAB_TRACE | SLAB_DEBUG_FREE)
111
112static inline int kmem_cache_debug(struct kmem_cache *s)
113{
114#ifdef CONFIG_SLUB_DEBUG
115 return unlikely(s->flags & SLAB_DEBUG_FLAGS);
116#else
117 return 0;
118#endif
119}
120
121
122
123
124
125
126
127
128
129
130#undef SLUB_RESILIENCY_TEST
131
132
133
134
135
136#define MIN_PARTIAL 5
137
138
139
140
141
142
143#define MAX_PARTIAL 10
144
145#define DEBUG_DEFAULT_FLAGS (SLAB_DEBUG_FREE | SLAB_RED_ZONE | \
146 SLAB_POISON | SLAB_STORE_USER)
147
148
149
150
151
152
153#define DEBUG_METADATA_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER)
154
155
156
157
158#define SLUB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
159 SLAB_TRACE | SLAB_DESTROY_BY_RCU | SLAB_NOLEAKTRACE | \
160 SLAB_FAILSLAB)
161
162#define SLUB_MERGE_SAME (SLAB_DEBUG_FREE | SLAB_RECLAIM_ACCOUNT | \
163 SLAB_CACHE_DMA | SLAB_NOTRACK)
164
165#define OO_SHIFT 16
166#define OO_MASK ((1 << OO_SHIFT) - 1)
167#define MAX_OBJS_PER_PAGE 65535
168
169
170#define __OBJECT_POISON 0x80000000UL
171#define __SYSFS_ADD_DEFERRED 0x40000000UL
172
173static int kmem_size = sizeof(struct kmem_cache);
174
175#ifdef CONFIG_SMP
176static struct notifier_block slab_notifier;
177#endif
178
179static enum {
180 DOWN,
181 PARTIAL,
182 UP,
183 SYSFS
184} slab_state = DOWN;
185
186
187static DECLARE_RWSEM(slub_lock);
188static LIST_HEAD(slab_caches);
189
190
191
192
193struct track {
194 unsigned long addr;
195 int cpu;
196 int pid;
197 unsigned long when;
198};
199
200enum track_item { TRACK_ALLOC, TRACK_FREE };
201
202#ifdef CONFIG_SLUB_DEBUG
203static int sysfs_slab_add(struct kmem_cache *);
204static int sysfs_slab_alias(struct kmem_cache *, const char *);
205static void sysfs_slab_remove(struct kmem_cache *);
206
207#else
208static inline int sysfs_slab_add(struct kmem_cache *s) { return 0; }
209static inline int sysfs_slab_alias(struct kmem_cache *s, const char *p)
210 { return 0; }
211static inline void sysfs_slab_remove(struct kmem_cache *s)
212{
213 kfree(s);
214}
215
216#endif
217
218static inline void stat(struct kmem_cache *s, enum stat_item si)
219{
220#ifdef CONFIG_SLUB_STATS
221 __this_cpu_inc(s->cpu_slab->stat[si]);
222#endif
223}
224
225
226
227
228
229int slab_is_available(void)
230{
231 return slab_state >= UP;
232}
233
234static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node)
235{
236#ifdef CONFIG_NUMA
237 return s->node[node];
238#else
239 return &s->local_node;
240#endif
241}
242
243
244static inline int check_valid_pointer(struct kmem_cache *s,
245 struct page *page, const void *object)
246{
247 void *base;
248
249 if (!object)
250 return 1;
251
252 base = page_address(page);
253 if (object < base || object >= base + page->objects * s->size ||
254 (object - base) % s->size) {
255 return 0;
256 }
257
258 return 1;
259}
260
261static inline void *get_freepointer(struct kmem_cache *s, void *object)
262{
263 return *(void **)(object + s->offset);
264}
265
266static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp)
267{
268 *(void **)(object + s->offset) = fp;
269}
270
271
272#define for_each_object(__p, __s, __addr, __objects) \
273 for (__p = (__addr); __p < (__addr) + (__objects) * (__s)->size;\
274 __p += (__s)->size)
275
276
277#define for_each_free_object(__p, __s, __free) \
278 for (__p = (__free); __p; __p = get_freepointer((__s), __p))
279
280
281static inline int slab_index(void *p, struct kmem_cache *s, void *addr)
282{
283 return (p - addr) / s->size;
284}
285
286static inline struct kmem_cache_order_objects oo_make(int order,
287 unsigned long size)
288{
289 struct kmem_cache_order_objects x = {
290 (order << OO_SHIFT) + (PAGE_SIZE << order) / size
291 };
292
293 return x;
294}
295
296static inline int oo_order(struct kmem_cache_order_objects x)
297{
298 return x.x >> OO_SHIFT;
299}
300
301static inline int oo_objects(struct kmem_cache_order_objects x)
302{
303 return x.x & OO_MASK;
304}
305
306#ifdef CONFIG_SLUB_DEBUG
307
308
309
310#ifdef CONFIG_SLUB_DEBUG_ON
311static int slub_debug = DEBUG_DEFAULT_FLAGS;
312#else
313static int slub_debug;
314#endif
315
316static char *slub_debug_slabs;
317static int disable_higher_order_debug;
318
319
320
321
322static void print_section(char *text, u8 *addr, unsigned int length)
323{
324 int i, offset;
325 int newline = 1;
326 char ascii[17];
327
328 ascii[16] = 0;
329
330 for (i = 0; i < length; i++) {
331 if (newline) {
332 printk(KERN_ERR "%8s 0x%p: ", text, addr + i);
333 newline = 0;
334 }
335 printk(KERN_CONT " %02x", addr[i]);
336 offset = i % 16;
337 ascii[offset] = isgraph(addr[i]) ? addr[i] : '.';
338 if (offset == 15) {
339 printk(KERN_CONT " %s\n", ascii);
340 newline = 1;
341 }
342 }
343 if (!newline) {
344 i %= 16;
345 while (i < 16) {
346 printk(KERN_CONT " ");
347 ascii[i] = ' ';
348 i++;
349 }
350 printk(KERN_CONT " %s\n", ascii);
351 }
352}
353
354static struct track *get_track(struct kmem_cache *s, void *object,
355 enum track_item alloc)
356{
357 struct track *p;
358
359 if (s->offset)
360 p = object + s->offset + sizeof(void *);
361 else
362 p = object + s->inuse;
363
364 return p + alloc;
365}
366
367static void set_track(struct kmem_cache *s, void *object,
368 enum track_item alloc, unsigned long addr)
369{
370 struct track *p = get_track(s, object, alloc);
371
372 if (addr) {
373 p->addr = addr;
374 p->cpu = smp_processor_id();
375 p->pid = current->pid;
376 p->when = jiffies;
377 } else
378 memset(p, 0, sizeof(struct track));
379}
380
381static void init_tracking(struct kmem_cache *s, void *object)
382{
383 if (!(s->flags & SLAB_STORE_USER))
384 return;
385
386 set_track(s, object, TRACK_FREE, 0UL);
387 set_track(s, object, TRACK_ALLOC, 0UL);
388}
389
390static void print_track(const char *s, struct track *t)
391{
392 if (!t->addr)
393 return;
394
395 printk(KERN_ERR "INFO: %s in %pS age=%lu cpu=%u pid=%d\n",
396 s, (void *)t->addr, jiffies - t->when, t->cpu, t->pid);
397}
398
399static void print_tracking(struct kmem_cache *s, void *object)
400{
401 if (!(s->flags & SLAB_STORE_USER))
402 return;
403
404 print_track("Allocated", get_track(s, object, TRACK_ALLOC));
405 print_track("Freed", get_track(s, object, TRACK_FREE));
406}
407
408static void print_page_info(struct page *page)
409{
410 printk(KERN_ERR "INFO: Slab 0x%p objects=%u used=%u fp=0x%p flags=0x%04lx\n",
411 page, page->objects, page->inuse, page->freelist, page->flags);
412
413}
414
415static void slab_bug(struct kmem_cache *s, char *fmt, ...)
416{
417 va_list args;
418 char buf[100];
419
420 va_start(args, fmt);
421 vsnprintf(buf, sizeof(buf), fmt, args);
422 va_end(args);
423 printk(KERN_ERR "========================================"
424 "=====================================\n");
425 printk(KERN_ERR "BUG %s: %s\n", s->name, buf);
426 printk(KERN_ERR "----------------------------------------"
427 "-------------------------------------\n\n");
428}
429
430static void slab_fix(struct kmem_cache *s, char *fmt, ...)
431{
432 va_list args;
433 char buf[100];
434
435 va_start(args, fmt);
436 vsnprintf(buf, sizeof(buf), fmt, args);
437 va_end(args);
438 printk(KERN_ERR "FIX %s: %s\n", s->name, buf);
439}
440
441static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
442{
443 unsigned int off;
444 u8 *addr = page_address(page);
445
446 print_tracking(s, p);
447
448 print_page_info(page);
449
450 printk(KERN_ERR "INFO: Object 0x%p @offset=%tu fp=0x%p\n\n",
451 p, p - addr, get_freepointer(s, p));
452
453 if (p > addr + 16)
454 print_section("Bytes b4", p - 16, 16);
455
456 print_section("Object", p, min_t(unsigned long, s->objsize, PAGE_SIZE));
457
458 if (s->flags & SLAB_RED_ZONE)
459 print_section("Redzone", p + s->objsize,
460 s->inuse - s->objsize);
461
462 if (s->offset)
463 off = s->offset + sizeof(void *);
464 else
465 off = s->inuse;
466
467 if (s->flags & SLAB_STORE_USER)
468 off += 2 * sizeof(struct track);
469
470 if (off != s->size)
471
472 print_section("Padding", p + off, s->size - off);
473
474 dump_stack();
475}
476
477static void object_err(struct kmem_cache *s, struct page *page,
478 u8 *object, char *reason)
479{
480 slab_bug(s, "%s", reason);
481 print_trailer(s, page, object);
482}
483
484static void slab_err(struct kmem_cache *s, struct page *page, char *fmt, ...)
485{
486 va_list args;
487 char buf[100];
488
489 va_start(args, fmt);
490 vsnprintf(buf, sizeof(buf), fmt, args);
491 va_end(args);
492 slab_bug(s, "%s", buf);
493 print_page_info(page);
494 dump_stack();
495}
496
497static void init_object(struct kmem_cache *s, void *object, int active)
498{
499 u8 *p = object;
500
501 if (s->flags & __OBJECT_POISON) {
502 memset(p, POISON_FREE, s->objsize - 1);
503 p[s->objsize - 1] = POISON_END;
504 }
505
506 if (s->flags & SLAB_RED_ZONE)
507 memset(p + s->objsize,
508 active ? SLUB_RED_ACTIVE : SLUB_RED_INACTIVE,
509 s->inuse - s->objsize);
510}
511
512static u8 *check_bytes(u8 *start, unsigned int value, unsigned int bytes)
513{
514 while (bytes) {
515 if (*start != (u8)value)
516 return start;
517 start++;
518 bytes--;
519 }
520 return NULL;
521}
522
523static void restore_bytes(struct kmem_cache *s, char *message, u8 data,
524 void *from, void *to)
525{
526 slab_fix(s, "Restoring 0x%p-0x%p=0x%x\n", from, to - 1, data);
527 memset(from, data, to - from);
528}
529
530static int check_bytes_and_report(struct kmem_cache *s, struct page *page,
531 u8 *object, char *what,
532 u8 *start, unsigned int value, unsigned int bytes)
533{
534 u8 *fault;
535 u8 *end;
536
537 fault = check_bytes(start, value, bytes);
538 if (!fault)
539 return 1;
540
541 end = start + bytes;
542 while (end > fault && end[-1] == value)
543 end--;
544
545 slab_bug(s, "%s overwritten", what);
546 printk(KERN_ERR "INFO: 0x%p-0x%p. First byte 0x%x instead of 0x%x\n",
547 fault, end - 1, fault[0], value);
548 print_trailer(s, page, object);
549
550 restore_bytes(s, what, value, fault, end);
551 return 0;
552}
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592static int check_pad_bytes(struct kmem_cache *s, struct page *page, u8 *p)
593{
594 unsigned long off = s->inuse;
595
596 if (s->offset)
597
598 off += sizeof(void *);
599
600 if (s->flags & SLAB_STORE_USER)
601
602 off += 2 * sizeof(struct track);
603
604 if (s->size == off)
605 return 1;
606
607 return check_bytes_and_report(s, page, p, "Object padding",
608 p + off, POISON_INUSE, s->size - off);
609}
610
611
612static int slab_pad_check(struct kmem_cache *s, struct page *page)
613{
614 u8 *start;
615 u8 *fault;
616 u8 *end;
617 int length;
618 int remainder;
619
620 if (!(s->flags & SLAB_POISON))
621 return 1;
622
623 start = page_address(page);
624 length = (PAGE_SIZE << compound_order(page));
625 end = start + length;
626 remainder = length % s->size;
627 if (!remainder)
628 return 1;
629
630 fault = check_bytes(end - remainder, POISON_INUSE, remainder);
631 if (!fault)
632 return 1;
633 while (end > fault && end[-1] == POISON_INUSE)
634 end--;
635
636 slab_err(s, page, "Padding overwritten. 0x%p-0x%p", fault, end - 1);
637 print_section("Padding", end - remainder, remainder);
638
639 restore_bytes(s, "slab padding", POISON_INUSE, end - remainder, end);
640 return 0;
641}
642
643static int check_object(struct kmem_cache *s, struct page *page,
644 void *object, int active)
645{
646 u8 *p = object;
647 u8 *endobject = object + s->objsize;
648
649 if (s->flags & SLAB_RED_ZONE) {
650 unsigned int red =
651 active ? SLUB_RED_ACTIVE : SLUB_RED_INACTIVE;
652
653 if (!check_bytes_and_report(s, page, object, "Redzone",
654 endobject, red, s->inuse - s->objsize))
655 return 0;
656 } else {
657 if ((s->flags & SLAB_POISON) && s->objsize < s->inuse) {
658 check_bytes_and_report(s, page, p, "Alignment padding",
659 endobject, POISON_INUSE, s->inuse - s->objsize);
660 }
661 }
662
663 if (s->flags & SLAB_POISON) {
664 if (!active && (s->flags & __OBJECT_POISON) &&
665 (!check_bytes_and_report(s, page, p, "Poison", p,
666 POISON_FREE, s->objsize - 1) ||
667 !check_bytes_and_report(s, page, p, "Poison",
668 p + s->objsize - 1, POISON_END, 1)))
669 return 0;
670
671
672
673 check_pad_bytes(s, page, p);
674 }
675
676 if (!s->offset && active)
677
678
679
680
681 return 1;
682
683
684 if (!check_valid_pointer(s, page, get_freepointer(s, p))) {
685 object_err(s, page, p, "Freepointer corrupt");
686
687
688
689
690
691 set_freepointer(s, p, NULL);
692 return 0;
693 }
694 return 1;
695}
696
697static int check_slab(struct kmem_cache *s, struct page *page)
698{
699 int maxobj;
700
701 VM_BUG_ON(!irqs_disabled());
702
703 if (!PageSlab(page)) {
704 slab_err(s, page, "Not a valid slab page");
705 return 0;
706 }
707
708 maxobj = (PAGE_SIZE << compound_order(page)) / s->size;
709 if (page->objects > maxobj) {
710 slab_err(s, page, "objects %u > max %u",
711 s->name, page->objects, maxobj);
712 return 0;
713 }
714 if (page->inuse > page->objects) {
715 slab_err(s, page, "inuse %u > max %u",
716 s->name, page->inuse, page->objects);
717 return 0;
718 }
719
720 slab_pad_check(s, page);
721 return 1;
722}
723
724
725
726
727
728static int on_freelist(struct kmem_cache *s, struct page *page, void *search)
729{
730 int nr = 0;
731 void *fp = page->freelist;
732 void *object = NULL;
733 unsigned long max_objects;
734
735 while (fp && nr <= page->objects) {
736 if (fp == search)
737 return 1;
738 if (!check_valid_pointer(s, page, fp)) {
739 if (object) {
740 object_err(s, page, object,
741 "Freechain corrupt");
742 set_freepointer(s, object, NULL);
743 break;
744 } else {
745 slab_err(s, page, "Freepointer corrupt");
746 page->freelist = NULL;
747 page->inuse = page->objects;
748 slab_fix(s, "Freelist cleared");
749 return 0;
750 }
751 break;
752 }
753 object = fp;
754 fp = get_freepointer(s, object);
755 nr++;
756 }
757
758 max_objects = (PAGE_SIZE << compound_order(page)) / s->size;
759 if (max_objects > MAX_OBJS_PER_PAGE)
760 max_objects = MAX_OBJS_PER_PAGE;
761
762 if (page->objects != max_objects) {
763 slab_err(s, page, "Wrong number of objects. Found %d but "
764 "should be %d", page->objects, max_objects);
765 page->objects = max_objects;
766 slab_fix(s, "Number of objects adjusted.");
767 }
768 if (page->inuse != page->objects - nr) {
769 slab_err(s, page, "Wrong object count. Counter is %d but "
770 "counted were %d", page->inuse, page->objects - nr);
771 page->inuse = page->objects - nr;
772 slab_fix(s, "Object count adjusted.");
773 }
774 return search == NULL;
775}
776
777static void trace(struct kmem_cache *s, struct page *page, void *object,
778 int alloc)
779{
780 if (s->flags & SLAB_TRACE) {
781 printk(KERN_INFO "TRACE %s %s 0x%p inuse=%d fp=0x%p\n",
782 s->name,
783 alloc ? "alloc" : "free",
784 object, page->inuse,
785 page->freelist);
786
787 if (!alloc)
788 print_section("Object", (void *)object, s->objsize);
789
790 dump_stack();
791 }
792}
793
794
795
796
797static void add_full(struct kmem_cache_node *n, struct page *page)
798{
799 spin_lock(&n->list_lock);
800 list_add(&page->lru, &n->full);
801 spin_unlock(&n->list_lock);
802}
803
804static void remove_full(struct kmem_cache *s, struct page *page)
805{
806 struct kmem_cache_node *n;
807
808 if (!(s->flags & SLAB_STORE_USER))
809 return;
810
811 n = get_node(s, page_to_nid(page));
812
813 spin_lock(&n->list_lock);
814 list_del(&page->lru);
815 spin_unlock(&n->list_lock);
816}
817
818
819static inline unsigned long slabs_node(struct kmem_cache *s, int node)
820{
821 struct kmem_cache_node *n = get_node(s, node);
822
823 return atomic_long_read(&n->nr_slabs);
824}
825
826static inline unsigned long node_nr_slabs(struct kmem_cache_node *n)
827{
828 return atomic_long_read(&n->nr_slabs);
829}
830
831static inline void inc_slabs_node(struct kmem_cache *s, int node, int objects)
832{
833 struct kmem_cache_node *n = get_node(s, node);
834
835
836
837
838
839
840
841 if (!NUMA_BUILD || n) {
842 atomic_long_inc(&n->nr_slabs);
843 atomic_long_add(objects, &n->total_objects);
844 }
845}
846static inline void dec_slabs_node(struct kmem_cache *s, int node, int objects)
847{
848 struct kmem_cache_node *n = get_node(s, node);
849
850 atomic_long_dec(&n->nr_slabs);
851 atomic_long_sub(objects, &n->total_objects);
852}
853
854
855static void setup_object_debug(struct kmem_cache *s, struct page *page,
856 void *object)
857{
858 if (!(s->flags & (SLAB_STORE_USER|SLAB_RED_ZONE|__OBJECT_POISON)))
859 return;
860
861 init_object(s, object, 0);
862 init_tracking(s, object);
863}
864
865static int alloc_debug_processing(struct kmem_cache *s, struct page *page,
866 void *object, unsigned long addr)
867{
868 if (!check_slab(s, page))
869 goto bad;
870
871 if (!on_freelist(s, page, object)) {
872 object_err(s, page, object, "Object already allocated");
873 goto bad;
874 }
875
876 if (!check_valid_pointer(s, page, object)) {
877 object_err(s, page, object, "Freelist Pointer check fails");
878 goto bad;
879 }
880
881 if (!check_object(s, page, object, 0))
882 goto bad;
883
884
885 if (s->flags & SLAB_STORE_USER)
886 set_track(s, object, TRACK_ALLOC, addr);
887 trace(s, page, object, 1);
888 init_object(s, object, 1);
889 return 1;
890
891bad:
892 if (PageSlab(page)) {
893
894
895
896
897
898 slab_fix(s, "Marking all objects used");
899 page->inuse = page->objects;
900 page->freelist = NULL;
901 }
902 return 0;
903}
904
905static int free_debug_processing(struct kmem_cache *s, struct page *page,
906 void *object, unsigned long addr)
907{
908 if (!check_slab(s, page))
909 goto fail;
910
911 if (!check_valid_pointer(s, page, object)) {
912 slab_err(s, page, "Invalid object pointer 0x%p", object);
913 goto fail;
914 }
915
916 if (on_freelist(s, page, object)) {
917 object_err(s, page, object, "Object already free");
918 goto fail;
919 }
920
921 if (!check_object(s, page, object, 1))
922 return 0;
923
924 if (unlikely(s != page->slab)) {
925 if (!PageSlab(page)) {
926 slab_err(s, page, "Attempt to free object(0x%p) "
927 "outside of slab", object);
928 } else if (!page->slab) {
929 printk(KERN_ERR
930 "SLUB <none>: no slab for object 0x%p.\n",
931 object);
932 dump_stack();
933 } else
934 object_err(s, page, object,
935 "page slab pointer corrupt.");
936 goto fail;
937 }
938
939
940 if (!PageSlubFrozen(page) && !page->freelist)
941 remove_full(s, page);
942 if (s->flags & SLAB_STORE_USER)
943 set_track(s, object, TRACK_FREE, addr);
944 trace(s, page, object, 0);
945 init_object(s, object, 0);
946 return 1;
947
948fail:
949 slab_fix(s, "Object at 0x%p not freed", object);
950 return 0;
951}
952
953static int __init setup_slub_debug(char *str)
954{
955 slub_debug = DEBUG_DEFAULT_FLAGS;
956 if (*str++ != '=' || !*str)
957
958
959
960 goto out;
961
962 if (*str == ',')
963
964
965
966
967 goto check_slabs;
968
969 if (tolower(*str) == 'o') {
970
971
972
973
974 disable_higher_order_debug = 1;
975 goto out;
976 }
977
978 slub_debug = 0;
979 if (*str == '-')
980
981
982
983 goto out;
984
985
986
987
988 for (; *str && *str != ','; str++) {
989 switch (tolower(*str)) {
990 case 'f':
991 slub_debug |= SLAB_DEBUG_FREE;
992 break;
993 case 'z':
994 slub_debug |= SLAB_RED_ZONE;
995 break;
996 case 'p':
997 slub_debug |= SLAB_POISON;
998 break;
999 case 'u':
1000 slub_debug |= SLAB_STORE_USER;
1001 break;
1002 case 't':
1003 slub_debug |= SLAB_TRACE;
1004 break;
1005 case 'a':
1006 slub_debug |= SLAB_FAILSLAB;
1007 break;
1008 default:
1009 printk(KERN_ERR "slub_debug option '%c' "
1010 "unknown. skipped\n", *str);
1011 }
1012 }
1013
1014check_slabs:
1015 if (*str == ',')
1016 slub_debug_slabs = str + 1;
1017out:
1018 return 1;
1019}
1020
1021__setup("slub_debug", setup_slub_debug);
1022
1023static unsigned long kmem_cache_flags(unsigned long objsize,
1024 unsigned long flags, const char *name,
1025 void (*ctor)(void *))
1026{
1027
1028
1029
1030 if (slub_debug && (!slub_debug_slabs ||
1031 !strncmp(slub_debug_slabs, name, strlen(slub_debug_slabs))))
1032 flags |= slub_debug;
1033
1034 return flags;
1035}
1036#else
1037static inline void setup_object_debug(struct kmem_cache *s,
1038 struct page *page, void *object) {}
1039
1040static inline int alloc_debug_processing(struct kmem_cache *s,
1041 struct page *page, void *object, unsigned long addr) { return 0; }
1042
1043static inline int free_debug_processing(struct kmem_cache *s,
1044 struct page *page, void *object, unsigned long addr) { return 0; }
1045
1046static inline int slab_pad_check(struct kmem_cache *s, struct page *page)
1047 { return 1; }
1048static inline int check_object(struct kmem_cache *s, struct page *page,
1049 void *object, int active) { return 1; }
1050static inline void add_full(struct kmem_cache_node *n, struct page *page) {}
1051static inline unsigned long kmem_cache_flags(unsigned long objsize,
1052 unsigned long flags, const char *name,
1053 void (*ctor)(void *))
1054{
1055 return flags;
1056}
1057#define slub_debug 0
1058
1059#define disable_higher_order_debug 0
1060
1061static inline unsigned long slabs_node(struct kmem_cache *s, int node)
1062 { return 0; }
1063static inline unsigned long node_nr_slabs(struct kmem_cache_node *n)
1064 { return 0; }
1065static inline void inc_slabs_node(struct kmem_cache *s, int node,
1066 int objects) {}
1067static inline void dec_slabs_node(struct kmem_cache *s, int node,
1068 int objects) {}
1069#endif
1070
1071
1072
1073
1074static inline struct page *alloc_slab_page(gfp_t flags, int node,
1075 struct kmem_cache_order_objects oo)
1076{
1077 int order = oo_order(oo);
1078
1079 flags |= __GFP_NOTRACK;
1080
1081 if (node == NUMA_NO_NODE)
1082 return alloc_pages(flags, order);
1083 else
1084 return alloc_pages_exact_node(node, flags, order);
1085}
1086
1087static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
1088{
1089 struct page *page;
1090 struct kmem_cache_order_objects oo = s->oo;
1091 gfp_t alloc_gfp;
1092
1093 flags |= s->allocflags;
1094
1095
1096
1097
1098
1099 alloc_gfp = (flags | __GFP_NOWARN | __GFP_NORETRY) & ~__GFP_NOFAIL;
1100
1101 page = alloc_slab_page(alloc_gfp, node, oo);
1102 if (unlikely(!page)) {
1103 oo = s->min;
1104
1105
1106
1107
1108 page = alloc_slab_page(flags, node, oo);
1109 if (!page)
1110 return NULL;
1111
1112 stat(s, ORDER_FALLBACK);
1113 }
1114
1115 if (kmemcheck_enabled
1116 && !(s->flags & (SLAB_NOTRACK | DEBUG_DEFAULT_FLAGS))) {
1117 int pages = 1 << oo_order(oo);
1118
1119 kmemcheck_alloc_shadow(page, oo_order(oo), flags, node);
1120
1121
1122
1123
1124
1125 if (s->ctor)
1126 kmemcheck_mark_uninitialized_pages(page, pages);
1127 else
1128 kmemcheck_mark_unallocated_pages(page, pages);
1129 }
1130
1131 page->objects = oo_objects(oo);
1132 mod_zone_page_state(page_zone(page),
1133 (s->flags & SLAB_RECLAIM_ACCOUNT) ?
1134 NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,
1135 1 << oo_order(oo));
1136
1137 return page;
1138}
1139
1140static void setup_object(struct kmem_cache *s, struct page *page,
1141 void *object)
1142{
1143 setup_object_debug(s, page, object);
1144 if (unlikely(s->ctor))
1145 s->ctor(object);
1146}
1147
1148static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
1149{
1150 struct page *page;
1151 void *start;
1152 void *last;
1153 void *p;
1154
1155 BUG_ON(flags & GFP_SLAB_BUG_MASK);
1156
1157 page = allocate_slab(s,
1158 flags & (GFP_RECLAIM_MASK | GFP_CONSTRAINT_MASK), node);
1159 if (!page)
1160 goto out;
1161
1162 inc_slabs_node(s, page_to_nid(page), page->objects);
1163 page->slab = s;
1164 page->flags |= 1 << PG_slab;
1165
1166 start = page_address(page);
1167
1168 if (unlikely(s->flags & SLAB_POISON))
1169 memset(start, POISON_INUSE, PAGE_SIZE << compound_order(page));
1170
1171 last = start;
1172 for_each_object(p, s, start, page->objects) {
1173 setup_object(s, page, last);
1174 set_freepointer(s, last, p);
1175 last = p;
1176 }
1177 setup_object(s, page, last);
1178 set_freepointer(s, last, NULL);
1179
1180 page->freelist = start;
1181 page->inuse = 0;
1182out:
1183 return page;
1184}
1185
1186static void __free_slab(struct kmem_cache *s, struct page *page)
1187{
1188 int order = compound_order(page);
1189 int pages = 1 << order;
1190
1191 if (kmem_cache_debug(s)) {
1192 void *p;
1193
1194 slab_pad_check(s, page);
1195 for_each_object(p, s, page_address(page),
1196 page->objects)
1197 check_object(s, page, p, 0);
1198 }
1199
1200 kmemcheck_free_shadow(page, compound_order(page));
1201
1202 mod_zone_page_state(page_zone(page),
1203 (s->flags & SLAB_RECLAIM_ACCOUNT) ?
1204 NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,
1205 -pages);
1206
1207 __ClearPageSlab(page);
1208 reset_page_mapcount(page);
1209 if (current->reclaim_state)
1210 current->reclaim_state->reclaimed_slab += pages;
1211 __free_pages(page, order);
1212}
1213
1214static void rcu_free_slab(struct rcu_head *h)
1215{
1216 struct page *page;
1217
1218 page = container_of((struct list_head *)h, struct page, lru);
1219 __free_slab(page->slab, page);
1220}
1221
1222static void free_slab(struct kmem_cache *s, struct page *page)
1223{
1224 if (unlikely(s->flags & SLAB_DESTROY_BY_RCU)) {
1225
1226
1227
1228 struct rcu_head *head = (void *)&page->lru;
1229
1230 call_rcu(head, rcu_free_slab);
1231 } else
1232 __free_slab(s, page);
1233}
1234
1235static void discard_slab(struct kmem_cache *s, struct page *page)
1236{
1237 dec_slabs_node(s, page_to_nid(page), page->objects);
1238 free_slab(s, page);
1239}
1240
1241
1242
1243
1244static __always_inline void slab_lock(struct page *page)
1245{
1246 bit_spin_lock(PG_locked, &page->flags);
1247}
1248
1249static __always_inline void slab_unlock(struct page *page)
1250{
1251 __bit_spin_unlock(PG_locked, &page->flags);
1252}
1253
1254static __always_inline int slab_trylock(struct page *page)
1255{
1256 int rc = 1;
1257
1258 rc = bit_spin_trylock(PG_locked, &page->flags);
1259 return rc;
1260}
1261
1262
1263
1264
1265static void add_partial(struct kmem_cache_node *n,
1266 struct page *page, int tail)
1267{
1268 spin_lock(&n->list_lock);
1269 n->nr_partial++;
1270 if (tail)
1271 list_add_tail(&page->lru, &n->partial);
1272 else
1273 list_add(&page->lru, &n->partial);
1274 spin_unlock(&n->list_lock);
1275}
1276
1277static void remove_partial(struct kmem_cache *s, struct page *page)
1278{
1279 struct kmem_cache_node *n = get_node(s, page_to_nid(page));
1280
1281 spin_lock(&n->list_lock);
1282 list_del(&page->lru);
1283 n->nr_partial--;
1284 spin_unlock(&n->list_lock);
1285}
1286
1287
1288
1289
1290
1291
1292static inline int lock_and_freeze_slab(struct kmem_cache_node *n,
1293 struct page *page)
1294{
1295 if (slab_trylock(page)) {
1296 list_del(&page->lru);
1297 n->nr_partial--;
1298 __SetPageSlubFrozen(page);
1299 return 1;
1300 }
1301 return 0;
1302}
1303
1304
1305
1306
1307static struct page *get_partial_node(struct kmem_cache_node *n)
1308{
1309 struct page *page;
1310
1311
1312
1313
1314
1315
1316
1317 if (!n || !n->nr_partial)
1318 return NULL;
1319
1320 spin_lock(&n->list_lock);
1321 list_for_each_entry(page, &n->partial, lru)
1322 if (lock_and_freeze_slab(n, page))
1323 goto out;
1324 page = NULL;
1325out:
1326 spin_unlock(&n->list_lock);
1327 return page;
1328}
1329
1330
1331
1332
1333static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags)
1334{
1335#ifdef CONFIG_NUMA
1336 struct zonelist *zonelist;
1337 struct zoneref *z;
1338 struct zone *zone;
1339 enum zone_type high_zoneidx = gfp_zone(flags);
1340 struct page *page;
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360 if (!s->remote_node_defrag_ratio ||
1361 get_cycles() % 1024 > s->remote_node_defrag_ratio)
1362 return NULL;
1363
1364 get_mems_allowed();
1365 zonelist = node_zonelist(slab_node(current->mempolicy), flags);
1366 for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
1367 struct kmem_cache_node *n;
1368
1369 n = get_node(s, zone_to_nid(zone));
1370
1371 if (n && cpuset_zone_allowed_hardwall(zone, flags) &&
1372 n->nr_partial > s->min_partial) {
1373 page = get_partial_node(n);
1374 if (page) {
1375 put_mems_allowed();
1376 return page;
1377 }
1378 }
1379 }
1380 put_mems_allowed();
1381#endif
1382 return NULL;
1383}
1384
1385
1386
1387
1388static struct page *get_partial(struct kmem_cache *s, gfp_t flags, int node)
1389{
1390 struct page *page;
1391 int searchnode = (node == NUMA_NO_NODE) ? numa_node_id() : node;
1392
1393 page = get_partial_node(get_node(s, searchnode));
1394 if (page || node != -1)
1395 return page;
1396
1397 return get_any_partial(s, flags);
1398}
1399
1400
1401
1402
1403
1404
1405
1406
1407static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail)
1408{
1409 struct kmem_cache_node *n = get_node(s, page_to_nid(page));
1410
1411 __ClearPageSlubFrozen(page);
1412 if (page->inuse) {
1413
1414 if (page->freelist) {
1415 add_partial(n, page, tail);
1416 stat(s, tail ? DEACTIVATE_TO_TAIL : DEACTIVATE_TO_HEAD);
1417 } else {
1418 stat(s, DEACTIVATE_FULL);
1419 if (kmem_cache_debug(s) && (s->flags & SLAB_STORE_USER))
1420 add_full(n, page);
1421 }
1422 slab_unlock(page);
1423 } else {
1424 stat(s, DEACTIVATE_EMPTY);
1425 if (n->nr_partial < s->min_partial) {
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436 add_partial(n, page, 1);
1437 slab_unlock(page);
1438 } else {
1439 slab_unlock(page);
1440 stat(s, FREE_SLAB);
1441 discard_slab(s, page);
1442 }
1443 }
1444}
1445
1446
1447
1448
1449static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
1450{
1451 struct page *page = c->page;
1452 int tail = 1;
1453
1454 if (page->freelist)
1455 stat(s, DEACTIVATE_REMOTE_FREES);
1456
1457
1458
1459
1460
1461 while (unlikely(c->freelist)) {
1462 void **object;
1463
1464 tail = 0;
1465
1466
1467 object = c->freelist;
1468 c->freelist = get_freepointer(s, c->freelist);
1469
1470
1471 set_freepointer(s, object, page->freelist);
1472 page->freelist = object;
1473 page->inuse--;
1474 }
1475 c->page = NULL;
1476 unfreeze_slab(s, page, tail);
1477}
1478
1479static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
1480{
1481 stat(s, CPUSLAB_FLUSH);
1482 slab_lock(c->page);
1483 deactivate_slab(s, c);
1484}
1485
1486
1487
1488
1489
1490
1491static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu)
1492{
1493 struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
1494
1495 if (likely(c && c->page))
1496 flush_slab(s, c);
1497}
1498
1499static void flush_cpu_slab(void *d)
1500{
1501 struct kmem_cache *s = d;
1502
1503 __flush_cpu_slab(s, smp_processor_id());
1504}
1505
1506static void flush_all(struct kmem_cache *s)
1507{
1508 on_each_cpu(flush_cpu_slab, s, 1);
1509}
1510
1511
1512
1513
1514
1515static inline int node_match(struct kmem_cache_cpu *c, int node)
1516{
1517#ifdef CONFIG_NUMA
1518 if (node != NUMA_NO_NODE && c->node != node)
1519 return 0;
1520#endif
1521 return 1;
1522}
1523
1524static int count_free(struct page *page)
1525{
1526 return page->objects - page->inuse;
1527}
1528
1529static unsigned long count_partial(struct kmem_cache_node *n,
1530 int (*get_count)(struct page *))
1531{
1532 unsigned long flags;
1533 unsigned long x = 0;
1534 struct page *page;
1535
1536 spin_lock_irqsave(&n->list_lock, flags);
1537 list_for_each_entry(page, &n->partial, lru)
1538 x += get_count(page);
1539 spin_unlock_irqrestore(&n->list_lock, flags);
1540 return x;
1541}
1542
1543static inline unsigned long node_nr_objs(struct kmem_cache_node *n)
1544{
1545#ifdef CONFIG_SLUB_DEBUG
1546 return atomic_long_read(&n->total_objects);
1547#else
1548 return 0;
1549#endif
1550}
1551
1552static noinline void
1553slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid)
1554{
1555 int node;
1556
1557 printk(KERN_WARNING
1558 "SLUB: Unable to allocate memory on node %d (gfp=0x%x)\n",
1559 nid, gfpflags);
1560 printk(KERN_WARNING " cache: %s, object size: %d, buffer size: %d, "
1561 "default order: %d, min order: %d\n", s->name, s->objsize,
1562 s->size, oo_order(s->oo), oo_order(s->min));
1563
1564 if (oo_order(s->min) > get_order(s->objsize))
1565 printk(KERN_WARNING " %s debugging increased min order, use "
1566 "slub_debug=O to disable.\n", s->name);
1567
1568 for_each_online_node(node) {
1569 struct kmem_cache_node *n = get_node(s, node);
1570 unsigned long nr_slabs;
1571 unsigned long nr_objs;
1572 unsigned long nr_free;
1573
1574 if (!n)
1575 continue;
1576
1577 nr_free = count_partial(n, count_free);
1578 nr_slabs = node_nr_slabs(n);
1579 nr_objs = node_nr_objs(n);
1580
1581 printk(KERN_WARNING
1582 " node %d: slabs: %ld, objs: %ld, free: %ld\n",
1583 node, nr_slabs, nr_objs, nr_free);
1584 }
1585}
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
1606 unsigned long addr, struct kmem_cache_cpu *c)
1607{
1608 void **object;
1609 struct page *new;
1610
1611
1612 gfpflags &= ~__GFP_ZERO;
1613
1614 if (!c->page)
1615 goto new_slab;
1616
1617 slab_lock(c->page);
1618 if (unlikely(!node_match(c, node)))
1619 goto another_slab;
1620
1621 stat(s, ALLOC_REFILL);
1622
1623load_freelist:
1624 object = c->page->freelist;
1625 if (unlikely(!object))
1626 goto another_slab;
1627 if (kmem_cache_debug(s))
1628 goto debug;
1629
1630 c->freelist = get_freepointer(s, object);
1631 c->page->inuse = c->page->objects;
1632 c->page->freelist = NULL;
1633 c->node = page_to_nid(c->page);
1634unlock_out:
1635 slab_unlock(c->page);
1636 stat(s, ALLOC_SLOWPATH);
1637 return object;
1638
1639another_slab:
1640 deactivate_slab(s, c);
1641
1642new_slab:
1643 new = get_partial(s, gfpflags, node);
1644 if (new) {
1645 c->page = new;
1646 stat(s, ALLOC_FROM_PARTIAL);
1647 goto load_freelist;
1648 }
1649
1650 if (gfpflags & __GFP_WAIT)
1651 local_irq_enable();
1652
1653 new = new_slab(s, gfpflags, node);
1654
1655 if (gfpflags & __GFP_WAIT)
1656 local_irq_disable();
1657
1658 if (new) {
1659 c = __this_cpu_ptr(s->cpu_slab);
1660 stat(s, ALLOC_SLAB);
1661 if (c->page)
1662 flush_slab(s, c);
1663 slab_lock(new);
1664 __SetPageSlubFrozen(new);
1665 c->page = new;
1666 goto load_freelist;
1667 }
1668 if (!(gfpflags & __GFP_NOWARN) && printk_ratelimit())
1669 slab_out_of_memory(s, gfpflags, node);
1670 return NULL;
1671debug:
1672 if (!alloc_debug_processing(s, c->page, object, addr))
1673 goto another_slab;
1674
1675 c->page->inuse++;
1676 c->page->freelist = get_freepointer(s, object);
1677 c->node = -1;
1678 goto unlock_out;
1679}
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691static __always_inline void *slab_alloc(struct kmem_cache *s,
1692 gfp_t gfpflags, int node, unsigned long addr)
1693{
1694 void **object;
1695 struct kmem_cache_cpu *c;
1696 unsigned long flags;
1697
1698 gfpflags &= gfp_allowed_mask;
1699
1700 lockdep_trace_alloc(gfpflags);
1701 might_sleep_if(gfpflags & __GFP_WAIT);
1702
1703 if (should_failslab(s->objsize, gfpflags, s->flags))
1704 return NULL;
1705
1706 local_irq_save(flags);
1707 c = __this_cpu_ptr(s->cpu_slab);
1708 object = c->freelist;
1709 if (unlikely(!object || !node_match(c, node)))
1710
1711 object = __slab_alloc(s, gfpflags, node, addr, c);
1712
1713 else {
1714 c->freelist = get_freepointer(s, object);
1715 stat(s, ALLOC_FASTPATH);
1716 }
1717 local_irq_restore(flags);
1718
1719 if (unlikely(gfpflags & __GFP_ZERO) && object)
1720 memset(object, 0, s->objsize);
1721
1722 kmemcheck_slab_alloc(s, gfpflags, object, s->objsize);
1723 kmemleak_alloc_recursive(object, s->objsize, 1, s->flags, gfpflags);
1724
1725 return object;
1726}
1727
1728void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags)
1729{
1730 void *ret = slab_alloc(s, gfpflags, NUMA_NO_NODE, _RET_IP_);
1731
1732 trace_kmem_cache_alloc(_RET_IP_, ret, s->objsize, s->size, gfpflags);
1733
1734 return ret;
1735}
1736EXPORT_SYMBOL(kmem_cache_alloc);
1737
1738#ifdef CONFIG_TRACING
1739void *kmem_cache_alloc_notrace(struct kmem_cache *s, gfp_t gfpflags)
1740{
1741 return slab_alloc(s, gfpflags, NUMA_NO_NODE, _RET_IP_);
1742}
1743EXPORT_SYMBOL(kmem_cache_alloc_notrace);
1744#endif
1745
1746#ifdef CONFIG_NUMA
1747void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
1748{
1749 void *ret = slab_alloc(s, gfpflags, node, _RET_IP_);
1750
1751 trace_kmem_cache_alloc_node(_RET_IP_, ret,
1752 s->objsize, s->size, gfpflags, node);
1753
1754 return ret;
1755}
1756EXPORT_SYMBOL(kmem_cache_alloc_node);
1757#endif
1758
1759#ifdef CONFIG_TRACING
1760void *kmem_cache_alloc_node_notrace(struct kmem_cache *s,
1761 gfp_t gfpflags,
1762 int node)
1763{
1764 return slab_alloc(s, gfpflags, node, _RET_IP_);
1765}
1766EXPORT_SYMBOL(kmem_cache_alloc_node_notrace);
1767#endif
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777static void __slab_free(struct kmem_cache *s, struct page *page,
1778 void *x, unsigned long addr)
1779{
1780 void *prior;
1781 void **object = (void *)x;
1782
1783 stat(s, FREE_SLOWPATH);
1784 slab_lock(page);
1785
1786 if (kmem_cache_debug(s))
1787 goto debug;
1788
1789checks_ok:
1790 prior = page->freelist;
1791 set_freepointer(s, object, prior);
1792 page->freelist = object;
1793 page->inuse--;
1794
1795 if (unlikely(PageSlubFrozen(page))) {
1796 stat(s, FREE_FROZEN);
1797 goto out_unlock;
1798 }
1799
1800 if (unlikely(!page->inuse))
1801 goto slab_empty;
1802
1803
1804
1805
1806
1807 if (unlikely(!prior)) {
1808 add_partial(get_node(s, page_to_nid(page)), page, 1);
1809 stat(s, FREE_ADD_PARTIAL);
1810 }
1811
1812out_unlock:
1813 slab_unlock(page);
1814 return;
1815
1816slab_empty:
1817 if (prior) {
1818
1819
1820
1821 remove_partial(s, page);
1822 stat(s, FREE_REMOVE_PARTIAL);
1823 }
1824 slab_unlock(page);
1825 stat(s, FREE_SLAB);
1826 discard_slab(s, page);
1827 return;
1828
1829debug:
1830 if (!free_debug_processing(s, page, x, addr))
1831 goto out_unlock;
1832 goto checks_ok;
1833}
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846static __always_inline void slab_free(struct kmem_cache *s,
1847 struct page *page, void *x, unsigned long addr)
1848{
1849 void **object = (void *)x;
1850 struct kmem_cache_cpu *c;
1851 unsigned long flags;
1852
1853 kmemleak_free_recursive(x, s->flags);
1854 local_irq_save(flags);
1855 c = __this_cpu_ptr(s->cpu_slab);
1856 kmemcheck_slab_free(s, object, s->objsize);
1857 debug_check_no_locks_freed(object, s->objsize);
1858 if (!(s->flags & SLAB_DEBUG_OBJECTS))
1859 debug_check_no_obj_freed(object, s->objsize);
1860 if (likely(page == c->page && c->node >= 0)) {
1861 set_freepointer(s, object, c->freelist);
1862 c->freelist = object;
1863 stat(s, FREE_FASTPATH);
1864 } else
1865 __slab_free(s, page, x, addr);
1866
1867 local_irq_restore(flags);
1868}
1869
1870void kmem_cache_free(struct kmem_cache *s, void *x)
1871{
1872 struct page *page;
1873
1874 page = virt_to_head_page(x);
1875
1876 slab_free(s, page, x, _RET_IP_);
1877
1878 trace_kmem_cache_free(_RET_IP_, x);
1879}
1880EXPORT_SYMBOL(kmem_cache_free);
1881
1882
1883static struct page *get_object_page(const void *x)
1884{
1885 struct page *page = virt_to_head_page(x);
1886
1887 if (!PageSlab(page))
1888 return NULL;
1889
1890 return page;
1891}
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912static int slub_min_order;
1913static int slub_max_order = PAGE_ALLOC_COSTLY_ORDER;
1914static int slub_min_objects;
1915
1916
1917
1918
1919
1920static int slub_nomerge;
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947static inline int slab_order(int size, int min_objects,
1948 int max_order, int fract_leftover)
1949{
1950 int order;
1951 int rem;
1952 int min_order = slub_min_order;
1953
1954 if ((PAGE_SIZE << min_order) / size > MAX_OBJS_PER_PAGE)
1955 return get_order(size * MAX_OBJS_PER_PAGE) - 1;
1956
1957 for (order = max(min_order,
1958 fls(min_objects * size - 1) - PAGE_SHIFT);
1959 order <= max_order; order++) {
1960
1961 unsigned long slab_size = PAGE_SIZE << order;
1962
1963 if (slab_size < min_objects * size)
1964 continue;
1965
1966 rem = slab_size % size;
1967
1968 if (rem <= slab_size / fract_leftover)
1969 break;
1970
1971 }
1972
1973 return order;
1974}
1975
1976static inline int calculate_order(int size)
1977{
1978 int order;
1979 int min_objects;
1980 int fraction;
1981 int max_objects;
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991 min_objects = slub_min_objects;
1992 if (!min_objects)
1993 min_objects = 4 * (fls(nr_cpu_ids) + 1);
1994 max_objects = (PAGE_SIZE << slub_max_order)/size;
1995 min_objects = min(min_objects, max_objects);
1996
1997 while (min_objects > 1) {
1998 fraction = 16;
1999 while (fraction >= 4) {
2000 order = slab_order(size, min_objects,
2001 slub_max_order, fraction);
2002 if (order <= slub_max_order)
2003 return order;
2004 fraction /= 2;
2005 }
2006 min_objects--;
2007 }
2008
2009
2010
2011
2012
2013 order = slab_order(size, 1, slub_max_order, 1);
2014 if (order <= slub_max_order)
2015 return order;
2016
2017
2018
2019
2020 order = slab_order(size, 1, MAX_ORDER, 1);
2021 if (order < MAX_ORDER)
2022 return order;
2023 return -ENOSYS;
2024}
2025
2026
2027
2028
2029static unsigned long calculate_alignment(unsigned long flags,
2030 unsigned long align, unsigned long size)
2031{
2032
2033
2034
2035
2036
2037
2038
2039 if (flags & SLAB_HWCACHE_ALIGN) {
2040 unsigned long ralign = cache_line_size();
2041 while (size <= ralign / 2)
2042 ralign /= 2;
2043 align = max(align, ralign);
2044 }
2045
2046 if (align < ARCH_SLAB_MINALIGN)
2047 align = ARCH_SLAB_MINALIGN;
2048
2049 return ALIGN(align, sizeof(void *));
2050}
2051
2052static void
2053init_kmem_cache_node(struct kmem_cache_node *n, struct kmem_cache *s)
2054{
2055 n->nr_partial = 0;
2056 spin_lock_init(&n->list_lock);
2057 INIT_LIST_HEAD(&n->partial);
2058#ifdef CONFIG_SLUB_DEBUG
2059 atomic_long_set(&n->nr_slabs, 0);
2060 atomic_long_set(&n->total_objects, 0);
2061 INIT_LIST_HEAD(&n->full);
2062#endif
2063}
2064
2065static DEFINE_PER_CPU(struct kmem_cache_cpu, kmalloc_percpu[KMALLOC_CACHES]);
2066
2067static inline int alloc_kmem_cache_cpus(struct kmem_cache *s, gfp_t flags)
2068{
2069 if (s < kmalloc_caches + KMALLOC_CACHES && s >= kmalloc_caches)
2070
2071
2072
2073
2074 s->cpu_slab = kmalloc_percpu + (s - kmalloc_caches);
2075 else
2076 s->cpu_slab = alloc_percpu(struct kmem_cache_cpu);
2077
2078 if (!s->cpu_slab)
2079 return 0;
2080
2081 return 1;
2082}
2083
2084#ifdef CONFIG_NUMA
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094static void early_kmem_cache_node_alloc(gfp_t gfpflags, int node)
2095{
2096 struct page *page;
2097 struct kmem_cache_node *n;
2098 unsigned long flags;
2099
2100 BUG_ON(kmalloc_caches->size < sizeof(struct kmem_cache_node));
2101
2102 page = new_slab(kmalloc_caches, gfpflags, node);
2103
2104 BUG_ON(!page);
2105 if (page_to_nid(page) != node) {
2106 printk(KERN_ERR "SLUB: Unable to allocate memory from "
2107 "node %d\n", node);
2108 printk(KERN_ERR "SLUB: Allocating a useless per node structure "
2109 "in order to be able to continue\n");
2110 }
2111
2112 n = page->freelist;
2113 BUG_ON(!n);
2114 page->freelist = get_freepointer(kmalloc_caches, n);
2115 page->inuse++;
2116 kmalloc_caches->node[node] = n;
2117#ifdef CONFIG_SLUB_DEBUG
2118 init_object(kmalloc_caches, n, 1);
2119 init_tracking(kmalloc_caches, n);
2120#endif
2121 init_kmem_cache_node(n, kmalloc_caches);
2122 inc_slabs_node(kmalloc_caches, node, page->objects);
2123
2124
2125
2126
2127
2128
2129 local_irq_save(flags);
2130 add_partial(n, page, 0);
2131 local_irq_restore(flags);
2132}
2133
2134static void free_kmem_cache_nodes(struct kmem_cache *s)
2135{
2136 int node;
2137
2138 for_each_node_state(node, N_NORMAL_MEMORY) {
2139 struct kmem_cache_node *n = s->node[node];
2140 if (n)
2141 kmem_cache_free(kmalloc_caches, n);
2142 s->node[node] = NULL;
2143 }
2144}
2145
2146static int init_kmem_cache_nodes(struct kmem_cache *s, gfp_t gfpflags)
2147{
2148 int node;
2149
2150 for_each_node_state(node, N_NORMAL_MEMORY) {
2151 struct kmem_cache_node *n;
2152
2153 if (slab_state == DOWN) {
2154 early_kmem_cache_node_alloc(gfpflags, node);
2155 continue;
2156 }
2157 n = kmem_cache_alloc_node(kmalloc_caches,
2158 gfpflags, node);
2159
2160 if (!n) {
2161 free_kmem_cache_nodes(s);
2162 return 0;
2163 }
2164
2165 s->node[node] = n;
2166 init_kmem_cache_node(n, s);
2167 }
2168 return 1;
2169}
2170#else
2171static void free_kmem_cache_nodes(struct kmem_cache *s)
2172{
2173}
2174
2175static int init_kmem_cache_nodes(struct kmem_cache *s, gfp_t gfpflags)
2176{
2177 init_kmem_cache_node(&s->local_node, s);
2178 return 1;
2179}
2180#endif
2181
2182static void set_min_partial(struct kmem_cache *s, unsigned long min)
2183{
2184 if (min < MIN_PARTIAL)
2185 min = MIN_PARTIAL;
2186 else if (min > MAX_PARTIAL)
2187 min = MAX_PARTIAL;
2188 s->min_partial = min;
2189}
2190
2191
2192
2193
2194
2195static int calculate_sizes(struct kmem_cache *s, int forced_order)
2196{
2197 unsigned long flags = s->flags;
2198 unsigned long size = s->objsize;
2199 unsigned long align = s->align;
2200 int order;
2201
2202
2203
2204
2205
2206
2207 size = ALIGN(size, sizeof(void *));
2208
2209#ifdef CONFIG_SLUB_DEBUG
2210
2211
2212
2213
2214
2215 if ((flags & SLAB_POISON) && !(flags & SLAB_DESTROY_BY_RCU) &&
2216 !s->ctor)
2217 s->flags |= __OBJECT_POISON;
2218 else
2219 s->flags &= ~__OBJECT_POISON;
2220
2221
2222
2223
2224
2225
2226
2227 if ((flags & SLAB_RED_ZONE) && size == s->objsize)
2228 size += sizeof(void *);
2229#endif
2230
2231
2232
2233
2234
2235 s->inuse = size;
2236
2237 if (((flags & (SLAB_DESTROY_BY_RCU | SLAB_POISON)) ||
2238 s->ctor)) {
2239
2240
2241
2242
2243
2244
2245
2246
2247 s->offset = size;
2248 size += sizeof(void *);
2249 }
2250
2251#ifdef CONFIG_SLUB_DEBUG
2252 if (flags & SLAB_STORE_USER)
2253
2254
2255
2256
2257 size += 2 * sizeof(struct track);
2258
2259 if (flags & SLAB_RED_ZONE)
2260
2261
2262
2263
2264
2265
2266
2267 size += sizeof(void *);
2268#endif
2269
2270
2271
2272
2273
2274
2275 align = calculate_alignment(flags, align, s->objsize);
2276 s->align = align;
2277
2278
2279
2280
2281
2282
2283 size = ALIGN(size, align);
2284 s->size = size;
2285 if (forced_order >= 0)
2286 order = forced_order;
2287 else
2288 order = calculate_order(size);
2289
2290 if (order < 0)
2291 return 0;
2292
2293 s->allocflags = 0;
2294 if (order)
2295 s->allocflags |= __GFP_COMP;
2296
2297 if (s->flags & SLAB_CACHE_DMA)
2298 s->allocflags |= SLUB_DMA;
2299
2300 if (s->flags & SLAB_RECLAIM_ACCOUNT)
2301 s->allocflags |= __GFP_RECLAIMABLE;
2302
2303
2304
2305
2306 s->oo = oo_make(order, size);
2307 s->min = oo_make(get_order(size), size);
2308 if (oo_objects(s->oo) > oo_objects(s->max))
2309 s->max = s->oo;
2310
2311 return !!oo_objects(s->oo);
2312
2313}
2314
2315static int kmem_cache_open(struct kmem_cache *s, gfp_t gfpflags,
2316 const char *name, size_t size,
2317 size_t align, unsigned long flags,
2318 void (*ctor)(void *))
2319{
2320 memset(s, 0, kmem_size);
2321 s->name = name;
2322 s->ctor = ctor;
2323 s->objsize = size;
2324 s->align = align;
2325 s->flags = kmem_cache_flags(size, flags, name, ctor);
2326
2327 if (!calculate_sizes(s, -1))
2328 goto error;
2329 if (disable_higher_order_debug) {
2330
2331
2332
2333
2334 if (get_order(s->size) > get_order(s->objsize)) {
2335 s->flags &= ~DEBUG_METADATA_FLAGS;
2336 s->offset = 0;
2337 if (!calculate_sizes(s, -1))
2338 goto error;
2339 }
2340 }
2341
2342
2343
2344
2345
2346 set_min_partial(s, ilog2(s->size));
2347 s->refcount = 1;
2348#ifdef CONFIG_NUMA
2349 s->remote_node_defrag_ratio = 1000;
2350#endif
2351 if (!init_kmem_cache_nodes(s, gfpflags & ~SLUB_DMA))
2352 goto error;
2353
2354 if (alloc_kmem_cache_cpus(s, gfpflags & ~SLUB_DMA))
2355 return 1;
2356
2357 free_kmem_cache_nodes(s);
2358error:
2359 if (flags & SLAB_PANIC)
2360 panic("Cannot create slab %s size=%lu realsize=%u "
2361 "order=%u offset=%u flags=%lx\n",
2362 s->name, (unsigned long)size, s->size, oo_order(s->oo),
2363 s->offset, flags);
2364 return 0;
2365}
2366
2367
2368
2369
2370int kmem_ptr_validate(struct kmem_cache *s, const void *object)
2371{
2372 struct page *page;
2373
2374 if (!kern_ptr_validate(object, s->size))
2375 return 0;
2376
2377 page = get_object_page(object);
2378
2379 if (!page || s != page->slab)
2380
2381 return 0;
2382
2383 if (!check_valid_pointer(s, page, object))
2384 return 0;
2385
2386
2387
2388
2389
2390
2391
2392 return 1;
2393}
2394EXPORT_SYMBOL(kmem_ptr_validate);
2395
2396
2397
2398
2399unsigned int kmem_cache_size(struct kmem_cache *s)
2400{
2401 return s->objsize;
2402}
2403EXPORT_SYMBOL(kmem_cache_size);
2404
2405const char *kmem_cache_name(struct kmem_cache *s)
2406{
2407 return s->name;
2408}
2409EXPORT_SYMBOL(kmem_cache_name);
2410
2411static void list_slab_objects(struct kmem_cache *s, struct page *page,
2412 const char *text)
2413{
2414#ifdef CONFIG_SLUB_DEBUG
2415 void *addr = page_address(page);
2416 void *p;
2417 long *map = kzalloc(BITS_TO_LONGS(page->objects) * sizeof(long),
2418 GFP_ATOMIC);
2419
2420 if (!map)
2421 return;
2422 slab_err(s, page, "%s", text);
2423 slab_lock(page);
2424 for_each_free_object(p, s, page->freelist)
2425 set_bit(slab_index(p, s, addr), map);
2426
2427 for_each_object(p, s, addr, page->objects) {
2428
2429 if (!test_bit(slab_index(p, s, addr), map)) {
2430 printk(KERN_ERR "INFO: Object 0x%p @offset=%tu\n",
2431 p, p - addr);
2432 print_tracking(s, p);
2433 }
2434 }
2435 slab_unlock(page);
2436 kfree(map);
2437#endif
2438}
2439
2440
2441
2442
2443static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
2444{
2445 unsigned long flags;
2446 struct page *page, *h;
2447
2448 spin_lock_irqsave(&n->list_lock, flags);
2449 list_for_each_entry_safe(page, h, &n->partial, lru) {
2450 if (!page->inuse) {
2451 list_del(&page->lru);
2452 discard_slab(s, page);
2453 n->nr_partial--;
2454 } else {
2455 list_slab_objects(s, page,
2456 "Objects remaining on kmem_cache_close()");
2457 }
2458 }
2459 spin_unlock_irqrestore(&n->list_lock, flags);
2460}
2461
2462
2463
2464
2465static inline int kmem_cache_close(struct kmem_cache *s)
2466{
2467 int node;
2468
2469 flush_all(s);
2470 free_percpu(s->cpu_slab);
2471
2472 for_each_node_state(node, N_NORMAL_MEMORY) {
2473 struct kmem_cache_node *n = get_node(s, node);
2474
2475 free_partial(s, n);
2476 if (n->nr_partial || slabs_node(s, node))
2477 return 1;
2478 }
2479 free_kmem_cache_nodes(s);
2480 return 0;
2481}
2482
2483
2484
2485
2486
2487void kmem_cache_destroy(struct kmem_cache *s)
2488{
2489 down_write(&slub_lock);
2490 s->refcount--;
2491 if (!s->refcount) {
2492 list_del(&s->list);
2493 if (kmem_cache_close(s)) {
2494 printk(KERN_ERR "SLUB %s: %s called for cache that "
2495 "still has objects.\n", s->name, __func__);
2496 dump_stack();
2497 }
2498 if (s->flags & SLAB_DESTROY_BY_RCU)
2499 rcu_barrier();
2500 sysfs_slab_remove(s);
2501 }
2502 up_write(&slub_lock);
2503}
2504EXPORT_SYMBOL(kmem_cache_destroy);
2505
2506
2507
2508
2509
2510struct kmem_cache kmalloc_caches[KMALLOC_CACHES] __cacheline_aligned;
2511EXPORT_SYMBOL(kmalloc_caches);
2512
2513static int __init setup_slub_min_order(char *str)
2514{
2515 get_option(&str, &slub_min_order);
2516
2517 return 1;
2518}
2519
2520__setup("slub_min_order=", setup_slub_min_order);
2521
2522static int __init setup_slub_max_order(char *str)
2523{
2524 get_option(&str, &slub_max_order);
2525 slub_max_order = min(slub_max_order, MAX_ORDER - 1);
2526
2527 return 1;
2528}
2529
2530__setup("slub_max_order=", setup_slub_max_order);
2531
2532static int __init setup_slub_min_objects(char *str)
2533{
2534 get_option(&str, &slub_min_objects);
2535
2536 return 1;
2537}
2538
2539__setup("slub_min_objects=", setup_slub_min_objects);
2540
2541static int __init setup_slub_nomerge(char *str)
2542{
2543 slub_nomerge = 1;
2544 return 1;
2545}
2546
2547__setup("slub_nomerge", setup_slub_nomerge);
2548
2549static struct kmem_cache *create_kmalloc_cache(struct kmem_cache *s,
2550 const char *name, int size, gfp_t gfp_flags)
2551{
2552 unsigned int flags = 0;
2553
2554 if (gfp_flags & SLUB_DMA)
2555 flags = SLAB_CACHE_DMA;
2556
2557
2558
2559
2560
2561 if (!kmem_cache_open(s, gfp_flags, name, size, ARCH_KMALLOC_MINALIGN,
2562 flags, NULL))
2563 goto panic;
2564
2565 list_add(&s->list, &slab_caches);
2566
2567 if (sysfs_slab_add(s))
2568 goto panic;
2569 return s;
2570
2571panic:
2572 panic("Creation of kmalloc slab %s size=%d failed.\n", name, size);
2573}
2574
2575#ifdef CONFIG_ZONE_DMA
2576static struct kmem_cache *kmalloc_caches_dma[SLUB_PAGE_SHIFT];
2577
2578static void sysfs_add_func(struct work_struct *w)
2579{
2580 struct kmem_cache *s;
2581
2582 down_write(&slub_lock);
2583 list_for_each_entry(s, &slab_caches, list) {
2584 if (s->flags & __SYSFS_ADD_DEFERRED) {
2585 s->flags &= ~__SYSFS_ADD_DEFERRED;
2586 sysfs_slab_add(s);
2587 }
2588 }
2589 up_write(&slub_lock);
2590}
2591
2592static DECLARE_WORK(sysfs_add_work, sysfs_add_func);
2593
2594static noinline struct kmem_cache *dma_kmalloc_cache(int index, gfp_t flags)
2595{
2596 struct kmem_cache *s;
2597 char *text;
2598 size_t realsize;
2599 unsigned long slabflags;
2600 int i;
2601
2602 s = kmalloc_caches_dma[index];
2603 if (s)
2604 return s;
2605
2606
2607 if (flags & __GFP_WAIT)
2608 down_write(&slub_lock);
2609 else {
2610 if (!down_write_trylock(&slub_lock))
2611 goto out;
2612 }
2613
2614 if (kmalloc_caches_dma[index])
2615 goto unlock_out;
2616
2617 realsize = kmalloc_caches[index].objsize;
2618 text = kasprintf(flags & ~SLUB_DMA, "kmalloc_dma-%d",
2619 (unsigned int)realsize);
2620
2621 s = NULL;
2622 for (i = 0; i < KMALLOC_CACHES; i++)
2623 if (!kmalloc_caches[i].size)
2624 break;
2625
2626 BUG_ON(i >= KMALLOC_CACHES);
2627 s = kmalloc_caches + i;
2628
2629
2630
2631
2632
2633
2634
2635 slabflags = SLAB_CACHE_DMA|SLAB_NOTRACK;
2636 if (slab_state >= SYSFS)
2637 slabflags |= __SYSFS_ADD_DEFERRED;
2638
2639 if (!text || !kmem_cache_open(s, flags, text,
2640 realsize, ARCH_KMALLOC_MINALIGN, slabflags, NULL)) {
2641 s->size = 0;
2642 kfree(text);
2643 goto unlock_out;
2644 }
2645
2646 list_add(&s->list, &slab_caches);
2647 kmalloc_caches_dma[index] = s;
2648
2649 if (slab_state >= SYSFS)
2650 schedule_work(&sysfs_add_work);
2651
2652unlock_out:
2653 up_write(&slub_lock);
2654out:
2655 return kmalloc_caches_dma[index];
2656}
2657#endif
2658
2659
2660
2661
2662
2663
2664
2665static s8 size_index[24] = {
2666 3,
2667 4,
2668 5,
2669 5,
2670 6,
2671 6,
2672 6,
2673 6,
2674 1,
2675 1,
2676 1,
2677 1,
2678 7,
2679 7,
2680 7,
2681 7,
2682 2,
2683 2,
2684 2,
2685 2,
2686 2,
2687 2,
2688 2,
2689 2
2690};
2691
2692static inline int size_index_elem(size_t bytes)
2693{
2694 return (bytes - 1) / 8;
2695}
2696
2697static struct kmem_cache *get_slab(size_t size, gfp_t flags)
2698{
2699 int index;
2700
2701 if (size <= 192) {
2702 if (!size)
2703 return ZERO_SIZE_PTR;
2704
2705 index = size_index[size_index_elem(size)];
2706 } else
2707 index = fls(size - 1);
2708
2709#ifdef CONFIG_ZONE_DMA
2710 if (unlikely((flags & SLUB_DMA)))
2711 return dma_kmalloc_cache(index, flags);
2712
2713#endif
2714 return &kmalloc_caches[index];
2715}
2716
2717void *__kmalloc(size_t size, gfp_t flags)
2718{
2719 struct kmem_cache *s;
2720 void *ret;
2721
2722 if (unlikely(size > SLUB_MAX_SIZE))
2723 return kmalloc_large(size, flags);
2724
2725 s = get_slab(size, flags);
2726
2727 if (unlikely(ZERO_OR_NULL_PTR(s)))
2728 return s;
2729
2730 ret = slab_alloc(s, flags, NUMA_NO_NODE, _RET_IP_);
2731
2732 trace_kmalloc(_RET_IP_, ret, size, s->size, flags);
2733
2734 return ret;
2735}
2736EXPORT_SYMBOL(__kmalloc);
2737
2738static void *kmalloc_large_node(size_t size, gfp_t flags, int node)
2739{
2740 struct page *page;
2741 void *ptr = NULL;
2742
2743 flags |= __GFP_COMP | __GFP_NOTRACK;
2744 page = alloc_pages_node(node, flags, get_order(size));
2745 if (page)
2746 ptr = page_address(page);
2747
2748 kmemleak_alloc(ptr, size, 1, flags);
2749 return ptr;
2750}
2751
2752#ifdef CONFIG_NUMA
2753void *__kmalloc_node(size_t size, gfp_t flags, int node)
2754{
2755 struct kmem_cache *s;
2756 void *ret;
2757
2758 if (unlikely(size > SLUB_MAX_SIZE)) {
2759 ret = kmalloc_large_node(size, flags, node);
2760
2761 trace_kmalloc_node(_RET_IP_, ret,
2762 size, PAGE_SIZE << get_order(size),
2763 flags, node);
2764
2765 return ret;
2766 }
2767
2768 s = get_slab(size, flags);
2769
2770 if (unlikely(ZERO_OR_NULL_PTR(s)))
2771 return s;
2772
2773 ret = slab_alloc(s, flags, node, _RET_IP_);
2774
2775 trace_kmalloc_node(_RET_IP_, ret, size, s->size, flags, node);
2776
2777 return ret;
2778}
2779EXPORT_SYMBOL(__kmalloc_node);
2780#endif
2781
2782size_t ksize(const void *object)
2783{
2784 struct page *page;
2785 struct kmem_cache *s;
2786
2787 if (unlikely(object == ZERO_SIZE_PTR))
2788 return 0;
2789
2790 page = virt_to_head_page(object);
2791
2792 if (unlikely(!PageSlab(page))) {
2793 WARN_ON(!PageCompound(page));
2794 return PAGE_SIZE << compound_order(page);
2795 }
2796 s = page->slab;
2797
2798#ifdef CONFIG_SLUB_DEBUG
2799
2800
2801
2802
2803 if (s->flags & (SLAB_RED_ZONE | SLAB_POISON))
2804 return s->objsize;
2805
2806#endif
2807
2808
2809
2810
2811
2812 if (s->flags & (SLAB_DESTROY_BY_RCU | SLAB_STORE_USER))
2813 return s->inuse;
2814
2815
2816
2817 return s->size;
2818}
2819EXPORT_SYMBOL(ksize);
2820
2821void kfree(const void *x)
2822{
2823 struct page *page;
2824 void *object = (void *)x;
2825
2826 trace_kfree(_RET_IP_, x);
2827
2828 if (unlikely(ZERO_OR_NULL_PTR(x)))
2829 return;
2830
2831 page = virt_to_head_page(x);
2832 if (unlikely(!PageSlab(page))) {
2833 BUG_ON(!PageCompound(page));
2834 kmemleak_free(x);
2835 put_page(page);
2836 return;
2837 }
2838 slab_free(page->slab, page, object, _RET_IP_);
2839}
2840EXPORT_SYMBOL(kfree);
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852int kmem_cache_shrink(struct kmem_cache *s)
2853{
2854 int node;
2855 int i;
2856 struct kmem_cache_node *n;
2857 struct page *page;
2858 struct page *t;
2859 int objects = oo_objects(s->max);
2860 struct list_head *slabs_by_inuse =
2861 kmalloc(sizeof(struct list_head) * objects, GFP_KERNEL);
2862 unsigned long flags;
2863
2864 if (!slabs_by_inuse)
2865 return -ENOMEM;
2866
2867 flush_all(s);
2868 for_each_node_state(node, N_NORMAL_MEMORY) {
2869 n = get_node(s, node);
2870
2871 if (!n->nr_partial)
2872 continue;
2873
2874 for (i = 0; i < objects; i++)
2875 INIT_LIST_HEAD(slabs_by_inuse + i);
2876
2877 spin_lock_irqsave(&n->list_lock, flags);
2878
2879
2880
2881
2882
2883
2884
2885 list_for_each_entry_safe(page, t, &n->partial, lru) {
2886 if (!page->inuse && slab_trylock(page)) {
2887
2888
2889
2890
2891
2892 list_del(&page->lru);
2893 n->nr_partial--;
2894 slab_unlock(page);
2895 discard_slab(s, page);
2896 } else {
2897 list_move(&page->lru,
2898 slabs_by_inuse + page->inuse);
2899 }
2900 }
2901
2902
2903
2904
2905
2906 for (i = objects - 1; i >= 0; i--)
2907 list_splice(slabs_by_inuse + i, n->partial.prev);
2908
2909 spin_unlock_irqrestore(&n->list_lock, flags);
2910 }
2911
2912 kfree(slabs_by_inuse);
2913 return 0;
2914}
2915EXPORT_SYMBOL(kmem_cache_shrink);
2916
2917#if defined(CONFIG_NUMA) && defined(CONFIG_MEMORY_HOTPLUG)
2918static int slab_mem_going_offline_callback(void *arg)
2919{
2920 struct kmem_cache *s;
2921
2922 down_read(&slub_lock);
2923 list_for_each_entry(s, &slab_caches, list)
2924 kmem_cache_shrink(s);
2925 up_read(&slub_lock);
2926
2927 return 0;
2928}
2929
2930static void slab_mem_offline_callback(void *arg)
2931{
2932 struct kmem_cache_node *n;
2933 struct kmem_cache *s;
2934 struct memory_notify *marg = arg;
2935 int offline_node;
2936
2937 offline_node = marg->status_change_nid;
2938
2939
2940
2941
2942
2943 if (offline_node < 0)
2944 return;
2945
2946 down_read(&slub_lock);
2947 list_for_each_entry(s, &slab_caches, list) {
2948 n = get_node(s, offline_node);
2949 if (n) {
2950
2951
2952
2953
2954
2955
2956 BUG_ON(slabs_node(s, offline_node));
2957
2958 s->node[offline_node] = NULL;
2959 kmem_cache_free(kmalloc_caches, n);
2960 }
2961 }
2962 up_read(&slub_lock);
2963}
2964
2965static int slab_mem_going_online_callback(void *arg)
2966{
2967 struct kmem_cache_node *n;
2968 struct kmem_cache *s;
2969 struct memory_notify *marg = arg;
2970 int nid = marg->status_change_nid;
2971 int ret = 0;
2972
2973
2974
2975
2976
2977 if (nid < 0)
2978 return 0;
2979
2980
2981
2982
2983
2984
2985 down_read(&slub_lock);
2986 list_for_each_entry(s, &slab_caches, list) {
2987
2988
2989
2990
2991
2992 n = kmem_cache_alloc(kmalloc_caches, GFP_KERNEL);
2993 if (!n) {
2994 ret = -ENOMEM;
2995 goto out;
2996 }
2997 init_kmem_cache_node(n, s);
2998 s->node[nid] = n;
2999 }
3000out:
3001 up_read(&slub_lock);
3002 return ret;
3003}
3004
3005static int slab_memory_callback(struct notifier_block *self,
3006 unsigned long action, void *arg)
3007{
3008 int ret = 0;
3009
3010 switch (action) {
3011 case MEM_GOING_ONLINE:
3012 ret = slab_mem_going_online_callback(arg);
3013 break;
3014 case MEM_GOING_OFFLINE:
3015 ret = slab_mem_going_offline_callback(arg);
3016 break;
3017 case MEM_OFFLINE:
3018 case MEM_CANCEL_ONLINE:
3019 slab_mem_offline_callback(arg);
3020 break;
3021 case MEM_ONLINE:
3022 case MEM_CANCEL_OFFLINE:
3023 break;
3024 }
3025 if (ret)
3026 ret = notifier_from_errno(ret);
3027 else
3028 ret = NOTIFY_OK;
3029 return ret;
3030}
3031
3032#endif
3033
3034
3035
3036
3037
3038void __init kmem_cache_init(void)
3039{
3040 int i;
3041 int caches = 0;
3042
3043#ifdef CONFIG_NUMA
3044
3045
3046
3047
3048
3049 create_kmalloc_cache(&kmalloc_caches[0], "kmem_cache_node",
3050 sizeof(struct kmem_cache_node), GFP_NOWAIT);
3051 kmalloc_caches[0].refcount = -1;
3052 caches++;
3053
3054 hotplug_memory_notifier(slab_memory_callback, SLAB_CALLBACK_PRI);
3055#endif
3056
3057
3058 slab_state = PARTIAL;
3059
3060
3061 if (KMALLOC_MIN_SIZE <= 32) {
3062 create_kmalloc_cache(&kmalloc_caches[1],
3063 "kmalloc-96", 96, GFP_NOWAIT);
3064 caches++;
3065 }
3066 if (KMALLOC_MIN_SIZE <= 64) {
3067 create_kmalloc_cache(&kmalloc_caches[2],
3068 "kmalloc-192", 192, GFP_NOWAIT);
3069 caches++;
3070 }
3071
3072 for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) {
3073 create_kmalloc_cache(&kmalloc_caches[i],
3074 "kmalloc", 1 << i, GFP_NOWAIT);
3075 caches++;
3076 }
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090 BUILD_BUG_ON(KMALLOC_MIN_SIZE > 256 ||
3091 (KMALLOC_MIN_SIZE & (KMALLOC_MIN_SIZE - 1)));
3092
3093 for (i = 8; i < KMALLOC_MIN_SIZE; i += 8) {
3094 int elem = size_index_elem(i);
3095 if (elem >= ARRAY_SIZE(size_index))
3096 break;
3097 size_index[elem] = KMALLOC_SHIFT_LOW;
3098 }
3099
3100 if (KMALLOC_MIN_SIZE == 64) {
3101
3102
3103
3104
3105 for (i = 64 + 8; i <= 96; i += 8)
3106 size_index[size_index_elem(i)] = 7;
3107 } else if (KMALLOC_MIN_SIZE == 128) {
3108
3109
3110
3111
3112
3113 for (i = 128 + 8; i <= 192; i += 8)
3114 size_index[size_index_elem(i)] = 8;
3115 }
3116
3117 slab_state = UP;
3118
3119
3120 for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) {
3121 char *s = kasprintf(GFP_NOWAIT, "kmalloc-%d", 1 << i);
3122
3123 BUG_ON(!s);
3124 kmalloc_caches[i].name = s;
3125 }
3126
3127#ifdef CONFIG_SMP
3128 register_cpu_notifier(&slab_notifier);
3129#endif
3130#ifdef CONFIG_NUMA
3131 kmem_size = offsetof(struct kmem_cache, node) +
3132 nr_node_ids * sizeof(struct kmem_cache_node *);
3133#else
3134 kmem_size = sizeof(struct kmem_cache);
3135#endif
3136
3137 printk(KERN_INFO
3138 "SLUB: Genslabs=%d, HWalign=%d, Order=%d-%d, MinObjects=%d,"
3139 " CPUs=%d, Nodes=%d\n",
3140 caches, cache_line_size(),
3141 slub_min_order, slub_max_order, slub_min_objects,
3142 nr_cpu_ids, nr_node_ids);
3143}
3144
3145void __init kmem_cache_init_late(void)
3146{
3147}
3148
3149
3150
3151
3152static int slab_unmergeable(struct kmem_cache *s)
3153{
3154 if (slub_nomerge || (s->flags & SLUB_NEVER_MERGE))
3155 return 1;
3156
3157 if (s->ctor)
3158 return 1;
3159
3160
3161
3162
3163 if (s->refcount < 0)
3164 return 1;
3165
3166 return 0;
3167}
3168
3169static struct kmem_cache *find_mergeable(size_t size,
3170 size_t align, unsigned long flags, const char *name,
3171 void (*ctor)(void *))
3172{
3173 struct kmem_cache *s;
3174
3175 if (slub_nomerge || (flags & SLUB_NEVER_MERGE))
3176 return NULL;
3177
3178 if (ctor)
3179 return NULL;
3180
3181 size = ALIGN(size, sizeof(void *));
3182 align = calculate_alignment(flags, align, size);
3183 size = ALIGN(size, align);
3184 flags = kmem_cache_flags(size, flags, name, NULL);
3185
3186 list_for_each_entry(s, &slab_caches, list) {
3187 if (slab_unmergeable(s))
3188 continue;
3189
3190 if (size > s->size)
3191 continue;
3192
3193 if ((flags & SLUB_MERGE_SAME) != (s->flags & SLUB_MERGE_SAME))
3194 continue;
3195
3196
3197
3198
3199 if ((s->size & ~(align - 1)) != s->size)
3200 continue;
3201
3202 if (s->size - size >= sizeof(void *))
3203 continue;
3204
3205 return s;
3206 }
3207 return NULL;
3208}
3209
3210struct kmem_cache *kmem_cache_create(const char *name, size_t size,
3211 size_t align, unsigned long flags, void (*ctor)(void *))
3212{
3213 struct kmem_cache *s;
3214
3215 if (WARN_ON(!name))
3216 return NULL;
3217
3218 down_write(&slub_lock);
3219 s = find_mergeable(size, align, flags, name, ctor);
3220 if (s) {
3221 s->refcount++;
3222
3223
3224
3225
3226 s->objsize = max(s->objsize, (int)size);
3227 s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *)));
3228
3229 if (sysfs_slab_alias(s, name)) {
3230 s->refcount--;
3231 goto err;
3232 }
3233 up_write(&slub_lock);
3234 return s;
3235 }
3236
3237 s = kmalloc(kmem_size, GFP_KERNEL);
3238 if (s) {
3239 if (kmem_cache_open(s, GFP_KERNEL, name,
3240 size, align, flags, ctor)) {
3241 list_add(&s->list, &slab_caches);
3242 if (sysfs_slab_add(s)) {
3243 list_del(&s->list);
3244 kfree(s);
3245 goto err;
3246 }
3247 up_write(&slub_lock);
3248 return s;
3249 }
3250 kfree(s);
3251 }
3252 up_write(&slub_lock);
3253
3254err:
3255 if (flags & SLAB_PANIC)
3256 panic("Cannot create slabcache %s\n", name);
3257 else
3258 s = NULL;
3259 return s;
3260}
3261EXPORT_SYMBOL(kmem_cache_create);
3262
3263#ifdef CONFIG_SMP
3264
3265
3266
3267
3268static int __cpuinit slab_cpuup_callback(struct notifier_block *nfb,
3269 unsigned long action, void *hcpu)
3270{
3271 long cpu = (long)hcpu;
3272 struct kmem_cache *s;
3273 unsigned long flags;
3274
3275 switch (action) {
3276 case CPU_UP_CANCELED:
3277 case CPU_UP_CANCELED_FROZEN:
3278 case CPU_DEAD:
3279 case CPU_DEAD_FROZEN:
3280 down_read(&slub_lock);
3281 list_for_each_entry(s, &slab_caches, list) {
3282 local_irq_save(flags);
3283 __flush_cpu_slab(s, cpu);
3284 local_irq_restore(flags);
3285 }
3286 up_read(&slub_lock);
3287 break;
3288 default:
3289 break;
3290 }
3291 return NOTIFY_OK;
3292}
3293
3294static struct notifier_block __cpuinitdata slab_notifier = {
3295 .notifier_call = slab_cpuup_callback
3296};
3297
3298#endif
3299
3300void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
3301{
3302 struct kmem_cache *s;
3303 void *ret;
3304
3305 if (unlikely(size > SLUB_MAX_SIZE))
3306 return kmalloc_large(size, gfpflags);
3307
3308 s = get_slab(size, gfpflags);
3309
3310 if (unlikely(ZERO_OR_NULL_PTR(s)))
3311 return s;
3312
3313 ret = slab_alloc(s, gfpflags, NUMA_NO_NODE, caller);
3314
3315
3316 trace_kmalloc(caller, ret, size, s->size, gfpflags);
3317
3318 return ret;
3319}
3320
3321void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
3322 int node, unsigned long caller)
3323{
3324 struct kmem_cache *s;
3325 void *ret;
3326
3327 if (unlikely(size > SLUB_MAX_SIZE)) {
3328 ret = kmalloc_large_node(size, gfpflags, node);
3329
3330 trace_kmalloc_node(caller, ret,
3331 size, PAGE_SIZE << get_order(size),
3332 gfpflags, node);
3333
3334 return ret;
3335 }
3336
3337 s = get_slab(size, gfpflags);
3338
3339 if (unlikely(ZERO_OR_NULL_PTR(s)))
3340 return s;
3341
3342 ret = slab_alloc(s, gfpflags, node, caller);
3343
3344
3345 trace_kmalloc_node(caller, ret, size, s->size, gfpflags, node);
3346
3347 return ret;
3348}
3349
3350#ifdef CONFIG_SLUB_DEBUG
3351static int count_inuse(struct page *page)
3352{
3353 return page->inuse;
3354}
3355
3356static int count_total(struct page *page)
3357{
3358 return page->objects;
3359}
3360
3361static int validate_slab(struct kmem_cache *s, struct page *page,
3362 unsigned long *map)
3363{
3364 void *p;
3365 void *addr = page_address(page);
3366
3367 if (!check_slab(s, page) ||
3368 !on_freelist(s, page, NULL))
3369 return 0;
3370
3371
3372 bitmap_zero(map, page->objects);
3373
3374 for_each_free_object(p, s, page->freelist) {
3375 set_bit(slab_index(p, s, addr), map);
3376 if (!check_object(s, page, p, 0))
3377 return 0;
3378 }
3379
3380 for_each_object(p, s, addr, page->objects)
3381 if (!test_bit(slab_index(p, s, addr), map))
3382 if (!check_object(s, page, p, 1))
3383 return 0;
3384 return 1;
3385}
3386
3387static void validate_slab_slab(struct kmem_cache *s, struct page *page,
3388 unsigned long *map)
3389{
3390 if (slab_trylock(page)) {
3391 validate_slab(s, page, map);
3392 slab_unlock(page);
3393 } else
3394 printk(KERN_INFO "SLUB %s: Skipped busy slab 0x%p\n",
3395 s->name, page);
3396}
3397
3398static int validate_slab_node(struct kmem_cache *s,
3399 struct kmem_cache_node *n, unsigned long *map)
3400{
3401 unsigned long count = 0;
3402 struct page *page;
3403 unsigned long flags;
3404
3405 spin_lock_irqsave(&n->list_lock, flags);
3406
3407 list_for_each_entry(page, &n->partial, lru) {
3408 validate_slab_slab(s, page, map);
3409 count++;
3410 }
3411 if (count != n->nr_partial)
3412 printk(KERN_ERR "SLUB %s: %ld partial slabs counted but "
3413 "counter=%ld\n", s->name, count, n->nr_partial);
3414
3415 if (!(s->flags & SLAB_STORE_USER))
3416 goto out;
3417
3418 list_for_each_entry(page, &n->full, lru) {
3419 validate_slab_slab(s, page, map);
3420 count++;
3421 }
3422 if (count != atomic_long_read(&n->nr_slabs))
3423 printk(KERN_ERR "SLUB: %s %ld slabs counted but "
3424 "counter=%ld\n", s->name, count,
3425 atomic_long_read(&n->nr_slabs));
3426
3427out:
3428 spin_unlock_irqrestore(&n->list_lock, flags);
3429 return count;
3430}
3431
3432static long validate_slab_cache(struct kmem_cache *s)
3433{
3434 int node;
3435 unsigned long count = 0;
3436 unsigned long *map = kmalloc(BITS_TO_LONGS(oo_objects(s->max)) *
3437 sizeof(unsigned long), GFP_KERNEL);
3438
3439 if (!map)
3440 return -ENOMEM;
3441
3442 flush_all(s);
3443 for_each_node_state(node, N_NORMAL_MEMORY) {
3444 struct kmem_cache_node *n = get_node(s, node);
3445
3446 count += validate_slab_node(s, n, map);
3447 }
3448 kfree(map);
3449 return count;
3450}
3451
3452#ifdef SLUB_RESILIENCY_TEST
3453static void resiliency_test(void)
3454{
3455 u8 *p;
3456
3457 printk(KERN_ERR "SLUB resiliency testing\n");
3458 printk(KERN_ERR "-----------------------\n");
3459 printk(KERN_ERR "A. Corruption after allocation\n");
3460
3461 p = kzalloc(16, GFP_KERNEL);
3462 p[16] = 0x12;
3463 printk(KERN_ERR "\n1. kmalloc-16: Clobber Redzone/next pointer"
3464 " 0x12->0x%p\n\n", p + 16);
3465
3466 validate_slab_cache(kmalloc_caches + 4);
3467
3468
3469 p = kzalloc(32, GFP_KERNEL);
3470 p[32 + sizeof(void *)] = 0x34;
3471 printk(KERN_ERR "\n2. kmalloc-32: Clobber next pointer/next slab"
3472 " 0x34 -> -0x%p\n", p);
3473 printk(KERN_ERR
3474 "If allocated object is overwritten then not detectable\n\n");
3475
3476 validate_slab_cache(kmalloc_caches + 5);
3477 p = kzalloc(64, GFP_KERNEL);
3478 p += 64 + (get_cycles() & 0xff) * sizeof(void *);
3479 *p = 0x56;
3480 printk(KERN_ERR "\n3. kmalloc-64: corrupting random byte 0x56->0x%p\n",
3481 p);
3482 printk(KERN_ERR
3483 "If allocated object is overwritten then not detectable\n\n");
3484 validate_slab_cache(kmalloc_caches + 6);
3485
3486 printk(KERN_ERR "\nB. Corruption after free\n");
3487 p = kzalloc(128, GFP_KERNEL);
3488 kfree(p);
3489 *p = 0x78;
3490 printk(KERN_ERR "1. kmalloc-128: Clobber first word 0x78->0x%p\n\n", p);
3491 validate_slab_cache(kmalloc_caches + 7);
3492
3493 p = kzalloc(256, GFP_KERNEL);
3494 kfree(p);
3495 p[50] = 0x9a;
3496 printk(KERN_ERR "\n2. kmalloc-256: Clobber 50th byte 0x9a->0x%p\n\n",
3497 p);
3498 validate_slab_cache(kmalloc_caches + 8);
3499
3500 p = kzalloc(512, GFP_KERNEL);
3501 kfree(p);
3502 p[512] = 0xab;
3503 printk(KERN_ERR "\n3. kmalloc-512: Clobber redzone 0xab->0x%p\n\n", p);
3504 validate_slab_cache(kmalloc_caches + 9);
3505}
3506#else
3507static void resiliency_test(void) {};
3508#endif
3509
3510
3511
3512
3513
3514
3515struct location {
3516 unsigned long count;
3517 unsigned long addr;
3518 long long sum_time;
3519 long min_time;
3520 long max_time;
3521 long min_pid;
3522 long max_pid;
3523 DECLARE_BITMAP(cpus, NR_CPUS);
3524 nodemask_t nodes;
3525};
3526
3527struct loc_track {
3528 unsigned long max;
3529 unsigned long count;
3530 struct location *loc;
3531};
3532
3533static void free_loc_track(struct loc_track *t)
3534{
3535 if (t->max)
3536 free_pages((unsigned long)t->loc,
3537 get_order(sizeof(struct location) * t->max));
3538}
3539
3540static int alloc_loc_track(struct loc_track *t, unsigned long max, gfp_t flags)
3541{
3542 struct location *l;
3543 int order;
3544
3545 order = get_order(sizeof(struct location) * max);
3546
3547 l = (void *)__get_free_pages(flags, order);
3548 if (!l)
3549 return 0;
3550
3551 if (t->count) {
3552 memcpy(l, t->loc, sizeof(struct location) * t->count);
3553 free_loc_track(t);
3554 }
3555 t->max = max;
3556 t->loc = l;
3557 return 1;
3558}
3559
3560static int add_location(struct loc_track *t, struct kmem_cache *s,
3561 const struct track *track)
3562{
3563 long start, end, pos;
3564 struct location *l;
3565 unsigned long caddr;
3566 unsigned long age = jiffies - track->when;
3567
3568 start = -1;
3569 end = t->count;
3570
3571 for ( ; ; ) {
3572 pos = start + (end - start + 1) / 2;
3573
3574
3575
3576
3577
3578 if (pos == end)
3579 break;
3580
3581 caddr = t->loc[pos].addr;
3582 if (track->addr == caddr) {
3583
3584 l = &t->loc[pos];
3585 l->count++;
3586 if (track->when) {
3587 l->sum_time += age;
3588 if (age < l->min_time)
3589 l->min_time = age;
3590 if (age > l->max_time)
3591 l->max_time = age;
3592
3593 if (track->pid < l->min_pid)
3594 l->min_pid = track->pid;
3595 if (track->pid > l->max_pid)
3596 l->max_pid = track->pid;
3597
3598 cpumask_set_cpu(track->cpu,
3599 to_cpumask(l->cpus));
3600 }
3601 node_set(page_to_nid(virt_to_page(track)), l->nodes);
3602 return 1;
3603 }
3604
3605 if (track->addr < caddr)
3606 end = pos;
3607 else
3608 start = pos;
3609 }
3610
3611
3612
3613
3614 if (t->count >= t->max && !alloc_loc_track(t, 2 * t->max, GFP_ATOMIC))
3615 return 0;
3616
3617 l = t->loc + pos;
3618 if (pos < t->count)
3619 memmove(l + 1, l,
3620 (t->count - pos) * sizeof(struct location));
3621 t->count++;
3622 l->count = 1;
3623 l->addr = track->addr;
3624 l->sum_time = age;
3625 l->min_time = age;
3626 l->max_time = age;
3627 l->min_pid = track->pid;
3628 l->max_pid = track->pid;
3629 cpumask_clear(to_cpumask(l->cpus));
3630 cpumask_set_cpu(track->cpu, to_cpumask(l->cpus));
3631 nodes_clear(l->nodes);
3632 node_set(page_to_nid(virt_to_page(track)), l->nodes);
3633 return 1;
3634}
3635
3636static void process_slab(struct loc_track *t, struct kmem_cache *s,
3637 struct page *page, enum track_item alloc,
3638 long *map)
3639{
3640 void *addr = page_address(page);
3641 void *p;
3642
3643 bitmap_zero(map, page->objects);
3644 for_each_free_object(p, s, page->freelist)
3645 set_bit(slab_index(p, s, addr), map);
3646
3647 for_each_object(p, s, addr, page->objects)
3648 if (!test_bit(slab_index(p, s, addr), map))
3649 add_location(t, s, get_track(s, p, alloc));
3650}
3651
3652static int list_locations(struct kmem_cache *s, char *buf,
3653 enum track_item alloc)
3654{
3655 int len = 0;
3656 unsigned long i;
3657 struct loc_track t = { 0, 0, NULL };
3658 int node;
3659 unsigned long *map = kmalloc(BITS_TO_LONGS(oo_objects(s->max)) *
3660 sizeof(unsigned long), GFP_KERNEL);
3661
3662 if (!map || !alloc_loc_track(&t, PAGE_SIZE / sizeof(struct location),
3663 GFP_TEMPORARY)) {
3664 kfree(map);
3665 return sprintf(buf, "Out of memory\n");
3666 }
3667
3668 flush_all(s);
3669
3670 for_each_node_state(node, N_NORMAL_MEMORY) {
3671 struct kmem_cache_node *n = get_node(s, node);
3672 unsigned long flags;
3673 struct page *page;
3674
3675 if (!atomic_long_read(&n->nr_slabs))
3676 continue;
3677
3678 spin_lock_irqsave(&n->list_lock, flags);
3679 list_for_each_entry(page, &n->partial, lru)
3680 process_slab(&t, s, page, alloc, map);
3681 list_for_each_entry(page, &n->full, lru)
3682 process_slab(&t, s, page, alloc, map);
3683 spin_unlock_irqrestore(&n->list_lock, flags);
3684 }
3685
3686 for (i = 0; i < t.count; i++) {
3687 struct location *l = &t.loc[i];
3688
3689 if (len > PAGE_SIZE - KSYM_SYMBOL_LEN - 100)
3690 break;
3691 len += sprintf(buf + len, "%7ld ", l->count);
3692
3693 if (l->addr)
3694 len += sprint_symbol(buf + len, (unsigned long)l->addr);
3695 else
3696 len += sprintf(buf + len, "<not-available>");
3697
3698 if (l->sum_time != l->min_time) {
3699 len += sprintf(buf + len, " age=%ld/%ld/%ld",
3700 l->min_time,
3701 (long)div_u64(l->sum_time, l->count),
3702 l->max_time);
3703 } else
3704 len += sprintf(buf + len, " age=%ld",
3705 l->min_time);
3706
3707 if (l->min_pid != l->max_pid)
3708 len += sprintf(buf + len, " pid=%ld-%ld",
3709 l->min_pid, l->max_pid);
3710 else
3711 len += sprintf(buf + len, " pid=%ld",
3712 l->min_pid);
3713
3714 if (num_online_cpus() > 1 &&
3715 !cpumask_empty(to_cpumask(l->cpus)) &&
3716 len < PAGE_SIZE - 60) {
3717 len += sprintf(buf + len, " cpus=");
3718 len += cpulist_scnprintf(buf + len, PAGE_SIZE - len - 50,
3719 to_cpumask(l->cpus));
3720 }
3721
3722 if (nr_online_nodes > 1 && !nodes_empty(l->nodes) &&
3723 len < PAGE_SIZE - 60) {
3724 len += sprintf(buf + len, " nodes=");
3725 len += nodelist_scnprintf(buf + len, PAGE_SIZE - len - 50,
3726 l->nodes);
3727 }
3728
3729 len += sprintf(buf + len, "\n");
3730 }
3731
3732 free_loc_track(&t);
3733 kfree(map);
3734 if (!t.count)
3735 len += sprintf(buf, "No data\n");
3736 return len;
3737}
3738
3739enum slab_stat_type {
3740 SL_ALL,
3741 SL_PARTIAL,
3742 SL_CPU,
3743 SL_OBJECTS,
3744 SL_TOTAL
3745};
3746
3747#define SO_ALL (1 << SL_ALL)
3748#define SO_PARTIAL (1 << SL_PARTIAL)
3749#define SO_CPU (1 << SL_CPU)
3750#define SO_OBJECTS (1 << SL_OBJECTS)
3751#define SO_TOTAL (1 << SL_TOTAL)
3752
3753static ssize_t show_slab_objects(struct kmem_cache *s,
3754 char *buf, unsigned long flags)
3755{
3756 unsigned long total = 0;
3757 int node;
3758 int x;
3759 unsigned long *nodes;
3760 unsigned long *per_cpu;
3761
3762 nodes = kzalloc(2 * sizeof(unsigned long) * nr_node_ids, GFP_KERNEL);
3763 if (!nodes)
3764 return -ENOMEM;
3765 per_cpu = nodes + nr_node_ids;
3766
3767 if (flags & SO_CPU) {
3768 int cpu;
3769
3770 for_each_possible_cpu(cpu) {
3771 struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
3772
3773 if (!c || c->node < 0)
3774 continue;
3775
3776 if (c->page) {
3777 if (flags & SO_TOTAL)
3778 x = c->page->objects;
3779 else if (flags & SO_OBJECTS)
3780 x = c->page->inuse;
3781 else
3782 x = 1;
3783
3784 total += x;
3785 nodes[c->node] += x;
3786 }
3787 per_cpu[c->node]++;
3788 }
3789 }
3790
3791 if (flags & SO_ALL) {
3792 for_each_node_state(node, N_NORMAL_MEMORY) {
3793 struct kmem_cache_node *n = get_node(s, node);
3794
3795 if (flags & SO_TOTAL)
3796 x = atomic_long_read(&n->total_objects);
3797 else if (flags & SO_OBJECTS)
3798 x = atomic_long_read(&n->total_objects) -
3799 count_partial(n, count_free);
3800
3801 else
3802 x = atomic_long_read(&n->nr_slabs);
3803 total += x;
3804 nodes[node] += x;
3805 }
3806
3807 } else if (flags & SO_PARTIAL) {
3808 for_each_node_state(node, N_NORMAL_MEMORY) {
3809 struct kmem_cache_node *n = get_node(s, node);
3810
3811 if (flags & SO_TOTAL)
3812 x = count_partial(n, count_total);
3813 else if (flags & SO_OBJECTS)
3814 x = count_partial(n, count_inuse);
3815 else
3816 x = n->nr_partial;
3817 total += x;
3818 nodes[node] += x;
3819 }
3820 }
3821 x = sprintf(buf, "%lu", total);
3822#ifdef CONFIG_NUMA
3823 for_each_node_state(node, N_NORMAL_MEMORY)
3824 if (nodes[node])
3825 x += sprintf(buf + x, " N%d=%lu",
3826 node, nodes[node]);
3827#endif
3828 kfree(nodes);
3829 return x + sprintf(buf + x, "\n");
3830}
3831
3832static int any_slab_objects(struct kmem_cache *s)
3833{
3834 int node;
3835
3836 for_each_online_node(node) {
3837 struct kmem_cache_node *n = get_node(s, node);
3838
3839 if (!n)
3840 continue;
3841
3842 if (atomic_long_read(&n->total_objects))
3843 return 1;
3844 }
3845 return 0;
3846}
3847
3848#define to_slab_attr(n) container_of(n, struct slab_attribute, attr)
3849#define to_slab(n) container_of(n, struct kmem_cache, kobj);
3850
3851struct slab_attribute {
3852 struct attribute attr;
3853 ssize_t (*show)(struct kmem_cache *s, char *buf);
3854 ssize_t (*store)(struct kmem_cache *s, const char *x, size_t count);
3855};
3856
3857#define SLAB_ATTR_RO(_name) \
3858 static struct slab_attribute _name##_attr = __ATTR_RO(_name)
3859
3860#define SLAB_ATTR(_name) \
3861 static struct slab_attribute _name##_attr = \
3862 __ATTR(_name, 0644, _name##_show, _name##_store)
3863
3864static ssize_t slab_size_show(struct kmem_cache *s, char *buf)
3865{
3866 return sprintf(buf, "%d\n", s->size);
3867}
3868SLAB_ATTR_RO(slab_size);
3869
3870static ssize_t align_show(struct kmem_cache *s, char *buf)
3871{
3872 return sprintf(buf, "%d\n", s->align);
3873}
3874SLAB_ATTR_RO(align);
3875
3876static ssize_t object_size_show(struct kmem_cache *s, char *buf)
3877{
3878 return sprintf(buf, "%d\n", s->objsize);
3879}
3880SLAB_ATTR_RO(object_size);
3881
3882static ssize_t objs_per_slab_show(struct kmem_cache *s, char *buf)
3883{
3884 return sprintf(buf, "%d\n", oo_objects(s->oo));
3885}
3886SLAB_ATTR_RO(objs_per_slab);
3887
3888static ssize_t order_store(struct kmem_cache *s,
3889 const char *buf, size_t length)
3890{
3891 unsigned long order;
3892 int err;
3893
3894 err = strict_strtoul(buf, 10, &order);
3895 if (err)
3896 return err;
3897
3898 if (order > slub_max_order || order < slub_min_order)
3899 return -EINVAL;
3900
3901 calculate_sizes(s, order);
3902 return length;
3903}
3904
3905static ssize_t order_show(struct kmem_cache *s, char *buf)
3906{
3907 return sprintf(buf, "%d\n", oo_order(s->oo));
3908}
3909SLAB_ATTR(order);
3910
3911static ssize_t min_partial_show(struct kmem_cache *s, char *buf)
3912{
3913 return sprintf(buf, "%lu\n", s->min_partial);
3914}
3915
3916static ssize_t min_partial_store(struct kmem_cache *s, const char *buf,
3917 size_t length)
3918{
3919 unsigned long min;
3920 int err;
3921
3922 err = strict_strtoul(buf, 10, &min);
3923 if (err)
3924 return err;
3925
3926 set_min_partial(s, min);
3927 return length;
3928}
3929SLAB_ATTR(min_partial);
3930
3931static ssize_t ctor_show(struct kmem_cache *s, char *buf)
3932{
3933 if (s->ctor) {
3934 int n = sprint_symbol(buf, (unsigned long)s->ctor);
3935
3936 return n + sprintf(buf + n, "\n");
3937 }
3938 return 0;
3939}
3940SLAB_ATTR_RO(ctor);
3941
3942static ssize_t aliases_show(struct kmem_cache *s, char *buf)
3943{
3944 return sprintf(buf, "%d\n", s->refcount - 1);
3945}
3946SLAB_ATTR_RO(aliases);
3947
3948static ssize_t slabs_show(struct kmem_cache *s, char *buf)
3949{
3950 return show_slab_objects(s, buf, SO_ALL);
3951}
3952SLAB_ATTR_RO(slabs);
3953
3954static ssize_t partial_show(struct kmem_cache *s, char *buf)
3955{
3956 return show_slab_objects(s, buf, SO_PARTIAL);
3957}
3958SLAB_ATTR_RO(partial);
3959
3960static ssize_t cpu_slabs_show(struct kmem_cache *s, char *buf)
3961{
3962 return show_slab_objects(s, buf, SO_CPU);
3963}
3964SLAB_ATTR_RO(cpu_slabs);
3965
3966static ssize_t objects_show(struct kmem_cache *s, char *buf)
3967{
3968 return show_slab_objects(s, buf, SO_ALL|SO_OBJECTS);
3969}
3970SLAB_ATTR_RO(objects);
3971
3972static ssize_t objects_partial_show(struct kmem_cache *s, char *buf)
3973{
3974 return show_slab_objects(s, buf, SO_PARTIAL|SO_OBJECTS);
3975}
3976SLAB_ATTR_RO(objects_partial);
3977
3978static ssize_t total_objects_show(struct kmem_cache *s, char *buf)
3979{
3980 return show_slab_objects(s, buf, SO_ALL|SO_TOTAL);
3981}
3982SLAB_ATTR_RO(total_objects);
3983
3984static ssize_t sanity_checks_show(struct kmem_cache *s, char *buf)
3985{
3986 return sprintf(buf, "%d\n", !!(s->flags & SLAB_DEBUG_FREE));
3987}
3988
3989static ssize_t sanity_checks_store(struct kmem_cache *s,
3990 const char *buf, size_t length)
3991{
3992 s->flags &= ~SLAB_DEBUG_FREE;
3993 if (buf[0] == '1')
3994 s->flags |= SLAB_DEBUG_FREE;
3995 return length;
3996}
3997SLAB_ATTR(sanity_checks);
3998
3999static ssize_t trace_show(struct kmem_cache *s, char *buf)
4000{
4001 return sprintf(buf, "%d\n", !!(s->flags & SLAB_TRACE));
4002}
4003
4004static ssize_t trace_store(struct kmem_cache *s, const char *buf,
4005 size_t length)
4006{
4007 s->flags &= ~SLAB_TRACE;
4008 if (buf[0] == '1')
4009 s->flags |= SLAB_TRACE;
4010 return length;
4011}
4012SLAB_ATTR(trace);
4013
4014#ifdef CONFIG_FAILSLAB
4015static ssize_t failslab_show(struct kmem_cache *s, char *buf)
4016{
4017 return sprintf(buf, "%d\n", !!(s->flags & SLAB_FAILSLAB));
4018}
4019
4020static ssize_t failslab_store(struct kmem_cache *s, const char *buf,
4021 size_t length)
4022{
4023 s->flags &= ~SLAB_FAILSLAB;
4024 if (buf[0] == '1')
4025 s->flags |= SLAB_FAILSLAB;
4026 return length;
4027}
4028SLAB_ATTR(failslab);
4029#endif
4030
4031static ssize_t reclaim_account_show(struct kmem_cache *s, char *buf)
4032{
4033 return sprintf(buf, "%d\n", !!(s->flags & SLAB_RECLAIM_ACCOUNT));
4034}
4035
4036static ssize_t reclaim_account_store(struct kmem_cache *s,
4037 const char *buf, size_t length)
4038{
4039 s->flags &= ~SLAB_RECLAIM_ACCOUNT;
4040 if (buf[0] == '1')
4041 s->flags |= SLAB_RECLAIM_ACCOUNT;
4042 return length;
4043}
4044SLAB_ATTR(reclaim_account);
4045
4046static ssize_t hwcache_align_show(struct kmem_cache *s, char *buf)
4047{
4048 return sprintf(buf, "%d\n", !!(s->flags & SLAB_HWCACHE_ALIGN));
4049}
4050SLAB_ATTR_RO(hwcache_align);
4051
4052#ifdef CONFIG_ZONE_DMA
4053static ssize_t cache_dma_show(struct kmem_cache *s, char *buf)
4054{
4055 return sprintf(buf, "%d\n", !!(s->flags & SLAB_CACHE_DMA));
4056}
4057SLAB_ATTR_RO(cache_dma);
4058#endif
4059
4060static ssize_t destroy_by_rcu_show(struct kmem_cache *s, char *buf)
4061{
4062 return sprintf(buf, "%d\n", !!(s->flags & SLAB_DESTROY_BY_RCU));
4063}
4064SLAB_ATTR_RO(destroy_by_rcu);
4065
4066static ssize_t red_zone_show(struct kmem_cache *s, char *buf)
4067{
4068 return sprintf(buf, "%d\n", !!(s->flags & SLAB_RED_ZONE));
4069}
4070
4071static ssize_t red_zone_store(struct kmem_cache *s,
4072 const char *buf, size_t length)
4073{
4074 if (any_slab_objects(s))
4075 return -EBUSY;
4076
4077 s->flags &= ~SLAB_RED_ZONE;
4078 if (buf[0] == '1')
4079 s->flags |= SLAB_RED_ZONE;
4080 calculate_sizes(s, -1);
4081 return length;
4082}
4083SLAB_ATTR(red_zone);
4084
4085static ssize_t poison_show(struct kmem_cache *s, char *buf)
4086{
4087 return sprintf(buf, "%d\n", !!(s->flags & SLAB_POISON));
4088}
4089
4090static ssize_t poison_store(struct kmem_cache *s,
4091 const char *buf, size_t length)
4092{
4093 if (any_slab_objects(s))
4094 return -EBUSY;
4095
4096 s->flags &= ~SLAB_POISON;
4097 if (buf[0] == '1')
4098 s->flags |= SLAB_POISON;
4099 calculate_sizes(s, -1);
4100 return length;
4101}
4102SLAB_ATTR(poison);
4103
4104static ssize_t store_user_show(struct kmem_cache *s, char *buf)
4105{
4106 return sprintf(buf, "%d\n", !!(s->flags & SLAB_STORE_USER));
4107}
4108
4109static ssize_t store_user_store(struct kmem_cache *s,
4110 const char *buf, size_t length)
4111{
4112 if (any_slab_objects(s))
4113 return -EBUSY;
4114
4115 s->flags &= ~SLAB_STORE_USER;
4116 if (buf[0] == '1')
4117 s->flags |= SLAB_STORE_USER;
4118 calculate_sizes(s, -1);
4119 return length;
4120}
4121SLAB_ATTR(store_user);
4122
4123static ssize_t validate_show(struct kmem_cache *s, char *buf)
4124{
4125 return 0;
4126}
4127
4128static ssize_t validate_store(struct kmem_cache *s,
4129 const char *buf, size_t length)
4130{
4131 int ret = -EINVAL;
4132
4133 if (buf[0] == '1') {
4134 ret = validate_slab_cache(s);
4135 if (ret >= 0)
4136 ret = length;
4137 }
4138 return ret;
4139}
4140SLAB_ATTR(validate);
4141
4142static ssize_t shrink_show(struct kmem_cache *s, char *buf)
4143{
4144 return 0;
4145}
4146
4147static ssize_t shrink_store(struct kmem_cache *s,
4148 const char *buf, size_t length)
4149{
4150 if (buf[0] == '1') {
4151 int rc = kmem_cache_shrink(s);
4152
4153 if (rc)
4154 return rc;
4155 } else
4156 return -EINVAL;
4157 return length;
4158}
4159SLAB_ATTR(shrink);
4160
4161static ssize_t alloc_calls_show(struct kmem_cache *s, char *buf)
4162{
4163 if (!(s->flags & SLAB_STORE_USER))
4164 return -ENOSYS;
4165 return list_locations(s, buf, TRACK_ALLOC);
4166}
4167SLAB_ATTR_RO(alloc_calls);
4168
4169static ssize_t free_calls_show(struct kmem_cache *s, char *buf)
4170{
4171 if (!(s->flags & SLAB_STORE_USER))
4172 return -ENOSYS;
4173 return list_locations(s, buf, TRACK_FREE);
4174}
4175SLAB_ATTR_RO(free_calls);
4176
4177#ifdef CONFIG_NUMA
4178static ssize_t remote_node_defrag_ratio_show(struct kmem_cache *s, char *buf)
4179{
4180 return sprintf(buf, "%d\n", s->remote_node_defrag_ratio / 10);
4181}
4182
4183static ssize_t remote_node_defrag_ratio_store(struct kmem_cache *s,
4184 const char *buf, size_t length)
4185{
4186 unsigned long ratio;
4187 int err;
4188
4189 err = strict_strtoul(buf, 10, &ratio);
4190 if (err)
4191 return err;
4192
4193 if (ratio <= 100)
4194 s->remote_node_defrag_ratio = ratio * 10;
4195
4196 return length;
4197}
4198SLAB_ATTR(remote_node_defrag_ratio);
4199#endif
4200
4201#ifdef CONFIG_SLUB_STATS
4202static int show_stat(struct kmem_cache *s, char *buf, enum stat_item si)
4203{
4204 unsigned long sum = 0;
4205 int cpu;
4206 int len;
4207 int *data = kmalloc(nr_cpu_ids * sizeof(int), GFP_KERNEL);
4208
4209 if (!data)
4210 return -ENOMEM;
4211
4212 for_each_online_cpu(cpu) {
4213 unsigned x = per_cpu_ptr(s->cpu_slab, cpu)->stat[si];
4214
4215 data[cpu] = x;
4216 sum += x;
4217 }
4218
4219 len = sprintf(buf, "%lu", sum);
4220
4221#ifdef CONFIG_SMP
4222 for_each_online_cpu(cpu) {
4223 if (data[cpu] && len < PAGE_SIZE - 20)
4224 len += sprintf(buf + len, " C%d=%u", cpu, data[cpu]);
4225 }
4226#endif
4227 kfree(data);
4228 return len + sprintf(buf + len, "\n");
4229}
4230
4231static void clear_stat(struct kmem_cache *s, enum stat_item si)
4232{
4233 int cpu;
4234
4235 for_each_online_cpu(cpu)
4236 per_cpu_ptr(s->cpu_slab, cpu)->stat[si] = 0;
4237}
4238
4239#define STAT_ATTR(si, text) \
4240static ssize_t text##_show(struct kmem_cache *s, char *buf) \
4241{ \
4242 return show_stat(s, buf, si); \
4243} \
4244static ssize_t text##_store(struct kmem_cache *s, \
4245 const char *buf, size_t length) \
4246{ \
4247 if (buf[0] != '0') \
4248 return -EINVAL; \
4249 clear_stat(s, si); \
4250 return length; \
4251} \
4252SLAB_ATTR(text); \
4253
4254STAT_ATTR(ALLOC_FASTPATH, alloc_fastpath);
4255STAT_ATTR(ALLOC_SLOWPATH, alloc_slowpath);
4256STAT_ATTR(FREE_FASTPATH, free_fastpath);
4257STAT_ATTR(FREE_SLOWPATH, free_slowpath);
4258STAT_ATTR(FREE_FROZEN, free_frozen);
4259STAT_ATTR(FREE_ADD_PARTIAL, free_add_partial);
4260STAT_ATTR(FREE_REMOVE_PARTIAL, free_remove_partial);
4261STAT_ATTR(ALLOC_FROM_PARTIAL, alloc_from_partial);
4262STAT_ATTR(ALLOC_SLAB, alloc_slab);
4263STAT_ATTR(ALLOC_REFILL, alloc_refill);
4264STAT_ATTR(FREE_SLAB, free_slab);
4265STAT_ATTR(CPUSLAB_FLUSH, cpuslab_flush);
4266STAT_ATTR(DEACTIVATE_FULL, deactivate_full);
4267STAT_ATTR(DEACTIVATE_EMPTY, deactivate_empty);
4268STAT_ATTR(DEACTIVATE_TO_HEAD, deactivate_to_head);
4269STAT_ATTR(DEACTIVATE_TO_TAIL, deactivate_to_tail);
4270STAT_ATTR(DEACTIVATE_REMOTE_FREES, deactivate_remote_frees);
4271STAT_ATTR(ORDER_FALLBACK, order_fallback);
4272#endif
4273
4274static struct attribute *slab_attrs[] = {
4275 &slab_size_attr.attr,
4276 &object_size_attr.attr,
4277 &objs_per_slab_attr.attr,
4278 &order_attr.attr,
4279 &min_partial_attr.attr,
4280 &objects_attr.attr,
4281 &objects_partial_attr.attr,
4282 &total_objects_attr.attr,
4283 &slabs_attr.attr,
4284 &partial_attr.attr,
4285 &cpu_slabs_attr.attr,
4286 &ctor_attr.attr,
4287 &aliases_attr.attr,
4288 &align_attr.attr,
4289 &sanity_checks_attr.attr,
4290 &trace_attr.attr,
4291 &hwcache_align_attr.attr,
4292 &reclaim_account_attr.attr,
4293 &destroy_by_rcu_attr.attr,
4294 &red_zone_attr.attr,
4295 &poison_attr.attr,
4296 &store_user_attr.attr,
4297 &validate_attr.attr,
4298 &shrink_attr.attr,
4299 &alloc_calls_attr.attr,
4300 &free_calls_attr.attr,
4301#ifdef CONFIG_ZONE_DMA
4302 &cache_dma_attr.attr,
4303#endif
4304#ifdef CONFIG_NUMA
4305 &remote_node_defrag_ratio_attr.attr,
4306#endif
4307#ifdef CONFIG_SLUB_STATS
4308 &alloc_fastpath_attr.attr,
4309 &alloc_slowpath_attr.attr,
4310 &free_fastpath_attr.attr,
4311 &free_slowpath_attr.attr,
4312 &free_frozen_attr.attr,
4313 &free_add_partial_attr.attr,
4314 &free_remove_partial_attr.attr,
4315 &alloc_from_partial_attr.attr,
4316 &alloc_slab_attr.attr,
4317 &alloc_refill_attr.attr,
4318 &free_slab_attr.attr,
4319 &cpuslab_flush_attr.attr,
4320 &deactivate_full_attr.attr,
4321 &deactivate_empty_attr.attr,
4322 &deactivate_to_head_attr.attr,
4323 &deactivate_to_tail_attr.attr,
4324 &deactivate_remote_frees_attr.attr,
4325 &order_fallback_attr.attr,
4326#endif
4327#ifdef CONFIG_FAILSLAB
4328 &failslab_attr.attr,
4329#endif
4330
4331 NULL
4332};
4333
4334static struct attribute_group slab_attr_group = {
4335 .attrs = slab_attrs,
4336};
4337
4338static ssize_t slab_attr_show(struct kobject *kobj,
4339 struct attribute *attr,
4340 char *buf)
4341{
4342 struct slab_attribute *attribute;
4343 struct kmem_cache *s;
4344 int err;
4345
4346 attribute = to_slab_attr(attr);
4347 s = to_slab(kobj);
4348
4349 if (!attribute->show)
4350 return -EIO;
4351
4352 err = attribute->show(s, buf);
4353
4354 return err;
4355}
4356
4357static ssize_t slab_attr_store(struct kobject *kobj,
4358 struct attribute *attr,
4359 const char *buf, size_t len)
4360{
4361 struct slab_attribute *attribute;
4362 struct kmem_cache *s;
4363 int err;
4364
4365 attribute = to_slab_attr(attr);
4366 s = to_slab(kobj);
4367
4368 if (!attribute->store)
4369 return -EIO;
4370
4371 err = attribute->store(s, buf, len);
4372
4373 return err;
4374}
4375
4376static void kmem_cache_release(struct kobject *kobj)
4377{
4378 struct kmem_cache *s = to_slab(kobj);
4379
4380 kfree(s);
4381}
4382
4383static const struct sysfs_ops slab_sysfs_ops = {
4384 .show = slab_attr_show,
4385 .store = slab_attr_store,
4386};
4387
4388static struct kobj_type slab_ktype = {
4389 .sysfs_ops = &slab_sysfs_ops,
4390 .release = kmem_cache_release
4391};
4392
4393static int uevent_filter(struct kset *kset, struct kobject *kobj)
4394{
4395 struct kobj_type *ktype = get_ktype(kobj);
4396
4397 if (ktype == &slab_ktype)
4398 return 1;
4399 return 0;
4400}
4401
4402static const struct kset_uevent_ops slab_uevent_ops = {
4403 .filter = uevent_filter,
4404};
4405
4406static struct kset *slab_kset;
4407
4408#define ID_STR_LENGTH 64
4409
4410
4411
4412
4413
4414static char *create_unique_id(struct kmem_cache *s)
4415{
4416 char *name = kmalloc(ID_STR_LENGTH, GFP_KERNEL);
4417 char *p = name;
4418
4419 BUG_ON(!name);
4420
4421 *p++ = ':';
4422
4423
4424
4425
4426
4427
4428
4429 if (s->flags & SLAB_CACHE_DMA)
4430 *p++ = 'd';
4431 if (s->flags & SLAB_RECLAIM_ACCOUNT)
4432 *p++ = 'a';
4433 if (s->flags & SLAB_DEBUG_FREE)
4434 *p++ = 'F';
4435 if (!(s->flags & SLAB_NOTRACK))
4436 *p++ = 't';
4437 if (p != name + 1)
4438 *p++ = '-';
4439 p += sprintf(p, "%07d", s->size);
4440 BUG_ON(p > name + ID_STR_LENGTH - 1);
4441 return name;
4442}
4443
4444static int sysfs_slab_add(struct kmem_cache *s)
4445{
4446 int err;
4447 const char *name;
4448 int unmergeable;
4449
4450 if (slab_state < SYSFS)
4451
4452 return 0;
4453
4454 unmergeable = slab_unmergeable(s);
4455 if (unmergeable) {
4456
4457
4458
4459
4460
4461 sysfs_remove_link(&slab_kset->kobj, s->name);
4462 name = s->name;
4463 } else {
4464
4465
4466
4467
4468 name = create_unique_id(s);
4469 }
4470
4471 s->kobj.kset = slab_kset;
4472 err = kobject_init_and_add(&s->kobj, &slab_ktype, NULL, name);
4473 if (err) {
4474 kobject_put(&s->kobj);
4475 return err;
4476 }
4477
4478 err = sysfs_create_group(&s->kobj, &slab_attr_group);
4479 if (err) {
4480 kobject_del(&s->kobj);
4481 kobject_put(&s->kobj);
4482 return err;
4483 }
4484 kobject_uevent(&s->kobj, KOBJ_ADD);
4485 if (!unmergeable) {
4486
4487 sysfs_slab_alias(s, s->name);
4488 kfree(name);
4489 }
4490 return 0;
4491}
4492
4493static void sysfs_slab_remove(struct kmem_cache *s)
4494{
4495 if (slab_state < SYSFS)
4496
4497
4498
4499
4500 return;
4501
4502 kobject_uevent(&s->kobj, KOBJ_REMOVE);
4503 kobject_del(&s->kobj);
4504 kobject_put(&s->kobj);
4505}
4506
4507
4508
4509
4510
4511struct saved_alias {
4512 struct kmem_cache *s;
4513 const char *name;
4514 struct saved_alias *next;
4515};
4516
4517static struct saved_alias *alias_list;
4518
4519static int sysfs_slab_alias(struct kmem_cache *s, const char *name)
4520{
4521 struct saved_alias *al;
4522
4523 if (slab_state == SYSFS) {
4524
4525
4526
4527 sysfs_remove_link(&slab_kset->kobj, name);
4528 return sysfs_create_link(&slab_kset->kobj, &s->kobj, name);
4529 }
4530
4531 al = kmalloc(sizeof(struct saved_alias), GFP_KERNEL);
4532 if (!al)
4533 return -ENOMEM;
4534
4535 al->s = s;
4536 al->name = name;
4537 al->next = alias_list;
4538 alias_list = al;
4539 return 0;
4540}
4541
4542static int __init slab_sysfs_init(void)
4543{
4544 struct kmem_cache *s;
4545 int err;
4546
4547 down_write(&slub_lock);
4548
4549 slab_kset = kset_create_and_add("slab", &slab_uevent_ops, kernel_kobj);
4550 if (!slab_kset) {
4551 up_write(&slub_lock);
4552 printk(KERN_ERR "Cannot register slab subsystem.\n");
4553 return -ENOSYS;
4554 }
4555
4556 slab_state = SYSFS;
4557
4558 list_for_each_entry(s, &slab_caches, list) {
4559 err = sysfs_slab_add(s);
4560 if (err)
4561 printk(KERN_ERR "SLUB: Unable to add boot slab %s"
4562 " to sysfs\n", s->name);
4563 }
4564
4565 while (alias_list) {
4566 struct saved_alias *al = alias_list;
4567
4568 alias_list = alias_list->next;
4569 err = sysfs_slab_alias(al->s, al->name);
4570 if (err)
4571 printk(KERN_ERR "SLUB: Unable to add boot slab alias"
4572 " %s to sysfs\n", s->name);
4573 kfree(al);
4574 }
4575
4576 up_write(&slub_lock);
4577 resiliency_test();
4578 return 0;
4579}
4580
4581__initcall(slab_sysfs_init);
4582#endif
4583
4584
4585
4586
4587#ifdef CONFIG_SLABINFO
4588static void print_slabinfo_header(struct seq_file *m)
4589{
4590 seq_puts(m, "slabinfo - version: 2.1\n");
4591 seq_puts(m, "# name <active_objs> <num_objs> <objsize> "
4592 "<objperslab> <pagesperslab>");
4593 seq_puts(m, " : tunables <limit> <batchcount> <sharedfactor>");
4594 seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>");
4595 seq_putc(m, '\n');
4596}
4597
4598static void *s_start(struct seq_file *m, loff_t *pos)
4599{
4600 loff_t n = *pos;
4601
4602 down_read(&slub_lock);
4603 if (!n)
4604 print_slabinfo_header(m);
4605
4606 return seq_list_start(&slab_caches, *pos);
4607}
4608
4609static void *s_next(struct seq_file *m, void *p, loff_t *pos)
4610{
4611 return seq_list_next(p, &slab_caches, pos);
4612}
4613
4614static void s_stop(struct seq_file *m, void *p)
4615{
4616 up_read(&slub_lock);
4617}
4618
4619static int s_show(struct seq_file *m, void *p)
4620{
4621 unsigned long nr_partials = 0;
4622 unsigned long nr_slabs = 0;
4623 unsigned long nr_inuse = 0;
4624 unsigned long nr_objs = 0;
4625 unsigned long nr_free = 0;
4626 struct kmem_cache *s;
4627 int node;
4628
4629 s = list_entry(p, struct kmem_cache, list);
4630
4631 for_each_online_node(node) {
4632 struct kmem_cache_node *n = get_node(s, node);
4633
4634 if (!n)
4635 continue;
4636
4637 nr_partials += n->nr_partial;
4638 nr_slabs += atomic_long_read(&n->nr_slabs);
4639 nr_objs += atomic_long_read(&n->total_objects);
4640 nr_free += count_partial(n, count_free);
4641 }
4642
4643 nr_inuse = nr_objs - nr_free;
4644
4645 seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d", s->name, nr_inuse,
4646 nr_objs, s->size, oo_objects(s->oo),
4647 (1 << oo_order(s->oo)));
4648 seq_printf(m, " : tunables %4u %4u %4u", 0, 0, 0);
4649 seq_printf(m, " : slabdata %6lu %6lu %6lu", nr_slabs, nr_slabs,
4650 0UL);
4651 seq_putc(m, '\n');
4652 return 0;
4653}
4654
4655static const struct seq_operations slabinfo_op = {
4656 .start = s_start,
4657 .next = s_next,
4658 .stop = s_stop,
4659 .show = s_show,
4660};
4661
4662static int slabinfo_open(struct inode *inode, struct file *file)
4663{
4664 return seq_open(file, &slabinfo_op);
4665}
4666
4667static const struct file_operations proc_slabinfo_operations = {
4668 .open = slabinfo_open,
4669 .read = seq_read,
4670 .llseek = seq_lseek,
4671 .release = seq_release,
4672};
4673
4674static int __init slab_proc_init(void)
4675{
4676 proc_create("slabinfo", S_IRUGO, NULL, &proc_slabinfo_operations);
4677 return 0;
4678}
4679module_init(slab_proc_init);
4680#endif
4681