1
2
3
4
5
6
7
8
9
10
11#include <linux/mm.h>
12#include <linux/swap.h>
13#include <linux/module.h>
14#include <linux/bit_spinlock.h>
15#include <linux/interrupt.h>
16#include <linux/bitops.h>
17#include <linux/slab.h>
18#include <linux/proc_fs.h>
19#include <linux/seq_file.h>
20#include <trace/kmemtrace.h>
21#include <linux/cpu.h>
22#include <linux/cpuset.h>
23#include <linux/mempolicy.h>
24#include <linux/ctype.h>
25#include <linux/debugobjects.h>
26#include <linux/kallsyms.h>
27#include <linux/memory.h>
28#include <linux/math64.h>
29#include <linux/fault-inject.h>
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109#ifdef CONFIG_SLUB_DEBUG
110#define SLABDEBUG 1
111#else
112#define SLABDEBUG 0
113#endif
114
115
116
117
118
119
120
121
122
123
124#undef SLUB_RESILIENCY_TEST
125
126
127
128
129
130#define MIN_PARTIAL 5
131
132
133
134
135
136
137#define MAX_PARTIAL 10
138
139#define DEBUG_DEFAULT_FLAGS (SLAB_DEBUG_FREE | SLAB_RED_ZONE | \
140 SLAB_POISON | SLAB_STORE_USER)
141
142
143
144
145#define SLUB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
146 SLAB_TRACE | SLAB_DESTROY_BY_RCU)
147
148#define SLUB_MERGE_SAME (SLAB_DEBUG_FREE | SLAB_RECLAIM_ACCOUNT | \
149 SLAB_CACHE_DMA)
150
151#ifndef ARCH_KMALLOC_MINALIGN
152#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long)
153#endif
154
155#ifndef ARCH_SLAB_MINALIGN
156#define ARCH_SLAB_MINALIGN __alignof__(unsigned long long)
157#endif
158
159#define OO_SHIFT 16
160#define OO_MASK ((1 << OO_SHIFT) - 1)
161#define MAX_OBJS_PER_PAGE 65535
162
163
164#define __OBJECT_POISON 0x80000000
165#define __SYSFS_ADD_DEFERRED 0x40000000
166
167static int kmem_size = sizeof(struct kmem_cache);
168
169#ifdef CONFIG_SMP
170static struct notifier_block slab_notifier;
171#endif
172
173static enum {
174 DOWN,
175 PARTIAL,
176 UP,
177 SYSFS
178} slab_state = DOWN;
179
180
181static DECLARE_RWSEM(slub_lock);
182static LIST_HEAD(slab_caches);
183
184
185
186
187struct track {
188 unsigned long addr;
189 int cpu;
190 int pid;
191 unsigned long when;
192};
193
194enum track_item { TRACK_ALLOC, TRACK_FREE };
195
196#ifdef CONFIG_SLUB_DEBUG
197static int sysfs_slab_add(struct kmem_cache *);
198static int sysfs_slab_alias(struct kmem_cache *, const char *);
199static void sysfs_slab_remove(struct kmem_cache *);
200
201#else
202static inline int sysfs_slab_add(struct kmem_cache *s) { return 0; }
203static inline int sysfs_slab_alias(struct kmem_cache *s, const char *p)
204 { return 0; }
205static inline void sysfs_slab_remove(struct kmem_cache *s)
206{
207 kfree(s);
208}
209
210#endif
211
212static inline void stat(struct kmem_cache_cpu *c, enum stat_item si)
213{
214#ifdef CONFIG_SLUB_STATS
215 c->stat[si]++;
216#endif
217}
218
219
220
221
222
223int slab_is_available(void)
224{
225 return slab_state >= UP;
226}
227
228static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node)
229{
230#ifdef CONFIG_NUMA
231 return s->node[node];
232#else
233 return &s->local_node;
234#endif
235}
236
237static inline struct kmem_cache_cpu *get_cpu_slab(struct kmem_cache *s, int cpu)
238{
239#ifdef CONFIG_SMP
240 return s->cpu_slab[cpu];
241#else
242 return &s->cpu_slab;
243#endif
244}
245
246
247static inline int check_valid_pointer(struct kmem_cache *s,
248 struct page *page, const void *object)
249{
250 void *base;
251
252 if (!object)
253 return 1;
254
255 base = page_address(page);
256 if (object < base || object >= base + page->objects * s->size ||
257 (object - base) % s->size) {
258 return 0;
259 }
260
261 return 1;
262}
263
264
265
266
267
268
269
270
271static inline void *get_freepointer(struct kmem_cache *s, void *object)
272{
273 return *(void **)(object + s->offset);
274}
275
276static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp)
277{
278 *(void **)(object + s->offset) = fp;
279}
280
281
282#define for_each_object(__p, __s, __addr, __objects) \
283 for (__p = (__addr); __p < (__addr) + (__objects) * (__s)->size;\
284 __p += (__s)->size)
285
286
287#define for_each_free_object(__p, __s, __free) \
288 for (__p = (__free); __p; __p = get_freepointer((__s), __p))
289
290
291static inline int slab_index(void *p, struct kmem_cache *s, void *addr)
292{
293 return (p - addr) / s->size;
294}
295
296static inline struct kmem_cache_order_objects oo_make(int order,
297 unsigned long size)
298{
299 struct kmem_cache_order_objects x = {
300 (order << OO_SHIFT) + (PAGE_SIZE << order) / size
301 };
302
303 return x;
304}
305
306static inline int oo_order(struct kmem_cache_order_objects x)
307{
308 return x.x >> OO_SHIFT;
309}
310
311static inline int oo_objects(struct kmem_cache_order_objects x)
312{
313 return x.x & OO_MASK;
314}
315
316#ifdef CONFIG_SLUB_DEBUG
317
318
319
320#ifdef CONFIG_SLUB_DEBUG_ON
321static int slub_debug = DEBUG_DEFAULT_FLAGS;
322#else
323static int slub_debug;
324#endif
325
326static char *slub_debug_slabs;
327
328
329
330
331static void print_section(char *text, u8 *addr, unsigned int length)
332{
333 int i, offset;
334 int newline = 1;
335 char ascii[17];
336
337 ascii[16] = 0;
338
339 for (i = 0; i < length; i++) {
340 if (newline) {
341 printk(KERN_ERR "%8s 0x%p: ", text, addr + i);
342 newline = 0;
343 }
344 printk(KERN_CONT " %02x", addr[i]);
345 offset = i % 16;
346 ascii[offset] = isgraph(addr[i]) ? addr[i] : '.';
347 if (offset == 15) {
348 printk(KERN_CONT " %s\n", ascii);
349 newline = 1;
350 }
351 }
352 if (!newline) {
353 i %= 16;
354 while (i < 16) {
355 printk(KERN_CONT " ");
356 ascii[i] = ' ';
357 i++;
358 }
359 printk(KERN_CONT " %s\n", ascii);
360 }
361}
362
363static struct track *get_track(struct kmem_cache *s, void *object,
364 enum track_item alloc)
365{
366 struct track *p;
367
368 if (s->offset)
369 p = object + s->offset + sizeof(void *);
370 else
371 p = object + s->inuse;
372
373 return p + alloc;
374}
375
376static void set_track(struct kmem_cache *s, void *object,
377 enum track_item alloc, unsigned long addr)
378{
379 struct track *p = get_track(s, object, alloc);
380
381 if (addr) {
382 p->addr = addr;
383 p->cpu = smp_processor_id();
384 p->pid = current->pid;
385 p->when = jiffies;
386 } else
387 memset(p, 0, sizeof(struct track));
388}
389
390static void init_tracking(struct kmem_cache *s, void *object)
391{
392 if (!(s->flags & SLAB_STORE_USER))
393 return;
394
395 set_track(s, object, TRACK_FREE, 0UL);
396 set_track(s, object, TRACK_ALLOC, 0UL);
397}
398
399static void print_track(const char *s, struct track *t)
400{
401 if (!t->addr)
402 return;
403
404 printk(KERN_ERR "INFO: %s in %pS age=%lu cpu=%u pid=%d\n",
405 s, (void *)t->addr, jiffies - t->when, t->cpu, t->pid);
406}
407
408static void print_tracking(struct kmem_cache *s, void *object)
409{
410 if (!(s->flags & SLAB_STORE_USER))
411 return;
412
413 print_track("Allocated", get_track(s, object, TRACK_ALLOC));
414 print_track("Freed", get_track(s, object, TRACK_FREE));
415}
416
417static void print_page_info(struct page *page)
418{
419 printk(KERN_ERR "INFO: Slab 0x%p objects=%u used=%u fp=0x%p flags=0x%04lx\n",
420 page, page->objects, page->inuse, page->freelist, page->flags);
421
422}
423
424static void slab_bug(struct kmem_cache *s, char *fmt, ...)
425{
426 va_list args;
427 char buf[100];
428
429 va_start(args, fmt);
430 vsnprintf(buf, sizeof(buf), fmt, args);
431 va_end(args);
432 printk(KERN_ERR "========================================"
433 "=====================================\n");
434 printk(KERN_ERR "BUG %s: %s\n", s->name, buf);
435 printk(KERN_ERR "----------------------------------------"
436 "-------------------------------------\n\n");
437}
438
439static void slab_fix(struct kmem_cache *s, char *fmt, ...)
440{
441 va_list args;
442 char buf[100];
443
444 va_start(args, fmt);
445 vsnprintf(buf, sizeof(buf), fmt, args);
446 va_end(args);
447 printk(KERN_ERR "FIX %s: %s\n", s->name, buf);
448}
449
450static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
451{
452 unsigned int off;
453 u8 *addr = page_address(page);
454
455 print_tracking(s, p);
456
457 print_page_info(page);
458
459 printk(KERN_ERR "INFO: Object 0x%p @offset=%tu fp=0x%p\n\n",
460 p, p - addr, get_freepointer(s, p));
461
462 if (p > addr + 16)
463 print_section("Bytes b4", p - 16, 16);
464
465 print_section("Object", p, min_t(unsigned long, s->objsize, PAGE_SIZE));
466
467 if (s->flags & SLAB_RED_ZONE)
468 print_section("Redzone", p + s->objsize,
469 s->inuse - s->objsize);
470
471 if (s->offset)
472 off = s->offset + sizeof(void *);
473 else
474 off = s->inuse;
475
476 if (s->flags & SLAB_STORE_USER)
477 off += 2 * sizeof(struct track);
478
479 if (off != s->size)
480
481 print_section("Padding", p + off, s->size - off);
482
483 dump_stack();
484}
485
486static void object_err(struct kmem_cache *s, struct page *page,
487 u8 *object, char *reason)
488{
489 slab_bug(s, "%s", reason);
490 print_trailer(s, page, object);
491}
492
493static void slab_err(struct kmem_cache *s, struct page *page, char *fmt, ...)
494{
495 va_list args;
496 char buf[100];
497
498 va_start(args, fmt);
499 vsnprintf(buf, sizeof(buf), fmt, args);
500 va_end(args);
501 slab_bug(s, "%s", buf);
502 print_page_info(page);
503 dump_stack();
504}
505
506static void init_object(struct kmem_cache *s, void *object, int active)
507{
508 u8 *p = object;
509
510 if (s->flags & __OBJECT_POISON) {
511 memset(p, POISON_FREE, s->objsize - 1);
512 p[s->objsize - 1] = POISON_END;
513 }
514
515 if (s->flags & SLAB_RED_ZONE)
516 memset(p + s->objsize,
517 active ? SLUB_RED_ACTIVE : SLUB_RED_INACTIVE,
518 s->inuse - s->objsize);
519}
520
521static u8 *check_bytes(u8 *start, unsigned int value, unsigned int bytes)
522{
523 while (bytes) {
524 if (*start != (u8)value)
525 return start;
526 start++;
527 bytes--;
528 }
529 return NULL;
530}
531
532static void restore_bytes(struct kmem_cache *s, char *message, u8 data,
533 void *from, void *to)
534{
535 slab_fix(s, "Restoring 0x%p-0x%p=0x%x\n", from, to - 1, data);
536 memset(from, data, to - from);
537}
538
539static int check_bytes_and_report(struct kmem_cache *s, struct page *page,
540 u8 *object, char *what,
541 u8 *start, unsigned int value, unsigned int bytes)
542{
543 u8 *fault;
544 u8 *end;
545
546 fault = check_bytes(start, value, bytes);
547 if (!fault)
548 return 1;
549
550 end = start + bytes;
551 while (end > fault && end[-1] == value)
552 end--;
553
554 slab_bug(s, "%s overwritten", what);
555 printk(KERN_ERR "INFO: 0x%p-0x%p. First byte 0x%x instead of 0x%x\n",
556 fault, end - 1, fault[0], value);
557 print_trailer(s, page, object);
558
559 restore_bytes(s, what, value, fault, end);
560 return 0;
561}
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601static int check_pad_bytes(struct kmem_cache *s, struct page *page, u8 *p)
602{
603 unsigned long off = s->inuse;
604
605 if (s->offset)
606
607 off += sizeof(void *);
608
609 if (s->flags & SLAB_STORE_USER)
610
611 off += 2 * sizeof(struct track);
612
613 if (s->size == off)
614 return 1;
615
616 return check_bytes_and_report(s, page, p, "Object padding",
617 p + off, POISON_INUSE, s->size - off);
618}
619
620
621static int slab_pad_check(struct kmem_cache *s, struct page *page)
622{
623 u8 *start;
624 u8 *fault;
625 u8 *end;
626 int length;
627 int remainder;
628
629 if (!(s->flags & SLAB_POISON))
630 return 1;
631
632 start = page_address(page);
633 length = (PAGE_SIZE << compound_order(page));
634 end = start + length;
635 remainder = length % s->size;
636 if (!remainder)
637 return 1;
638
639 fault = check_bytes(end - remainder, POISON_INUSE, remainder);
640 if (!fault)
641 return 1;
642 while (end > fault && end[-1] == POISON_INUSE)
643 end--;
644
645 slab_err(s, page, "Padding overwritten. 0x%p-0x%p", fault, end - 1);
646 print_section("Padding", end - remainder, remainder);
647
648 restore_bytes(s, "slab padding", POISON_INUSE, start, end);
649 return 0;
650}
651
652static int check_object(struct kmem_cache *s, struct page *page,
653 void *object, int active)
654{
655 u8 *p = object;
656 u8 *endobject = object + s->objsize;
657
658 if (s->flags & SLAB_RED_ZONE) {
659 unsigned int red =
660 active ? SLUB_RED_ACTIVE : SLUB_RED_INACTIVE;
661
662 if (!check_bytes_and_report(s, page, object, "Redzone",
663 endobject, red, s->inuse - s->objsize))
664 return 0;
665 } else {
666 if ((s->flags & SLAB_POISON) && s->objsize < s->inuse) {
667 check_bytes_and_report(s, page, p, "Alignment padding",
668 endobject, POISON_INUSE, s->inuse - s->objsize);
669 }
670 }
671
672 if (s->flags & SLAB_POISON) {
673 if (!active && (s->flags & __OBJECT_POISON) &&
674 (!check_bytes_and_report(s, page, p, "Poison", p,
675 POISON_FREE, s->objsize - 1) ||
676 !check_bytes_and_report(s, page, p, "Poison",
677 p + s->objsize - 1, POISON_END, 1)))
678 return 0;
679
680
681
682 check_pad_bytes(s, page, p);
683 }
684
685 if (!s->offset && active)
686
687
688
689
690 return 1;
691
692
693 if (!check_valid_pointer(s, page, get_freepointer(s, p))) {
694 object_err(s, page, p, "Freepointer corrupt");
695
696
697
698
699
700 set_freepointer(s, p, NULL);
701 return 0;
702 }
703 return 1;
704}
705
706static int check_slab(struct kmem_cache *s, struct page *page)
707{
708 int maxobj;
709
710 VM_BUG_ON(!irqs_disabled());
711
712 if (!PageSlab(page)) {
713 slab_err(s, page, "Not a valid slab page");
714 return 0;
715 }
716
717 maxobj = (PAGE_SIZE << compound_order(page)) / s->size;
718 if (page->objects > maxobj) {
719 slab_err(s, page, "objects %u > max %u",
720 s->name, page->objects, maxobj);
721 return 0;
722 }
723 if (page->inuse > page->objects) {
724 slab_err(s, page, "inuse %u > max %u",
725 s->name, page->inuse, page->objects);
726 return 0;
727 }
728
729 slab_pad_check(s, page);
730 return 1;
731}
732
733
734
735
736
737static int on_freelist(struct kmem_cache *s, struct page *page, void *search)
738{
739 int nr = 0;
740 void *fp = page->freelist;
741 void *object = NULL;
742 unsigned long max_objects;
743
744 while (fp && nr <= page->objects) {
745 if (fp == search)
746 return 1;
747 if (!check_valid_pointer(s, page, fp)) {
748 if (object) {
749 object_err(s, page, object,
750 "Freechain corrupt");
751 set_freepointer(s, object, NULL);
752 break;
753 } else {
754 slab_err(s, page, "Freepointer corrupt");
755 page->freelist = NULL;
756 page->inuse = page->objects;
757 slab_fix(s, "Freelist cleared");
758 return 0;
759 }
760 break;
761 }
762 object = fp;
763 fp = get_freepointer(s, object);
764 nr++;
765 }
766
767 max_objects = (PAGE_SIZE << compound_order(page)) / s->size;
768 if (max_objects > MAX_OBJS_PER_PAGE)
769 max_objects = MAX_OBJS_PER_PAGE;
770
771 if (page->objects != max_objects) {
772 slab_err(s, page, "Wrong number of objects. Found %d but "
773 "should be %d", page->objects, max_objects);
774 page->objects = max_objects;
775 slab_fix(s, "Number of objects adjusted.");
776 }
777 if (page->inuse != page->objects - nr) {
778 slab_err(s, page, "Wrong object count. Counter is %d but "
779 "counted were %d", page->inuse, page->objects - nr);
780 page->inuse = page->objects - nr;
781 slab_fix(s, "Object count adjusted.");
782 }
783 return search == NULL;
784}
785
786static void trace(struct kmem_cache *s, struct page *page, void *object,
787 int alloc)
788{
789 if (s->flags & SLAB_TRACE) {
790 printk(KERN_INFO "TRACE %s %s 0x%p inuse=%d fp=0x%p\n",
791 s->name,
792 alloc ? "alloc" : "free",
793 object, page->inuse,
794 page->freelist);
795
796 if (!alloc)
797 print_section("Object", (void *)object, s->objsize);
798
799 dump_stack();
800 }
801}
802
803
804
805
806static void add_full(struct kmem_cache_node *n, struct page *page)
807{
808 spin_lock(&n->list_lock);
809 list_add(&page->lru, &n->full);
810 spin_unlock(&n->list_lock);
811}
812
813static void remove_full(struct kmem_cache *s, struct page *page)
814{
815 struct kmem_cache_node *n;
816
817 if (!(s->flags & SLAB_STORE_USER))
818 return;
819
820 n = get_node(s, page_to_nid(page));
821
822 spin_lock(&n->list_lock);
823 list_del(&page->lru);
824 spin_unlock(&n->list_lock);
825}
826
827
828static inline unsigned long slabs_node(struct kmem_cache *s, int node)
829{
830 struct kmem_cache_node *n = get_node(s, node);
831
832 return atomic_long_read(&n->nr_slabs);
833}
834
835static inline void inc_slabs_node(struct kmem_cache *s, int node, int objects)
836{
837 struct kmem_cache_node *n = get_node(s, node);
838
839
840
841
842
843
844
845 if (!NUMA_BUILD || n) {
846 atomic_long_inc(&n->nr_slabs);
847 atomic_long_add(objects, &n->total_objects);
848 }
849}
850static inline void dec_slabs_node(struct kmem_cache *s, int node, int objects)
851{
852 struct kmem_cache_node *n = get_node(s, node);
853
854 atomic_long_dec(&n->nr_slabs);
855 atomic_long_sub(objects, &n->total_objects);
856}
857
858
859static void setup_object_debug(struct kmem_cache *s, struct page *page,
860 void *object)
861{
862 if (!(s->flags & (SLAB_STORE_USER|SLAB_RED_ZONE|__OBJECT_POISON)))
863 return;
864
865 init_object(s, object, 0);
866 init_tracking(s, object);
867}
868
869static int alloc_debug_processing(struct kmem_cache *s, struct page *page,
870 void *object, unsigned long addr)
871{
872 if (!check_slab(s, page))
873 goto bad;
874
875 if (!on_freelist(s, page, object)) {
876 object_err(s, page, object, "Object already allocated");
877 goto bad;
878 }
879
880 if (!check_valid_pointer(s, page, object)) {
881 object_err(s, page, object, "Freelist Pointer check fails");
882 goto bad;
883 }
884
885 if (!check_object(s, page, object, 0))
886 goto bad;
887
888
889 if (s->flags & SLAB_STORE_USER)
890 set_track(s, object, TRACK_ALLOC, addr);
891 trace(s, page, object, 1);
892 init_object(s, object, 1);
893 return 1;
894
895bad:
896 if (PageSlab(page)) {
897
898
899
900
901
902 slab_fix(s, "Marking all objects used");
903 page->inuse = page->objects;
904 page->freelist = NULL;
905 }
906 return 0;
907}
908
909static int free_debug_processing(struct kmem_cache *s, struct page *page,
910 void *object, unsigned long addr)
911{
912 if (!check_slab(s, page))
913 goto fail;
914
915 if (!check_valid_pointer(s, page, object)) {
916 slab_err(s, page, "Invalid object pointer 0x%p", object);
917 goto fail;
918 }
919
920 if (on_freelist(s, page, object)) {
921 object_err(s, page, object, "Object already free");
922 goto fail;
923 }
924
925 if (!check_object(s, page, object, 1))
926 return 0;
927
928 if (unlikely(s != page->slab)) {
929 if (!PageSlab(page)) {
930 slab_err(s, page, "Attempt to free object(0x%p) "
931 "outside of slab", object);
932 } else if (!page->slab) {
933 printk(KERN_ERR
934 "SLUB <none>: no slab for object 0x%p.\n",
935 object);
936 dump_stack();
937 } else
938 object_err(s, page, object,
939 "page slab pointer corrupt.");
940 goto fail;
941 }
942
943
944 if (!PageSlubFrozen(page) && !page->freelist)
945 remove_full(s, page);
946 if (s->flags & SLAB_STORE_USER)
947 set_track(s, object, TRACK_FREE, addr);
948 trace(s, page, object, 0);
949 init_object(s, object, 0);
950 return 1;
951
952fail:
953 slab_fix(s, "Object at 0x%p not freed", object);
954 return 0;
955}
956
957static int __init setup_slub_debug(char *str)
958{
959 slub_debug = DEBUG_DEFAULT_FLAGS;
960 if (*str++ != '=' || !*str)
961
962
963
964 goto out;
965
966 if (*str == ',')
967
968
969
970
971 goto check_slabs;
972
973 slub_debug = 0;
974 if (*str == '-')
975
976
977
978 goto out;
979
980
981
982
983 for (; *str && *str != ','; str++) {
984 switch (tolower(*str)) {
985 case 'f':
986 slub_debug |= SLAB_DEBUG_FREE;
987 break;
988 case 'z':
989 slub_debug |= SLAB_RED_ZONE;
990 break;
991 case 'p':
992 slub_debug |= SLAB_POISON;
993 break;
994 case 'u':
995 slub_debug |= SLAB_STORE_USER;
996 break;
997 case 't':
998 slub_debug |= SLAB_TRACE;
999 break;
1000 default:
1001 printk(KERN_ERR "slub_debug option '%c' "
1002 "unknown. skipped\n", *str);
1003 }
1004 }
1005
1006check_slabs:
1007 if (*str == ',')
1008 slub_debug_slabs = str + 1;
1009out:
1010 return 1;
1011}
1012
1013__setup("slub_debug", setup_slub_debug);
1014
1015static unsigned long kmem_cache_flags(unsigned long objsize,
1016 unsigned long flags, const char *name,
1017 void (*ctor)(void *))
1018{
1019
1020
1021
1022 if (slub_debug && (!slub_debug_slabs ||
1023 strncmp(slub_debug_slabs, name, strlen(slub_debug_slabs)) == 0))
1024 flags |= slub_debug;
1025
1026 return flags;
1027}
1028#else
1029static inline void setup_object_debug(struct kmem_cache *s,
1030 struct page *page, void *object) {}
1031
1032static inline int alloc_debug_processing(struct kmem_cache *s,
1033 struct page *page, void *object, unsigned long addr) { return 0; }
1034
1035static inline int free_debug_processing(struct kmem_cache *s,
1036 struct page *page, void *object, unsigned long addr) { return 0; }
1037
1038static inline int slab_pad_check(struct kmem_cache *s, struct page *page)
1039 { return 1; }
1040static inline int check_object(struct kmem_cache *s, struct page *page,
1041 void *object, int active) { return 1; }
1042static inline void add_full(struct kmem_cache_node *n, struct page *page) {}
1043static inline unsigned long kmem_cache_flags(unsigned long objsize,
1044 unsigned long flags, const char *name,
1045 void (*ctor)(void *))
1046{
1047 return flags;
1048}
1049#define slub_debug 0
1050
1051static inline unsigned long slabs_node(struct kmem_cache *s, int node)
1052 { return 0; }
1053static inline void inc_slabs_node(struct kmem_cache *s, int node,
1054 int objects) {}
1055static inline void dec_slabs_node(struct kmem_cache *s, int node,
1056 int objects) {}
1057#endif
1058
1059
1060
1061
1062static inline struct page *alloc_slab_page(gfp_t flags, int node,
1063 struct kmem_cache_order_objects oo)
1064{
1065 int order = oo_order(oo);
1066
1067 if (node == -1)
1068 return alloc_pages(flags, order);
1069 else
1070 return alloc_pages_node(node, flags, order);
1071}
1072
1073static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
1074{
1075 struct page *page;
1076 struct kmem_cache_order_objects oo = s->oo;
1077
1078 flags |= s->allocflags;
1079
1080 page = alloc_slab_page(flags | __GFP_NOWARN | __GFP_NORETRY, node,
1081 oo);
1082 if (unlikely(!page)) {
1083 oo = s->min;
1084
1085
1086
1087
1088 page = alloc_slab_page(flags, node, oo);
1089 if (!page)
1090 return NULL;
1091
1092 stat(get_cpu_slab(s, raw_smp_processor_id()), ORDER_FALLBACK);
1093 }
1094 page->objects = oo_objects(oo);
1095 mod_zone_page_state(page_zone(page),
1096 (s->flags & SLAB_RECLAIM_ACCOUNT) ?
1097 NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,
1098 1 << oo_order(oo));
1099
1100 return page;
1101}
1102
1103static void setup_object(struct kmem_cache *s, struct page *page,
1104 void *object)
1105{
1106 setup_object_debug(s, page, object);
1107 if (unlikely(s->ctor))
1108 s->ctor(object);
1109}
1110
1111static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
1112{
1113 struct page *page;
1114 void *start;
1115 void *last;
1116 void *p;
1117
1118 BUG_ON(flags & GFP_SLAB_BUG_MASK);
1119
1120 page = allocate_slab(s,
1121 flags & (GFP_RECLAIM_MASK | GFP_CONSTRAINT_MASK), node);
1122 if (!page)
1123 goto out;
1124
1125 inc_slabs_node(s, page_to_nid(page), page->objects);
1126 page->slab = s;
1127 page->flags |= 1 << PG_slab;
1128 if (s->flags & (SLAB_DEBUG_FREE | SLAB_RED_ZONE | SLAB_POISON |
1129 SLAB_STORE_USER | SLAB_TRACE))
1130 __SetPageSlubDebug(page);
1131
1132 start = page_address(page);
1133
1134 if (unlikely(s->flags & SLAB_POISON))
1135 memset(start, POISON_INUSE, PAGE_SIZE << compound_order(page));
1136
1137 last = start;
1138 for_each_object(p, s, start, page->objects) {
1139 setup_object(s, page, last);
1140 set_freepointer(s, last, p);
1141 last = p;
1142 }
1143 setup_object(s, page, last);
1144 set_freepointer(s, last, NULL);
1145
1146 page->freelist = start;
1147 page->inuse = 0;
1148out:
1149 return page;
1150}
1151
1152static void __free_slab(struct kmem_cache *s, struct page *page)
1153{
1154 int order = compound_order(page);
1155 int pages = 1 << order;
1156
1157 if (unlikely(SLABDEBUG && PageSlubDebug(page))) {
1158 void *p;
1159
1160 slab_pad_check(s, page);
1161 for_each_object(p, s, page_address(page),
1162 page->objects)
1163 check_object(s, page, p, 0);
1164 __ClearPageSlubDebug(page);
1165 }
1166
1167 mod_zone_page_state(page_zone(page),
1168 (s->flags & SLAB_RECLAIM_ACCOUNT) ?
1169 NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,
1170 -pages);
1171
1172 __ClearPageSlab(page);
1173 reset_page_mapcount(page);
1174 if (current->reclaim_state)
1175 current->reclaim_state->reclaimed_slab += pages;
1176 __free_pages(page, order);
1177}
1178
1179static void rcu_free_slab(struct rcu_head *h)
1180{
1181 struct page *page;
1182
1183 page = container_of((struct list_head *)h, struct page, lru);
1184 __free_slab(page->slab, page);
1185}
1186
1187static void free_slab(struct kmem_cache *s, struct page *page)
1188{
1189 if (unlikely(s->flags & SLAB_DESTROY_BY_RCU)) {
1190
1191
1192
1193 struct rcu_head *head = (void *)&page->lru;
1194
1195 call_rcu(head, rcu_free_slab);
1196 } else
1197 __free_slab(s, page);
1198}
1199
1200static void discard_slab(struct kmem_cache *s, struct page *page)
1201{
1202 dec_slabs_node(s, page_to_nid(page), page->objects);
1203 free_slab(s, page);
1204}
1205
1206
1207
1208
1209static __always_inline void slab_lock(struct page *page)
1210{
1211 bit_spin_lock(PG_locked, &page->flags);
1212}
1213
1214static __always_inline void slab_unlock(struct page *page)
1215{
1216 __bit_spin_unlock(PG_locked, &page->flags);
1217}
1218
1219static __always_inline int slab_trylock(struct page *page)
1220{
1221 int rc = 1;
1222
1223 rc = bit_spin_trylock(PG_locked, &page->flags);
1224 return rc;
1225}
1226
1227
1228
1229
1230static void add_partial(struct kmem_cache_node *n,
1231 struct page *page, int tail)
1232{
1233 spin_lock(&n->list_lock);
1234 n->nr_partial++;
1235 if (tail)
1236 list_add_tail(&page->lru, &n->partial);
1237 else
1238 list_add(&page->lru, &n->partial);
1239 spin_unlock(&n->list_lock);
1240}
1241
1242static void remove_partial(struct kmem_cache *s, struct page *page)
1243{
1244 struct kmem_cache_node *n = get_node(s, page_to_nid(page));
1245
1246 spin_lock(&n->list_lock);
1247 list_del(&page->lru);
1248 n->nr_partial--;
1249 spin_unlock(&n->list_lock);
1250}
1251
1252
1253
1254
1255
1256
1257static inline int lock_and_freeze_slab(struct kmem_cache_node *n,
1258 struct page *page)
1259{
1260 if (slab_trylock(page)) {
1261 list_del(&page->lru);
1262 n->nr_partial--;
1263 __SetPageSlubFrozen(page);
1264 return 1;
1265 }
1266 return 0;
1267}
1268
1269
1270
1271
1272static struct page *get_partial_node(struct kmem_cache_node *n)
1273{
1274 struct page *page;
1275
1276
1277
1278
1279
1280
1281
1282 if (!n || !n->nr_partial)
1283 return NULL;
1284
1285 spin_lock(&n->list_lock);
1286 list_for_each_entry(page, &n->partial, lru)
1287 if (lock_and_freeze_slab(n, page))
1288 goto out;
1289 page = NULL;
1290out:
1291 spin_unlock(&n->list_lock);
1292 return page;
1293}
1294
1295
1296
1297
1298static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags)
1299{
1300#ifdef CONFIG_NUMA
1301 struct zonelist *zonelist;
1302 struct zoneref *z;
1303 struct zone *zone;
1304 enum zone_type high_zoneidx = gfp_zone(flags);
1305 struct page *page;
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325 if (!s->remote_node_defrag_ratio ||
1326 get_cycles() % 1024 > s->remote_node_defrag_ratio)
1327 return NULL;
1328
1329 zonelist = node_zonelist(slab_node(current->mempolicy), flags);
1330 for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
1331 struct kmem_cache_node *n;
1332
1333 n = get_node(s, zone_to_nid(zone));
1334
1335 if (n && cpuset_zone_allowed_hardwall(zone, flags) &&
1336 n->nr_partial > s->min_partial) {
1337 page = get_partial_node(n);
1338 if (page)
1339 return page;
1340 }
1341 }
1342#endif
1343 return NULL;
1344}
1345
1346
1347
1348
1349static struct page *get_partial(struct kmem_cache *s, gfp_t flags, int node)
1350{
1351 struct page *page;
1352 int searchnode = (node == -1) ? numa_node_id() : node;
1353
1354 page = get_partial_node(get_node(s, searchnode));
1355 if (page || (flags & __GFP_THISNODE))
1356 return page;
1357
1358 return get_any_partial(s, flags);
1359}
1360
1361
1362
1363
1364
1365
1366
1367
1368static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail)
1369{
1370 struct kmem_cache_node *n = get_node(s, page_to_nid(page));
1371 struct kmem_cache_cpu *c = get_cpu_slab(s, smp_processor_id());
1372
1373 __ClearPageSlubFrozen(page);
1374 if (page->inuse) {
1375
1376 if (page->freelist) {
1377 add_partial(n, page, tail);
1378 stat(c, tail ? DEACTIVATE_TO_TAIL : DEACTIVATE_TO_HEAD);
1379 } else {
1380 stat(c, DEACTIVATE_FULL);
1381 if (SLABDEBUG && PageSlubDebug(page) &&
1382 (s->flags & SLAB_STORE_USER))
1383 add_full(n, page);
1384 }
1385 slab_unlock(page);
1386 } else {
1387 stat(c, DEACTIVATE_EMPTY);
1388 if (n->nr_partial < s->min_partial) {
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399 add_partial(n, page, 1);
1400 slab_unlock(page);
1401 } else {
1402 slab_unlock(page);
1403 stat(get_cpu_slab(s, raw_smp_processor_id()), FREE_SLAB);
1404 discard_slab(s, page);
1405 }
1406 }
1407}
1408
1409
1410
1411
1412static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
1413{
1414 struct page *page = c->page;
1415 int tail = 1;
1416
1417 if (page->freelist)
1418 stat(c, DEACTIVATE_REMOTE_FREES);
1419
1420
1421
1422
1423
1424 while (unlikely(c->freelist)) {
1425 void **object;
1426
1427 tail = 0;
1428
1429
1430 object = c->freelist;
1431 c->freelist = c->freelist[c->offset];
1432
1433
1434 object[c->offset] = page->freelist;
1435 page->freelist = object;
1436 page->inuse--;
1437 }
1438 c->page = NULL;
1439 unfreeze_slab(s, page, tail);
1440}
1441
1442static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
1443{
1444 stat(c, CPUSLAB_FLUSH);
1445 slab_lock(c->page);
1446 deactivate_slab(s, c);
1447}
1448
1449
1450
1451
1452
1453
1454static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu)
1455{
1456 struct kmem_cache_cpu *c = get_cpu_slab(s, cpu);
1457
1458 if (likely(c && c->page))
1459 flush_slab(s, c);
1460}
1461
1462static void flush_cpu_slab(void *d)
1463{
1464 struct kmem_cache *s = d;
1465
1466 __flush_cpu_slab(s, smp_processor_id());
1467}
1468
1469static void flush_all(struct kmem_cache *s)
1470{
1471 on_each_cpu(flush_cpu_slab, s, 1);
1472}
1473
1474
1475
1476
1477
1478static inline int node_match(struct kmem_cache_cpu *c, int node)
1479{
1480#ifdef CONFIG_NUMA
1481 if (node != -1 && c->node != node)
1482 return 0;
1483#endif
1484 return 1;
1485}
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
1506 unsigned long addr, struct kmem_cache_cpu *c)
1507{
1508 void **object;
1509 struct page *new;
1510
1511
1512 gfpflags &= ~__GFP_ZERO;
1513
1514 if (!c->page)
1515 goto new_slab;
1516
1517 slab_lock(c->page);
1518 if (unlikely(!node_match(c, node)))
1519 goto another_slab;
1520
1521 stat(c, ALLOC_REFILL);
1522
1523load_freelist:
1524 object = c->page->freelist;
1525 if (unlikely(!object))
1526 goto another_slab;
1527 if (unlikely(SLABDEBUG && PageSlubDebug(c->page)))
1528 goto debug;
1529
1530 c->freelist = object[c->offset];
1531 c->page->inuse = c->page->objects;
1532 c->page->freelist = NULL;
1533 c->node = page_to_nid(c->page);
1534unlock_out:
1535 slab_unlock(c->page);
1536 stat(c, ALLOC_SLOWPATH);
1537 return object;
1538
1539another_slab:
1540 deactivate_slab(s, c);
1541
1542new_slab:
1543 new = get_partial(s, gfpflags, node);
1544 if (new) {
1545 c->page = new;
1546 stat(c, ALLOC_FROM_PARTIAL);
1547 goto load_freelist;
1548 }
1549
1550 if (gfpflags & __GFP_WAIT)
1551 local_irq_enable();
1552
1553 new = new_slab(s, gfpflags, node);
1554
1555 if (gfpflags & __GFP_WAIT)
1556 local_irq_disable();
1557
1558 if (new) {
1559 c = get_cpu_slab(s, smp_processor_id());
1560 stat(c, ALLOC_SLAB);
1561 if (c->page)
1562 flush_slab(s, c);
1563 slab_lock(new);
1564 __SetPageSlubFrozen(new);
1565 c->page = new;
1566 goto load_freelist;
1567 }
1568 return NULL;
1569debug:
1570 if (!alloc_debug_processing(s, c->page, object, addr))
1571 goto another_slab;
1572
1573 c->page->inuse++;
1574 c->page->freelist = object[c->offset];
1575 c->node = -1;
1576 goto unlock_out;
1577}
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589static __always_inline void *slab_alloc(struct kmem_cache *s,
1590 gfp_t gfpflags, int node, unsigned long addr)
1591{
1592 void **object;
1593 struct kmem_cache_cpu *c;
1594 unsigned long flags;
1595 unsigned int objsize;
1596
1597 lockdep_trace_alloc(gfpflags);
1598 might_sleep_if(gfpflags & __GFP_WAIT);
1599
1600 if (should_failslab(s->objsize, gfpflags))
1601 return NULL;
1602
1603 local_irq_save(flags);
1604 c = get_cpu_slab(s, smp_processor_id());
1605 objsize = c->objsize;
1606 if (unlikely(!c->freelist || !node_match(c, node)))
1607
1608 object = __slab_alloc(s, gfpflags, node, addr, c);
1609
1610 else {
1611 object = c->freelist;
1612 c->freelist = object[c->offset];
1613 stat(c, ALLOC_FASTPATH);
1614 }
1615 local_irq_restore(flags);
1616
1617 if (unlikely((gfpflags & __GFP_ZERO) && object))
1618 memset(object, 0, objsize);
1619
1620 return object;
1621}
1622
1623void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags)
1624{
1625 void *ret = slab_alloc(s, gfpflags, -1, _RET_IP_);
1626
1627 trace_kmem_cache_alloc(_RET_IP_, ret, s->objsize, s->size, gfpflags);
1628
1629 return ret;
1630}
1631EXPORT_SYMBOL(kmem_cache_alloc);
1632
1633#ifdef CONFIG_KMEMTRACE
1634void *kmem_cache_alloc_notrace(struct kmem_cache *s, gfp_t gfpflags)
1635{
1636 return slab_alloc(s, gfpflags, -1, _RET_IP_);
1637}
1638EXPORT_SYMBOL(kmem_cache_alloc_notrace);
1639#endif
1640
1641#ifdef CONFIG_NUMA
1642void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
1643{
1644 void *ret = slab_alloc(s, gfpflags, node, _RET_IP_);
1645
1646 trace_kmem_cache_alloc_node(_RET_IP_, ret,
1647 s->objsize, s->size, gfpflags, node);
1648
1649 return ret;
1650}
1651EXPORT_SYMBOL(kmem_cache_alloc_node);
1652#endif
1653
1654#ifdef CONFIG_KMEMTRACE
1655void *kmem_cache_alloc_node_notrace(struct kmem_cache *s,
1656 gfp_t gfpflags,
1657 int node)
1658{
1659 return slab_alloc(s, gfpflags, node, _RET_IP_);
1660}
1661EXPORT_SYMBOL(kmem_cache_alloc_node_notrace);
1662#endif
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672static void __slab_free(struct kmem_cache *s, struct page *page,
1673 void *x, unsigned long addr, unsigned int offset)
1674{
1675 void *prior;
1676 void **object = (void *)x;
1677 struct kmem_cache_cpu *c;
1678
1679 c = get_cpu_slab(s, raw_smp_processor_id());
1680 stat(c, FREE_SLOWPATH);
1681 slab_lock(page);
1682
1683 if (unlikely(SLABDEBUG && PageSlubDebug(page)))
1684 goto debug;
1685
1686checks_ok:
1687 prior = object[offset] = page->freelist;
1688 page->freelist = object;
1689 page->inuse--;
1690
1691 if (unlikely(PageSlubFrozen(page))) {
1692 stat(c, FREE_FROZEN);
1693 goto out_unlock;
1694 }
1695
1696 if (unlikely(!page->inuse))
1697 goto slab_empty;
1698
1699
1700
1701
1702
1703 if (unlikely(!prior)) {
1704 add_partial(get_node(s, page_to_nid(page)), page, 1);
1705 stat(c, FREE_ADD_PARTIAL);
1706 }
1707
1708out_unlock:
1709 slab_unlock(page);
1710 return;
1711
1712slab_empty:
1713 if (prior) {
1714
1715
1716
1717 remove_partial(s, page);
1718 stat(c, FREE_REMOVE_PARTIAL);
1719 }
1720 slab_unlock(page);
1721 stat(c, FREE_SLAB);
1722 discard_slab(s, page);
1723 return;
1724
1725debug:
1726 if (!free_debug_processing(s, page, x, addr))
1727 goto out_unlock;
1728 goto checks_ok;
1729}
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742static __always_inline void slab_free(struct kmem_cache *s,
1743 struct page *page, void *x, unsigned long addr)
1744{
1745 void **object = (void *)x;
1746 struct kmem_cache_cpu *c;
1747 unsigned long flags;
1748
1749 local_irq_save(flags);
1750 c = get_cpu_slab(s, smp_processor_id());
1751 debug_check_no_locks_freed(object, c->objsize);
1752 if (!(s->flags & SLAB_DEBUG_OBJECTS))
1753 debug_check_no_obj_freed(object, c->objsize);
1754 if (likely(page == c->page && c->node >= 0)) {
1755 object[c->offset] = c->freelist;
1756 c->freelist = object;
1757 stat(c, FREE_FASTPATH);
1758 } else
1759 __slab_free(s, page, x, addr, c->offset);
1760
1761 local_irq_restore(flags);
1762}
1763
1764void kmem_cache_free(struct kmem_cache *s, void *x)
1765{
1766 struct page *page;
1767
1768 page = virt_to_head_page(x);
1769
1770 slab_free(s, page, x, _RET_IP_);
1771
1772 trace_kmem_cache_free(_RET_IP_, x);
1773}
1774EXPORT_SYMBOL(kmem_cache_free);
1775
1776
1777static struct page *get_object_page(const void *x)
1778{
1779 struct page *page = virt_to_head_page(x);
1780
1781 if (!PageSlab(page))
1782 return NULL;
1783
1784 return page;
1785}
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806static int slub_min_order;
1807static int slub_max_order = PAGE_ALLOC_COSTLY_ORDER;
1808static int slub_min_objects;
1809
1810
1811
1812
1813
1814static int slub_nomerge;
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841static inline int slab_order(int size, int min_objects,
1842 int max_order, int fract_leftover)
1843{
1844 int order;
1845 int rem;
1846 int min_order = slub_min_order;
1847
1848 if ((PAGE_SIZE << min_order) / size > MAX_OBJS_PER_PAGE)
1849 return get_order(size * MAX_OBJS_PER_PAGE) - 1;
1850
1851 for (order = max(min_order,
1852 fls(min_objects * size - 1) - PAGE_SHIFT);
1853 order <= max_order; order++) {
1854
1855 unsigned long slab_size = PAGE_SIZE << order;
1856
1857 if (slab_size < min_objects * size)
1858 continue;
1859
1860 rem = slab_size % size;
1861
1862 if (rem <= slab_size / fract_leftover)
1863 break;
1864
1865 }
1866
1867 return order;
1868}
1869
1870static inline int calculate_order(int size)
1871{
1872 int order;
1873 int min_objects;
1874 int fraction;
1875 int max_objects;
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885 min_objects = slub_min_objects;
1886 if (!min_objects)
1887 min_objects = 4 * (fls(nr_cpu_ids) + 1);
1888 max_objects = (PAGE_SIZE << slub_max_order)/size;
1889 min_objects = min(min_objects, max_objects);
1890
1891 while (min_objects > 1) {
1892 fraction = 16;
1893 while (fraction >= 4) {
1894 order = slab_order(size, min_objects,
1895 slub_max_order, fraction);
1896 if (order <= slub_max_order)
1897 return order;
1898 fraction /= 2;
1899 }
1900 min_objects --;
1901 }
1902
1903
1904
1905
1906
1907 order = slab_order(size, 1, slub_max_order, 1);
1908 if (order <= slub_max_order)
1909 return order;
1910
1911
1912
1913
1914 order = slab_order(size, 1, MAX_ORDER, 1);
1915 if (order < MAX_ORDER)
1916 return order;
1917 return -ENOSYS;
1918}
1919
1920
1921
1922
1923static unsigned long calculate_alignment(unsigned long flags,
1924 unsigned long align, unsigned long size)
1925{
1926
1927
1928
1929
1930
1931
1932
1933 if (flags & SLAB_HWCACHE_ALIGN) {
1934 unsigned long ralign = cache_line_size();
1935 while (size <= ralign / 2)
1936 ralign /= 2;
1937 align = max(align, ralign);
1938 }
1939
1940 if (align < ARCH_SLAB_MINALIGN)
1941 align = ARCH_SLAB_MINALIGN;
1942
1943 return ALIGN(align, sizeof(void *));
1944}
1945
1946static void init_kmem_cache_cpu(struct kmem_cache *s,
1947 struct kmem_cache_cpu *c)
1948{
1949 c->page = NULL;
1950 c->freelist = NULL;
1951 c->node = 0;
1952 c->offset = s->offset / sizeof(void *);
1953 c->objsize = s->objsize;
1954#ifdef CONFIG_SLUB_STATS
1955 memset(c->stat, 0, NR_SLUB_STAT_ITEMS * sizeof(unsigned));
1956#endif
1957}
1958
1959static void
1960init_kmem_cache_node(struct kmem_cache_node *n, struct kmem_cache *s)
1961{
1962 n->nr_partial = 0;
1963 spin_lock_init(&n->list_lock);
1964 INIT_LIST_HEAD(&n->partial);
1965#ifdef CONFIG_SLUB_DEBUG
1966 atomic_long_set(&n->nr_slabs, 0);
1967 atomic_long_set(&n->total_objects, 0);
1968 INIT_LIST_HEAD(&n->full);
1969#endif
1970}
1971
1972#ifdef CONFIG_SMP
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988#define NR_KMEM_CACHE_CPU 100
1989
1990static DEFINE_PER_CPU(struct kmem_cache_cpu,
1991 kmem_cache_cpu)[NR_KMEM_CACHE_CPU];
1992
1993static DEFINE_PER_CPU(struct kmem_cache_cpu *, kmem_cache_cpu_free);
1994static DECLARE_BITMAP(kmem_cach_cpu_free_init_once, CONFIG_NR_CPUS);
1995
1996static struct kmem_cache_cpu *alloc_kmem_cache_cpu(struct kmem_cache *s,
1997 int cpu, gfp_t flags)
1998{
1999 struct kmem_cache_cpu *c = per_cpu(kmem_cache_cpu_free, cpu);
2000
2001 if (c)
2002 per_cpu(kmem_cache_cpu_free, cpu) =
2003 (void *)c->freelist;
2004 else {
2005
2006 c = kmalloc_node(
2007 ALIGN(sizeof(struct kmem_cache_cpu), cache_line_size()),
2008 flags, cpu_to_node(cpu));
2009 if (!c)
2010 return NULL;
2011 }
2012
2013 init_kmem_cache_cpu(s, c);
2014 return c;
2015}
2016
2017static void free_kmem_cache_cpu(struct kmem_cache_cpu *c, int cpu)
2018{
2019 if (c < per_cpu(kmem_cache_cpu, cpu) ||
2020 c >= per_cpu(kmem_cache_cpu, cpu) + NR_KMEM_CACHE_CPU) {
2021 kfree(c);
2022 return;
2023 }
2024 c->freelist = (void *)per_cpu(kmem_cache_cpu_free, cpu);
2025 per_cpu(kmem_cache_cpu_free, cpu) = c;
2026}
2027
2028static void free_kmem_cache_cpus(struct kmem_cache *s)
2029{
2030 int cpu;
2031
2032 for_each_online_cpu(cpu) {
2033 struct kmem_cache_cpu *c = get_cpu_slab(s, cpu);
2034
2035 if (c) {
2036 s->cpu_slab[cpu] = NULL;
2037 free_kmem_cache_cpu(c, cpu);
2038 }
2039 }
2040}
2041
2042static int alloc_kmem_cache_cpus(struct kmem_cache *s, gfp_t flags)
2043{
2044 int cpu;
2045
2046 for_each_online_cpu(cpu) {
2047 struct kmem_cache_cpu *c = get_cpu_slab(s, cpu);
2048
2049 if (c)
2050 continue;
2051
2052 c = alloc_kmem_cache_cpu(s, cpu, flags);
2053 if (!c) {
2054 free_kmem_cache_cpus(s);
2055 return 0;
2056 }
2057 s->cpu_slab[cpu] = c;
2058 }
2059 return 1;
2060}
2061
2062
2063
2064
2065static void init_alloc_cpu_cpu(int cpu)
2066{
2067 int i;
2068
2069 if (cpumask_test_cpu(cpu, to_cpumask(kmem_cach_cpu_free_init_once)))
2070 return;
2071
2072 for (i = NR_KMEM_CACHE_CPU - 1; i >= 0; i--)
2073 free_kmem_cache_cpu(&per_cpu(kmem_cache_cpu, cpu)[i], cpu);
2074
2075 cpumask_set_cpu(cpu, to_cpumask(kmem_cach_cpu_free_init_once));
2076}
2077
2078static void __init init_alloc_cpu(void)
2079{
2080 int cpu;
2081
2082 for_each_online_cpu(cpu)
2083 init_alloc_cpu_cpu(cpu);
2084 }
2085
2086#else
2087static inline void free_kmem_cache_cpus(struct kmem_cache *s) {}
2088static inline void init_alloc_cpu(void) {}
2089
2090static inline int alloc_kmem_cache_cpus(struct kmem_cache *s, gfp_t flags)
2091{
2092 init_kmem_cache_cpu(s, &s->cpu_slab);
2093 return 1;
2094}
2095#endif
2096
2097#ifdef CONFIG_NUMA
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107static void early_kmem_cache_node_alloc(gfp_t gfpflags, int node)
2108{
2109 struct page *page;
2110 struct kmem_cache_node *n;
2111 unsigned long flags;
2112
2113 BUG_ON(kmalloc_caches->size < sizeof(struct kmem_cache_node));
2114
2115 page = new_slab(kmalloc_caches, gfpflags, node);
2116
2117 BUG_ON(!page);
2118 if (page_to_nid(page) != node) {
2119 printk(KERN_ERR "SLUB: Unable to allocate memory from "
2120 "node %d\n", node);
2121 printk(KERN_ERR "SLUB: Allocating a useless per node structure "
2122 "in order to be able to continue\n");
2123 }
2124
2125 n = page->freelist;
2126 BUG_ON(!n);
2127 page->freelist = get_freepointer(kmalloc_caches, n);
2128 page->inuse++;
2129 kmalloc_caches->node[node] = n;
2130#ifdef CONFIG_SLUB_DEBUG
2131 init_object(kmalloc_caches, n, 1);
2132 init_tracking(kmalloc_caches, n);
2133#endif
2134 init_kmem_cache_node(n, kmalloc_caches);
2135 inc_slabs_node(kmalloc_caches, node, page->objects);
2136
2137
2138
2139
2140
2141
2142 local_irq_save(flags);
2143 add_partial(n, page, 0);
2144 local_irq_restore(flags);
2145}
2146
2147static void free_kmem_cache_nodes(struct kmem_cache *s)
2148{
2149 int node;
2150
2151 for_each_node_state(node, N_NORMAL_MEMORY) {
2152 struct kmem_cache_node *n = s->node[node];
2153 if (n && n != &s->local_node)
2154 kmem_cache_free(kmalloc_caches, n);
2155 s->node[node] = NULL;
2156 }
2157}
2158
2159static int init_kmem_cache_nodes(struct kmem_cache *s, gfp_t gfpflags)
2160{
2161 int node;
2162 int local_node;
2163
2164 if (slab_state >= UP)
2165 local_node = page_to_nid(virt_to_page(s));
2166 else
2167 local_node = 0;
2168
2169 for_each_node_state(node, N_NORMAL_MEMORY) {
2170 struct kmem_cache_node *n;
2171
2172 if (local_node == node)
2173 n = &s->local_node;
2174 else {
2175 if (slab_state == DOWN) {
2176 early_kmem_cache_node_alloc(gfpflags, node);
2177 continue;
2178 }
2179 n = kmem_cache_alloc_node(kmalloc_caches,
2180 gfpflags, node);
2181
2182 if (!n) {
2183 free_kmem_cache_nodes(s);
2184 return 0;
2185 }
2186
2187 }
2188 s->node[node] = n;
2189 init_kmem_cache_node(n, s);
2190 }
2191 return 1;
2192}
2193#else
2194static void free_kmem_cache_nodes(struct kmem_cache *s)
2195{
2196}
2197
2198static int init_kmem_cache_nodes(struct kmem_cache *s, gfp_t gfpflags)
2199{
2200 init_kmem_cache_node(&s->local_node, s);
2201 return 1;
2202}
2203#endif
2204
2205static void set_min_partial(struct kmem_cache *s, unsigned long min)
2206{
2207 if (min < MIN_PARTIAL)
2208 min = MIN_PARTIAL;
2209 else if (min > MAX_PARTIAL)
2210 min = MAX_PARTIAL;
2211 s->min_partial = min;
2212}
2213
2214
2215
2216
2217
2218static int calculate_sizes(struct kmem_cache *s, int forced_order)
2219{
2220 unsigned long flags = s->flags;
2221 unsigned long size = s->objsize;
2222 unsigned long align = s->align;
2223 int order;
2224
2225
2226
2227
2228
2229
2230 size = ALIGN(size, sizeof(void *));
2231
2232#ifdef CONFIG_SLUB_DEBUG
2233
2234
2235
2236
2237
2238 if ((flags & SLAB_POISON) && !(flags & SLAB_DESTROY_BY_RCU) &&
2239 !s->ctor)
2240 s->flags |= __OBJECT_POISON;
2241 else
2242 s->flags &= ~__OBJECT_POISON;
2243
2244
2245
2246
2247
2248
2249
2250 if ((flags & SLAB_RED_ZONE) && size == s->objsize)
2251 size += sizeof(void *);
2252#endif
2253
2254
2255
2256
2257
2258 s->inuse = size;
2259
2260 if (((flags & (SLAB_DESTROY_BY_RCU | SLAB_POISON)) ||
2261 s->ctor)) {
2262
2263
2264
2265
2266
2267
2268
2269
2270 s->offset = size;
2271 size += sizeof(void *);
2272 }
2273
2274#ifdef CONFIG_SLUB_DEBUG
2275 if (flags & SLAB_STORE_USER)
2276
2277
2278
2279
2280 size += 2 * sizeof(struct track);
2281
2282 if (flags & SLAB_RED_ZONE)
2283
2284
2285
2286
2287
2288
2289
2290 size += sizeof(void *);
2291#endif
2292
2293
2294
2295
2296
2297
2298 align = calculate_alignment(flags, align, s->objsize);
2299
2300
2301
2302
2303
2304
2305 size = ALIGN(size, align);
2306 s->size = size;
2307 if (forced_order >= 0)
2308 order = forced_order;
2309 else
2310 order = calculate_order(size);
2311
2312 if (order < 0)
2313 return 0;
2314
2315 s->allocflags = 0;
2316 if (order)
2317 s->allocflags |= __GFP_COMP;
2318
2319 if (s->flags & SLAB_CACHE_DMA)
2320 s->allocflags |= SLUB_DMA;
2321
2322 if (s->flags & SLAB_RECLAIM_ACCOUNT)
2323 s->allocflags |= __GFP_RECLAIMABLE;
2324
2325
2326
2327
2328 s->oo = oo_make(order, size);
2329 s->min = oo_make(get_order(size), size);
2330 if (oo_objects(s->oo) > oo_objects(s->max))
2331 s->max = s->oo;
2332
2333 return !!oo_objects(s->oo);
2334
2335}
2336
2337static int kmem_cache_open(struct kmem_cache *s, gfp_t gfpflags,
2338 const char *name, size_t size,
2339 size_t align, unsigned long flags,
2340 void (*ctor)(void *))
2341{
2342 memset(s, 0, kmem_size);
2343 s->name = name;
2344 s->ctor = ctor;
2345 s->objsize = size;
2346 s->align = align;
2347 s->flags = kmem_cache_flags(size, flags, name, ctor);
2348
2349 if (!calculate_sizes(s, -1))
2350 goto error;
2351
2352
2353
2354
2355
2356 set_min_partial(s, ilog2(s->size));
2357 s->refcount = 1;
2358#ifdef CONFIG_NUMA
2359 s->remote_node_defrag_ratio = 1000;
2360#endif
2361 if (!init_kmem_cache_nodes(s, gfpflags & ~SLUB_DMA))
2362 goto error;
2363
2364 if (alloc_kmem_cache_cpus(s, gfpflags & ~SLUB_DMA))
2365 return 1;
2366 free_kmem_cache_nodes(s);
2367error:
2368 if (flags & SLAB_PANIC)
2369 panic("Cannot create slab %s size=%lu realsize=%u "
2370 "order=%u offset=%u flags=%lx\n",
2371 s->name, (unsigned long)size, s->size, oo_order(s->oo),
2372 s->offset, flags);
2373 return 0;
2374}
2375
2376
2377
2378
2379int kmem_ptr_validate(struct kmem_cache *s, const void *object)
2380{
2381 struct page *page;
2382
2383 page = get_object_page(object);
2384
2385 if (!page || s != page->slab)
2386
2387 return 0;
2388
2389 if (!check_valid_pointer(s, page, object))
2390 return 0;
2391
2392
2393
2394
2395
2396
2397
2398 return 1;
2399}
2400EXPORT_SYMBOL(kmem_ptr_validate);
2401
2402
2403
2404
2405unsigned int kmem_cache_size(struct kmem_cache *s)
2406{
2407 return s->objsize;
2408}
2409EXPORT_SYMBOL(kmem_cache_size);
2410
2411const char *kmem_cache_name(struct kmem_cache *s)
2412{
2413 return s->name;
2414}
2415EXPORT_SYMBOL(kmem_cache_name);
2416
2417static void list_slab_objects(struct kmem_cache *s, struct page *page,
2418 const char *text)
2419{
2420#ifdef CONFIG_SLUB_DEBUG
2421 void *addr = page_address(page);
2422 void *p;
2423 DECLARE_BITMAP(map, page->objects);
2424
2425 bitmap_zero(map, page->objects);
2426 slab_err(s, page, "%s", text);
2427 slab_lock(page);
2428 for_each_free_object(p, s, page->freelist)
2429 set_bit(slab_index(p, s, addr), map);
2430
2431 for_each_object(p, s, addr, page->objects) {
2432
2433 if (!test_bit(slab_index(p, s, addr), map)) {
2434 printk(KERN_ERR "INFO: Object 0x%p @offset=%tu\n",
2435 p, p - addr);
2436 print_tracking(s, p);
2437 }
2438 }
2439 slab_unlock(page);
2440#endif
2441}
2442
2443
2444
2445
2446static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
2447{
2448 unsigned long flags;
2449 struct page *page, *h;
2450
2451 spin_lock_irqsave(&n->list_lock, flags);
2452 list_for_each_entry_safe(page, h, &n->partial, lru) {
2453 if (!page->inuse) {
2454 list_del(&page->lru);
2455 discard_slab(s, page);
2456 n->nr_partial--;
2457 } else {
2458 list_slab_objects(s, page,
2459 "Objects remaining on kmem_cache_close()");
2460 }
2461 }
2462 spin_unlock_irqrestore(&n->list_lock, flags);
2463}
2464
2465
2466
2467
2468static inline int kmem_cache_close(struct kmem_cache *s)
2469{
2470 int node;
2471
2472 flush_all(s);
2473
2474
2475 free_kmem_cache_cpus(s);
2476 for_each_node_state(node, N_NORMAL_MEMORY) {
2477 struct kmem_cache_node *n = get_node(s, node);
2478
2479 free_partial(s, n);
2480 if (n->nr_partial || slabs_node(s, node))
2481 return 1;
2482 }
2483 free_kmem_cache_nodes(s);
2484 return 0;
2485}
2486
2487
2488
2489
2490
2491void kmem_cache_destroy(struct kmem_cache *s)
2492{
2493 if (s->flags & SLAB_DESTROY_BY_RCU)
2494 rcu_barrier();
2495 down_write(&slub_lock);
2496 s->refcount--;
2497 if (!s->refcount) {
2498 list_del(&s->list);
2499 up_write(&slub_lock);
2500 if (kmem_cache_close(s)) {
2501 printk(KERN_ERR "SLUB %s: %s called for cache that "
2502 "still has objects.\n", s->name, __func__);
2503 dump_stack();
2504 }
2505 sysfs_slab_remove(s);
2506 } else
2507 up_write(&slub_lock);
2508}
2509EXPORT_SYMBOL(kmem_cache_destroy);
2510
2511
2512
2513
2514
2515struct kmem_cache kmalloc_caches[SLUB_PAGE_SHIFT] __cacheline_aligned;
2516EXPORT_SYMBOL(kmalloc_caches);
2517
2518static int __init setup_slub_min_order(char *str)
2519{
2520 get_option(&str, &slub_min_order);
2521
2522 return 1;
2523}
2524
2525__setup("slub_min_order=", setup_slub_min_order);
2526
2527static int __init setup_slub_max_order(char *str)
2528{
2529 get_option(&str, &slub_max_order);
2530 slub_max_order = min(slub_max_order, MAX_ORDER - 1);
2531
2532 return 1;
2533}
2534
2535__setup("slub_max_order=", setup_slub_max_order);
2536
2537static int __init setup_slub_min_objects(char *str)
2538{
2539 get_option(&str, &slub_min_objects);
2540
2541 return 1;
2542}
2543
2544__setup("slub_min_objects=", setup_slub_min_objects);
2545
2546static int __init setup_slub_nomerge(char *str)
2547{
2548 slub_nomerge = 1;
2549 return 1;
2550}
2551
2552__setup("slub_nomerge", setup_slub_nomerge);
2553
2554static struct kmem_cache *create_kmalloc_cache(struct kmem_cache *s,
2555 const char *name, int size, gfp_t gfp_flags)
2556{
2557 unsigned int flags = 0;
2558
2559 if (gfp_flags & SLUB_DMA)
2560 flags = SLAB_CACHE_DMA;
2561
2562 down_write(&slub_lock);
2563 if (!kmem_cache_open(s, gfp_flags, name, size, ARCH_KMALLOC_MINALIGN,
2564 flags, NULL))
2565 goto panic;
2566
2567 list_add(&s->list, &slab_caches);
2568 up_write(&slub_lock);
2569 if (sysfs_slab_add(s))
2570 goto panic;
2571 return s;
2572
2573panic:
2574 panic("Creation of kmalloc slab %s size=%d failed.\n", name, size);
2575}
2576
2577#ifdef CONFIG_ZONE_DMA
2578static struct kmem_cache *kmalloc_caches_dma[SLUB_PAGE_SHIFT];
2579
2580static void sysfs_add_func(struct work_struct *w)
2581{
2582 struct kmem_cache *s;
2583
2584 down_write(&slub_lock);
2585 list_for_each_entry(s, &slab_caches, list) {
2586 if (s->flags & __SYSFS_ADD_DEFERRED) {
2587 s->flags &= ~__SYSFS_ADD_DEFERRED;
2588 sysfs_slab_add(s);
2589 }
2590 }
2591 up_write(&slub_lock);
2592}
2593
2594static DECLARE_WORK(sysfs_add_work, sysfs_add_func);
2595
2596static noinline struct kmem_cache *dma_kmalloc_cache(int index, gfp_t flags)
2597{
2598 struct kmem_cache *s;
2599 char *text;
2600 size_t realsize;
2601
2602 s = kmalloc_caches_dma[index];
2603 if (s)
2604 return s;
2605
2606
2607 if (flags & __GFP_WAIT)
2608 down_write(&slub_lock);
2609 else {
2610 if (!down_write_trylock(&slub_lock))
2611 goto out;
2612 }
2613
2614 if (kmalloc_caches_dma[index])
2615 goto unlock_out;
2616
2617 realsize = kmalloc_caches[index].objsize;
2618 text = kasprintf(flags & ~SLUB_DMA, "kmalloc_dma-%d",
2619 (unsigned int)realsize);
2620 s = kmalloc(kmem_size, flags & ~SLUB_DMA);
2621
2622 if (!s || !text || !kmem_cache_open(s, flags, text,
2623 realsize, ARCH_KMALLOC_MINALIGN,
2624 SLAB_CACHE_DMA|__SYSFS_ADD_DEFERRED, NULL)) {
2625 kfree(s);
2626 kfree(text);
2627 goto unlock_out;
2628 }
2629
2630 list_add(&s->list, &slab_caches);
2631 kmalloc_caches_dma[index] = s;
2632
2633 schedule_work(&sysfs_add_work);
2634
2635unlock_out:
2636 up_write(&slub_lock);
2637out:
2638 return kmalloc_caches_dma[index];
2639}
2640#endif
2641
2642
2643
2644
2645
2646
2647
2648static s8 size_index[24] = {
2649 3,
2650 4,
2651 5,
2652 5,
2653 6,
2654 6,
2655 6,
2656 6,
2657 1,
2658 1,
2659 1,
2660 1,
2661 7,
2662 7,
2663 7,
2664 7,
2665 2,
2666 2,
2667 2,
2668 2,
2669 2,
2670 2,
2671 2,
2672 2
2673};
2674
2675static struct kmem_cache *get_slab(size_t size, gfp_t flags)
2676{
2677 int index;
2678
2679 if (size <= 192) {
2680 if (!size)
2681 return ZERO_SIZE_PTR;
2682
2683 index = size_index[(size - 1) / 8];
2684 } else
2685 index = fls(size - 1);
2686
2687#ifdef CONFIG_ZONE_DMA
2688 if (unlikely((flags & SLUB_DMA)))
2689 return dma_kmalloc_cache(index, flags);
2690
2691#endif
2692 return &kmalloc_caches[index];
2693}
2694
2695void *__kmalloc(size_t size, gfp_t flags)
2696{
2697 struct kmem_cache *s;
2698 void *ret;
2699
2700 if (unlikely(size > SLUB_MAX_SIZE))
2701 return kmalloc_large(size, flags);
2702
2703 s = get_slab(size, flags);
2704
2705 if (unlikely(ZERO_OR_NULL_PTR(s)))
2706 return s;
2707
2708 ret = slab_alloc(s, flags, -1, _RET_IP_);
2709
2710 trace_kmalloc(_RET_IP_, ret, size, s->size, flags);
2711
2712 return ret;
2713}
2714EXPORT_SYMBOL(__kmalloc);
2715
2716static void *kmalloc_large_node(size_t size, gfp_t flags, int node)
2717{
2718 struct page *page = alloc_pages_node(node, flags | __GFP_COMP,
2719 get_order(size));
2720
2721 if (page)
2722 return page_address(page);
2723 else
2724 return NULL;
2725}
2726
2727#ifdef CONFIG_NUMA
2728void *__kmalloc_node(size_t size, gfp_t flags, int node)
2729{
2730 struct kmem_cache *s;
2731 void *ret;
2732
2733 if (unlikely(size > SLUB_MAX_SIZE)) {
2734 ret = kmalloc_large_node(size, flags, node);
2735
2736 trace_kmalloc_node(_RET_IP_, ret,
2737 size, PAGE_SIZE << get_order(size),
2738 flags, node);
2739
2740 return ret;
2741 }
2742
2743 s = get_slab(size, flags);
2744
2745 if (unlikely(ZERO_OR_NULL_PTR(s)))
2746 return s;
2747
2748 ret = slab_alloc(s, flags, node, _RET_IP_);
2749
2750 trace_kmalloc_node(_RET_IP_, ret, size, s->size, flags, node);
2751
2752 return ret;
2753}
2754EXPORT_SYMBOL(__kmalloc_node);
2755#endif
2756
2757size_t ksize(const void *object)
2758{
2759 struct page *page;
2760 struct kmem_cache *s;
2761
2762 if (unlikely(object == ZERO_SIZE_PTR))
2763 return 0;
2764
2765 page = virt_to_head_page(object);
2766
2767 if (unlikely(!PageSlab(page))) {
2768 WARN_ON(!PageCompound(page));
2769 return PAGE_SIZE << compound_order(page);
2770 }
2771 s = page->slab;
2772
2773#ifdef CONFIG_SLUB_DEBUG
2774
2775
2776
2777
2778 if (s->flags & (SLAB_RED_ZONE | SLAB_POISON))
2779 return s->objsize;
2780
2781#endif
2782
2783
2784
2785
2786
2787 if (s->flags & (SLAB_DESTROY_BY_RCU | SLAB_STORE_USER))
2788 return s->inuse;
2789
2790
2791
2792 return s->size;
2793}
2794EXPORT_SYMBOL(ksize);
2795
2796void kfree(const void *x)
2797{
2798 struct page *page;
2799 void *object = (void *)x;
2800
2801 trace_kfree(_RET_IP_, x);
2802
2803 if (unlikely(ZERO_OR_NULL_PTR(x)))
2804 return;
2805
2806 page = virt_to_head_page(x);
2807 if (unlikely(!PageSlab(page))) {
2808 BUG_ON(!PageCompound(page));
2809 put_page(page);
2810 return;
2811 }
2812 slab_free(page->slab, page, object, _RET_IP_);
2813}
2814EXPORT_SYMBOL(kfree);
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826int kmem_cache_shrink(struct kmem_cache *s)
2827{
2828 int node;
2829 int i;
2830 struct kmem_cache_node *n;
2831 struct page *page;
2832 struct page *t;
2833 int objects = oo_objects(s->max);
2834 struct list_head *slabs_by_inuse =
2835 kmalloc(sizeof(struct list_head) * objects, GFP_KERNEL);
2836 unsigned long flags;
2837
2838 if (!slabs_by_inuse)
2839 return -ENOMEM;
2840
2841 flush_all(s);
2842 for_each_node_state(node, N_NORMAL_MEMORY) {
2843 n = get_node(s, node);
2844
2845 if (!n->nr_partial)
2846 continue;
2847
2848 for (i = 0; i < objects; i++)
2849 INIT_LIST_HEAD(slabs_by_inuse + i);
2850
2851 spin_lock_irqsave(&n->list_lock, flags);
2852
2853
2854
2855
2856
2857
2858
2859 list_for_each_entry_safe(page, t, &n->partial, lru) {
2860 if (!page->inuse && slab_trylock(page)) {
2861
2862
2863
2864
2865
2866 list_del(&page->lru);
2867 n->nr_partial--;
2868 slab_unlock(page);
2869 discard_slab(s, page);
2870 } else {
2871 list_move(&page->lru,
2872 slabs_by_inuse + page->inuse);
2873 }
2874 }
2875
2876
2877
2878
2879
2880 for (i = objects - 1; i >= 0; i--)
2881 list_splice(slabs_by_inuse + i, n->partial.prev);
2882
2883 spin_unlock_irqrestore(&n->list_lock, flags);
2884 }
2885
2886 kfree(slabs_by_inuse);
2887 return 0;
2888}
2889EXPORT_SYMBOL(kmem_cache_shrink);
2890
2891#if defined(CONFIG_NUMA) && defined(CONFIG_MEMORY_HOTPLUG)
2892static int slab_mem_going_offline_callback(void *arg)
2893{
2894 struct kmem_cache *s;
2895
2896 down_read(&slub_lock);
2897 list_for_each_entry(s, &slab_caches, list)
2898 kmem_cache_shrink(s);
2899 up_read(&slub_lock);
2900
2901 return 0;
2902}
2903
2904static void slab_mem_offline_callback(void *arg)
2905{
2906 struct kmem_cache_node *n;
2907 struct kmem_cache *s;
2908 struct memory_notify *marg = arg;
2909 int offline_node;
2910
2911 offline_node = marg->status_change_nid;
2912
2913
2914
2915
2916
2917 if (offline_node < 0)
2918 return;
2919
2920 down_read(&slub_lock);
2921 list_for_each_entry(s, &slab_caches, list) {
2922 n = get_node(s, offline_node);
2923 if (n) {
2924
2925
2926
2927
2928
2929
2930 BUG_ON(slabs_node(s, offline_node));
2931
2932 s->node[offline_node] = NULL;
2933 kmem_cache_free(kmalloc_caches, n);
2934 }
2935 }
2936 up_read(&slub_lock);
2937}
2938
2939static int slab_mem_going_online_callback(void *arg)
2940{
2941 struct kmem_cache_node *n;
2942 struct kmem_cache *s;
2943 struct memory_notify *marg = arg;
2944 int nid = marg->status_change_nid;
2945 int ret = 0;
2946
2947
2948
2949
2950
2951 if (nid < 0)
2952 return 0;
2953
2954
2955
2956
2957
2958
2959 down_read(&slub_lock);
2960 list_for_each_entry(s, &slab_caches, list) {
2961
2962
2963
2964
2965
2966 n = kmem_cache_alloc(kmalloc_caches, GFP_KERNEL);
2967 if (!n) {
2968 ret = -ENOMEM;
2969 goto out;
2970 }
2971 init_kmem_cache_node(n, s);
2972 s->node[nid] = n;
2973 }
2974out:
2975 up_read(&slub_lock);
2976 return ret;
2977}
2978
2979static int slab_memory_callback(struct notifier_block *self,
2980 unsigned long action, void *arg)
2981{
2982 int ret = 0;
2983
2984 switch (action) {
2985 case MEM_GOING_ONLINE:
2986 ret = slab_mem_going_online_callback(arg);
2987 break;
2988 case MEM_GOING_OFFLINE:
2989 ret = slab_mem_going_offline_callback(arg);
2990 break;
2991 case MEM_OFFLINE:
2992 case MEM_CANCEL_ONLINE:
2993 slab_mem_offline_callback(arg);
2994 break;
2995 case MEM_ONLINE:
2996 case MEM_CANCEL_OFFLINE:
2997 break;
2998 }
2999 if (ret)
3000 ret = notifier_from_errno(ret);
3001 else
3002 ret = NOTIFY_OK;
3003 return ret;
3004}
3005
3006#endif
3007
3008
3009
3010
3011
3012void __init kmem_cache_init(void)
3013{
3014 int i;
3015 int caches = 0;
3016
3017 init_alloc_cpu();
3018
3019#ifdef CONFIG_NUMA
3020
3021
3022
3023
3024
3025 create_kmalloc_cache(&kmalloc_caches[0], "kmem_cache_node",
3026 sizeof(struct kmem_cache_node), GFP_KERNEL);
3027 kmalloc_caches[0].refcount = -1;
3028 caches++;
3029
3030 hotplug_memory_notifier(slab_memory_callback, SLAB_CALLBACK_PRI);
3031#endif
3032
3033
3034 slab_state = PARTIAL;
3035
3036
3037 if (KMALLOC_MIN_SIZE <= 64) {
3038 create_kmalloc_cache(&kmalloc_caches[1],
3039 "kmalloc-96", 96, GFP_KERNEL);
3040 caches++;
3041 create_kmalloc_cache(&kmalloc_caches[2],
3042 "kmalloc-192", 192, GFP_KERNEL);
3043 caches++;
3044 }
3045
3046 for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) {
3047 create_kmalloc_cache(&kmalloc_caches[i],
3048 "kmalloc", 1 << i, GFP_KERNEL);
3049 caches++;
3050 }
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064 BUILD_BUG_ON(KMALLOC_MIN_SIZE > 256 ||
3065 (KMALLOC_MIN_SIZE & (KMALLOC_MIN_SIZE - 1)));
3066
3067 for (i = 8; i < KMALLOC_MIN_SIZE; i += 8)
3068 size_index[(i - 1) / 8] = KMALLOC_SHIFT_LOW;
3069
3070 if (KMALLOC_MIN_SIZE == 128) {
3071
3072
3073
3074
3075
3076 for (i = 128 + 8; i <= 192; i += 8)
3077 size_index[(i - 1) / 8] = 8;
3078 }
3079
3080 slab_state = UP;
3081
3082
3083 for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++)
3084 kmalloc_caches[i]. name =
3085 kasprintf(GFP_KERNEL, "kmalloc-%d", 1 << i);
3086
3087#ifdef CONFIG_SMP
3088 register_cpu_notifier(&slab_notifier);
3089 kmem_size = offsetof(struct kmem_cache, cpu_slab) +
3090 nr_cpu_ids * sizeof(struct kmem_cache_cpu *);
3091#else
3092 kmem_size = sizeof(struct kmem_cache);
3093#endif
3094
3095 printk(KERN_INFO
3096 "SLUB: Genslabs=%d, HWalign=%d, Order=%d-%d, MinObjects=%d,"
3097 " CPUs=%d, Nodes=%d\n",
3098 caches, cache_line_size(),
3099 slub_min_order, slub_max_order, slub_min_objects,
3100 nr_cpu_ids, nr_node_ids);
3101}
3102
3103
3104
3105
3106static int slab_unmergeable(struct kmem_cache *s)
3107{
3108 if (slub_nomerge || (s->flags & SLUB_NEVER_MERGE))
3109 return 1;
3110
3111 if (s->ctor)
3112 return 1;
3113
3114
3115
3116
3117 if (s->refcount < 0)
3118 return 1;
3119
3120 return 0;
3121}
3122
3123static struct kmem_cache *find_mergeable(size_t size,
3124 size_t align, unsigned long flags, const char *name,
3125 void (*ctor)(void *))
3126{
3127 struct kmem_cache *s;
3128
3129 if (slub_nomerge || (flags & SLUB_NEVER_MERGE))
3130 return NULL;
3131
3132 if (ctor)
3133 return NULL;
3134
3135 size = ALIGN(size, sizeof(void *));
3136 align = calculate_alignment(flags, align, size);
3137 size = ALIGN(size, align);
3138 flags = kmem_cache_flags(size, flags, name, NULL);
3139
3140 list_for_each_entry(s, &slab_caches, list) {
3141 if (slab_unmergeable(s))
3142 continue;
3143
3144 if (size > s->size)
3145 continue;
3146
3147 if ((flags & SLUB_MERGE_SAME) != (s->flags & SLUB_MERGE_SAME))
3148 continue;
3149
3150
3151
3152
3153 if ((s->size & ~(align - 1)) != s->size)
3154 continue;
3155
3156 if (s->size - size >= sizeof(void *))
3157 continue;
3158
3159 return s;
3160 }
3161 return NULL;
3162}
3163
3164struct kmem_cache *kmem_cache_create(const char *name, size_t size,
3165 size_t align, unsigned long flags, void (*ctor)(void *))
3166{
3167 struct kmem_cache *s;
3168
3169 down_write(&slub_lock);
3170 s = find_mergeable(size, align, flags, name, ctor);
3171 if (s) {
3172 int cpu;
3173
3174 s->refcount++;
3175
3176
3177
3178
3179 s->objsize = max(s->objsize, (int)size);
3180
3181
3182
3183
3184
3185 for_each_online_cpu(cpu)
3186 get_cpu_slab(s, cpu)->objsize = s->objsize;
3187
3188 s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *)));
3189 up_write(&slub_lock);
3190
3191 if (sysfs_slab_alias(s, name)) {
3192 down_write(&slub_lock);
3193 s->refcount--;
3194 up_write(&slub_lock);
3195 goto err;
3196 }
3197 return s;
3198 }
3199
3200 s = kmalloc(kmem_size, GFP_KERNEL);
3201 if (s) {
3202 if (kmem_cache_open(s, GFP_KERNEL, name,
3203 size, align, flags, ctor)) {
3204 list_add(&s->list, &slab_caches);
3205 up_write(&slub_lock);
3206 if (sysfs_slab_add(s)) {
3207 down_write(&slub_lock);
3208 list_del(&s->list);
3209 up_write(&slub_lock);
3210 kfree(s);
3211 goto err;
3212 }
3213 return s;
3214 }
3215 kfree(s);
3216 }
3217 up_write(&slub_lock);
3218
3219err:
3220 if (flags & SLAB_PANIC)
3221 panic("Cannot create slabcache %s\n", name);
3222 else
3223 s = NULL;
3224 return s;
3225}
3226EXPORT_SYMBOL(kmem_cache_create);
3227
3228#ifdef CONFIG_SMP
3229
3230
3231
3232
3233static int __cpuinit slab_cpuup_callback(struct notifier_block *nfb,
3234 unsigned long action, void *hcpu)
3235{
3236 long cpu = (long)hcpu;
3237 struct kmem_cache *s;
3238 unsigned long flags;
3239
3240 switch (action) {
3241 case CPU_UP_PREPARE:
3242 case CPU_UP_PREPARE_FROZEN:
3243 init_alloc_cpu_cpu(cpu);
3244 down_read(&slub_lock);
3245 list_for_each_entry(s, &slab_caches, list)
3246 s->cpu_slab[cpu] = alloc_kmem_cache_cpu(s, cpu,
3247 GFP_KERNEL);
3248 up_read(&slub_lock);
3249 break;
3250
3251 case CPU_UP_CANCELED:
3252 case CPU_UP_CANCELED_FROZEN:
3253 case CPU_DEAD:
3254 case CPU_DEAD_FROZEN:
3255 down_read(&slub_lock);
3256 list_for_each_entry(s, &slab_caches, list) {
3257 struct kmem_cache_cpu *c = get_cpu_slab(s, cpu);
3258
3259 local_irq_save(flags);
3260 __flush_cpu_slab(s, cpu);
3261 local_irq_restore(flags);
3262 free_kmem_cache_cpu(c, cpu);
3263 s->cpu_slab[cpu] = NULL;
3264 }
3265 up_read(&slub_lock);
3266 break;
3267 default:
3268 break;
3269 }
3270 return NOTIFY_OK;
3271}
3272
3273static struct notifier_block __cpuinitdata slab_notifier = {
3274 .notifier_call = slab_cpuup_callback
3275};
3276
3277#endif
3278
3279void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
3280{
3281 struct kmem_cache *s;
3282 void *ret;
3283
3284 if (unlikely(size > SLUB_MAX_SIZE))
3285 return kmalloc_large(size, gfpflags);
3286
3287 s = get_slab(size, gfpflags);
3288
3289 if (unlikely(ZERO_OR_NULL_PTR(s)))
3290 return s;
3291
3292 ret = slab_alloc(s, gfpflags, -1, caller);
3293
3294
3295 trace_kmalloc(caller, ret, size, s->size, gfpflags);
3296
3297 return ret;
3298}
3299
3300void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
3301 int node, unsigned long caller)
3302{
3303 struct kmem_cache *s;
3304 void *ret;
3305
3306 if (unlikely(size > SLUB_MAX_SIZE))
3307 return kmalloc_large_node(size, gfpflags, node);
3308
3309 s = get_slab(size, gfpflags);
3310
3311 if (unlikely(ZERO_OR_NULL_PTR(s)))
3312 return s;
3313
3314 ret = slab_alloc(s, gfpflags, node, caller);
3315
3316
3317 trace_kmalloc_node(caller, ret, size, s->size, gfpflags, node);
3318
3319 return ret;
3320}
3321
3322#ifdef CONFIG_SLUB_DEBUG
3323static unsigned long count_partial(struct kmem_cache_node *n,
3324 int (*get_count)(struct page *))
3325{
3326 unsigned long flags;
3327 unsigned long x = 0;
3328 struct page *page;
3329
3330 spin_lock_irqsave(&n->list_lock, flags);
3331 list_for_each_entry(page, &n->partial, lru)
3332 x += get_count(page);
3333 spin_unlock_irqrestore(&n->list_lock, flags);
3334 return x;
3335}
3336
3337static int count_inuse(struct page *page)
3338{
3339 return page->inuse;
3340}
3341
3342static int count_total(struct page *page)
3343{
3344 return page->objects;
3345}
3346
3347static int count_free(struct page *page)
3348{
3349 return page->objects - page->inuse;
3350}
3351
3352static int validate_slab(struct kmem_cache *s, struct page *page,
3353 unsigned long *map)
3354{
3355 void *p;
3356 void *addr = page_address(page);
3357
3358 if (!check_slab(s, page) ||
3359 !on_freelist(s, page, NULL))
3360 return 0;
3361
3362
3363 bitmap_zero(map, page->objects);
3364
3365 for_each_free_object(p, s, page->freelist) {
3366 set_bit(slab_index(p, s, addr), map);
3367 if (!check_object(s, page, p, 0))
3368 return 0;
3369 }
3370
3371 for_each_object(p, s, addr, page->objects)
3372 if (!test_bit(slab_index(p, s, addr), map))
3373 if (!check_object(s, page, p, 1))
3374 return 0;
3375 return 1;
3376}
3377
3378static void validate_slab_slab(struct kmem_cache *s, struct page *page,
3379 unsigned long *map)
3380{
3381 if (slab_trylock(page)) {
3382 validate_slab(s, page, map);
3383 slab_unlock(page);
3384 } else
3385 printk(KERN_INFO "SLUB %s: Skipped busy slab 0x%p\n",
3386 s->name, page);
3387
3388 if (s->flags & DEBUG_DEFAULT_FLAGS) {
3389 if (!PageSlubDebug(page))
3390 printk(KERN_ERR "SLUB %s: SlubDebug not set "
3391 "on slab 0x%p\n", s->name, page);
3392 } else {
3393 if (PageSlubDebug(page))
3394 printk(KERN_ERR "SLUB %s: SlubDebug set on "
3395 "slab 0x%p\n", s->name, page);
3396 }
3397}
3398
3399static int validate_slab_node(struct kmem_cache *s,
3400 struct kmem_cache_node *n, unsigned long *map)
3401{
3402 unsigned long count = 0;
3403 struct page *page;
3404 unsigned long flags;
3405
3406 spin_lock_irqsave(&n->list_lock, flags);
3407
3408 list_for_each_entry(page, &n->partial, lru) {
3409 validate_slab_slab(s, page, map);
3410 count++;
3411 }
3412 if (count != n->nr_partial)
3413 printk(KERN_ERR "SLUB %s: %ld partial slabs counted but "
3414 "counter=%ld\n", s->name, count, n->nr_partial);
3415
3416 if (!(s->flags & SLAB_STORE_USER))
3417 goto out;
3418
3419 list_for_each_entry(page, &n->full, lru) {
3420 validate_slab_slab(s, page, map);
3421 count++;
3422 }
3423 if (count != atomic_long_read(&n->nr_slabs))
3424 printk(KERN_ERR "SLUB: %s %ld slabs counted but "
3425 "counter=%ld\n", s->name, count,
3426 atomic_long_read(&n->nr_slabs));
3427
3428out:
3429 spin_unlock_irqrestore(&n->list_lock, flags);
3430 return count;
3431}
3432
3433static long validate_slab_cache(struct kmem_cache *s)
3434{
3435 int node;
3436 unsigned long count = 0;
3437 unsigned long *map = kmalloc(BITS_TO_LONGS(oo_objects(s->max)) *
3438 sizeof(unsigned long), GFP_KERNEL);
3439
3440 if (!map)
3441 return -ENOMEM;
3442
3443 flush_all(s);
3444 for_each_node_state(node, N_NORMAL_MEMORY) {
3445 struct kmem_cache_node *n = get_node(s, node);
3446
3447 count += validate_slab_node(s, n, map);
3448 }
3449 kfree(map);
3450 return count;
3451}
3452
3453#ifdef SLUB_RESILIENCY_TEST
3454static void resiliency_test(void)
3455{
3456 u8 *p;
3457
3458 printk(KERN_ERR "SLUB resiliency testing\n");
3459 printk(KERN_ERR "-----------------------\n");
3460 printk(KERN_ERR "A. Corruption after allocation\n");
3461
3462 p = kzalloc(16, GFP_KERNEL);
3463 p[16] = 0x12;
3464 printk(KERN_ERR "\n1. kmalloc-16: Clobber Redzone/next pointer"
3465 " 0x12->0x%p\n\n", p + 16);
3466
3467 validate_slab_cache(kmalloc_caches + 4);
3468
3469
3470 p = kzalloc(32, GFP_KERNEL);
3471 p[32 + sizeof(void *)] = 0x34;
3472 printk(KERN_ERR "\n2. kmalloc-32: Clobber next pointer/next slab"
3473 " 0x34 -> -0x%p\n", p);
3474 printk(KERN_ERR
3475 "If allocated object is overwritten then not detectable\n\n");
3476
3477 validate_slab_cache(kmalloc_caches + 5);
3478 p = kzalloc(64, GFP_KERNEL);
3479 p += 64 + (get_cycles() & 0xff) * sizeof(void *);
3480 *p = 0x56;
3481 printk(KERN_ERR "\n3. kmalloc-64: corrupting random byte 0x56->0x%p\n",
3482 p);
3483 printk(KERN_ERR
3484 "If allocated object is overwritten then not detectable\n\n");
3485 validate_slab_cache(kmalloc_caches + 6);
3486
3487 printk(KERN_ERR "\nB. Corruption after free\n");
3488 p = kzalloc(128, GFP_KERNEL);
3489 kfree(p);
3490 *p = 0x78;
3491 printk(KERN_ERR "1. kmalloc-128: Clobber first word 0x78->0x%p\n\n", p);
3492 validate_slab_cache(kmalloc_caches + 7);
3493
3494 p = kzalloc(256, GFP_KERNEL);
3495 kfree(p);
3496 p[50] = 0x9a;
3497 printk(KERN_ERR "\n2. kmalloc-256: Clobber 50th byte 0x9a->0x%p\n\n",
3498 p);
3499 validate_slab_cache(kmalloc_caches + 8);
3500
3501 p = kzalloc(512, GFP_KERNEL);
3502 kfree(p);
3503 p[512] = 0xab;
3504 printk(KERN_ERR "\n3. kmalloc-512: Clobber redzone 0xab->0x%p\n\n", p);
3505 validate_slab_cache(kmalloc_caches + 9);
3506}
3507#else
3508static void resiliency_test(void) {};
3509#endif
3510
3511
3512
3513
3514
3515
3516struct location {
3517 unsigned long count;
3518 unsigned long addr;
3519 long long sum_time;
3520 long min_time;
3521 long max_time;
3522 long min_pid;
3523 long max_pid;
3524 DECLARE_BITMAP(cpus, NR_CPUS);
3525 nodemask_t nodes;
3526};
3527
3528struct loc_track {
3529 unsigned long max;
3530 unsigned long count;
3531 struct location *loc;
3532};
3533
3534static void free_loc_track(struct loc_track *t)
3535{
3536 if (t->max)
3537 free_pages((unsigned long)t->loc,
3538 get_order(sizeof(struct location) * t->max));
3539}
3540
3541static int alloc_loc_track(struct loc_track *t, unsigned long max, gfp_t flags)
3542{
3543 struct location *l;
3544 int order;
3545
3546 order = get_order(sizeof(struct location) * max);
3547
3548 l = (void *)__get_free_pages(flags, order);
3549 if (!l)
3550 return 0;
3551
3552 if (t->count) {
3553 memcpy(l, t->loc, sizeof(struct location) * t->count);
3554 free_loc_track(t);
3555 }
3556 t->max = max;
3557 t->loc = l;
3558 return 1;
3559}
3560
3561static int add_location(struct loc_track *t, struct kmem_cache *s,
3562 const struct track *track)
3563{
3564 long start, end, pos;
3565 struct location *l;
3566 unsigned long caddr;
3567 unsigned long age = jiffies - track->when;
3568
3569 start = -1;
3570 end = t->count;
3571
3572 for ( ; ; ) {
3573 pos = start + (end - start + 1) / 2;
3574
3575
3576
3577
3578
3579 if (pos == end)
3580 break;
3581
3582 caddr = t->loc[pos].addr;
3583 if (track->addr == caddr) {
3584
3585 l = &t->loc[pos];
3586 l->count++;
3587 if (track->when) {
3588 l->sum_time += age;
3589 if (age < l->min_time)
3590 l->min_time = age;
3591 if (age > l->max_time)
3592 l->max_time = age;
3593
3594 if (track->pid < l->min_pid)
3595 l->min_pid = track->pid;
3596 if (track->pid > l->max_pid)
3597 l->max_pid = track->pid;
3598
3599 cpumask_set_cpu(track->cpu,
3600 to_cpumask(l->cpus));
3601 }
3602 node_set(page_to_nid(virt_to_page(track)), l->nodes);
3603 return 1;
3604 }
3605
3606 if (track->addr < caddr)
3607 end = pos;
3608 else
3609 start = pos;
3610 }
3611
3612
3613
3614
3615 if (t->count >= t->max && !alloc_loc_track(t, 2 * t->max, GFP_ATOMIC))
3616 return 0;
3617
3618 l = t->loc + pos;
3619 if (pos < t->count)
3620 memmove(l + 1, l,
3621 (t->count - pos) * sizeof(struct location));
3622 t->count++;
3623 l->count = 1;
3624 l->addr = track->addr;
3625 l->sum_time = age;
3626 l->min_time = age;
3627 l->max_time = age;
3628 l->min_pid = track->pid;
3629 l->max_pid = track->pid;
3630 cpumask_clear(to_cpumask(l->cpus));
3631 cpumask_set_cpu(track->cpu, to_cpumask(l->cpus));
3632 nodes_clear(l->nodes);
3633 node_set(page_to_nid(virt_to_page(track)), l->nodes);
3634 return 1;
3635}
3636
3637static void process_slab(struct loc_track *t, struct kmem_cache *s,
3638 struct page *page, enum track_item alloc)
3639{
3640 void *addr = page_address(page);
3641 DECLARE_BITMAP(map, page->objects);
3642 void *p;
3643
3644 bitmap_zero(map, page->objects);
3645 for_each_free_object(p, s, page->freelist)
3646 set_bit(slab_index(p, s, addr), map);
3647
3648 for_each_object(p, s, addr, page->objects)
3649 if (!test_bit(slab_index(p, s, addr), map))
3650 add_location(t, s, get_track(s, p, alloc));
3651}
3652
3653static int list_locations(struct kmem_cache *s, char *buf,
3654 enum track_item alloc)
3655{
3656 int len = 0;
3657 unsigned long i;
3658 struct loc_track t = { 0, 0, NULL };
3659 int node;
3660
3661 if (!alloc_loc_track(&t, PAGE_SIZE / sizeof(struct location),
3662 GFP_TEMPORARY))
3663 return sprintf(buf, "Out of memory\n");
3664
3665
3666 flush_all(s);
3667
3668 for_each_node_state(node, N_NORMAL_MEMORY) {
3669 struct kmem_cache_node *n = get_node(s, node);
3670 unsigned long flags;
3671 struct page *page;
3672
3673 if (!atomic_long_read(&n->nr_slabs))
3674 continue;
3675
3676 spin_lock_irqsave(&n->list_lock, flags);
3677 list_for_each_entry(page, &n->partial, lru)
3678 process_slab(&t, s, page, alloc);
3679 list_for_each_entry(page, &n->full, lru)
3680 process_slab(&t, s, page, alloc);
3681 spin_unlock_irqrestore(&n->list_lock, flags);
3682 }
3683
3684 for (i = 0; i < t.count; i++) {
3685 struct location *l = &t.loc[i];
3686
3687 if (len > PAGE_SIZE - KSYM_SYMBOL_LEN - 100)
3688 break;
3689 len += sprintf(buf + len, "%7ld ", l->count);
3690
3691 if (l->addr)
3692 len += sprint_symbol(buf + len, (unsigned long)l->addr);
3693 else
3694 len += sprintf(buf + len, "<not-available>");
3695
3696 if (l->sum_time != l->min_time) {
3697 len += sprintf(buf + len, " age=%ld/%ld/%ld",
3698 l->min_time,
3699 (long)div_u64(l->sum_time, l->count),
3700 l->max_time);
3701 } else
3702 len += sprintf(buf + len, " age=%ld",
3703 l->min_time);
3704
3705 if (l->min_pid != l->max_pid)
3706 len += sprintf(buf + len, " pid=%ld-%ld",
3707 l->min_pid, l->max_pid);
3708 else
3709 len += sprintf(buf + len, " pid=%ld",
3710 l->min_pid);
3711
3712 if (num_online_cpus() > 1 &&
3713 !cpumask_empty(to_cpumask(l->cpus)) &&
3714 len < PAGE_SIZE - 60) {
3715 len += sprintf(buf + len, " cpus=");
3716 len += cpulist_scnprintf(buf + len, PAGE_SIZE - len - 50,
3717 to_cpumask(l->cpus));
3718 }
3719
3720 if (num_online_nodes() > 1 && !nodes_empty(l->nodes) &&
3721 len < PAGE_SIZE - 60) {
3722 len += sprintf(buf + len, " nodes=");
3723 len += nodelist_scnprintf(buf + len, PAGE_SIZE - len - 50,
3724 l->nodes);
3725 }
3726
3727 len += sprintf(buf + len, "\n");
3728 }
3729
3730 free_loc_track(&t);
3731 if (!t.count)
3732 len += sprintf(buf, "No data\n");
3733 return len;
3734}
3735
3736enum slab_stat_type {
3737 SL_ALL,
3738 SL_PARTIAL,
3739 SL_CPU,
3740 SL_OBJECTS,
3741 SL_TOTAL
3742};
3743
3744#define SO_ALL (1 << SL_ALL)
3745#define SO_PARTIAL (1 << SL_PARTIAL)
3746#define SO_CPU (1 << SL_CPU)
3747#define SO_OBJECTS (1 << SL_OBJECTS)
3748#define SO_TOTAL (1 << SL_TOTAL)
3749
3750static ssize_t show_slab_objects(struct kmem_cache *s,
3751 char *buf, unsigned long flags)
3752{
3753 unsigned long total = 0;
3754 int node;
3755 int x;
3756 unsigned long *nodes;
3757 unsigned long *per_cpu;
3758
3759 nodes = kzalloc(2 * sizeof(unsigned long) * nr_node_ids, GFP_KERNEL);
3760 if (!nodes)
3761 return -ENOMEM;
3762 per_cpu = nodes + nr_node_ids;
3763
3764 if (flags & SO_CPU) {
3765 int cpu;
3766
3767 for_each_possible_cpu(cpu) {
3768 struct kmem_cache_cpu *c = get_cpu_slab(s, cpu);
3769
3770 if (!c || c->node < 0)
3771 continue;
3772
3773 if (c->page) {
3774 if (flags & SO_TOTAL)
3775 x = c->page->objects;
3776 else if (flags & SO_OBJECTS)
3777 x = c->page->inuse;
3778 else
3779 x = 1;
3780
3781 total += x;
3782 nodes[c->node] += x;
3783 }
3784 per_cpu[c->node]++;
3785 }
3786 }
3787
3788 if (flags & SO_ALL) {
3789 for_each_node_state(node, N_NORMAL_MEMORY) {
3790 struct kmem_cache_node *n = get_node(s, node);
3791
3792 if (flags & SO_TOTAL)
3793 x = atomic_long_read(&n->total_objects);
3794 else if (flags & SO_OBJECTS)
3795 x = atomic_long_read(&n->total_objects) -
3796 count_partial(n, count_free);
3797
3798 else
3799 x = atomic_long_read(&n->nr_slabs);
3800 total += x;
3801 nodes[node] += x;
3802 }
3803
3804 } else if (flags & SO_PARTIAL) {
3805 for_each_node_state(node, N_NORMAL_MEMORY) {
3806 struct kmem_cache_node *n = get_node(s, node);
3807
3808 if (flags & SO_TOTAL)
3809 x = count_partial(n, count_total);
3810 else if (flags & SO_OBJECTS)
3811 x = count_partial(n, count_inuse);
3812 else
3813 x = n->nr_partial;
3814 total += x;
3815 nodes[node] += x;
3816 }
3817 }
3818 x = sprintf(buf, "%lu", total);
3819#ifdef CONFIG_NUMA
3820 for_each_node_state(node, N_NORMAL_MEMORY)
3821 if (nodes[node])
3822 x += sprintf(buf + x, " N%d=%lu",
3823 node, nodes[node]);
3824#endif
3825 kfree(nodes);
3826 return x + sprintf(buf + x, "\n");
3827}
3828
3829static int any_slab_objects(struct kmem_cache *s)
3830{
3831 int node;
3832
3833 for_each_online_node(node) {
3834 struct kmem_cache_node *n = get_node(s, node);
3835
3836 if (!n)
3837 continue;
3838
3839 if (atomic_long_read(&n->total_objects))
3840 return 1;
3841 }
3842 return 0;
3843}
3844
3845#define to_slab_attr(n) container_of(n, struct slab_attribute, attr)
3846#define to_slab(n) container_of(n, struct kmem_cache, kobj);
3847
3848struct slab_attribute {
3849 struct attribute attr;
3850 ssize_t (*show)(struct kmem_cache *s, char *buf);
3851 ssize_t (*store)(struct kmem_cache *s, const char *x, size_t count);
3852};
3853
3854#define SLAB_ATTR_RO(_name) \
3855 static struct slab_attribute _name##_attr = __ATTR_RO(_name)
3856
3857#define SLAB_ATTR(_name) \
3858 static struct slab_attribute _name##_attr = \
3859 __ATTR(_name, 0644, _name##_show, _name##_store)
3860
3861static ssize_t slab_size_show(struct kmem_cache *s, char *buf)
3862{
3863 return sprintf(buf, "%d\n", s->size);
3864}
3865SLAB_ATTR_RO(slab_size);
3866
3867static ssize_t align_show(struct kmem_cache *s, char *buf)
3868{
3869 return sprintf(buf, "%d\n", s->align);
3870}
3871SLAB_ATTR_RO(align);
3872
3873static ssize_t object_size_show(struct kmem_cache *s, char *buf)
3874{
3875 return sprintf(buf, "%d\n", s->objsize);
3876}
3877SLAB_ATTR_RO(object_size);
3878
3879static ssize_t objs_per_slab_show(struct kmem_cache *s, char *buf)
3880{
3881 return sprintf(buf, "%d\n", oo_objects(s->oo));
3882}
3883SLAB_ATTR_RO(objs_per_slab);
3884
3885static ssize_t order_store(struct kmem_cache *s,
3886 const char *buf, size_t length)
3887{
3888 unsigned long order;
3889 int err;
3890
3891 err = strict_strtoul(buf, 10, &order);
3892 if (err)
3893 return err;
3894
3895 if (order > slub_max_order || order < slub_min_order)
3896 return -EINVAL;
3897
3898 calculate_sizes(s, order);
3899 return length;
3900}
3901
3902static ssize_t order_show(struct kmem_cache *s, char *buf)
3903{
3904 return sprintf(buf, "%d\n", oo_order(s->oo));
3905}
3906SLAB_ATTR(order);
3907
3908static ssize_t min_partial_show(struct kmem_cache *s, char *buf)
3909{
3910 return sprintf(buf, "%lu\n", s->min_partial);
3911}
3912
3913static ssize_t min_partial_store(struct kmem_cache *s, const char *buf,
3914 size_t length)
3915{
3916 unsigned long min;
3917 int err;
3918
3919 err = strict_strtoul(buf, 10, &min);
3920 if (err)
3921 return err;
3922
3923 set_min_partial(s, min);
3924 return length;
3925}
3926SLAB_ATTR(min_partial);
3927
3928static ssize_t ctor_show(struct kmem_cache *s, char *buf)
3929{
3930 if (s->ctor) {
3931 int n = sprint_symbol(buf, (unsigned long)s->ctor);
3932
3933 return n + sprintf(buf + n, "\n");
3934 }
3935 return 0;
3936}
3937SLAB_ATTR_RO(ctor);
3938
3939static ssize_t aliases_show(struct kmem_cache *s, char *buf)
3940{
3941 return sprintf(buf, "%d\n", s->refcount - 1);
3942}
3943SLAB_ATTR_RO(aliases);
3944
3945static ssize_t slabs_show(struct kmem_cache *s, char *buf)
3946{
3947 return show_slab_objects(s, buf, SO_ALL);
3948}
3949SLAB_ATTR_RO(slabs);
3950
3951static ssize_t partial_show(struct kmem_cache *s, char *buf)
3952{
3953 return show_slab_objects(s, buf, SO_PARTIAL);
3954}
3955SLAB_ATTR_RO(partial);
3956
3957static ssize_t cpu_slabs_show(struct kmem_cache *s, char *buf)
3958{
3959 return show_slab_objects(s, buf, SO_CPU);
3960}
3961SLAB_ATTR_RO(cpu_slabs);
3962
3963static ssize_t objects_show(struct kmem_cache *s, char *buf)
3964{
3965 return show_slab_objects(s, buf, SO_ALL|SO_OBJECTS);
3966}
3967SLAB_ATTR_RO(objects);
3968
3969static ssize_t objects_partial_show(struct kmem_cache *s, char *buf)
3970{
3971 return show_slab_objects(s, buf, SO_PARTIAL|SO_OBJECTS);
3972}
3973SLAB_ATTR_RO(objects_partial);
3974
3975static ssize_t total_objects_show(struct kmem_cache *s, char *buf)
3976{
3977 return show_slab_objects(s, buf, SO_ALL|SO_TOTAL);
3978}
3979SLAB_ATTR_RO(total_objects);
3980
3981static ssize_t sanity_checks_show(struct kmem_cache *s, char *buf)
3982{
3983 return sprintf(buf, "%d\n", !!(s->flags & SLAB_DEBUG_FREE));
3984}
3985
3986static ssize_t sanity_checks_store(struct kmem_cache *s,
3987 const char *buf, size_t length)
3988{
3989 s->flags &= ~SLAB_DEBUG_FREE;
3990 if (buf[0] == '1')
3991 s->flags |= SLAB_DEBUG_FREE;
3992 return length;
3993}
3994SLAB_ATTR(sanity_checks);
3995
3996static ssize_t trace_show(struct kmem_cache *s, char *buf)
3997{
3998 return sprintf(buf, "%d\n", !!(s->flags & SLAB_TRACE));
3999}
4000
4001static ssize_t trace_store(struct kmem_cache *s, const char *buf,
4002 size_t length)
4003{
4004 s->flags &= ~SLAB_TRACE;
4005 if (buf[0] == '1')
4006 s->flags |= SLAB_TRACE;
4007 return length;
4008}
4009SLAB_ATTR(trace);
4010
4011static ssize_t reclaim_account_show(struct kmem_cache *s, char *buf)
4012{
4013 return sprintf(buf, "%d\n", !!(s->flags & SLAB_RECLAIM_ACCOUNT));
4014}
4015
4016static ssize_t reclaim_account_store(struct kmem_cache *s,
4017 const char *buf, size_t length)
4018{
4019 s->flags &= ~SLAB_RECLAIM_ACCOUNT;
4020 if (buf[0] == '1')
4021 s->flags |= SLAB_RECLAIM_ACCOUNT;
4022 return length;
4023}
4024SLAB_ATTR(reclaim_account);
4025
4026static ssize_t hwcache_align_show(struct kmem_cache *s, char *buf)
4027{
4028 return sprintf(buf, "%d\n", !!(s->flags & SLAB_HWCACHE_ALIGN));
4029}
4030SLAB_ATTR_RO(hwcache_align);
4031
4032#ifdef CONFIG_ZONE_DMA
4033static ssize_t cache_dma_show(struct kmem_cache *s, char *buf)
4034{
4035 return sprintf(buf, "%d\n", !!(s->flags & SLAB_CACHE_DMA));
4036}
4037SLAB_ATTR_RO(cache_dma);
4038#endif
4039
4040static ssize_t destroy_by_rcu_show(struct kmem_cache *s, char *buf)
4041{
4042 return sprintf(buf, "%d\n", !!(s->flags & SLAB_DESTROY_BY_RCU));
4043}
4044SLAB_ATTR_RO(destroy_by_rcu);
4045
4046static ssize_t red_zone_show(struct kmem_cache *s, char *buf)
4047{
4048 return sprintf(buf, "%d\n", !!(s->flags & SLAB_RED_ZONE));
4049}
4050
4051static ssize_t red_zone_store(struct kmem_cache *s,
4052 const char *buf, size_t length)
4053{
4054 if (any_slab_objects(s))
4055 return -EBUSY;
4056
4057 s->flags &= ~SLAB_RED_ZONE;
4058 if (buf[0] == '1')
4059 s->flags |= SLAB_RED_ZONE;
4060 calculate_sizes(s, -1);
4061 return length;
4062}
4063SLAB_ATTR(red_zone);
4064
4065static ssize_t poison_show(struct kmem_cache *s, char *buf)
4066{
4067 return sprintf(buf, "%d\n", !!(s->flags & SLAB_POISON));
4068}
4069
4070static ssize_t poison_store(struct kmem_cache *s,
4071 const char *buf, size_t length)
4072{
4073 if (any_slab_objects(s))
4074 return -EBUSY;
4075
4076 s->flags &= ~SLAB_POISON;
4077 if (buf[0] == '1')
4078 s->flags |= SLAB_POISON;
4079 calculate_sizes(s, -1);
4080 return length;
4081}
4082SLAB_ATTR(poison);
4083
4084static ssize_t store_user_show(struct kmem_cache *s, char *buf)
4085{
4086 return sprintf(buf, "%d\n", !!(s->flags & SLAB_STORE_USER));
4087}
4088
4089static ssize_t store_user_store(struct kmem_cache *s,
4090 const char *buf, size_t length)
4091{
4092 if (any_slab_objects(s))
4093 return -EBUSY;
4094
4095 s->flags &= ~SLAB_STORE_USER;
4096 if (buf[0] == '1')
4097 s->flags |= SLAB_STORE_USER;
4098 calculate_sizes(s, -1);
4099 return length;
4100}
4101SLAB_ATTR(store_user);
4102
4103static ssize_t validate_show(struct kmem_cache *s, char *buf)
4104{
4105 return 0;
4106}
4107
4108static ssize_t validate_store(struct kmem_cache *s,
4109 const char *buf, size_t length)
4110{
4111 int ret = -EINVAL;
4112
4113 if (buf[0] == '1') {
4114 ret = validate_slab_cache(s);
4115 if (ret >= 0)
4116 ret = length;
4117 }
4118 return ret;
4119}
4120SLAB_ATTR(validate);
4121
4122static ssize_t shrink_show(struct kmem_cache *s, char *buf)
4123{
4124 return 0;
4125}
4126
4127static ssize_t shrink_store(struct kmem_cache *s,
4128 const char *buf, size_t length)
4129{
4130 if (buf[0] == '1') {
4131 int rc = kmem_cache_shrink(s);
4132
4133 if (rc)
4134 return rc;
4135 } else
4136 return -EINVAL;
4137 return length;
4138}
4139SLAB_ATTR(shrink);
4140
4141static ssize_t alloc_calls_show(struct kmem_cache *s, char *buf)
4142{
4143 if (!(s->flags & SLAB_STORE_USER))
4144 return -ENOSYS;
4145 return list_locations(s, buf, TRACK_ALLOC);
4146}
4147SLAB_ATTR_RO(alloc_calls);
4148
4149static ssize_t free_calls_show(struct kmem_cache *s, char *buf)
4150{
4151 if (!(s->flags & SLAB_STORE_USER))
4152 return -ENOSYS;
4153 return list_locations(s, buf, TRACK_FREE);
4154}
4155SLAB_ATTR_RO(free_calls);
4156
4157#ifdef CONFIG_NUMA
4158static ssize_t remote_node_defrag_ratio_show(struct kmem_cache *s, char *buf)
4159{
4160 return sprintf(buf, "%d\n", s->remote_node_defrag_ratio / 10);
4161}
4162
4163static ssize_t remote_node_defrag_ratio_store(struct kmem_cache *s,
4164 const char *buf, size_t length)
4165{
4166 unsigned long ratio;
4167 int err;
4168
4169 err = strict_strtoul(buf, 10, &ratio);
4170 if (err)
4171 return err;
4172
4173 if (ratio <= 100)
4174 s->remote_node_defrag_ratio = ratio * 10;
4175
4176 return length;
4177}
4178SLAB_ATTR(remote_node_defrag_ratio);
4179#endif
4180
4181#ifdef CONFIG_SLUB_STATS
4182static int show_stat(struct kmem_cache *s, char *buf, enum stat_item si)
4183{
4184 unsigned long sum = 0;
4185 int cpu;
4186 int len;
4187 int *data = kmalloc(nr_cpu_ids * sizeof(int), GFP_KERNEL);
4188
4189 if (!data)
4190 return -ENOMEM;
4191
4192 for_each_online_cpu(cpu) {
4193 unsigned x = get_cpu_slab(s, cpu)->stat[si];
4194
4195 data[cpu] = x;
4196 sum += x;
4197 }
4198
4199 len = sprintf(buf, "%lu", sum);
4200
4201#ifdef CONFIG_SMP
4202 for_each_online_cpu(cpu) {
4203 if (data[cpu] && len < PAGE_SIZE - 20)
4204 len += sprintf(buf + len, " C%d=%u", cpu, data[cpu]);
4205 }
4206#endif
4207 kfree(data);
4208 return len + sprintf(buf + len, "\n");
4209}
4210
4211#define STAT_ATTR(si, text) \
4212static ssize_t text##_show(struct kmem_cache *s, char *buf) \
4213{ \
4214 return show_stat(s, buf, si); \
4215} \
4216SLAB_ATTR_RO(text); \
4217
4218STAT_ATTR(ALLOC_FASTPATH, alloc_fastpath);
4219STAT_ATTR(ALLOC_SLOWPATH, alloc_slowpath);
4220STAT_ATTR(FREE_FASTPATH, free_fastpath);
4221STAT_ATTR(FREE_SLOWPATH, free_slowpath);
4222STAT_ATTR(FREE_FROZEN, free_frozen);
4223STAT_ATTR(FREE_ADD_PARTIAL, free_add_partial);
4224STAT_ATTR(FREE_REMOVE_PARTIAL, free_remove_partial);
4225STAT_ATTR(ALLOC_FROM_PARTIAL, alloc_from_partial);
4226STAT_ATTR(ALLOC_SLAB, alloc_slab);
4227STAT_ATTR(ALLOC_REFILL, alloc_refill);
4228STAT_ATTR(FREE_SLAB, free_slab);
4229STAT_ATTR(CPUSLAB_FLUSH, cpuslab_flush);
4230STAT_ATTR(DEACTIVATE_FULL, deactivate_full);
4231STAT_ATTR(DEACTIVATE_EMPTY, deactivate_empty);
4232STAT_ATTR(DEACTIVATE_TO_HEAD, deactivate_to_head);
4233STAT_ATTR(DEACTIVATE_TO_TAIL, deactivate_to_tail);
4234STAT_ATTR(DEACTIVATE_REMOTE_FREES, deactivate_remote_frees);
4235STAT_ATTR(ORDER_FALLBACK, order_fallback);
4236#endif
4237
4238static struct attribute *slab_attrs[] = {
4239 &slab_size_attr.attr,
4240 &object_size_attr.attr,
4241 &objs_per_slab_attr.attr,
4242 &order_attr.attr,
4243 &min_partial_attr.attr,
4244 &objects_attr.attr,
4245 &objects_partial_attr.attr,
4246 &total_objects_attr.attr,
4247 &slabs_attr.attr,
4248 &partial_attr.attr,
4249 &cpu_slabs_attr.attr,
4250 &ctor_attr.attr,
4251 &aliases_attr.attr,
4252 &align_attr.attr,
4253 &sanity_checks_attr.attr,
4254 &trace_attr.attr,
4255 &hwcache_align_attr.attr,
4256 &reclaim_account_attr.attr,
4257 &destroy_by_rcu_attr.attr,
4258 &red_zone_attr.attr,
4259 &poison_attr.attr,
4260 &store_user_attr.attr,
4261 &validate_attr.attr,
4262 &shrink_attr.attr,
4263 &alloc_calls_attr.attr,
4264 &free_calls_attr.attr,
4265#ifdef CONFIG_ZONE_DMA
4266 &cache_dma_attr.attr,
4267#endif
4268#ifdef CONFIG_NUMA
4269 &remote_node_defrag_ratio_attr.attr,
4270#endif
4271#ifdef CONFIG_SLUB_STATS
4272 &alloc_fastpath_attr.attr,
4273 &alloc_slowpath_attr.attr,
4274 &free_fastpath_attr.attr,
4275 &free_slowpath_attr.attr,
4276 &free_frozen_attr.attr,
4277 &free_add_partial_attr.attr,
4278 &free_remove_partial_attr.attr,
4279 &alloc_from_partial_attr.attr,
4280 &alloc_slab_attr.attr,
4281 &alloc_refill_attr.attr,
4282 &free_slab_attr.attr,
4283 &cpuslab_flush_attr.attr,
4284 &deactivate_full_attr.attr,
4285 &deactivate_empty_attr.attr,
4286 &deactivate_to_head_attr.attr,
4287 &deactivate_to_tail_attr.attr,
4288 &deactivate_remote_frees_attr.attr,
4289 &order_fallback_attr.attr,
4290#endif
4291 NULL
4292};
4293
4294static struct attribute_group slab_attr_group = {
4295 .attrs = slab_attrs,
4296};
4297
4298static ssize_t slab_attr_show(struct kobject *kobj,
4299 struct attribute *attr,
4300 char *buf)
4301{
4302 struct slab_attribute *attribute;
4303 struct kmem_cache *s;
4304 int err;
4305
4306 attribute = to_slab_attr(attr);
4307 s = to_slab(kobj);
4308
4309 if (!attribute->show)
4310 return -EIO;
4311
4312 err = attribute->show(s, buf);
4313
4314 return err;
4315}
4316
4317static ssize_t slab_attr_store(struct kobject *kobj,
4318 struct attribute *attr,
4319 const char *buf, size_t len)
4320{
4321 struct slab_attribute *attribute;
4322 struct kmem_cache *s;
4323 int err;
4324
4325 attribute = to_slab_attr(attr);
4326 s = to_slab(kobj);
4327
4328 if (!attribute->store)
4329 return -EIO;
4330
4331 err = attribute->store(s, buf, len);
4332
4333 return err;
4334}
4335
4336static void kmem_cache_release(struct kobject *kobj)
4337{
4338 struct kmem_cache *s = to_slab(kobj);
4339
4340 kfree(s);
4341}
4342
4343static struct sysfs_ops slab_sysfs_ops = {
4344 .show = slab_attr_show,
4345 .store = slab_attr_store,
4346};
4347
4348static struct kobj_type slab_ktype = {
4349 .sysfs_ops = &slab_sysfs_ops,
4350 .release = kmem_cache_release
4351};
4352
4353static int uevent_filter(struct kset *kset, struct kobject *kobj)
4354{
4355 struct kobj_type *ktype = get_ktype(kobj);
4356
4357 if (ktype == &slab_ktype)
4358 return 1;
4359 return 0;
4360}
4361
4362static struct kset_uevent_ops slab_uevent_ops = {
4363 .filter = uevent_filter,
4364};
4365
4366static struct kset *slab_kset;
4367
4368#define ID_STR_LENGTH 64
4369
4370
4371
4372
4373
4374static char *create_unique_id(struct kmem_cache *s)
4375{
4376 char *name = kmalloc(ID_STR_LENGTH, GFP_KERNEL);
4377 char *p = name;
4378
4379 BUG_ON(!name);
4380
4381 *p++ = ':';
4382
4383
4384
4385
4386
4387
4388
4389 if (s->flags & SLAB_CACHE_DMA)
4390 *p++ = 'd';
4391 if (s->flags & SLAB_RECLAIM_ACCOUNT)
4392 *p++ = 'a';
4393 if (s->flags & SLAB_DEBUG_FREE)
4394 *p++ = 'F';
4395 if (p != name + 1)
4396 *p++ = '-';
4397 p += sprintf(p, "%07d", s->size);
4398 BUG_ON(p > name + ID_STR_LENGTH - 1);
4399 return name;
4400}
4401
4402static int sysfs_slab_add(struct kmem_cache *s)
4403{
4404 int err;
4405 const char *name;
4406 int unmergeable;
4407
4408 if (slab_state < SYSFS)
4409
4410 return 0;
4411
4412 unmergeable = slab_unmergeable(s);
4413 if (unmergeable) {
4414
4415
4416
4417
4418
4419 sysfs_remove_link(&slab_kset->kobj, s->name);
4420 name = s->name;
4421 } else {
4422
4423
4424
4425
4426 name = create_unique_id(s);
4427 }
4428
4429 s->kobj.kset = slab_kset;
4430 err = kobject_init_and_add(&s->kobj, &slab_ktype, NULL, name);
4431 if (err) {
4432 kobject_put(&s->kobj);
4433 return err;
4434 }
4435
4436 err = sysfs_create_group(&s->kobj, &slab_attr_group);
4437 if (err)
4438 return err;
4439 kobject_uevent(&s->kobj, KOBJ_ADD);
4440 if (!unmergeable) {
4441
4442 sysfs_slab_alias(s, s->name);
4443 kfree(name);
4444 }
4445 return 0;
4446}
4447
4448static void sysfs_slab_remove(struct kmem_cache *s)
4449{
4450 kobject_uevent(&s->kobj, KOBJ_REMOVE);
4451 kobject_del(&s->kobj);
4452 kobject_put(&s->kobj);
4453}
4454
4455
4456
4457
4458
4459struct saved_alias {
4460 struct kmem_cache *s;
4461 const char *name;
4462 struct saved_alias *next;
4463};
4464
4465static struct saved_alias *alias_list;
4466
4467static int sysfs_slab_alias(struct kmem_cache *s, const char *name)
4468{
4469 struct saved_alias *al;
4470
4471 if (slab_state == SYSFS) {
4472
4473
4474
4475 sysfs_remove_link(&slab_kset->kobj, name);
4476 return sysfs_create_link(&slab_kset->kobj, &s->kobj, name);
4477 }
4478
4479 al = kmalloc(sizeof(struct saved_alias), GFP_KERNEL);
4480 if (!al)
4481 return -ENOMEM;
4482
4483 al->s = s;
4484 al->name = name;
4485 al->next = alias_list;
4486 alias_list = al;
4487 return 0;
4488}
4489
4490static int __init slab_sysfs_init(void)
4491{
4492 struct kmem_cache *s;
4493 int err;
4494
4495 slab_kset = kset_create_and_add("slab", &slab_uevent_ops, kernel_kobj);
4496 if (!slab_kset) {
4497 printk(KERN_ERR "Cannot register slab subsystem.\n");
4498 return -ENOSYS;
4499 }
4500
4501 slab_state = SYSFS;
4502
4503 list_for_each_entry(s, &slab_caches, list) {
4504 err = sysfs_slab_add(s);
4505 if (err)
4506 printk(KERN_ERR "SLUB: Unable to add boot slab %s"
4507 " to sysfs\n", s->name);
4508 }
4509
4510 while (alias_list) {
4511 struct saved_alias *al = alias_list;
4512
4513 alias_list = alias_list->next;
4514 err = sysfs_slab_alias(al->s, al->name);
4515 if (err)
4516 printk(KERN_ERR "SLUB: Unable to add boot slab alias"
4517 " %s to sysfs\n", s->name);
4518 kfree(al);
4519 }
4520
4521 resiliency_test();
4522 return 0;
4523}
4524
4525__initcall(slab_sysfs_init);
4526#endif
4527
4528
4529
4530
4531#ifdef CONFIG_SLABINFO
4532static void print_slabinfo_header(struct seq_file *m)
4533{
4534 seq_puts(m, "slabinfo - version: 2.1\n");
4535 seq_puts(m, "# name <active_objs> <num_objs> <objsize> "
4536 "<objperslab> <pagesperslab>");
4537 seq_puts(m, " : tunables <limit> <batchcount> <sharedfactor>");
4538 seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>");
4539 seq_putc(m, '\n');
4540}
4541
4542static void *s_start(struct seq_file *m, loff_t *pos)
4543{
4544 loff_t n = *pos;
4545
4546 down_read(&slub_lock);
4547 if (!n)
4548 print_slabinfo_header(m);
4549
4550 return seq_list_start(&slab_caches, *pos);
4551}
4552
4553static void *s_next(struct seq_file *m, void *p, loff_t *pos)
4554{
4555 return seq_list_next(p, &slab_caches, pos);
4556}
4557
4558static void s_stop(struct seq_file *m, void *p)
4559{
4560 up_read(&slub_lock);
4561}
4562
4563static int s_show(struct seq_file *m, void *p)
4564{
4565 unsigned long nr_partials = 0;
4566 unsigned long nr_slabs = 0;
4567 unsigned long nr_inuse = 0;
4568 unsigned long nr_objs = 0;
4569 unsigned long nr_free = 0;
4570 struct kmem_cache *s;
4571 int node;
4572
4573 s = list_entry(p, struct kmem_cache, list);
4574
4575 for_each_online_node(node) {
4576 struct kmem_cache_node *n = get_node(s, node);
4577
4578 if (!n)
4579 continue;
4580
4581 nr_partials += n->nr_partial;
4582 nr_slabs += atomic_long_read(&n->nr_slabs);
4583 nr_objs += atomic_long_read(&n->total_objects);
4584 nr_free += count_partial(n, count_free);
4585 }
4586
4587 nr_inuse = nr_objs - nr_free;
4588
4589 seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d", s->name, nr_inuse,
4590 nr_objs, s->size, oo_objects(s->oo),
4591 (1 << oo_order(s->oo)));
4592 seq_printf(m, " : tunables %4u %4u %4u", 0, 0, 0);
4593 seq_printf(m, " : slabdata %6lu %6lu %6lu", nr_slabs, nr_slabs,
4594 0UL);
4595 seq_putc(m, '\n');
4596 return 0;
4597}
4598
4599static const struct seq_operations slabinfo_op = {
4600 .start = s_start,
4601 .next = s_next,
4602 .stop = s_stop,
4603 .show = s_show,
4604};
4605
4606static int slabinfo_open(struct inode *inode, struct file *file)
4607{
4608 return seq_open(file, &slabinfo_op);
4609}
4610
4611static const struct file_operations proc_slabinfo_operations = {
4612 .open = slabinfo_open,
4613 .read = seq_read,
4614 .llseek = seq_lseek,
4615 .release = seq_release,
4616};
4617
4618static int __init slab_proc_init(void)
4619{
4620 proc_create("slabinfo",S_IWUSR|S_IRUGO,NULL,&proc_slabinfo_operations);
4621 return 0;
4622}
4623module_init(slab_proc_init);
4624#endif
4625