1
2
3
4
5
6
7
8
9
10
11
12#include <linux/mm.h>
13#include <linux/swap.h>
14#include <linux/module.h>
15#include <linux/bit_spinlock.h>
16#include <linux/interrupt.h>
17#include <linux/bitops.h>
18#include <linux/slab.h>
19#include "slab.h"
20#include <linux/proc_fs.h>
21#include <linux/seq_file.h>
22#include <linux/kmemcheck.h>
23#include <linux/cpu.h>
24#include <linux/cpuset.h>
25#include <linux/mempolicy.h>
26#include <linux/ctype.h>
27#include <linux/debugobjects.h>
28#include <linux/kallsyms.h>
29#include <linux/memory.h>
30#include <linux/math64.h>
31#include <linux/fault-inject.h>
32#include <linux/stacktrace.h>
33#include <linux/prefetch.h>
34
35#include <trace/events/kmem.h>
36
37#include "internal.h"
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115#define SLAB_DEBUG_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
116 SLAB_TRACE | SLAB_DEBUG_FREE)
117
118static inline int kmem_cache_debug(struct kmem_cache *s)
119{
120#ifdef CONFIG_SLUB_DEBUG
121 return unlikely(s->flags & SLAB_DEBUG_FLAGS);
122#else
123 return 0;
124#endif
125}
126
127
128
129
130
131
132
133
134
135
136#undef SLUB_RESILIENCY_TEST
137
138
139#undef SLUB_DEBUG_CMPXCHG
140
141
142
143
144
145#define MIN_PARTIAL 5
146
147
148
149
150
151
152#define MAX_PARTIAL 10
153
154#define DEBUG_DEFAULT_FLAGS (SLAB_DEBUG_FREE | SLAB_RED_ZONE | \
155 SLAB_POISON | SLAB_STORE_USER)
156
157
158
159
160
161
162#define DEBUG_METADATA_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER)
163
164
165
166
167#define SLUB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
168 SLAB_TRACE | SLAB_DESTROY_BY_RCU | SLAB_NOLEAKTRACE | \
169 SLAB_FAILSLAB)
170
171#define SLUB_MERGE_SAME (SLAB_DEBUG_FREE | SLAB_RECLAIM_ACCOUNT | \
172 SLAB_CACHE_DMA | SLAB_NOTRACK)
173
174#define OO_SHIFT 16
175#define OO_MASK ((1 << OO_SHIFT) - 1)
176#define MAX_OBJS_PER_PAGE 32767
177
178
179#define __OBJECT_POISON 0x80000000UL
180#define __CMPXCHG_DOUBLE 0x40000000UL
181
182static int kmem_size = sizeof(struct kmem_cache);
183
184#ifdef CONFIG_SMP
185static struct notifier_block slab_notifier;
186#endif
187
188
189
190
191#define TRACK_ADDRS_COUNT 16
192struct track {
193 unsigned long addr;
194#ifdef CONFIG_STACKTRACE
195 unsigned long addrs[TRACK_ADDRS_COUNT];
196#endif
197 int cpu;
198 int pid;
199 unsigned long when;
200};
201
202enum track_item { TRACK_ALLOC, TRACK_FREE };
203
204#ifdef CONFIG_SYSFS
205static int sysfs_slab_add(struct kmem_cache *);
206static int sysfs_slab_alias(struct kmem_cache *, const char *);
207static void sysfs_slab_remove(struct kmem_cache *);
208
209#else
210static inline int sysfs_slab_add(struct kmem_cache *s) { return 0; }
211static inline int sysfs_slab_alias(struct kmem_cache *s, const char *p)
212 { return 0; }
213static inline void sysfs_slab_remove(struct kmem_cache *s) { }
214
215#endif
216
217static inline void stat(const struct kmem_cache *s, enum stat_item si)
218{
219#ifdef CONFIG_SLUB_STATS
220 __this_cpu_inc(s->cpu_slab->stat[si]);
221#endif
222}
223
224
225
226
227
228static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node)
229{
230 return s->node[node];
231}
232
233
234static inline int check_valid_pointer(struct kmem_cache *s,
235 struct page *page, const void *object)
236{
237 void *base;
238
239 if (!object)
240 return 1;
241
242 base = page_address(page);
243 if (object < base || object >= base + page->objects * s->size ||
244 (object - base) % s->size) {
245 return 0;
246 }
247
248 return 1;
249}
250
251static inline void *get_freepointer(struct kmem_cache *s, void *object)
252{
253 return *(void **)(object + s->offset);
254}
255
256static void prefetch_freepointer(const struct kmem_cache *s, void *object)
257{
258 prefetch(object + s->offset);
259}
260
261static inline void *get_freepointer_safe(struct kmem_cache *s, void *object)
262{
263 void *p;
264
265#ifdef CONFIG_DEBUG_PAGEALLOC
266 probe_kernel_read(&p, (void **)(object + s->offset), sizeof(p));
267#else
268 p = get_freepointer(s, object);
269#endif
270 return p;
271}
272
273static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp)
274{
275 *(void **)(object + s->offset) = fp;
276}
277
278
279#define for_each_object(__p, __s, __addr, __objects) \
280 for (__p = (__addr); __p < (__addr) + (__objects) * (__s)->size;\
281 __p += (__s)->size)
282
283
284static inline int slab_index(void *p, struct kmem_cache *s, void *addr)
285{
286 return (p - addr) / s->size;
287}
288
289static inline size_t slab_ksize(const struct kmem_cache *s)
290{
291#ifdef CONFIG_SLUB_DEBUG
292
293
294
295
296 if (s->flags & (SLAB_RED_ZONE | SLAB_POISON))
297 return s->object_size;
298
299#endif
300
301
302
303
304
305 if (s->flags & (SLAB_DESTROY_BY_RCU | SLAB_STORE_USER))
306 return s->inuse;
307
308
309
310 return s->size;
311}
312
313static inline int order_objects(int order, unsigned long size, int reserved)
314{
315 return ((PAGE_SIZE << order) - reserved) / size;
316}
317
318static inline struct kmem_cache_order_objects oo_make(int order,
319 unsigned long size, int reserved)
320{
321 struct kmem_cache_order_objects x = {
322 (order << OO_SHIFT) + order_objects(order, size, reserved)
323 };
324
325 return x;
326}
327
328static inline int oo_order(struct kmem_cache_order_objects x)
329{
330 return x.x >> OO_SHIFT;
331}
332
333static inline int oo_objects(struct kmem_cache_order_objects x)
334{
335 return x.x & OO_MASK;
336}
337
338
339
340
341static __always_inline void slab_lock(struct page *page)
342{
343 bit_spin_lock(PG_locked, &page->flags);
344}
345
346static __always_inline void slab_unlock(struct page *page)
347{
348 __bit_spin_unlock(PG_locked, &page->flags);
349}
350
351
352static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
353 void *freelist_old, unsigned long counters_old,
354 void *freelist_new, unsigned long counters_new,
355 const char *n)
356{
357 VM_BUG_ON(!irqs_disabled());
358#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
359 defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
360 if (s->flags & __CMPXCHG_DOUBLE) {
361 if (cmpxchg_double(&page->freelist, &page->counters,
362 freelist_old, counters_old,
363 freelist_new, counters_new))
364 return 1;
365 } else
366#endif
367 {
368 slab_lock(page);
369 if (page->freelist == freelist_old && page->counters == counters_old) {
370 page->freelist = freelist_new;
371 page->counters = counters_new;
372 slab_unlock(page);
373 return 1;
374 }
375 slab_unlock(page);
376 }
377
378 cpu_relax();
379 stat(s, CMPXCHG_DOUBLE_FAIL);
380
381#ifdef SLUB_DEBUG_CMPXCHG
382 printk(KERN_INFO "%s %s: cmpxchg double redo ", n, s->name);
383#endif
384
385 return 0;
386}
387
388static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
389 void *freelist_old, unsigned long counters_old,
390 void *freelist_new, unsigned long counters_new,
391 const char *n)
392{
393#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
394 defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
395 if (s->flags & __CMPXCHG_DOUBLE) {
396 if (cmpxchg_double(&page->freelist, &page->counters,
397 freelist_old, counters_old,
398 freelist_new, counters_new))
399 return 1;
400 } else
401#endif
402 {
403 unsigned long flags;
404
405 local_irq_save(flags);
406 slab_lock(page);
407 if (page->freelist == freelist_old && page->counters == counters_old) {
408 page->freelist = freelist_new;
409 page->counters = counters_new;
410 slab_unlock(page);
411 local_irq_restore(flags);
412 return 1;
413 }
414 slab_unlock(page);
415 local_irq_restore(flags);
416 }
417
418 cpu_relax();
419 stat(s, CMPXCHG_DOUBLE_FAIL);
420
421#ifdef SLUB_DEBUG_CMPXCHG
422 printk(KERN_INFO "%s %s: cmpxchg double redo ", n, s->name);
423#endif
424
425 return 0;
426}
427
428#ifdef CONFIG_SLUB_DEBUG
429
430
431
432
433
434
435static void get_map(struct kmem_cache *s, struct page *page, unsigned long *map)
436{
437 void *p;
438 void *addr = page_address(page);
439
440 for (p = page->freelist; p; p = get_freepointer(s, p))
441 set_bit(slab_index(p, s, addr), map);
442}
443
444
445
446
447#ifdef CONFIG_SLUB_DEBUG_ON
448static int slub_debug = DEBUG_DEFAULT_FLAGS;
449#else
450static int slub_debug;
451#endif
452
453static char *slub_debug_slabs;
454static int disable_higher_order_debug;
455
456
457
458
459static void print_section(char *text, u8 *addr, unsigned int length)
460{
461 print_hex_dump(KERN_ERR, text, DUMP_PREFIX_ADDRESS, 16, 1, addr,
462 length, 1);
463}
464
465static struct track *get_track(struct kmem_cache *s, void *object,
466 enum track_item alloc)
467{
468 struct track *p;
469
470 if (s->offset)
471 p = object + s->offset + sizeof(void *);
472 else
473 p = object + s->inuse;
474
475 return p + alloc;
476}
477
478static void set_track(struct kmem_cache *s, void *object,
479 enum track_item alloc, unsigned long addr)
480{
481 struct track *p = get_track(s, object, alloc);
482
483 if (addr) {
484#ifdef CONFIG_STACKTRACE
485 struct stack_trace trace;
486 int i;
487
488 trace.nr_entries = 0;
489 trace.max_entries = TRACK_ADDRS_COUNT;
490 trace.entries = p->addrs;
491 trace.skip = 3;
492 save_stack_trace(&trace);
493
494
495 if (trace.nr_entries != 0 &&
496 trace.entries[trace.nr_entries - 1] == ULONG_MAX)
497 trace.nr_entries--;
498
499 for (i = trace.nr_entries; i < TRACK_ADDRS_COUNT; i++)
500 p->addrs[i] = 0;
501#endif
502 p->addr = addr;
503 p->cpu = smp_processor_id();
504 p->pid = current->pid;
505 p->when = jiffies;
506 } else
507 memset(p, 0, sizeof(struct track));
508}
509
510static void init_tracking(struct kmem_cache *s, void *object)
511{
512 if (!(s->flags & SLAB_STORE_USER))
513 return;
514
515 set_track(s, object, TRACK_FREE, 0UL);
516 set_track(s, object, TRACK_ALLOC, 0UL);
517}
518
519static void print_track(const char *s, struct track *t)
520{
521 if (!t->addr)
522 return;
523
524 printk(KERN_ERR "INFO: %s in %pS age=%lu cpu=%u pid=%d\n",
525 s, (void *)t->addr, jiffies - t->when, t->cpu, t->pid);
526#ifdef CONFIG_STACKTRACE
527 {
528 int i;
529 for (i = 0; i < TRACK_ADDRS_COUNT; i++)
530 if (t->addrs[i])
531 printk(KERN_ERR "\t%pS\n", (void *)t->addrs[i]);
532 else
533 break;
534 }
535#endif
536}
537
538static void print_tracking(struct kmem_cache *s, void *object)
539{
540 if (!(s->flags & SLAB_STORE_USER))
541 return;
542
543 print_track("Allocated", get_track(s, object, TRACK_ALLOC));
544 print_track("Freed", get_track(s, object, TRACK_FREE));
545}
546
547static void print_page_info(struct page *page)
548{
549 printk(KERN_ERR "INFO: Slab 0x%p objects=%u used=%u fp=0x%p flags=0x%04lx\n",
550 page, page->objects, page->inuse, page->freelist, page->flags);
551
552}
553
554static void slab_bug(struct kmem_cache *s, char *fmt, ...)
555{
556 va_list args;
557 char buf[100];
558
559 va_start(args, fmt);
560 vsnprintf(buf, sizeof(buf), fmt, args);
561 va_end(args);
562 printk(KERN_ERR "========================================"
563 "=====================================\n");
564 printk(KERN_ERR "BUG %s (%s): %s\n", s->name, print_tainted(), buf);
565 printk(KERN_ERR "----------------------------------------"
566 "-------------------------------------\n\n");
567
568 add_taint(TAINT_BAD_PAGE);
569}
570
571static void slab_fix(struct kmem_cache *s, char *fmt, ...)
572{
573 va_list args;
574 char buf[100];
575
576 va_start(args, fmt);
577 vsnprintf(buf, sizeof(buf), fmt, args);
578 va_end(args);
579 printk(KERN_ERR "FIX %s: %s\n", s->name, buf);
580}
581
582static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
583{
584 unsigned int off;
585 u8 *addr = page_address(page);
586
587 print_tracking(s, p);
588
589 print_page_info(page);
590
591 printk(KERN_ERR "INFO: Object 0x%p @offset=%tu fp=0x%p\n\n",
592 p, p - addr, get_freepointer(s, p));
593
594 if (p > addr + 16)
595 print_section("Bytes b4 ", p - 16, 16);
596
597 print_section("Object ", p, min_t(unsigned long, s->object_size,
598 PAGE_SIZE));
599 if (s->flags & SLAB_RED_ZONE)
600 print_section("Redzone ", p + s->object_size,
601 s->inuse - s->object_size);
602
603 if (s->offset)
604 off = s->offset + sizeof(void *);
605 else
606 off = s->inuse;
607
608 if (s->flags & SLAB_STORE_USER)
609 off += 2 * sizeof(struct track);
610
611 if (off != s->size)
612
613 print_section("Padding ", p + off, s->size - off);
614
615 dump_stack();
616}
617
618static void object_err(struct kmem_cache *s, struct page *page,
619 u8 *object, char *reason)
620{
621 slab_bug(s, "%s", reason);
622 print_trailer(s, page, object);
623}
624
625static void slab_err(struct kmem_cache *s, struct page *page, const char *fmt, ...)
626{
627 va_list args;
628 char buf[100];
629
630 va_start(args, fmt);
631 vsnprintf(buf, sizeof(buf), fmt, args);
632 va_end(args);
633 slab_bug(s, "%s", buf);
634 print_page_info(page);
635 dump_stack();
636}
637
638static void init_object(struct kmem_cache *s, void *object, u8 val)
639{
640 u8 *p = object;
641
642 if (s->flags & __OBJECT_POISON) {
643 memset(p, POISON_FREE, s->object_size - 1);
644 p[s->object_size - 1] = POISON_END;
645 }
646
647 if (s->flags & SLAB_RED_ZONE)
648 memset(p + s->object_size, val, s->inuse - s->object_size);
649}
650
651static void restore_bytes(struct kmem_cache *s, char *message, u8 data,
652 void *from, void *to)
653{
654 slab_fix(s, "Restoring 0x%p-0x%p=0x%x\n", from, to - 1, data);
655 memset(from, data, to - from);
656}
657
658static int check_bytes_and_report(struct kmem_cache *s, struct page *page,
659 u8 *object, char *what,
660 u8 *start, unsigned int value, unsigned int bytes)
661{
662 u8 *fault;
663 u8 *end;
664
665 fault = memchr_inv(start, value, bytes);
666 if (!fault)
667 return 1;
668
669 end = start + bytes;
670 while (end > fault && end[-1] == value)
671 end--;
672
673 slab_bug(s, "%s overwritten", what);
674 printk(KERN_ERR "INFO: 0x%p-0x%p. First byte 0x%x instead of 0x%x\n",
675 fault, end - 1, fault[0], value);
676 print_trailer(s, page, object);
677
678 restore_bytes(s, what, value, fault, end);
679 return 0;
680}
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720static int check_pad_bytes(struct kmem_cache *s, struct page *page, u8 *p)
721{
722 unsigned long off = s->inuse;
723
724 if (s->offset)
725
726 off += sizeof(void *);
727
728 if (s->flags & SLAB_STORE_USER)
729
730 off += 2 * sizeof(struct track);
731
732 if (s->size == off)
733 return 1;
734
735 return check_bytes_and_report(s, page, p, "Object padding",
736 p + off, POISON_INUSE, s->size - off);
737}
738
739
740static int slab_pad_check(struct kmem_cache *s, struct page *page)
741{
742 u8 *start;
743 u8 *fault;
744 u8 *end;
745 int length;
746 int remainder;
747
748 if (!(s->flags & SLAB_POISON))
749 return 1;
750
751 start = page_address(page);
752 length = (PAGE_SIZE << compound_order(page)) - s->reserved;
753 end = start + length;
754 remainder = length % s->size;
755 if (!remainder)
756 return 1;
757
758 fault = memchr_inv(end - remainder, POISON_INUSE, remainder);
759 if (!fault)
760 return 1;
761 while (end > fault && end[-1] == POISON_INUSE)
762 end--;
763
764 slab_err(s, page, "Padding overwritten. 0x%p-0x%p", fault, end - 1);
765 print_section("Padding ", end - remainder, remainder);
766
767 restore_bytes(s, "slab padding", POISON_INUSE, end - remainder, end);
768 return 0;
769}
770
771static int check_object(struct kmem_cache *s, struct page *page,
772 void *object, u8 val)
773{
774 u8 *p = object;
775 u8 *endobject = object + s->object_size;
776
777 if (s->flags & SLAB_RED_ZONE) {
778 if (!check_bytes_and_report(s, page, object, "Redzone",
779 endobject, val, s->inuse - s->object_size))
780 return 0;
781 } else {
782 if ((s->flags & SLAB_POISON) && s->object_size < s->inuse) {
783 check_bytes_and_report(s, page, p, "Alignment padding",
784 endobject, POISON_INUSE, s->inuse - s->object_size);
785 }
786 }
787
788 if (s->flags & SLAB_POISON) {
789 if (val != SLUB_RED_ACTIVE && (s->flags & __OBJECT_POISON) &&
790 (!check_bytes_and_report(s, page, p, "Poison", p,
791 POISON_FREE, s->object_size - 1) ||
792 !check_bytes_and_report(s, page, p, "Poison",
793 p + s->object_size - 1, POISON_END, 1)))
794 return 0;
795
796
797
798 check_pad_bytes(s, page, p);
799 }
800
801 if (!s->offset && val == SLUB_RED_ACTIVE)
802
803
804
805
806 return 1;
807
808
809 if (!check_valid_pointer(s, page, get_freepointer(s, p))) {
810 object_err(s, page, p, "Freepointer corrupt");
811
812
813
814
815
816 set_freepointer(s, p, NULL);
817 return 0;
818 }
819 return 1;
820}
821
822static int check_slab(struct kmem_cache *s, struct page *page)
823{
824 int maxobj;
825
826 VM_BUG_ON(!irqs_disabled());
827
828 if (!PageSlab(page)) {
829 slab_err(s, page, "Not a valid slab page");
830 return 0;
831 }
832
833 maxobj = order_objects(compound_order(page), s->size, s->reserved);
834 if (page->objects > maxobj) {
835 slab_err(s, page, "objects %u > max %u",
836 s->name, page->objects, maxobj);
837 return 0;
838 }
839 if (page->inuse > page->objects) {
840 slab_err(s, page, "inuse %u > max %u",
841 s->name, page->inuse, page->objects);
842 return 0;
843 }
844
845 slab_pad_check(s, page);
846 return 1;
847}
848
849
850
851
852
853static int on_freelist(struct kmem_cache *s, struct page *page, void *search)
854{
855 int nr = 0;
856 void *fp;
857 void *object = NULL;
858 unsigned long max_objects;
859
860 fp = page->freelist;
861 while (fp && nr <= page->objects) {
862 if (fp == search)
863 return 1;
864 if (!check_valid_pointer(s, page, fp)) {
865 if (object) {
866 object_err(s, page, object,
867 "Freechain corrupt");
868 set_freepointer(s, object, NULL);
869 break;
870 } else {
871 slab_err(s, page, "Freepointer corrupt");
872 page->freelist = NULL;
873 page->inuse = page->objects;
874 slab_fix(s, "Freelist cleared");
875 return 0;
876 }
877 break;
878 }
879 object = fp;
880 fp = get_freepointer(s, object);
881 nr++;
882 }
883
884 max_objects = order_objects(compound_order(page), s->size, s->reserved);
885 if (max_objects > MAX_OBJS_PER_PAGE)
886 max_objects = MAX_OBJS_PER_PAGE;
887
888 if (page->objects != max_objects) {
889 slab_err(s, page, "Wrong number of objects. Found %d but "
890 "should be %d", page->objects, max_objects);
891 page->objects = max_objects;
892 slab_fix(s, "Number of objects adjusted.");
893 }
894 if (page->inuse != page->objects - nr) {
895 slab_err(s, page, "Wrong object count. Counter is %d but "
896 "counted were %d", page->inuse, page->objects - nr);
897 page->inuse = page->objects - nr;
898 slab_fix(s, "Object count adjusted.");
899 }
900 return search == NULL;
901}
902
903static void trace(struct kmem_cache *s, struct page *page, void *object,
904 int alloc)
905{
906 if (s->flags & SLAB_TRACE) {
907 printk(KERN_INFO "TRACE %s %s 0x%p inuse=%d fp=0x%p\n",
908 s->name,
909 alloc ? "alloc" : "free",
910 object, page->inuse,
911 page->freelist);
912
913 if (!alloc)
914 print_section("Object ", (void *)object, s->object_size);
915
916 dump_stack();
917 }
918}
919
920
921
922
923
924static inline int slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags)
925{
926 flags &= gfp_allowed_mask;
927 lockdep_trace_alloc(flags);
928 might_sleep_if(flags & __GFP_WAIT);
929
930 return should_failslab(s->object_size, flags, s->flags);
931}
932
933static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags, void *object)
934{
935 flags &= gfp_allowed_mask;
936 kmemcheck_slab_alloc(s, flags, object, slab_ksize(s));
937 kmemleak_alloc_recursive(object, s->object_size, 1, s->flags, flags);
938}
939
940static inline void slab_free_hook(struct kmem_cache *s, void *x)
941{
942 kmemleak_free_recursive(x, s->flags);
943
944
945
946
947
948
949#if defined(CONFIG_KMEMCHECK) || defined(CONFIG_LOCKDEP)
950 {
951 unsigned long flags;
952
953 local_irq_save(flags);
954 kmemcheck_slab_free(s, x, s->object_size);
955 debug_check_no_locks_freed(x, s->object_size);
956 local_irq_restore(flags);
957 }
958#endif
959 if (!(s->flags & SLAB_DEBUG_OBJECTS))
960 debug_check_no_obj_freed(x, s->object_size);
961}
962
963
964
965
966
967
968static void add_full(struct kmem_cache *s,
969 struct kmem_cache_node *n, struct page *page)
970{
971 if (!(s->flags & SLAB_STORE_USER))
972 return;
973
974 list_add(&page->lru, &n->full);
975}
976
977
978
979
980static void remove_full(struct kmem_cache *s, struct page *page)
981{
982 if (!(s->flags & SLAB_STORE_USER))
983 return;
984
985 list_del(&page->lru);
986}
987
988
989static inline unsigned long slabs_node(struct kmem_cache *s, int node)
990{
991 struct kmem_cache_node *n = get_node(s, node);
992
993 return atomic_long_read(&n->nr_slabs);
994}
995
996static inline unsigned long node_nr_slabs(struct kmem_cache_node *n)
997{
998 return atomic_long_read(&n->nr_slabs);
999}
1000
1001static inline void inc_slabs_node(struct kmem_cache *s, int node, int objects)
1002{
1003 struct kmem_cache_node *n = get_node(s, node);
1004
1005
1006
1007
1008
1009
1010
1011 if (n) {
1012 atomic_long_inc(&n->nr_slabs);
1013 atomic_long_add(objects, &n->total_objects);
1014 }
1015}
1016static inline void dec_slabs_node(struct kmem_cache *s, int node, int objects)
1017{
1018 struct kmem_cache_node *n = get_node(s, node);
1019
1020 atomic_long_dec(&n->nr_slabs);
1021 atomic_long_sub(objects, &n->total_objects);
1022}
1023
1024
1025static void setup_object_debug(struct kmem_cache *s, struct page *page,
1026 void *object)
1027{
1028 if (!(s->flags & (SLAB_STORE_USER|SLAB_RED_ZONE|__OBJECT_POISON)))
1029 return;
1030
1031 init_object(s, object, SLUB_RED_INACTIVE);
1032 init_tracking(s, object);
1033}
1034
1035static noinline int alloc_debug_processing(struct kmem_cache *s, struct page *page,
1036 void *object, unsigned long addr)
1037{
1038 if (!check_slab(s, page))
1039 goto bad;
1040
1041 if (!check_valid_pointer(s, page, object)) {
1042 object_err(s, page, object, "Freelist Pointer check fails");
1043 goto bad;
1044 }
1045
1046 if (!check_object(s, page, object, SLUB_RED_INACTIVE))
1047 goto bad;
1048
1049
1050 if (s->flags & SLAB_STORE_USER)
1051 set_track(s, object, TRACK_ALLOC, addr);
1052 trace(s, page, object, 1);
1053 init_object(s, object, SLUB_RED_ACTIVE);
1054 return 1;
1055
1056bad:
1057 if (PageSlab(page)) {
1058
1059
1060
1061
1062
1063 slab_fix(s, "Marking all objects used");
1064 page->inuse = page->objects;
1065 page->freelist = NULL;
1066 }
1067 return 0;
1068}
1069
1070static noinline struct kmem_cache_node *free_debug_processing(
1071 struct kmem_cache *s, struct page *page, void *object,
1072 unsigned long addr, unsigned long *flags)
1073{
1074 struct kmem_cache_node *n = get_node(s, page_to_nid(page));
1075
1076 spin_lock_irqsave(&n->list_lock, *flags);
1077 slab_lock(page);
1078
1079 if (!check_slab(s, page))
1080 goto fail;
1081
1082 if (!check_valid_pointer(s, page, object)) {
1083 slab_err(s, page, "Invalid object pointer 0x%p", object);
1084 goto fail;
1085 }
1086
1087 if (on_freelist(s, page, object)) {
1088 object_err(s, page, object, "Object already free");
1089 goto fail;
1090 }
1091
1092 if (!check_object(s, page, object, SLUB_RED_ACTIVE))
1093 goto out;
1094
1095 if (unlikely(s != page->slab)) {
1096 if (!PageSlab(page)) {
1097 slab_err(s, page, "Attempt to free object(0x%p) "
1098 "outside of slab", object);
1099 } else if (!page->slab) {
1100 printk(KERN_ERR
1101 "SLUB <none>: no slab for object 0x%p.\n",
1102 object);
1103 dump_stack();
1104 } else
1105 object_err(s, page, object,
1106 "page slab pointer corrupt.");
1107 goto fail;
1108 }
1109
1110 if (s->flags & SLAB_STORE_USER)
1111 set_track(s, object, TRACK_FREE, addr);
1112 trace(s, page, object, 0);
1113 init_object(s, object, SLUB_RED_INACTIVE);
1114out:
1115 slab_unlock(page);
1116
1117
1118
1119
1120 return n;
1121
1122fail:
1123 slab_unlock(page);
1124 spin_unlock_irqrestore(&n->list_lock, *flags);
1125 slab_fix(s, "Object at 0x%p not freed", object);
1126 return NULL;
1127}
1128
1129static int __init setup_slub_debug(char *str)
1130{
1131 slub_debug = DEBUG_DEFAULT_FLAGS;
1132 if (*str++ != '=' || !*str)
1133
1134
1135
1136 goto out;
1137
1138 if (*str == ',')
1139
1140
1141
1142
1143 goto check_slabs;
1144
1145 if (tolower(*str) == 'o') {
1146
1147
1148
1149
1150 disable_higher_order_debug = 1;
1151 goto out;
1152 }
1153
1154 slub_debug = 0;
1155 if (*str == '-')
1156
1157
1158
1159 goto out;
1160
1161
1162
1163
1164 for (; *str && *str != ','; str++) {
1165 switch (tolower(*str)) {
1166 case 'f':
1167 slub_debug |= SLAB_DEBUG_FREE;
1168 break;
1169 case 'z':
1170 slub_debug |= SLAB_RED_ZONE;
1171 break;
1172 case 'p':
1173 slub_debug |= SLAB_POISON;
1174 break;
1175 case 'u':
1176 slub_debug |= SLAB_STORE_USER;
1177 break;
1178 case 't':
1179 slub_debug |= SLAB_TRACE;
1180 break;
1181 case 'a':
1182 slub_debug |= SLAB_FAILSLAB;
1183 break;
1184 default:
1185 printk(KERN_ERR "slub_debug option '%c' "
1186 "unknown. skipped\n", *str);
1187 }
1188 }
1189
1190check_slabs:
1191 if (*str == ',')
1192 slub_debug_slabs = str + 1;
1193out:
1194 return 1;
1195}
1196
1197__setup("slub_debug", setup_slub_debug);
1198
1199static unsigned long kmem_cache_flags(unsigned long object_size,
1200 unsigned long flags, const char *name,
1201 void (*ctor)(void *))
1202{
1203
1204
1205
1206 if (slub_debug && (!slub_debug_slabs ||
1207 !strncmp(slub_debug_slabs, name, strlen(slub_debug_slabs))))
1208 flags |= slub_debug;
1209
1210 return flags;
1211}
1212#else
1213static inline void setup_object_debug(struct kmem_cache *s,
1214 struct page *page, void *object) {}
1215
1216static inline int alloc_debug_processing(struct kmem_cache *s,
1217 struct page *page, void *object, unsigned long addr) { return 0; }
1218
1219static inline struct kmem_cache_node *free_debug_processing(
1220 struct kmem_cache *s, struct page *page, void *object,
1221 unsigned long addr, unsigned long *flags) { return NULL; }
1222
1223static inline int slab_pad_check(struct kmem_cache *s, struct page *page)
1224 { return 1; }
1225static inline int check_object(struct kmem_cache *s, struct page *page,
1226 void *object, u8 val) { return 1; }
1227static inline void add_full(struct kmem_cache *s, struct kmem_cache_node *n,
1228 struct page *page) {}
1229static inline void remove_full(struct kmem_cache *s, struct page *page) {}
1230static inline unsigned long kmem_cache_flags(unsigned long object_size,
1231 unsigned long flags, const char *name,
1232 void (*ctor)(void *))
1233{
1234 return flags;
1235}
1236#define slub_debug 0
1237
1238#define disable_higher_order_debug 0
1239
1240static inline unsigned long slabs_node(struct kmem_cache *s, int node)
1241 { return 0; }
1242static inline unsigned long node_nr_slabs(struct kmem_cache_node *n)
1243 { return 0; }
1244static inline void inc_slabs_node(struct kmem_cache *s, int node,
1245 int objects) {}
1246static inline void dec_slabs_node(struct kmem_cache *s, int node,
1247 int objects) {}
1248
1249static inline int slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags)
1250 { return 0; }
1251
1252static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags,
1253 void *object) {}
1254
1255static inline void slab_free_hook(struct kmem_cache *s, void *x) {}
1256
1257#endif
1258
1259
1260
1261
1262static inline struct page *alloc_slab_page(gfp_t flags, int node,
1263 struct kmem_cache_order_objects oo)
1264{
1265 int order = oo_order(oo);
1266
1267 flags |= __GFP_NOTRACK;
1268
1269 if (node == NUMA_NO_NODE)
1270 return alloc_pages(flags, order);
1271 else
1272 return alloc_pages_exact_node(node, flags, order);
1273}
1274
1275static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
1276{
1277 struct page *page;
1278 struct kmem_cache_order_objects oo = s->oo;
1279 gfp_t alloc_gfp;
1280
1281 flags &= gfp_allowed_mask;
1282
1283 if (flags & __GFP_WAIT)
1284 local_irq_enable();
1285
1286 flags |= s->allocflags;
1287
1288
1289
1290
1291
1292 alloc_gfp = (flags | __GFP_NOWARN | __GFP_NORETRY) & ~__GFP_NOFAIL;
1293
1294 page = alloc_slab_page(alloc_gfp, node, oo);
1295 if (unlikely(!page)) {
1296 oo = s->min;
1297
1298
1299
1300
1301 page = alloc_slab_page(flags, node, oo);
1302
1303 if (page)
1304 stat(s, ORDER_FALLBACK);
1305 }
1306
1307 if (kmemcheck_enabled && page
1308 && !(s->flags & (SLAB_NOTRACK | DEBUG_DEFAULT_FLAGS))) {
1309 int pages = 1 << oo_order(oo);
1310
1311 kmemcheck_alloc_shadow(page, oo_order(oo), flags, node);
1312
1313
1314
1315
1316
1317 if (s->ctor)
1318 kmemcheck_mark_uninitialized_pages(page, pages);
1319 else
1320 kmemcheck_mark_unallocated_pages(page, pages);
1321 }
1322
1323 if (flags & __GFP_WAIT)
1324 local_irq_disable();
1325 if (!page)
1326 return NULL;
1327
1328 page->objects = oo_objects(oo);
1329 mod_zone_page_state(page_zone(page),
1330 (s->flags & SLAB_RECLAIM_ACCOUNT) ?
1331 NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,
1332 1 << oo_order(oo));
1333
1334 return page;
1335}
1336
1337static void setup_object(struct kmem_cache *s, struct page *page,
1338 void *object)
1339{
1340 setup_object_debug(s, page, object);
1341 if (unlikely(s->ctor))
1342 s->ctor(object);
1343}
1344
1345static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
1346{
1347 struct page *page;
1348 void *start;
1349 void *last;
1350 void *p;
1351
1352 BUG_ON(flags & GFP_SLAB_BUG_MASK);
1353
1354 page = allocate_slab(s,
1355 flags & (GFP_RECLAIM_MASK | GFP_CONSTRAINT_MASK), node);
1356 if (!page)
1357 goto out;
1358
1359 inc_slabs_node(s, page_to_nid(page), page->objects);
1360 page->slab = s;
1361 __SetPageSlab(page);
1362 if (page->pfmemalloc)
1363 SetPageSlabPfmemalloc(page);
1364
1365 start = page_address(page);
1366
1367 if (unlikely(s->flags & SLAB_POISON))
1368 memset(start, POISON_INUSE, PAGE_SIZE << compound_order(page));
1369
1370 last = start;
1371 for_each_object(p, s, start, page->objects) {
1372 setup_object(s, page, last);
1373 set_freepointer(s, last, p);
1374 last = p;
1375 }
1376 setup_object(s, page, last);
1377 set_freepointer(s, last, NULL);
1378
1379 page->freelist = start;
1380 page->inuse = page->objects;
1381 page->frozen = 1;
1382out:
1383 return page;
1384}
1385
1386static void __free_slab(struct kmem_cache *s, struct page *page)
1387{
1388 int order = compound_order(page);
1389 int pages = 1 << order;
1390
1391 if (kmem_cache_debug(s)) {
1392 void *p;
1393
1394 slab_pad_check(s, page);
1395 for_each_object(p, s, page_address(page),
1396 page->objects)
1397 check_object(s, page, p, SLUB_RED_INACTIVE);
1398 }
1399
1400 kmemcheck_free_shadow(page, compound_order(page));
1401
1402 mod_zone_page_state(page_zone(page),
1403 (s->flags & SLAB_RECLAIM_ACCOUNT) ?
1404 NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,
1405 -pages);
1406
1407 __ClearPageSlabPfmemalloc(page);
1408 __ClearPageSlab(page);
1409 reset_page_mapcount(page);
1410 if (current->reclaim_state)
1411 current->reclaim_state->reclaimed_slab += pages;
1412 __free_pages(page, order);
1413}
1414
1415#define need_reserve_slab_rcu \
1416 (sizeof(((struct page *)NULL)->lru) < sizeof(struct rcu_head))
1417
1418static void rcu_free_slab(struct rcu_head *h)
1419{
1420 struct page *page;
1421
1422 if (need_reserve_slab_rcu)
1423 page = virt_to_head_page(h);
1424 else
1425 page = container_of((struct list_head *)h, struct page, lru);
1426
1427 __free_slab(page->slab, page);
1428}
1429
1430static void free_slab(struct kmem_cache *s, struct page *page)
1431{
1432 if (unlikely(s->flags & SLAB_DESTROY_BY_RCU)) {
1433 struct rcu_head *head;
1434
1435 if (need_reserve_slab_rcu) {
1436 int order = compound_order(page);
1437 int offset = (PAGE_SIZE << order) - s->reserved;
1438
1439 VM_BUG_ON(s->reserved != sizeof(*head));
1440 head = page_address(page) + offset;
1441 } else {
1442
1443
1444
1445 head = (void *)&page->lru;
1446 }
1447
1448 call_rcu(head, rcu_free_slab);
1449 } else
1450 __free_slab(s, page);
1451}
1452
1453static void discard_slab(struct kmem_cache *s, struct page *page)
1454{
1455 dec_slabs_node(s, page_to_nid(page), page->objects);
1456 free_slab(s, page);
1457}
1458
1459
1460
1461
1462
1463
1464static inline void add_partial(struct kmem_cache_node *n,
1465 struct page *page, int tail)
1466{
1467 n->nr_partial++;
1468 if (tail == DEACTIVATE_TO_TAIL)
1469 list_add_tail(&page->lru, &n->partial);
1470 else
1471 list_add(&page->lru, &n->partial);
1472}
1473
1474
1475
1476
1477static inline void remove_partial(struct kmem_cache_node *n,
1478 struct page *page)
1479{
1480 list_del(&page->lru);
1481 n->nr_partial--;
1482}
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492static inline void *acquire_slab(struct kmem_cache *s,
1493 struct kmem_cache_node *n, struct page *page,
1494 int mode)
1495{
1496 void *freelist;
1497 unsigned long counters;
1498 struct page new;
1499
1500
1501
1502
1503
1504
1505 freelist = page->freelist;
1506 counters = page->counters;
1507 new.counters = counters;
1508 if (mode) {
1509 new.inuse = page->objects;
1510 new.freelist = NULL;
1511 } else {
1512 new.freelist = freelist;
1513 }
1514
1515 VM_BUG_ON(new.frozen);
1516 new.frozen = 1;
1517
1518 if (!__cmpxchg_double_slab(s, page,
1519 freelist, counters,
1520 new.freelist, new.counters,
1521 "acquire_slab"))
1522 return NULL;
1523
1524 remove_partial(n, page);
1525 WARN_ON(!freelist);
1526 return freelist;
1527}
1528
1529static int put_cpu_partial(struct kmem_cache *s, struct page *page, int drain);
1530static inline bool pfmemalloc_match(struct page *page, gfp_t gfpflags);
1531
1532
1533
1534
1535static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
1536 struct kmem_cache_cpu *c, gfp_t flags)
1537{
1538 struct page *page, *page2;
1539 void *object = NULL;
1540
1541
1542
1543
1544
1545
1546
1547 if (!n || !n->nr_partial)
1548 return NULL;
1549
1550 spin_lock(&n->list_lock);
1551 list_for_each_entry_safe(page, page2, &n->partial, lru) {
1552 void *t;
1553 int available;
1554
1555 if (!pfmemalloc_match(page, flags))
1556 continue;
1557
1558 t = acquire_slab(s, n, page, object == NULL);
1559 if (!t)
1560 break;
1561
1562 if (!object) {
1563 c->page = page;
1564 stat(s, ALLOC_FROM_PARTIAL);
1565 object = t;
1566 available = page->objects - page->inuse;
1567 } else {
1568 available = put_cpu_partial(s, page, 0);
1569 stat(s, CPU_PARTIAL_NODE);
1570 }
1571 if (kmem_cache_debug(s) || available > s->cpu_partial / 2)
1572 break;
1573
1574 }
1575 spin_unlock(&n->list_lock);
1576 return object;
1577}
1578
1579
1580
1581
1582static void *get_any_partial(struct kmem_cache *s, gfp_t flags,
1583 struct kmem_cache_cpu *c)
1584{
1585#ifdef CONFIG_NUMA
1586 struct zonelist *zonelist;
1587 struct zoneref *z;
1588 struct zone *zone;
1589 enum zone_type high_zoneidx = gfp_zone(flags);
1590 void *object;
1591 unsigned int cpuset_mems_cookie;
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611 if (!s->remote_node_defrag_ratio ||
1612 get_cycles() % 1024 > s->remote_node_defrag_ratio)
1613 return NULL;
1614
1615 do {
1616 cpuset_mems_cookie = get_mems_allowed();
1617 zonelist = node_zonelist(slab_node(), flags);
1618 for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
1619 struct kmem_cache_node *n;
1620
1621 n = get_node(s, zone_to_nid(zone));
1622
1623 if (n && cpuset_zone_allowed_hardwall(zone, flags) &&
1624 n->nr_partial > s->min_partial) {
1625 object = get_partial_node(s, n, c, flags);
1626 if (object) {
1627
1628
1629
1630
1631
1632
1633
1634
1635 put_mems_allowed(cpuset_mems_cookie);
1636 return object;
1637 }
1638 }
1639 }
1640 } while (!put_mems_allowed(cpuset_mems_cookie));
1641#endif
1642 return NULL;
1643}
1644
1645
1646
1647
1648static void *get_partial(struct kmem_cache *s, gfp_t flags, int node,
1649 struct kmem_cache_cpu *c)
1650{
1651 void *object;
1652 int searchnode = (node == NUMA_NO_NODE) ? numa_node_id() : node;
1653
1654 object = get_partial_node(s, get_node(s, searchnode), c, flags);
1655 if (object || node != NUMA_NO_NODE)
1656 return object;
1657
1658 return get_any_partial(s, flags, c);
1659}
1660
1661#ifdef CONFIG_PREEMPT
1662
1663
1664
1665
1666
1667#define TID_STEP roundup_pow_of_two(CONFIG_NR_CPUS)
1668#else
1669
1670
1671
1672
1673#define TID_STEP 1
1674#endif
1675
1676static inline unsigned long next_tid(unsigned long tid)
1677{
1678 return tid + TID_STEP;
1679}
1680
1681static inline unsigned int tid_to_cpu(unsigned long tid)
1682{
1683 return tid % TID_STEP;
1684}
1685
1686static inline unsigned long tid_to_event(unsigned long tid)
1687{
1688 return tid / TID_STEP;
1689}
1690
1691static inline unsigned int init_tid(int cpu)
1692{
1693 return cpu;
1694}
1695
1696static inline void note_cmpxchg_failure(const char *n,
1697 const struct kmem_cache *s, unsigned long tid)
1698{
1699#ifdef SLUB_DEBUG_CMPXCHG
1700 unsigned long actual_tid = __this_cpu_read(s->cpu_slab->tid);
1701
1702 printk(KERN_INFO "%s %s: cmpxchg redo ", n, s->name);
1703
1704#ifdef CONFIG_PREEMPT
1705 if (tid_to_cpu(tid) != tid_to_cpu(actual_tid))
1706 printk("due to cpu change %d -> %d\n",
1707 tid_to_cpu(tid), tid_to_cpu(actual_tid));
1708 else
1709#endif
1710 if (tid_to_event(tid) != tid_to_event(actual_tid))
1711 printk("due to cpu running other code. Event %ld->%ld\n",
1712 tid_to_event(tid), tid_to_event(actual_tid));
1713 else
1714 printk("for unknown reason: actual=%lx was=%lx target=%lx\n",
1715 actual_tid, tid, next_tid(tid));
1716#endif
1717 stat(s, CMPXCHG_DOUBLE_CPU_FAIL);
1718}
1719
1720static void init_kmem_cache_cpus(struct kmem_cache *s)
1721{
1722 int cpu;
1723
1724 for_each_possible_cpu(cpu)
1725 per_cpu_ptr(s->cpu_slab, cpu)->tid = init_tid(cpu);
1726}
1727
1728
1729
1730
1731static void deactivate_slab(struct kmem_cache *s, struct page *page, void *freelist)
1732{
1733 enum slab_modes { M_NONE, M_PARTIAL, M_FULL, M_FREE };
1734 struct kmem_cache_node *n = get_node(s, page_to_nid(page));
1735 int lock = 0;
1736 enum slab_modes l = M_NONE, m = M_NONE;
1737 void *nextfree;
1738 int tail = DEACTIVATE_TO_HEAD;
1739 struct page new;
1740 struct page old;
1741
1742 if (page->freelist) {
1743 stat(s, DEACTIVATE_REMOTE_FREES);
1744 tail = DEACTIVATE_TO_TAIL;
1745 }
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755 while (freelist && (nextfree = get_freepointer(s, freelist))) {
1756 void *prior;
1757 unsigned long counters;
1758
1759 do {
1760 prior = page->freelist;
1761 counters = page->counters;
1762 set_freepointer(s, freelist, prior);
1763 new.counters = counters;
1764 new.inuse--;
1765 VM_BUG_ON(!new.frozen);
1766
1767 } while (!__cmpxchg_double_slab(s, page,
1768 prior, counters,
1769 freelist, new.counters,
1770 "drain percpu freelist"));
1771
1772 freelist = nextfree;
1773 }
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789redo:
1790
1791 old.freelist = page->freelist;
1792 old.counters = page->counters;
1793 VM_BUG_ON(!old.frozen);
1794
1795
1796 new.counters = old.counters;
1797 if (freelist) {
1798 new.inuse--;
1799 set_freepointer(s, freelist, old.freelist);
1800 new.freelist = freelist;
1801 } else
1802 new.freelist = old.freelist;
1803
1804 new.frozen = 0;
1805
1806 if (!new.inuse && n->nr_partial > s->min_partial)
1807 m = M_FREE;
1808 else if (new.freelist) {
1809 m = M_PARTIAL;
1810 if (!lock) {
1811 lock = 1;
1812
1813
1814
1815
1816
1817 spin_lock(&n->list_lock);
1818 }
1819 } else {
1820 m = M_FULL;
1821 if (kmem_cache_debug(s) && !lock) {
1822 lock = 1;
1823
1824
1825
1826
1827
1828 spin_lock(&n->list_lock);
1829 }
1830 }
1831
1832 if (l != m) {
1833
1834 if (l == M_PARTIAL)
1835
1836 remove_partial(n, page);
1837
1838 else if (l == M_FULL)
1839
1840 remove_full(s, page);
1841
1842 if (m == M_PARTIAL) {
1843
1844 add_partial(n, page, tail);
1845 stat(s, tail);
1846
1847 } else if (m == M_FULL) {
1848
1849 stat(s, DEACTIVATE_FULL);
1850 add_full(s, n, page);
1851
1852 }
1853 }
1854
1855 l = m;
1856 if (!__cmpxchg_double_slab(s, page,
1857 old.freelist, old.counters,
1858 new.freelist, new.counters,
1859 "unfreezing slab"))
1860 goto redo;
1861
1862 if (lock)
1863 spin_unlock(&n->list_lock);
1864
1865 if (m == M_FREE) {
1866 stat(s, DEACTIVATE_EMPTY);
1867 discard_slab(s, page);
1868 stat(s, FREE_SLAB);
1869 }
1870}
1871
1872
1873
1874
1875
1876
1877static void unfreeze_partials(struct kmem_cache *s)
1878{
1879 struct kmem_cache_node *n = NULL, *n2 = NULL;
1880 struct kmem_cache_cpu *c = this_cpu_ptr(s->cpu_slab);
1881 struct page *page, *discard_page = NULL;
1882
1883 while ((page = c->partial)) {
1884 struct page new;
1885 struct page old;
1886
1887 c->partial = page->next;
1888
1889 n2 = get_node(s, page_to_nid(page));
1890 if (n != n2) {
1891 if (n)
1892 spin_unlock(&n->list_lock);
1893
1894 n = n2;
1895 spin_lock(&n->list_lock);
1896 }
1897
1898 do {
1899
1900 old.freelist = page->freelist;
1901 old.counters = page->counters;
1902 VM_BUG_ON(!old.frozen);
1903
1904 new.counters = old.counters;
1905 new.freelist = old.freelist;
1906
1907 new.frozen = 0;
1908
1909 } while (!__cmpxchg_double_slab(s, page,
1910 old.freelist, old.counters,
1911 new.freelist, new.counters,
1912 "unfreezing slab"));
1913
1914 if (unlikely(!new.inuse && n->nr_partial > s->min_partial)) {
1915 page->next = discard_page;
1916 discard_page = page;
1917 } else {
1918 add_partial(n, page, DEACTIVATE_TO_TAIL);
1919 stat(s, FREE_ADD_PARTIAL);
1920 }
1921 }
1922
1923 if (n)
1924 spin_unlock(&n->list_lock);
1925
1926 while (discard_page) {
1927 page = discard_page;
1928 discard_page = discard_page->next;
1929
1930 stat(s, DEACTIVATE_EMPTY);
1931 discard_slab(s, page);
1932 stat(s, FREE_SLAB);
1933 }
1934}
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945static int put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
1946{
1947 struct page *oldpage;
1948 int pages;
1949 int pobjects;
1950
1951 do {
1952 pages = 0;
1953 pobjects = 0;
1954 oldpage = this_cpu_read(s->cpu_slab->partial);
1955
1956 if (oldpage) {
1957 pobjects = oldpage->pobjects;
1958 pages = oldpage->pages;
1959 if (drain && pobjects > s->cpu_partial) {
1960 unsigned long flags;
1961
1962
1963
1964
1965 local_irq_save(flags);
1966 unfreeze_partials(s);
1967 local_irq_restore(flags);
1968 oldpage = NULL;
1969 pobjects = 0;
1970 pages = 0;
1971 stat(s, CPU_PARTIAL_DRAIN);
1972 }
1973 }
1974
1975 pages++;
1976 pobjects += page->objects - page->inuse;
1977
1978 page->pages = pages;
1979 page->pobjects = pobjects;
1980 page->next = oldpage;
1981
1982 } while (this_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page) != oldpage);
1983 return pobjects;
1984}
1985
1986static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
1987{
1988 stat(s, CPUSLAB_FLUSH);
1989 deactivate_slab(s, c->page, c->freelist);
1990
1991 c->tid = next_tid(c->tid);
1992 c->page = NULL;
1993 c->freelist = NULL;
1994}
1995
1996
1997
1998
1999
2000
2001static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu)
2002{
2003 struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
2004
2005 if (likely(c)) {
2006 if (c->page)
2007 flush_slab(s, c);
2008
2009 unfreeze_partials(s);
2010 }
2011}
2012
2013static void flush_cpu_slab(void *d)
2014{
2015 struct kmem_cache *s = d;
2016
2017 __flush_cpu_slab(s, smp_processor_id());
2018}
2019
2020static bool has_cpu_slab(int cpu, void *info)
2021{
2022 struct kmem_cache *s = info;
2023 struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
2024
2025 return c->page || c->partial;
2026}
2027
2028static void flush_all(struct kmem_cache *s)
2029{
2030 on_each_cpu_cond(has_cpu_slab, flush_cpu_slab, s, 1, GFP_ATOMIC);
2031}
2032
2033
2034
2035
2036
2037static inline int node_match(struct page *page, int node)
2038{
2039#ifdef CONFIG_NUMA
2040 if (node != NUMA_NO_NODE && page_to_nid(page) != node)
2041 return 0;
2042#endif
2043 return 1;
2044}
2045
2046static int count_free(struct page *page)
2047{
2048 return page->objects - page->inuse;
2049}
2050
2051static unsigned long count_partial(struct kmem_cache_node *n,
2052 int (*get_count)(struct page *))
2053{
2054 unsigned long flags;
2055 unsigned long x = 0;
2056 struct page *page;
2057
2058 spin_lock_irqsave(&n->list_lock, flags);
2059 list_for_each_entry(page, &n->partial, lru)
2060 x += get_count(page);
2061 spin_unlock_irqrestore(&n->list_lock, flags);
2062 return x;
2063}
2064
2065static inline unsigned long node_nr_objs(struct kmem_cache_node *n)
2066{
2067#ifdef CONFIG_SLUB_DEBUG
2068 return atomic_long_read(&n->total_objects);
2069#else
2070 return 0;
2071#endif
2072}
2073
2074static noinline void
2075slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid)
2076{
2077 int node;
2078
2079 printk(KERN_WARNING
2080 "SLUB: Unable to allocate memory on node %d (gfp=0x%x)\n",
2081 nid, gfpflags);
2082 printk(KERN_WARNING " cache: %s, object size: %d, buffer size: %d, "
2083 "default order: %d, min order: %d\n", s->name, s->object_size,
2084 s->size, oo_order(s->oo), oo_order(s->min));
2085
2086 if (oo_order(s->min) > get_order(s->object_size))
2087 printk(KERN_WARNING " %s debugging increased min order, use "
2088 "slub_debug=O to disable.\n", s->name);
2089
2090 for_each_online_node(node) {
2091 struct kmem_cache_node *n = get_node(s, node);
2092 unsigned long nr_slabs;
2093 unsigned long nr_objs;
2094 unsigned long nr_free;
2095
2096 if (!n)
2097 continue;
2098
2099 nr_free = count_partial(n, count_free);
2100 nr_slabs = node_nr_slabs(n);
2101 nr_objs = node_nr_objs(n);
2102
2103 printk(KERN_WARNING
2104 " node %d: slabs: %ld, objs: %ld, free: %ld\n",
2105 node, nr_slabs, nr_objs, nr_free);
2106 }
2107}
2108
2109static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags,
2110 int node, struct kmem_cache_cpu **pc)
2111{
2112 void *freelist;
2113 struct kmem_cache_cpu *c = *pc;
2114 struct page *page;
2115
2116 freelist = get_partial(s, flags, node, c);
2117
2118 if (freelist)
2119 return freelist;
2120
2121 page = new_slab(s, flags, node);
2122 if (page) {
2123 c = __this_cpu_ptr(s->cpu_slab);
2124 if (c->page)
2125 flush_slab(s, c);
2126
2127
2128
2129
2130
2131 freelist = page->freelist;
2132 page->freelist = NULL;
2133
2134 stat(s, ALLOC_SLAB);
2135 c->page = page;
2136 *pc = c;
2137 } else
2138 freelist = NULL;
2139
2140 return freelist;
2141}
2142
2143static inline bool pfmemalloc_match(struct page *page, gfp_t gfpflags)
2144{
2145 if (unlikely(PageSlabPfmemalloc(page)))
2146 return gfp_pfmemalloc_allowed(gfpflags);
2147
2148 return true;
2149}
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161static inline void *get_freelist(struct kmem_cache *s, struct page *page)
2162{
2163 struct page new;
2164 unsigned long counters;
2165 void *freelist;
2166
2167 do {
2168 freelist = page->freelist;
2169 counters = page->counters;
2170
2171 new.counters = counters;
2172 VM_BUG_ON(!new.frozen);
2173
2174 new.inuse = page->objects;
2175 new.frozen = freelist != NULL;
2176
2177 } while (!__cmpxchg_double_slab(s, page,
2178 freelist, counters,
2179 NULL, new.counters,
2180 "get_freelist"));
2181
2182 return freelist;
2183}
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
2202 unsigned long addr, struct kmem_cache_cpu *c)
2203{
2204 void *freelist;
2205 struct page *page;
2206 unsigned long flags;
2207
2208 local_irq_save(flags);
2209#ifdef CONFIG_PREEMPT
2210
2211
2212
2213
2214
2215 c = this_cpu_ptr(s->cpu_slab);
2216#endif
2217
2218 page = c->page;
2219 if (!page)
2220 goto new_slab;
2221redo:
2222
2223 if (unlikely(!node_match(page, node))) {
2224 stat(s, ALLOC_NODE_MISMATCH);
2225 deactivate_slab(s, page, c->freelist);
2226 c->page = NULL;
2227 c->freelist = NULL;
2228 goto new_slab;
2229 }
2230
2231
2232
2233
2234
2235
2236 if (unlikely(!pfmemalloc_match(page, gfpflags))) {
2237 deactivate_slab(s, page, c->freelist);
2238 c->page = NULL;
2239 c->freelist = NULL;
2240 goto new_slab;
2241 }
2242
2243
2244 freelist = c->freelist;
2245 if (freelist)
2246 goto load_freelist;
2247
2248 stat(s, ALLOC_SLOWPATH);
2249
2250 freelist = get_freelist(s, page);
2251
2252 if (!freelist) {
2253 c->page = NULL;
2254 stat(s, DEACTIVATE_BYPASS);
2255 goto new_slab;
2256 }
2257
2258 stat(s, ALLOC_REFILL);
2259
2260load_freelist:
2261
2262
2263
2264
2265
2266 VM_BUG_ON(!c->page->frozen);
2267 c->freelist = get_freepointer(s, freelist);
2268 c->tid = next_tid(c->tid);
2269 local_irq_restore(flags);
2270 return freelist;
2271
2272new_slab:
2273
2274 if (c->partial) {
2275 page = c->page = c->partial;
2276 c->partial = page->next;
2277 stat(s, CPU_PARTIAL_ALLOC);
2278 c->freelist = NULL;
2279 goto redo;
2280 }
2281
2282 freelist = new_slab_objects(s, gfpflags, node, &c);
2283
2284 if (unlikely(!freelist)) {
2285 if (!(gfpflags & __GFP_NOWARN) && printk_ratelimit())
2286 slab_out_of_memory(s, gfpflags, node);
2287
2288 local_irq_restore(flags);
2289 return NULL;
2290 }
2291
2292 page = c->page;
2293 if (likely(!kmem_cache_debug(s) && pfmemalloc_match(page, gfpflags)))
2294 goto load_freelist;
2295
2296
2297 if (kmem_cache_debug(s) && !alloc_debug_processing(s, page, freelist, addr))
2298 goto new_slab;
2299
2300 deactivate_slab(s, page, get_freepointer(s, freelist));
2301 c->page = NULL;
2302 c->freelist = NULL;
2303 local_irq_restore(flags);
2304 return freelist;
2305}
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317static __always_inline void *slab_alloc_node(struct kmem_cache *s,
2318 gfp_t gfpflags, int node, unsigned long addr)
2319{
2320 void **object;
2321 struct kmem_cache_cpu *c;
2322 struct page *page;
2323 unsigned long tid;
2324
2325 if (slab_pre_alloc_hook(s, gfpflags))
2326 return NULL;
2327
2328redo:
2329
2330
2331
2332
2333
2334
2335
2336 c = __this_cpu_ptr(s->cpu_slab);
2337
2338
2339
2340
2341
2342
2343
2344 tid = c->tid;
2345 barrier();
2346
2347 object = c->freelist;
2348 page = c->page;
2349 if (unlikely(!object || !node_match(page, node)))
2350 object = __slab_alloc(s, gfpflags, node, addr, c);
2351
2352 else {
2353 void *next_object = get_freepointer_safe(s, object);
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367 if (unlikely(!this_cpu_cmpxchg_double(
2368 s->cpu_slab->freelist, s->cpu_slab->tid,
2369 object, tid,
2370 next_object, next_tid(tid)))) {
2371
2372 note_cmpxchg_failure("slab_alloc", s, tid);
2373 goto redo;
2374 }
2375 prefetch_freepointer(s, next_object);
2376 stat(s, ALLOC_FASTPATH);
2377 }
2378
2379 if (unlikely(gfpflags & __GFP_ZERO) && object)
2380 memset(object, 0, s->object_size);
2381
2382 slab_post_alloc_hook(s, gfpflags, object);
2383
2384 return object;
2385}
2386
2387static __always_inline void *slab_alloc(struct kmem_cache *s,
2388 gfp_t gfpflags, unsigned long addr)
2389{
2390 return slab_alloc_node(s, gfpflags, NUMA_NO_NODE, addr);
2391}
2392
2393void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags)
2394{
2395 void *ret = slab_alloc(s, gfpflags, _RET_IP_);
2396
2397 trace_kmem_cache_alloc(_RET_IP_, ret, s->object_size, s->size, gfpflags);
2398
2399 return ret;
2400}
2401EXPORT_SYMBOL(kmem_cache_alloc);
2402
2403#ifdef CONFIG_TRACING
2404void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size)
2405{
2406 void *ret = slab_alloc(s, gfpflags, _RET_IP_);
2407 trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags);
2408 return ret;
2409}
2410EXPORT_SYMBOL(kmem_cache_alloc_trace);
2411
2412void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order)
2413{
2414 void *ret = kmalloc_order(size, flags, order);
2415 trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << order, flags);
2416 return ret;
2417}
2418EXPORT_SYMBOL(kmalloc_order_trace);
2419#endif
2420
2421#ifdef CONFIG_NUMA
2422void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
2423{
2424 void *ret = slab_alloc_node(s, gfpflags, node, _RET_IP_);
2425
2426 trace_kmem_cache_alloc_node(_RET_IP_, ret,
2427 s->object_size, s->size, gfpflags, node);
2428
2429 return ret;
2430}
2431EXPORT_SYMBOL(kmem_cache_alloc_node);
2432
2433#ifdef CONFIG_TRACING
2434void *kmem_cache_alloc_node_trace(struct kmem_cache *s,
2435 gfp_t gfpflags,
2436 int node, size_t size)
2437{
2438 void *ret = slab_alloc_node(s, gfpflags, node, _RET_IP_);
2439
2440 trace_kmalloc_node(_RET_IP_, ret,
2441 size, s->size, gfpflags, node);
2442 return ret;
2443}
2444EXPORT_SYMBOL(kmem_cache_alloc_node_trace);
2445#endif
2446#endif
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456static void __slab_free(struct kmem_cache *s, struct page *page,
2457 void *x, unsigned long addr)
2458{
2459 void *prior;
2460 void **object = (void *)x;
2461 int was_frozen;
2462 int inuse;
2463 struct page new;
2464 unsigned long counters;
2465 struct kmem_cache_node *n = NULL;
2466 unsigned long uninitialized_var(flags);
2467
2468 stat(s, FREE_SLOWPATH);
2469
2470 if (kmem_cache_debug(s) &&
2471 !(n = free_debug_processing(s, page, x, addr, &flags)))
2472 return;
2473
2474 do {
2475 prior = page->freelist;
2476 counters = page->counters;
2477 set_freepointer(s, object, prior);
2478 new.counters = counters;
2479 was_frozen = new.frozen;
2480 new.inuse--;
2481 if ((!new.inuse || !prior) && !was_frozen && !n) {
2482
2483 if (!kmem_cache_debug(s) && !prior)
2484
2485
2486
2487
2488
2489 new.frozen = 1;
2490
2491 else {
2492
2493 n = get_node(s, page_to_nid(page));
2494
2495
2496
2497
2498
2499
2500
2501
2502 spin_lock_irqsave(&n->list_lock, flags);
2503
2504 }
2505 }
2506 inuse = new.inuse;
2507
2508 } while (!cmpxchg_double_slab(s, page,
2509 prior, counters,
2510 object, new.counters,
2511 "__slab_free"));
2512
2513 if (likely(!n)) {
2514
2515
2516
2517
2518
2519 if (new.frozen && !was_frozen) {
2520 put_cpu_partial(s, page, 1);
2521 stat(s, CPU_PARTIAL_FREE);
2522 }
2523
2524
2525
2526
2527 if (was_frozen)
2528 stat(s, FREE_FROZEN);
2529 return;
2530 }
2531
2532
2533
2534
2535
2536 if (was_frozen)
2537 stat(s, FREE_FROZEN);
2538 else {
2539 if (unlikely(!inuse && n->nr_partial > s->min_partial))
2540 goto slab_empty;
2541
2542
2543
2544
2545
2546 if (unlikely(!prior)) {
2547 remove_full(s, page);
2548 add_partial(n, page, DEACTIVATE_TO_TAIL);
2549 stat(s, FREE_ADD_PARTIAL);
2550 }
2551 }
2552 spin_unlock_irqrestore(&n->list_lock, flags);
2553 return;
2554
2555slab_empty:
2556 if (prior) {
2557
2558
2559
2560 remove_partial(n, page);
2561 stat(s, FREE_REMOVE_PARTIAL);
2562 } else
2563
2564 remove_full(s, page);
2565
2566 spin_unlock_irqrestore(&n->list_lock, flags);
2567 stat(s, FREE_SLAB);
2568 discard_slab(s, page);
2569}
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582static __always_inline void slab_free(struct kmem_cache *s,
2583 struct page *page, void *x, unsigned long addr)
2584{
2585 void **object = (void *)x;
2586 struct kmem_cache_cpu *c;
2587 unsigned long tid;
2588
2589 slab_free_hook(s, x);
2590
2591redo:
2592
2593
2594
2595
2596
2597
2598 c = __this_cpu_ptr(s->cpu_slab);
2599
2600 tid = c->tid;
2601 barrier();
2602
2603 if (likely(page == c->page)) {
2604 set_freepointer(s, object, c->freelist);
2605
2606 if (unlikely(!this_cpu_cmpxchg_double(
2607 s->cpu_slab->freelist, s->cpu_slab->tid,
2608 c->freelist, tid,
2609 object, next_tid(tid)))) {
2610
2611 note_cmpxchg_failure("slab_free", s, tid);
2612 goto redo;
2613 }
2614 stat(s, FREE_FASTPATH);
2615 } else
2616 __slab_free(s, page, x, addr);
2617
2618}
2619
2620void kmem_cache_free(struct kmem_cache *s, void *x)
2621{
2622 struct page *page;
2623
2624 page = virt_to_head_page(x);
2625
2626 if (kmem_cache_debug(s) && page->slab != s) {
2627 pr_err("kmem_cache_free: Wrong slab cache. %s but object"
2628 " is from %s\n", page->slab->name, s->name);
2629 WARN_ON_ONCE(1);
2630 return;
2631 }
2632
2633 slab_free(s, page, x, _RET_IP_);
2634
2635 trace_kmem_cache_free(_RET_IP_, x);
2636}
2637EXPORT_SYMBOL(kmem_cache_free);
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658static int slub_min_order;
2659static int slub_max_order = PAGE_ALLOC_COSTLY_ORDER;
2660static int slub_min_objects;
2661
2662
2663
2664
2665
2666static int slub_nomerge;
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693static inline int slab_order(int size, int min_objects,
2694 int max_order, int fract_leftover, int reserved)
2695{
2696 int order;
2697 int rem;
2698 int min_order = slub_min_order;
2699
2700 if (order_objects(min_order, size, reserved) > MAX_OBJS_PER_PAGE)
2701 return get_order(size * MAX_OBJS_PER_PAGE) - 1;
2702
2703 for (order = max(min_order,
2704 fls(min_objects * size - 1) - PAGE_SHIFT);
2705 order <= max_order; order++) {
2706
2707 unsigned long slab_size = PAGE_SIZE << order;
2708
2709 if (slab_size < min_objects * size + reserved)
2710 continue;
2711
2712 rem = (slab_size - reserved) % size;
2713
2714 if (rem <= slab_size / fract_leftover)
2715 break;
2716
2717 }
2718
2719 return order;
2720}
2721
2722static inline int calculate_order(int size, int reserved)
2723{
2724 int order;
2725 int min_objects;
2726 int fraction;
2727 int max_objects;
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737 min_objects = slub_min_objects;
2738 if (!min_objects)
2739 min_objects = 4 * (fls(nr_cpu_ids) + 1);
2740 max_objects = order_objects(slub_max_order, size, reserved);
2741 min_objects = min(min_objects, max_objects);
2742
2743 while (min_objects > 1) {
2744 fraction = 16;
2745 while (fraction >= 4) {
2746 order = slab_order(size, min_objects,
2747 slub_max_order, fraction, reserved);
2748 if (order <= slub_max_order)
2749 return order;
2750 fraction /= 2;
2751 }
2752 min_objects--;
2753 }
2754
2755
2756
2757
2758
2759 order = slab_order(size, 1, slub_max_order, 1, reserved);
2760 if (order <= slub_max_order)
2761 return order;
2762
2763
2764
2765
2766 order = slab_order(size, 1, MAX_ORDER, 1, reserved);
2767 if (order < MAX_ORDER)
2768 return order;
2769 return -ENOSYS;
2770}
2771
2772
2773
2774
2775static unsigned long calculate_alignment(unsigned long flags,
2776 unsigned long align, unsigned long size)
2777{
2778
2779
2780
2781
2782
2783
2784
2785 if (flags & SLAB_HWCACHE_ALIGN) {
2786 unsigned long ralign = cache_line_size();
2787 while (size <= ralign / 2)
2788 ralign /= 2;
2789 align = max(align, ralign);
2790 }
2791
2792 if (align < ARCH_SLAB_MINALIGN)
2793 align = ARCH_SLAB_MINALIGN;
2794
2795 return ALIGN(align, sizeof(void *));
2796}
2797
2798static void
2799init_kmem_cache_node(struct kmem_cache_node *n)
2800{
2801 n->nr_partial = 0;
2802 spin_lock_init(&n->list_lock);
2803 INIT_LIST_HEAD(&n->partial);
2804#ifdef CONFIG_SLUB_DEBUG
2805 atomic_long_set(&n->nr_slabs, 0);
2806 atomic_long_set(&n->total_objects, 0);
2807 INIT_LIST_HEAD(&n->full);
2808#endif
2809}
2810
2811static inline int alloc_kmem_cache_cpus(struct kmem_cache *s)
2812{
2813 BUILD_BUG_ON(PERCPU_DYNAMIC_EARLY_SIZE <
2814 SLUB_PAGE_SHIFT * sizeof(struct kmem_cache_cpu));
2815
2816
2817
2818
2819
2820 s->cpu_slab = __alloc_percpu(sizeof(struct kmem_cache_cpu),
2821 2 * sizeof(void *));
2822
2823 if (!s->cpu_slab)
2824 return 0;
2825
2826 init_kmem_cache_cpus(s);
2827
2828 return 1;
2829}
2830
2831static struct kmem_cache *kmem_cache_node;
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842static void early_kmem_cache_node_alloc(int node)
2843{
2844 struct page *page;
2845 struct kmem_cache_node *n;
2846
2847 BUG_ON(kmem_cache_node->size < sizeof(struct kmem_cache_node));
2848
2849 page = new_slab(kmem_cache_node, GFP_NOWAIT, node);
2850
2851 BUG_ON(!page);
2852 if (page_to_nid(page) != node) {
2853 printk(KERN_ERR "SLUB: Unable to allocate memory from "
2854 "node %d\n", node);
2855 printk(KERN_ERR "SLUB: Allocating a useless per node structure "
2856 "in order to be able to continue\n");
2857 }
2858
2859 n = page->freelist;
2860 BUG_ON(!n);
2861 page->freelist = get_freepointer(kmem_cache_node, n);
2862 page->inuse = 1;
2863 page->frozen = 0;
2864 kmem_cache_node->node[node] = n;
2865#ifdef CONFIG_SLUB_DEBUG
2866 init_object(kmem_cache_node, n, SLUB_RED_ACTIVE);
2867 init_tracking(kmem_cache_node, n);
2868#endif
2869 init_kmem_cache_node(n);
2870 inc_slabs_node(kmem_cache_node, node, page->objects);
2871
2872 add_partial(n, page, DEACTIVATE_TO_HEAD);
2873}
2874
2875static void free_kmem_cache_nodes(struct kmem_cache *s)
2876{
2877 int node;
2878
2879 for_each_node_state(node, N_NORMAL_MEMORY) {
2880 struct kmem_cache_node *n = s->node[node];
2881
2882 if (n)
2883 kmem_cache_free(kmem_cache_node, n);
2884
2885 s->node[node] = NULL;
2886 }
2887}
2888
2889static int init_kmem_cache_nodes(struct kmem_cache *s)
2890{
2891 int node;
2892
2893 for_each_node_state(node, N_NORMAL_MEMORY) {
2894 struct kmem_cache_node *n;
2895
2896 if (slab_state == DOWN) {
2897 early_kmem_cache_node_alloc(node);
2898 continue;
2899 }
2900 n = kmem_cache_alloc_node(kmem_cache_node,
2901 GFP_KERNEL, node);
2902
2903 if (!n) {
2904 free_kmem_cache_nodes(s);
2905 return 0;
2906 }
2907
2908 s->node[node] = n;
2909 init_kmem_cache_node(n);
2910 }
2911 return 1;
2912}
2913
2914static void set_min_partial(struct kmem_cache *s, unsigned long min)
2915{
2916 if (min < MIN_PARTIAL)
2917 min = MIN_PARTIAL;
2918 else if (min > MAX_PARTIAL)
2919 min = MAX_PARTIAL;
2920 s->min_partial = min;
2921}
2922
2923
2924
2925
2926
2927static int calculate_sizes(struct kmem_cache *s, int forced_order)
2928{
2929 unsigned long flags = s->flags;
2930 unsigned long size = s->object_size;
2931 unsigned long align = s->align;
2932 int order;
2933
2934
2935
2936
2937
2938
2939 size = ALIGN(size, sizeof(void *));
2940
2941#ifdef CONFIG_SLUB_DEBUG
2942
2943
2944
2945
2946
2947 if ((flags & SLAB_POISON) && !(flags & SLAB_DESTROY_BY_RCU) &&
2948 !s->ctor)
2949 s->flags |= __OBJECT_POISON;
2950 else
2951 s->flags &= ~__OBJECT_POISON;
2952
2953
2954
2955
2956
2957
2958
2959 if ((flags & SLAB_RED_ZONE) && size == s->object_size)
2960 size += sizeof(void *);
2961#endif
2962
2963
2964
2965
2966
2967 s->inuse = size;
2968
2969 if (((flags & (SLAB_DESTROY_BY_RCU | SLAB_POISON)) ||
2970 s->ctor)) {
2971
2972
2973
2974
2975
2976
2977
2978
2979 s->offset = size;
2980 size += sizeof(void *);
2981 }
2982
2983#ifdef CONFIG_SLUB_DEBUG
2984 if (flags & SLAB_STORE_USER)
2985
2986
2987
2988
2989 size += 2 * sizeof(struct track);
2990
2991 if (flags & SLAB_RED_ZONE)
2992
2993
2994
2995
2996
2997
2998
2999 size += sizeof(void *);
3000#endif
3001
3002
3003
3004
3005
3006
3007 align = calculate_alignment(flags, align, s->object_size);
3008 s->align = align;
3009
3010
3011
3012
3013
3014
3015 size = ALIGN(size, align);
3016 s->size = size;
3017 if (forced_order >= 0)
3018 order = forced_order;
3019 else
3020 order = calculate_order(size, s->reserved);
3021
3022 if (order < 0)
3023 return 0;
3024
3025 s->allocflags = 0;
3026 if (order)
3027 s->allocflags |= __GFP_COMP;
3028
3029 if (s->flags & SLAB_CACHE_DMA)
3030 s->allocflags |= SLUB_DMA;
3031
3032 if (s->flags & SLAB_RECLAIM_ACCOUNT)
3033 s->allocflags |= __GFP_RECLAIMABLE;
3034
3035
3036
3037
3038 s->oo = oo_make(order, size, s->reserved);
3039 s->min = oo_make(get_order(size), size, s->reserved);
3040 if (oo_objects(s->oo) > oo_objects(s->max))
3041 s->max = s->oo;
3042
3043 return !!oo_objects(s->oo);
3044
3045}
3046
3047static int kmem_cache_open(struct kmem_cache *s, unsigned long flags)
3048{
3049 s->flags = kmem_cache_flags(s->size, flags, s->name, s->ctor);
3050 s->reserved = 0;
3051
3052 if (need_reserve_slab_rcu && (s->flags & SLAB_DESTROY_BY_RCU))
3053 s->reserved = sizeof(struct rcu_head);
3054
3055 if (!calculate_sizes(s, -1))
3056 goto error;
3057 if (disable_higher_order_debug) {
3058
3059
3060
3061
3062 if (get_order(s->size) > get_order(s->object_size)) {
3063 s->flags &= ~DEBUG_METADATA_FLAGS;
3064 s->offset = 0;
3065 if (!calculate_sizes(s, -1))
3066 goto error;
3067 }
3068 }
3069
3070#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
3071 defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
3072 if (system_has_cmpxchg_double() && (s->flags & SLAB_DEBUG_FLAGS) == 0)
3073
3074 s->flags |= __CMPXCHG_DOUBLE;
3075#endif
3076
3077
3078
3079
3080
3081 set_min_partial(s, ilog2(s->size) / 2);
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100 if (kmem_cache_debug(s))
3101 s->cpu_partial = 0;
3102 else if (s->size >= PAGE_SIZE)
3103 s->cpu_partial = 2;
3104 else if (s->size >= 1024)
3105 s->cpu_partial = 6;
3106 else if (s->size >= 256)
3107 s->cpu_partial = 13;
3108 else
3109 s->cpu_partial = 30;
3110
3111#ifdef CONFIG_NUMA
3112 s->remote_node_defrag_ratio = 1000;
3113#endif
3114 if (!init_kmem_cache_nodes(s))
3115 goto error;
3116
3117 if (alloc_kmem_cache_cpus(s))
3118 return 0;
3119
3120 free_kmem_cache_nodes(s);
3121error:
3122 if (flags & SLAB_PANIC)
3123 panic("Cannot create slab %s size=%lu realsize=%u "
3124 "order=%u offset=%u flags=%lx\n",
3125 s->name, (unsigned long)s->size, s->size, oo_order(s->oo),
3126 s->offset, flags);
3127 return -EINVAL;
3128}
3129
3130
3131
3132
3133unsigned int kmem_cache_size(struct kmem_cache *s)
3134{
3135 return s->object_size;
3136}
3137EXPORT_SYMBOL(kmem_cache_size);
3138
3139static void list_slab_objects(struct kmem_cache *s, struct page *page,
3140 const char *text)
3141{
3142#ifdef CONFIG_SLUB_DEBUG
3143 void *addr = page_address(page);
3144 void *p;
3145 unsigned long *map = kzalloc(BITS_TO_LONGS(page->objects) *
3146 sizeof(long), GFP_ATOMIC);
3147 if (!map)
3148 return;
3149 slab_err(s, page, text, s->name);
3150 slab_lock(page);
3151
3152 get_map(s, page, map);
3153 for_each_object(p, s, addr, page->objects) {
3154
3155 if (!test_bit(slab_index(p, s, addr), map)) {
3156 printk(KERN_ERR "INFO: Object 0x%p @offset=%tu\n",
3157 p, p - addr);
3158 print_tracking(s, p);
3159 }
3160 }
3161 slab_unlock(page);
3162 kfree(map);
3163#endif
3164}
3165
3166
3167
3168
3169
3170
3171static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
3172{
3173 struct page *page, *h;
3174
3175 list_for_each_entry_safe(page, h, &n->partial, lru) {
3176 if (!page->inuse) {
3177 remove_partial(n, page);
3178 discard_slab(s, page);
3179 } else {
3180 list_slab_objects(s, page,
3181 "Objects remaining in %s on kmem_cache_close()");
3182 }
3183 }
3184}
3185
3186
3187
3188
3189static inline int kmem_cache_close(struct kmem_cache *s)
3190{
3191 int node;
3192
3193 flush_all(s);
3194
3195 for_each_node_state(node, N_NORMAL_MEMORY) {
3196 struct kmem_cache_node *n = get_node(s, node);
3197
3198 free_partial(s, n);
3199 if (n->nr_partial || slabs_node(s, node))
3200 return 1;
3201 }
3202 free_percpu(s->cpu_slab);
3203 free_kmem_cache_nodes(s);
3204 return 0;
3205}
3206
3207int __kmem_cache_shutdown(struct kmem_cache *s)
3208{
3209 int rc = kmem_cache_close(s);
3210
3211 if (!rc)
3212 sysfs_slab_remove(s);
3213
3214 return rc;
3215}
3216
3217
3218
3219
3220
3221struct kmem_cache *kmalloc_caches[SLUB_PAGE_SHIFT];
3222EXPORT_SYMBOL(kmalloc_caches);
3223
3224#ifdef CONFIG_ZONE_DMA
3225static struct kmem_cache *kmalloc_dma_caches[SLUB_PAGE_SHIFT];
3226#endif
3227
3228static int __init setup_slub_min_order(char *str)
3229{
3230 get_option(&str, &slub_min_order);
3231
3232 return 1;
3233}
3234
3235__setup("slub_min_order=", setup_slub_min_order);
3236
3237static int __init setup_slub_max_order(char *str)
3238{
3239 get_option(&str, &slub_max_order);
3240 slub_max_order = min(slub_max_order, MAX_ORDER - 1);
3241
3242 return 1;
3243}
3244
3245__setup("slub_max_order=", setup_slub_max_order);
3246
3247static int __init setup_slub_min_objects(char *str)
3248{
3249 get_option(&str, &slub_min_objects);
3250
3251 return 1;
3252}
3253
3254__setup("slub_min_objects=", setup_slub_min_objects);
3255
3256static int __init setup_slub_nomerge(char *str)
3257{
3258 slub_nomerge = 1;
3259 return 1;
3260}
3261
3262__setup("slub_nomerge", setup_slub_nomerge);
3263
3264static struct kmem_cache *__init create_kmalloc_cache(const char *name,
3265 int size, unsigned int flags)
3266{
3267 struct kmem_cache *s;
3268
3269 s = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT);
3270
3271 s->name = name;
3272 s->size = s->object_size = size;
3273 s->align = ARCH_KMALLOC_MINALIGN;
3274
3275
3276
3277
3278
3279 if (kmem_cache_open(s, flags))
3280 goto panic;
3281
3282 list_add(&s->list, &slab_caches);
3283 return s;
3284
3285panic:
3286 panic("Creation of kmalloc slab %s size=%d failed.\n", name, size);
3287 return NULL;
3288}
3289
3290
3291
3292
3293
3294
3295
3296static s8 size_index[24] = {
3297 3,
3298 4,
3299 5,
3300 5,
3301 6,
3302 6,
3303 6,
3304 6,
3305 1,
3306 1,
3307 1,
3308 1,
3309 7,
3310 7,
3311 7,
3312 7,
3313 2,
3314 2,
3315 2,
3316 2,
3317 2,
3318 2,
3319 2,
3320 2
3321};
3322
3323static inline int size_index_elem(size_t bytes)
3324{
3325 return (bytes - 1) / 8;
3326}
3327
3328static struct kmem_cache *get_slab(size_t size, gfp_t flags)
3329{
3330 int index;
3331
3332 if (size <= 192) {
3333 if (!size)
3334 return ZERO_SIZE_PTR;
3335
3336 index = size_index[size_index_elem(size)];
3337 } else
3338 index = fls(size - 1);
3339
3340#ifdef CONFIG_ZONE_DMA
3341 if (unlikely((flags & SLUB_DMA)))
3342 return kmalloc_dma_caches[index];
3343
3344#endif
3345 return kmalloc_caches[index];
3346}
3347
3348void *__kmalloc(size_t size, gfp_t flags)
3349{
3350 struct kmem_cache *s;
3351 void *ret;
3352
3353 if (unlikely(size > SLUB_MAX_SIZE))
3354 return kmalloc_large(size, flags);
3355
3356 s = get_slab(size, flags);
3357
3358 if (unlikely(ZERO_OR_NULL_PTR(s)))
3359 return s;
3360
3361 ret = slab_alloc(s, flags, _RET_IP_);
3362
3363 trace_kmalloc(_RET_IP_, ret, size, s->size, flags);
3364
3365 return ret;
3366}
3367EXPORT_SYMBOL(__kmalloc);
3368
3369#ifdef CONFIG_NUMA
3370static void *kmalloc_large_node(size_t size, gfp_t flags, int node)
3371{
3372 struct page *page;
3373 void *ptr = NULL;
3374
3375 flags |= __GFP_COMP | __GFP_NOTRACK;
3376 page = alloc_pages_node(node, flags, get_order(size));
3377 if (page)
3378 ptr = page_address(page);
3379
3380 kmemleak_alloc(ptr, size, 1, flags);
3381 return ptr;
3382}
3383
3384void *__kmalloc_node(size_t size, gfp_t flags, int node)
3385{
3386 struct kmem_cache *s;
3387 void *ret;
3388
3389 if (unlikely(size > SLUB_MAX_SIZE)) {
3390 ret = kmalloc_large_node(size, flags, node);
3391
3392 trace_kmalloc_node(_RET_IP_, ret,
3393 size, PAGE_SIZE << get_order(size),
3394 flags, node);
3395
3396 return ret;
3397 }
3398
3399 s = get_slab(size, flags);
3400
3401 if (unlikely(ZERO_OR_NULL_PTR(s)))
3402 return s;
3403
3404 ret = slab_alloc_node(s, flags, node, _RET_IP_);
3405
3406 trace_kmalloc_node(_RET_IP_, ret, size, s->size, flags, node);
3407
3408 return ret;
3409}
3410EXPORT_SYMBOL(__kmalloc_node);
3411#endif
3412
3413size_t ksize(const void *object)
3414{
3415 struct page *page;
3416
3417 if (unlikely(object == ZERO_SIZE_PTR))
3418 return 0;
3419
3420 page = virt_to_head_page(object);
3421
3422 if (unlikely(!PageSlab(page))) {
3423 WARN_ON(!PageCompound(page));
3424 return PAGE_SIZE << compound_order(page);
3425 }
3426
3427 return slab_ksize(page->slab);
3428}
3429EXPORT_SYMBOL(ksize);
3430
3431#ifdef CONFIG_SLUB_DEBUG
3432bool verify_mem_not_deleted(const void *x)
3433{
3434 struct page *page;
3435 void *object = (void *)x;
3436 unsigned long flags;
3437 bool rv;
3438
3439 if (unlikely(ZERO_OR_NULL_PTR(x)))
3440 return false;
3441
3442 local_irq_save(flags);
3443
3444 page = virt_to_head_page(x);
3445 if (unlikely(!PageSlab(page))) {
3446
3447 rv = true;
3448 goto out_unlock;
3449 }
3450
3451 slab_lock(page);
3452 if (on_freelist(page->slab, page, object)) {
3453 object_err(page->slab, page, object, "Object is on free-list");
3454 rv = false;
3455 } else {
3456 rv = true;
3457 }
3458 slab_unlock(page);
3459
3460out_unlock:
3461 local_irq_restore(flags);
3462 return rv;
3463}
3464EXPORT_SYMBOL(verify_mem_not_deleted);
3465#endif
3466
3467void kfree(const void *x)
3468{
3469 struct page *page;
3470 void *object = (void *)x;
3471
3472 trace_kfree(_RET_IP_, x);
3473
3474 if (unlikely(ZERO_OR_NULL_PTR(x)))
3475 return;
3476
3477 page = virt_to_head_page(x);
3478 if (unlikely(!PageSlab(page))) {
3479 BUG_ON(!PageCompound(page));
3480 kmemleak_free(x);
3481 __free_pages(page, compound_order(page));
3482 return;
3483 }
3484 slab_free(page->slab, page, object, _RET_IP_);
3485}
3486EXPORT_SYMBOL(kfree);
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498int kmem_cache_shrink(struct kmem_cache *s)
3499{
3500 int node;
3501 int i;
3502 struct kmem_cache_node *n;
3503 struct page *page;
3504 struct page *t;
3505 int objects = oo_objects(s->max);
3506 struct list_head *slabs_by_inuse =
3507 kmalloc(sizeof(struct list_head) * objects, GFP_KERNEL);
3508 unsigned long flags;
3509
3510 if (!slabs_by_inuse)
3511 return -ENOMEM;
3512
3513 flush_all(s);
3514 for_each_node_state(node, N_NORMAL_MEMORY) {
3515 n = get_node(s, node);
3516
3517 if (!n->nr_partial)
3518 continue;
3519
3520 for (i = 0; i < objects; i++)
3521 INIT_LIST_HEAD(slabs_by_inuse + i);
3522
3523 spin_lock_irqsave(&n->list_lock, flags);
3524
3525
3526
3527
3528
3529
3530
3531 list_for_each_entry_safe(page, t, &n->partial, lru) {
3532 list_move(&page->lru, slabs_by_inuse + page->inuse);
3533 if (!page->inuse)
3534 n->nr_partial--;
3535 }
3536
3537
3538
3539
3540
3541 for (i = objects - 1; i > 0; i--)
3542 list_splice(slabs_by_inuse + i, n->partial.prev);
3543
3544 spin_unlock_irqrestore(&n->list_lock, flags);
3545
3546
3547 list_for_each_entry_safe(page, t, slabs_by_inuse, lru)
3548 discard_slab(s, page);
3549 }
3550
3551 kfree(slabs_by_inuse);
3552 return 0;
3553}
3554EXPORT_SYMBOL(kmem_cache_shrink);
3555
3556#if defined(CONFIG_MEMORY_HOTPLUG)
3557static int slab_mem_going_offline_callback(void *arg)
3558{
3559 struct kmem_cache *s;
3560
3561 mutex_lock(&slab_mutex);
3562 list_for_each_entry(s, &slab_caches, list)
3563 kmem_cache_shrink(s);
3564 mutex_unlock(&slab_mutex);
3565
3566 return 0;
3567}
3568
3569static void slab_mem_offline_callback(void *arg)
3570{
3571 struct kmem_cache_node *n;
3572 struct kmem_cache *s;
3573 struct memory_notify *marg = arg;
3574 int offline_node;
3575
3576 offline_node = marg->status_change_nid;
3577
3578
3579
3580
3581
3582 if (offline_node < 0)
3583 return;
3584
3585 mutex_lock(&slab_mutex);
3586 list_for_each_entry(s, &slab_caches, list) {
3587 n = get_node(s, offline_node);
3588 if (n) {
3589
3590
3591
3592
3593
3594
3595 BUG_ON(slabs_node(s, offline_node));
3596
3597 s->node[offline_node] = NULL;
3598 kmem_cache_free(kmem_cache_node, n);
3599 }
3600 }
3601 mutex_unlock(&slab_mutex);
3602}
3603
3604static int slab_mem_going_online_callback(void *arg)
3605{
3606 struct kmem_cache_node *n;
3607 struct kmem_cache *s;
3608 struct memory_notify *marg = arg;
3609 int nid = marg->status_change_nid;
3610 int ret = 0;
3611
3612
3613
3614
3615
3616 if (nid < 0)
3617 return 0;
3618
3619
3620
3621
3622
3623
3624 mutex_lock(&slab_mutex);
3625 list_for_each_entry(s, &slab_caches, list) {
3626
3627
3628
3629
3630
3631 n = kmem_cache_alloc(kmem_cache_node, GFP_KERNEL);
3632 if (!n) {
3633 ret = -ENOMEM;
3634 goto out;
3635 }
3636 init_kmem_cache_node(n);
3637 s->node[nid] = n;
3638 }
3639out:
3640 mutex_unlock(&slab_mutex);
3641 return ret;
3642}
3643
3644static int slab_memory_callback(struct notifier_block *self,
3645 unsigned long action, void *arg)
3646{
3647 int ret = 0;
3648
3649 switch (action) {
3650 case MEM_GOING_ONLINE:
3651 ret = slab_mem_going_online_callback(arg);
3652 break;
3653 case MEM_GOING_OFFLINE:
3654 ret = slab_mem_going_offline_callback(arg);
3655 break;
3656 case MEM_OFFLINE:
3657 case MEM_CANCEL_ONLINE:
3658 slab_mem_offline_callback(arg);
3659 break;
3660 case MEM_ONLINE:
3661 case MEM_CANCEL_OFFLINE:
3662 break;
3663 }
3664 if (ret)
3665 ret = notifier_from_errno(ret);
3666 else
3667 ret = NOTIFY_OK;
3668 return ret;
3669}
3670
3671#endif
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682static void __init kmem_cache_bootstrap_fixup(struct kmem_cache *s)
3683{
3684 int node;
3685
3686 list_add(&s->list, &slab_caches);
3687 s->refcount = -1;
3688
3689 for_each_node_state(node, N_NORMAL_MEMORY) {
3690 struct kmem_cache_node *n = get_node(s, node);
3691 struct page *p;
3692
3693 if (n) {
3694 list_for_each_entry(p, &n->partial, lru)
3695 p->slab = s;
3696
3697#ifdef CONFIG_SLUB_DEBUG
3698 list_for_each_entry(p, &n->full, lru)
3699 p->slab = s;
3700#endif
3701 }
3702 }
3703}
3704
3705void __init kmem_cache_init(void)
3706{
3707 int i;
3708 int caches = 0;
3709 struct kmem_cache *temp_kmem_cache;
3710 int order;
3711 struct kmem_cache *temp_kmem_cache_node;
3712 unsigned long kmalloc_size;
3713
3714 if (debug_guardpage_minorder())
3715 slub_max_order = 0;
3716
3717 kmem_size = offsetof(struct kmem_cache, node) +
3718 nr_node_ids * sizeof(struct kmem_cache_node *);
3719
3720
3721 kmalloc_size = ALIGN(kmem_size, cache_line_size());
3722 order = get_order(2 * kmalloc_size);
3723 kmem_cache = (void *)__get_free_pages(GFP_NOWAIT | __GFP_ZERO, order);
3724
3725
3726
3727
3728
3729
3730 kmem_cache_node = (void *)kmem_cache + kmalloc_size;
3731
3732 kmem_cache_node->name = "kmem_cache_node";
3733 kmem_cache_node->size = kmem_cache_node->object_size =
3734 sizeof(struct kmem_cache_node);
3735 kmem_cache_open(kmem_cache_node, SLAB_HWCACHE_ALIGN | SLAB_PANIC);
3736
3737 hotplug_memory_notifier(slab_memory_callback, SLAB_CALLBACK_PRI);
3738
3739
3740 slab_state = PARTIAL;
3741
3742 temp_kmem_cache = kmem_cache;
3743 kmem_cache->name = "kmem_cache";
3744 kmem_cache->size = kmem_cache->object_size = kmem_size;
3745 kmem_cache_open(kmem_cache, SLAB_HWCACHE_ALIGN | SLAB_PANIC);
3746
3747 kmem_cache = kmem_cache_alloc(kmem_cache, GFP_NOWAIT);
3748 memcpy(kmem_cache, temp_kmem_cache, kmem_size);
3749
3750
3751
3752
3753
3754
3755 temp_kmem_cache_node = kmem_cache_node;
3756
3757 kmem_cache_node = kmem_cache_alloc(kmem_cache, GFP_NOWAIT);
3758 memcpy(kmem_cache_node, temp_kmem_cache_node, kmem_size);
3759
3760 kmem_cache_bootstrap_fixup(kmem_cache_node);
3761
3762 caches++;
3763 kmem_cache_bootstrap_fixup(kmem_cache);
3764 caches++;
3765
3766 free_pages((unsigned long)temp_kmem_cache, order);
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781 BUILD_BUG_ON(KMALLOC_MIN_SIZE > 256 ||
3782 (KMALLOC_MIN_SIZE & (KMALLOC_MIN_SIZE - 1)));
3783
3784 for (i = 8; i < KMALLOC_MIN_SIZE; i += 8) {
3785 int elem = size_index_elem(i);
3786 if (elem >= ARRAY_SIZE(size_index))
3787 break;
3788 size_index[elem] = KMALLOC_SHIFT_LOW;
3789 }
3790
3791 if (KMALLOC_MIN_SIZE == 64) {
3792
3793
3794
3795
3796 for (i = 64 + 8; i <= 96; i += 8)
3797 size_index[size_index_elem(i)] = 7;
3798 } else if (KMALLOC_MIN_SIZE == 128) {
3799
3800
3801
3802
3803
3804 for (i = 128 + 8; i <= 192; i += 8)
3805 size_index[size_index_elem(i)] = 8;
3806 }
3807
3808
3809 if (KMALLOC_MIN_SIZE <= 32) {
3810 kmalloc_caches[1] = create_kmalloc_cache("kmalloc-96", 96, 0);
3811 caches++;
3812 }
3813
3814 if (KMALLOC_MIN_SIZE <= 64) {
3815 kmalloc_caches[2] = create_kmalloc_cache("kmalloc-192", 192, 0);
3816 caches++;
3817 }
3818
3819 for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) {
3820 kmalloc_caches[i] = create_kmalloc_cache("kmalloc", 1 << i, 0);
3821 caches++;
3822 }
3823
3824 slab_state = UP;
3825
3826
3827 if (KMALLOC_MIN_SIZE <= 32) {
3828 kmalloc_caches[1]->name = kstrdup(kmalloc_caches[1]->name, GFP_NOWAIT);
3829 BUG_ON(!kmalloc_caches[1]->name);
3830 }
3831
3832 if (KMALLOC_MIN_SIZE <= 64) {
3833 kmalloc_caches[2]->name = kstrdup(kmalloc_caches[2]->name, GFP_NOWAIT);
3834 BUG_ON(!kmalloc_caches[2]->name);
3835 }
3836
3837 for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) {
3838 char *s = kasprintf(GFP_NOWAIT, "kmalloc-%d", 1 << i);
3839
3840 BUG_ON(!s);
3841 kmalloc_caches[i]->name = s;
3842 }
3843
3844#ifdef CONFIG_SMP
3845 register_cpu_notifier(&slab_notifier);
3846#endif
3847
3848#ifdef CONFIG_ZONE_DMA
3849 for (i = 0; i < SLUB_PAGE_SHIFT; i++) {
3850 struct kmem_cache *s = kmalloc_caches[i];
3851
3852 if (s && s->size) {
3853 char *name = kasprintf(GFP_NOWAIT,
3854 "dma-kmalloc-%d", s->object_size);
3855
3856 BUG_ON(!name);
3857 kmalloc_dma_caches[i] = create_kmalloc_cache(name,
3858 s->object_size, SLAB_CACHE_DMA);
3859 }
3860 }
3861#endif
3862 printk(KERN_INFO
3863 "SLUB: Genslabs=%d, HWalign=%d, Order=%d-%d, MinObjects=%d,"
3864 " CPUs=%d, Nodes=%d\n",
3865 caches, cache_line_size(),
3866 slub_min_order, slub_max_order, slub_min_objects,
3867 nr_cpu_ids, nr_node_ids);
3868}
3869
3870void __init kmem_cache_init_late(void)
3871{
3872}
3873
3874
3875
3876
3877static int slab_unmergeable(struct kmem_cache *s)
3878{
3879 if (slub_nomerge || (s->flags & SLUB_NEVER_MERGE))
3880 return 1;
3881
3882 if (s->ctor)
3883 return 1;
3884
3885
3886
3887
3888 if (s->refcount < 0)
3889 return 1;
3890
3891 return 0;
3892}
3893
3894static struct kmem_cache *find_mergeable(size_t size,
3895 size_t align, unsigned long flags, const char *name,
3896 void (*ctor)(void *))
3897{
3898 struct kmem_cache *s;
3899
3900 if (slub_nomerge || (flags & SLUB_NEVER_MERGE))
3901 return NULL;
3902
3903 if (ctor)
3904 return NULL;
3905
3906 size = ALIGN(size, sizeof(void *));
3907 align = calculate_alignment(flags, align, size);
3908 size = ALIGN(size, align);
3909 flags = kmem_cache_flags(size, flags, name, NULL);
3910
3911 list_for_each_entry(s, &slab_caches, list) {
3912 if (slab_unmergeable(s))
3913 continue;
3914
3915 if (size > s->size)
3916 continue;
3917
3918 if ((flags & SLUB_MERGE_SAME) != (s->flags & SLUB_MERGE_SAME))
3919 continue;
3920
3921
3922
3923
3924 if ((s->size & ~(align - 1)) != s->size)
3925 continue;
3926
3927 if (s->size - size >= sizeof(void *))
3928 continue;
3929
3930 return s;
3931 }
3932 return NULL;
3933}
3934
3935struct kmem_cache *__kmem_cache_alias(const char *name, size_t size,
3936 size_t align, unsigned long flags, void (*ctor)(void *))
3937{
3938 struct kmem_cache *s;
3939
3940 s = find_mergeable(size, align, flags, name, ctor);
3941 if (s) {
3942 s->refcount++;
3943
3944
3945
3946
3947 s->object_size = max(s->object_size, (int)size);
3948 s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *)));
3949
3950 if (sysfs_slab_alias(s, name)) {
3951 s->refcount--;
3952 s = NULL;
3953 }
3954 }
3955
3956 return s;
3957}
3958
3959int __kmem_cache_create(struct kmem_cache *s, unsigned long flags)
3960{
3961 int err;
3962
3963 err = kmem_cache_open(s, flags);
3964 if (err)
3965 return err;
3966
3967 mutex_unlock(&slab_mutex);
3968 err = sysfs_slab_add(s);
3969 mutex_lock(&slab_mutex);
3970
3971 if (err)
3972 kmem_cache_close(s);
3973
3974 return err;
3975}
3976
3977#ifdef CONFIG_SMP
3978
3979
3980
3981
3982static int __cpuinit slab_cpuup_callback(struct notifier_block *nfb,
3983 unsigned long action, void *hcpu)
3984{
3985 long cpu = (long)hcpu;
3986 struct kmem_cache *s;
3987 unsigned long flags;
3988
3989 switch (action) {
3990 case CPU_UP_CANCELED:
3991 case CPU_UP_CANCELED_FROZEN:
3992 case CPU_DEAD:
3993 case CPU_DEAD_FROZEN:
3994 mutex_lock(&slab_mutex);
3995 list_for_each_entry(s, &slab_caches, list) {
3996 local_irq_save(flags);
3997 __flush_cpu_slab(s, cpu);
3998 local_irq_restore(flags);
3999 }
4000 mutex_unlock(&slab_mutex);
4001 break;
4002 default:
4003 break;
4004 }
4005 return NOTIFY_OK;
4006}
4007
4008static struct notifier_block __cpuinitdata slab_notifier = {
4009 .notifier_call = slab_cpuup_callback
4010};
4011
4012#endif
4013
4014void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
4015{
4016 struct kmem_cache *s;
4017 void *ret;
4018
4019 if (unlikely(size > SLUB_MAX_SIZE))
4020 return kmalloc_large(size, gfpflags);
4021
4022 s = get_slab(size, gfpflags);
4023
4024 if (unlikely(ZERO_OR_NULL_PTR(s)))
4025 return s;
4026
4027 ret = slab_alloc(s, gfpflags, caller);
4028
4029
4030 trace_kmalloc(caller, ret, size, s->size, gfpflags);
4031
4032 return ret;
4033}
4034
4035#ifdef CONFIG_NUMA
4036void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
4037 int node, unsigned long caller)
4038{
4039 struct kmem_cache *s;
4040 void *ret;
4041
4042 if (unlikely(size > SLUB_MAX_SIZE)) {
4043 ret = kmalloc_large_node(size, gfpflags, node);
4044
4045 trace_kmalloc_node(caller, ret,
4046 size, PAGE_SIZE << get_order(size),
4047 gfpflags, node);
4048
4049 return ret;
4050 }
4051
4052 s = get_slab(size, gfpflags);
4053
4054 if (unlikely(ZERO_OR_NULL_PTR(s)))
4055 return s;
4056
4057 ret = slab_alloc_node(s, gfpflags, node, caller);
4058
4059
4060 trace_kmalloc_node(caller, ret, size, s->size, gfpflags, node);
4061
4062 return ret;
4063}
4064#endif
4065
4066#ifdef CONFIG_SYSFS
4067static int count_inuse(struct page *page)
4068{
4069 return page->inuse;
4070}
4071
4072static int count_total(struct page *page)
4073{
4074 return page->objects;
4075}
4076#endif
4077
4078#ifdef CONFIG_SLUB_DEBUG
4079static int validate_slab(struct kmem_cache *s, struct page *page,
4080 unsigned long *map)
4081{
4082 void *p;
4083 void *addr = page_address(page);
4084
4085 if (!check_slab(s, page) ||
4086 !on_freelist(s, page, NULL))
4087 return 0;
4088
4089
4090 bitmap_zero(map, page->objects);
4091
4092 get_map(s, page, map);
4093 for_each_object(p, s, addr, page->objects) {
4094 if (test_bit(slab_index(p, s, addr), map))
4095 if (!check_object(s, page, p, SLUB_RED_INACTIVE))
4096 return 0;
4097 }
4098
4099 for_each_object(p, s, addr, page->objects)
4100 if (!test_bit(slab_index(p, s, addr), map))
4101 if (!check_object(s, page, p, SLUB_RED_ACTIVE))
4102 return 0;
4103 return 1;
4104}
4105
4106static void validate_slab_slab(struct kmem_cache *s, struct page *page,
4107 unsigned long *map)
4108{
4109 slab_lock(page);
4110 validate_slab(s, page, map);
4111 slab_unlock(page);
4112}
4113
4114static int validate_slab_node(struct kmem_cache *s,
4115 struct kmem_cache_node *n, unsigned long *map)
4116{
4117 unsigned long count = 0;
4118 struct page *page;
4119 unsigned long flags;
4120
4121 spin_lock_irqsave(&n->list_lock, flags);
4122
4123 list_for_each_entry(page, &n->partial, lru) {
4124 validate_slab_slab(s, page, map);
4125 count++;
4126 }
4127 if (count != n->nr_partial)
4128 printk(KERN_ERR "SLUB %s: %ld partial slabs counted but "
4129 "counter=%ld\n", s->name, count, n->nr_partial);
4130
4131 if (!(s->flags & SLAB_STORE_USER))
4132 goto out;
4133
4134 list_for_each_entry(page, &n->full, lru) {
4135 validate_slab_slab(s, page, map);
4136 count++;
4137 }
4138 if (count != atomic_long_read(&n->nr_slabs))
4139 printk(KERN_ERR "SLUB: %s %ld slabs counted but "
4140 "counter=%ld\n", s->name, count,
4141 atomic_long_read(&n->nr_slabs));
4142
4143out:
4144 spin_unlock_irqrestore(&n->list_lock, flags);
4145 return count;
4146}
4147
4148static long validate_slab_cache(struct kmem_cache *s)
4149{
4150 int node;
4151 unsigned long count = 0;
4152 unsigned long *map = kmalloc(BITS_TO_LONGS(oo_objects(s->max)) *
4153 sizeof(unsigned long), GFP_KERNEL);
4154
4155 if (!map)
4156 return -ENOMEM;
4157
4158 flush_all(s);
4159 for_each_node_state(node, N_NORMAL_MEMORY) {
4160 struct kmem_cache_node *n = get_node(s, node);
4161
4162 count += validate_slab_node(s, n, map);
4163 }
4164 kfree(map);
4165 return count;
4166}
4167
4168
4169
4170
4171
4172struct location {
4173 unsigned long count;
4174 unsigned long addr;
4175 long long sum_time;
4176 long min_time;
4177 long max_time;
4178 long min_pid;
4179 long max_pid;
4180 DECLARE_BITMAP(cpus, NR_CPUS);
4181 nodemask_t nodes;
4182};
4183
4184struct loc_track {
4185 unsigned long max;
4186 unsigned long count;
4187 struct location *loc;
4188};
4189
4190static void free_loc_track(struct loc_track *t)
4191{
4192 if (t->max)
4193 free_pages((unsigned long)t->loc,
4194 get_order(sizeof(struct location) * t->max));
4195}
4196
4197static int alloc_loc_track(struct loc_track *t, unsigned long max, gfp_t flags)
4198{
4199 struct location *l;
4200 int order;
4201
4202 order = get_order(sizeof(struct location) * max);
4203
4204 l = (void *)__get_free_pages(flags, order);
4205 if (!l)
4206 return 0;
4207
4208 if (t->count) {
4209 memcpy(l, t->loc, sizeof(struct location) * t->count);
4210 free_loc_track(t);
4211 }
4212 t->max = max;
4213 t->loc = l;
4214 return 1;
4215}
4216
4217static int add_location(struct loc_track *t, struct kmem_cache *s,
4218 const struct track *track)
4219{
4220 long start, end, pos;
4221 struct location *l;
4222 unsigned long caddr;
4223 unsigned long age = jiffies - track->when;
4224
4225 start = -1;
4226 end = t->count;
4227
4228 for ( ; ; ) {
4229 pos = start + (end - start + 1) / 2;
4230
4231
4232
4233
4234
4235 if (pos == end)
4236 break;
4237
4238 caddr = t->loc[pos].addr;
4239 if (track->addr == caddr) {
4240
4241 l = &t->loc[pos];
4242 l->count++;
4243 if (track->when) {
4244 l->sum_time += age;
4245 if (age < l->min_time)
4246 l->min_time = age;
4247 if (age > l->max_time)
4248 l->max_time = age;
4249
4250 if (track->pid < l->min_pid)
4251 l->min_pid = track->pid;
4252 if (track->pid > l->max_pid)
4253 l->max_pid = track->pid;
4254
4255 cpumask_set_cpu(track->cpu,
4256 to_cpumask(l->cpus));
4257 }
4258 node_set(page_to_nid(virt_to_page(track)), l->nodes);
4259 return 1;
4260 }
4261
4262 if (track->addr < caddr)
4263 end = pos;
4264 else
4265 start = pos;
4266 }
4267
4268
4269
4270
4271 if (t->count >= t->max && !alloc_loc_track(t, 2 * t->max, GFP_ATOMIC))
4272 return 0;
4273
4274 l = t->loc + pos;
4275 if (pos < t->count)
4276 memmove(l + 1, l,
4277 (t->count - pos) * sizeof(struct location));
4278 t->count++;
4279 l->count = 1;
4280 l->addr = track->addr;
4281 l->sum_time = age;
4282 l->min_time = age;
4283 l->max_time = age;
4284 l->min_pid = track->pid;
4285 l->max_pid = track->pid;
4286 cpumask_clear(to_cpumask(l->cpus));
4287 cpumask_set_cpu(track->cpu, to_cpumask(l->cpus));
4288 nodes_clear(l->nodes);
4289 node_set(page_to_nid(virt_to_page(track)), l->nodes);
4290 return 1;
4291}
4292
4293static void process_slab(struct loc_track *t, struct kmem_cache *s,
4294 struct page *page, enum track_item alloc,
4295 unsigned long *map)
4296{
4297 void *addr = page_address(page);
4298 void *p;
4299
4300 bitmap_zero(map, page->objects);
4301 get_map(s, page, map);
4302
4303 for_each_object(p, s, addr, page->objects)
4304 if (!test_bit(slab_index(p, s, addr), map))
4305 add_location(t, s, get_track(s, p, alloc));
4306}
4307
4308static int list_locations(struct kmem_cache *s, char *buf,
4309 enum track_item alloc)
4310{
4311 int len = 0;
4312 unsigned long i;
4313 struct loc_track t = { 0, 0, NULL };
4314 int node;
4315 unsigned long *map = kmalloc(BITS_TO_LONGS(oo_objects(s->max)) *
4316 sizeof(unsigned long), GFP_KERNEL);
4317
4318 if (!map || !alloc_loc_track(&t, PAGE_SIZE / sizeof(struct location),
4319 GFP_TEMPORARY)) {
4320 kfree(map);
4321 return sprintf(buf, "Out of memory\n");
4322 }
4323
4324 flush_all(s);
4325
4326 for_each_node_state(node, N_NORMAL_MEMORY) {
4327 struct kmem_cache_node *n = get_node(s, node);
4328 unsigned long flags;
4329 struct page *page;
4330
4331 if (!atomic_long_read(&n->nr_slabs))
4332 continue;
4333
4334 spin_lock_irqsave(&n->list_lock, flags);
4335 list_for_each_entry(page, &n->partial, lru)
4336 process_slab(&t, s, page, alloc, map);
4337 list_for_each_entry(page, &n->full, lru)
4338 process_slab(&t, s, page, alloc, map);
4339 spin_unlock_irqrestore(&n->list_lock, flags);
4340 }
4341
4342 for (i = 0; i < t.count; i++) {
4343 struct location *l = &t.loc[i];
4344
4345 if (len > PAGE_SIZE - KSYM_SYMBOL_LEN - 100)
4346 break;
4347 len += sprintf(buf + len, "%7ld ", l->count);
4348
4349 if (l->addr)
4350 len += sprintf(buf + len, "%pS", (void *)l->addr);
4351 else
4352 len += sprintf(buf + len, "<not-available>");
4353
4354 if (l->sum_time != l->min_time) {
4355 len += sprintf(buf + len, " age=%ld/%ld/%ld",
4356 l->min_time,
4357 (long)div_u64(l->sum_time, l->count),
4358 l->max_time);
4359 } else
4360 len += sprintf(buf + len, " age=%ld",
4361 l->min_time);
4362
4363 if (l->min_pid != l->max_pid)
4364 len += sprintf(buf + len, " pid=%ld-%ld",
4365 l->min_pid, l->max_pid);
4366 else
4367 len += sprintf(buf + len, " pid=%ld",
4368 l->min_pid);
4369
4370 if (num_online_cpus() > 1 &&
4371 !cpumask_empty(to_cpumask(l->cpus)) &&
4372 len < PAGE_SIZE - 60) {
4373 len += sprintf(buf + len, " cpus=");
4374 len += cpulist_scnprintf(buf + len, PAGE_SIZE - len - 50,
4375 to_cpumask(l->cpus));
4376 }
4377
4378 if (nr_online_nodes > 1 && !nodes_empty(l->nodes) &&
4379 len < PAGE_SIZE - 60) {
4380 len += sprintf(buf + len, " nodes=");
4381 len += nodelist_scnprintf(buf + len, PAGE_SIZE - len - 50,
4382 l->nodes);
4383 }
4384
4385 len += sprintf(buf + len, "\n");
4386 }
4387
4388 free_loc_track(&t);
4389 kfree(map);
4390 if (!t.count)
4391 len += sprintf(buf, "No data\n");
4392 return len;
4393}
4394#endif
4395
4396#ifdef SLUB_RESILIENCY_TEST
4397static void resiliency_test(void)
4398{
4399 u8 *p;
4400
4401 BUILD_BUG_ON(KMALLOC_MIN_SIZE > 16 || SLUB_PAGE_SHIFT < 10);
4402
4403 printk(KERN_ERR "SLUB resiliency testing\n");
4404 printk(KERN_ERR "-----------------------\n");
4405 printk(KERN_ERR "A. Corruption after allocation\n");
4406
4407 p = kzalloc(16, GFP_KERNEL);
4408 p[16] = 0x12;
4409 printk(KERN_ERR "\n1. kmalloc-16: Clobber Redzone/next pointer"
4410 " 0x12->0x%p\n\n", p + 16);
4411
4412 validate_slab_cache(kmalloc_caches[4]);
4413
4414
4415 p = kzalloc(32, GFP_KERNEL);
4416 p[32 + sizeof(void *)] = 0x34;
4417 printk(KERN_ERR "\n2. kmalloc-32: Clobber next pointer/next slab"
4418 " 0x34 -> -0x%p\n", p);
4419 printk(KERN_ERR
4420 "If allocated object is overwritten then not detectable\n\n");
4421
4422 validate_slab_cache(kmalloc_caches[5]);
4423 p = kzalloc(64, GFP_KERNEL);
4424 p += 64 + (get_cycles() & 0xff) * sizeof(void *);
4425 *p = 0x56;
4426 printk(KERN_ERR "\n3. kmalloc-64: corrupting random byte 0x56->0x%p\n",
4427 p);
4428 printk(KERN_ERR
4429 "If allocated object is overwritten then not detectable\n\n");
4430 validate_slab_cache(kmalloc_caches[6]);
4431
4432 printk(KERN_ERR "\nB. Corruption after free\n");
4433 p = kzalloc(128, GFP_KERNEL);
4434 kfree(p);
4435 *p = 0x78;
4436 printk(KERN_ERR "1. kmalloc-128: Clobber first word 0x78->0x%p\n\n", p);
4437 validate_slab_cache(kmalloc_caches[7]);
4438
4439 p = kzalloc(256, GFP_KERNEL);
4440 kfree(p);
4441 p[50] = 0x9a;
4442 printk(KERN_ERR "\n2. kmalloc-256: Clobber 50th byte 0x9a->0x%p\n\n",
4443 p);
4444 validate_slab_cache(kmalloc_caches[8]);
4445
4446 p = kzalloc(512, GFP_KERNEL);
4447 kfree(p);
4448 p[512] = 0xab;
4449 printk(KERN_ERR "\n3. kmalloc-512: Clobber redzone 0xab->0x%p\n\n", p);
4450 validate_slab_cache(kmalloc_caches[9]);
4451}
4452#else
4453#ifdef CONFIG_SYSFS
4454static void resiliency_test(void) {};
4455#endif
4456#endif
4457
4458#ifdef CONFIG_SYSFS
4459enum slab_stat_type {
4460 SL_ALL,
4461 SL_PARTIAL,
4462 SL_CPU,
4463 SL_OBJECTS,
4464 SL_TOTAL
4465};
4466
4467#define SO_ALL (1 << SL_ALL)
4468#define SO_PARTIAL (1 << SL_PARTIAL)
4469#define SO_CPU (1 << SL_CPU)
4470#define SO_OBJECTS (1 << SL_OBJECTS)
4471#define SO_TOTAL (1 << SL_TOTAL)
4472
4473static ssize_t show_slab_objects(struct kmem_cache *s,
4474 char *buf, unsigned long flags)
4475{
4476 unsigned long total = 0;
4477 int node;
4478 int x;
4479 unsigned long *nodes;
4480 unsigned long *per_cpu;
4481
4482 nodes = kzalloc(2 * sizeof(unsigned long) * nr_node_ids, GFP_KERNEL);
4483 if (!nodes)
4484 return -ENOMEM;
4485 per_cpu = nodes + nr_node_ids;
4486
4487 if (flags & SO_CPU) {
4488 int cpu;
4489
4490 for_each_possible_cpu(cpu) {
4491 struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
4492 int node;
4493 struct page *page;
4494
4495 page = ACCESS_ONCE(c->page);
4496 if (!page)
4497 continue;
4498
4499 node = page_to_nid(page);
4500 if (flags & SO_TOTAL)
4501 x = page->objects;
4502 else if (flags & SO_OBJECTS)
4503 x = page->inuse;
4504 else
4505 x = 1;
4506
4507 total += x;
4508 nodes[node] += x;
4509
4510 page = ACCESS_ONCE(c->partial);
4511 if (page) {
4512 x = page->pobjects;
4513 total += x;
4514 nodes[node] += x;
4515 }
4516
4517 per_cpu[node]++;
4518 }
4519 }
4520
4521 lock_memory_hotplug();
4522#ifdef CONFIG_SLUB_DEBUG
4523 if (flags & SO_ALL) {
4524 for_each_node_state(node, N_NORMAL_MEMORY) {
4525 struct kmem_cache_node *n = get_node(s, node);
4526
4527 if (flags & SO_TOTAL)
4528 x = atomic_long_read(&n->total_objects);
4529 else if (flags & SO_OBJECTS)
4530 x = atomic_long_read(&n->total_objects) -
4531 count_partial(n, count_free);
4532
4533 else
4534 x = atomic_long_read(&n->nr_slabs);
4535 total += x;
4536 nodes[node] += x;
4537 }
4538
4539 } else
4540#endif
4541 if (flags & SO_PARTIAL) {
4542 for_each_node_state(node, N_NORMAL_MEMORY) {
4543 struct kmem_cache_node *n = get_node(s, node);
4544
4545 if (flags & SO_TOTAL)
4546 x = count_partial(n, count_total);
4547 else if (flags & SO_OBJECTS)
4548 x = count_partial(n, count_inuse);
4549 else
4550 x = n->nr_partial;
4551 total += x;
4552 nodes[node] += x;
4553 }
4554 }
4555 x = sprintf(buf, "%lu", total);
4556#ifdef CONFIG_NUMA
4557 for_each_node_state(node, N_NORMAL_MEMORY)
4558 if (nodes[node])
4559 x += sprintf(buf + x, " N%d=%lu",
4560 node, nodes[node]);
4561#endif
4562 unlock_memory_hotplug();
4563 kfree(nodes);
4564 return x + sprintf(buf + x, "\n");
4565}
4566
4567#ifdef CONFIG_SLUB_DEBUG
4568static int any_slab_objects(struct kmem_cache *s)
4569{
4570 int node;
4571
4572 for_each_online_node(node) {
4573 struct kmem_cache_node *n = get_node(s, node);
4574
4575 if (!n)
4576 continue;
4577
4578 if (atomic_long_read(&n->total_objects))
4579 return 1;
4580 }
4581 return 0;
4582}
4583#endif
4584
4585#define to_slab_attr(n) container_of(n, struct slab_attribute, attr)
4586#define to_slab(n) container_of(n, struct kmem_cache, kobj)
4587
4588struct slab_attribute {
4589 struct attribute attr;
4590 ssize_t (*show)(struct kmem_cache *s, char *buf);
4591 ssize_t (*store)(struct kmem_cache *s, const char *x, size_t count);
4592};
4593
4594#define SLAB_ATTR_RO(_name) \
4595 static struct slab_attribute _name##_attr = \
4596 __ATTR(_name, 0400, _name##_show, NULL)
4597
4598#define SLAB_ATTR(_name) \
4599 static struct slab_attribute _name##_attr = \
4600 __ATTR(_name, 0600, _name##_show, _name##_store)
4601
4602static ssize_t slab_size_show(struct kmem_cache *s, char *buf)
4603{
4604 return sprintf(buf, "%d\n", s->size);
4605}
4606SLAB_ATTR_RO(slab_size);
4607
4608static ssize_t align_show(struct kmem_cache *s, char *buf)
4609{
4610 return sprintf(buf, "%d\n", s->align);
4611}
4612SLAB_ATTR_RO(align);
4613
4614static ssize_t object_size_show(struct kmem_cache *s, char *buf)
4615{
4616 return sprintf(buf, "%d\n", s->object_size);
4617}
4618SLAB_ATTR_RO(object_size);
4619
4620static ssize_t objs_per_slab_show(struct kmem_cache *s, char *buf)
4621{
4622 return sprintf(buf, "%d\n", oo_objects(s->oo));
4623}
4624SLAB_ATTR_RO(objs_per_slab);
4625
4626static ssize_t order_store(struct kmem_cache *s,
4627 const char *buf, size_t length)
4628{
4629 unsigned long order;
4630 int err;
4631
4632 err = strict_strtoul(buf, 10, &order);
4633 if (err)
4634 return err;
4635
4636 if (order > slub_max_order || order < slub_min_order)
4637 return -EINVAL;
4638
4639 calculate_sizes(s, order);
4640 return length;
4641}
4642
4643static ssize_t order_show(struct kmem_cache *s, char *buf)
4644{
4645 return sprintf(buf, "%d\n", oo_order(s->oo));
4646}
4647SLAB_ATTR(order);
4648
4649static ssize_t min_partial_show(struct kmem_cache *s, char *buf)
4650{
4651 return sprintf(buf, "%lu\n", s->min_partial);
4652}
4653
4654static ssize_t min_partial_store(struct kmem_cache *s, const char *buf,
4655 size_t length)
4656{
4657 unsigned long min;
4658 int err;
4659
4660 err = strict_strtoul(buf, 10, &min);
4661 if (err)
4662 return err;
4663
4664 set_min_partial(s, min);
4665 return length;
4666}
4667SLAB_ATTR(min_partial);
4668
4669static ssize_t cpu_partial_show(struct kmem_cache *s, char *buf)
4670{
4671 return sprintf(buf, "%u\n", s->cpu_partial);
4672}
4673
4674static ssize_t cpu_partial_store(struct kmem_cache *s, const char *buf,
4675 size_t length)
4676{
4677 unsigned long objects;
4678 int err;
4679
4680 err = strict_strtoul(buf, 10, &objects);
4681 if (err)
4682 return err;
4683 if (objects && kmem_cache_debug(s))
4684 return -EINVAL;
4685
4686 s->cpu_partial = objects;
4687 flush_all(s);
4688 return length;
4689}
4690SLAB_ATTR(cpu_partial);
4691
4692static ssize_t ctor_show(struct kmem_cache *s, char *buf)
4693{
4694 if (!s->ctor)
4695 return 0;
4696 return sprintf(buf, "%pS\n", s->ctor);
4697}
4698SLAB_ATTR_RO(ctor);
4699
4700static ssize_t aliases_show(struct kmem_cache *s, char *buf)
4701{
4702 return sprintf(buf, "%d\n", s->refcount - 1);
4703}
4704SLAB_ATTR_RO(aliases);
4705
4706static ssize_t partial_show(struct kmem_cache *s, char *buf)
4707{
4708 return show_slab_objects(s, buf, SO_PARTIAL);
4709}
4710SLAB_ATTR_RO(partial);
4711
4712static ssize_t cpu_slabs_show(struct kmem_cache *s, char *buf)
4713{
4714 return show_slab_objects(s, buf, SO_CPU);
4715}
4716SLAB_ATTR_RO(cpu_slabs);
4717
4718static ssize_t objects_show(struct kmem_cache *s, char *buf)
4719{
4720 return show_slab_objects(s, buf, SO_ALL|SO_OBJECTS);
4721}
4722SLAB_ATTR_RO(objects);
4723
4724static ssize_t objects_partial_show(struct kmem_cache *s, char *buf)
4725{
4726 return show_slab_objects(s, buf, SO_PARTIAL|SO_OBJECTS);
4727}
4728SLAB_ATTR_RO(objects_partial);
4729
4730static ssize_t slabs_cpu_partial_show(struct kmem_cache *s, char *buf)
4731{
4732 int objects = 0;
4733 int pages = 0;
4734 int cpu;
4735 int len;
4736
4737 for_each_online_cpu(cpu) {
4738 struct page *page = per_cpu_ptr(s->cpu_slab, cpu)->partial;
4739
4740 if (page) {
4741 pages += page->pages;
4742 objects += page->pobjects;
4743 }
4744 }
4745
4746 len = sprintf(buf, "%d(%d)", objects, pages);
4747
4748#ifdef CONFIG_SMP
4749 for_each_online_cpu(cpu) {
4750 struct page *page = per_cpu_ptr(s->cpu_slab, cpu) ->partial;
4751
4752 if (page && len < PAGE_SIZE - 20)
4753 len += sprintf(buf + len, " C%d=%d(%d)", cpu,
4754 page->pobjects, page->pages);
4755 }
4756#endif
4757 return len + sprintf(buf + len, "\n");
4758}
4759SLAB_ATTR_RO(slabs_cpu_partial);
4760
4761static ssize_t reclaim_account_show(struct kmem_cache *s, char *buf)
4762{
4763 return sprintf(buf, "%d\n", !!(s->flags & SLAB_RECLAIM_ACCOUNT));
4764}
4765
4766static ssize_t reclaim_account_store(struct kmem_cache *s,
4767 const char *buf, size_t length)
4768{
4769 s->flags &= ~SLAB_RECLAIM_ACCOUNT;
4770 if (buf[0] == '1')
4771 s->flags |= SLAB_RECLAIM_ACCOUNT;
4772 return length;
4773}
4774SLAB_ATTR(reclaim_account);
4775
4776static ssize_t hwcache_align_show(struct kmem_cache *s, char *buf)
4777{
4778 return sprintf(buf, "%d\n", !!(s->flags & SLAB_HWCACHE_ALIGN));
4779}
4780SLAB_ATTR_RO(hwcache_align);
4781
4782#ifdef CONFIG_ZONE_DMA
4783static ssize_t cache_dma_show(struct kmem_cache *s, char *buf)
4784{
4785 return sprintf(buf, "%d\n", !!(s->flags & SLAB_CACHE_DMA));
4786}
4787SLAB_ATTR_RO(cache_dma);
4788#endif
4789
4790static ssize_t destroy_by_rcu_show(struct kmem_cache *s, char *buf)
4791{
4792 return sprintf(buf, "%d\n", !!(s->flags & SLAB_DESTROY_BY_RCU));
4793}
4794SLAB_ATTR_RO(destroy_by_rcu);
4795
4796static ssize_t reserved_show(struct kmem_cache *s, char *buf)
4797{
4798 return sprintf(buf, "%d\n", s->reserved);
4799}
4800SLAB_ATTR_RO(reserved);
4801
4802#ifdef CONFIG_SLUB_DEBUG
4803static ssize_t slabs_show(struct kmem_cache *s, char *buf)
4804{
4805 return show_slab_objects(s, buf, SO_ALL);
4806}
4807SLAB_ATTR_RO(slabs);
4808
4809static ssize_t total_objects_show(struct kmem_cache *s, char *buf)
4810{
4811 return show_slab_objects(s, buf, SO_ALL|SO_TOTAL);
4812}
4813SLAB_ATTR_RO(total_objects);
4814
4815static ssize_t sanity_checks_show(struct kmem_cache *s, char *buf)
4816{
4817 return sprintf(buf, "%d\n", !!(s->flags & SLAB_DEBUG_FREE));
4818}
4819
4820static ssize_t sanity_checks_store(struct kmem_cache *s,
4821 const char *buf, size_t length)
4822{
4823 s->flags &= ~SLAB_DEBUG_FREE;
4824 if (buf[0] == '1') {
4825 s->flags &= ~__CMPXCHG_DOUBLE;
4826 s->flags |= SLAB_DEBUG_FREE;
4827 }
4828 return length;
4829}
4830SLAB_ATTR(sanity_checks);
4831
4832static ssize_t trace_show(struct kmem_cache *s, char *buf)
4833{
4834 return sprintf(buf, "%d\n", !!(s->flags & SLAB_TRACE));
4835}
4836
4837static ssize_t trace_store(struct kmem_cache *s, const char *buf,
4838 size_t length)
4839{
4840 s->flags &= ~SLAB_TRACE;
4841 if (buf[0] == '1') {
4842 s->flags &= ~__CMPXCHG_DOUBLE;
4843 s->flags |= SLAB_TRACE;
4844 }
4845 return length;
4846}
4847SLAB_ATTR(trace);
4848
4849static ssize_t red_zone_show(struct kmem_cache *s, char *buf)
4850{
4851 return sprintf(buf, "%d\n", !!(s->flags & SLAB_RED_ZONE));
4852}
4853
4854static ssize_t red_zone_store(struct kmem_cache *s,
4855 const char *buf, size_t length)
4856{
4857 if (any_slab_objects(s))
4858 return -EBUSY;
4859
4860 s->flags &= ~SLAB_RED_ZONE;
4861 if (buf[0] == '1') {
4862 s->flags &= ~__CMPXCHG_DOUBLE;
4863 s->flags |= SLAB_RED_ZONE;
4864 }
4865 calculate_sizes(s, -1);
4866 return length;
4867}
4868SLAB_ATTR(red_zone);
4869
4870static ssize_t poison_show(struct kmem_cache *s, char *buf)
4871{
4872 return sprintf(buf, "%d\n", !!(s->flags & SLAB_POISON));
4873}
4874
4875static ssize_t poison_store(struct kmem_cache *s,
4876 const char *buf, size_t length)
4877{
4878 if (any_slab_objects(s))
4879 return -EBUSY;
4880
4881 s->flags &= ~SLAB_POISON;
4882 if (buf[0] == '1') {
4883 s->flags &= ~__CMPXCHG_DOUBLE;
4884 s->flags |= SLAB_POISON;
4885 }
4886 calculate_sizes(s, -1);
4887 return length;
4888}
4889SLAB_ATTR(poison);
4890
4891static ssize_t store_user_show(struct kmem_cache *s, char *buf)
4892{
4893 return sprintf(buf, "%d\n", !!(s->flags & SLAB_STORE_USER));
4894}
4895
4896static ssize_t store_user_store(struct kmem_cache *s,
4897 const char *buf, size_t length)
4898{
4899 if (any_slab_objects(s))
4900 return -EBUSY;
4901
4902 s->flags &= ~SLAB_STORE_USER;
4903 if (buf[0] == '1') {
4904 s->flags &= ~__CMPXCHG_DOUBLE;
4905 s->flags |= SLAB_STORE_USER;
4906 }
4907 calculate_sizes(s, -1);
4908 return length;
4909}
4910SLAB_ATTR(store_user);
4911
4912static ssize_t validate_show(struct kmem_cache *s, char *buf)
4913{
4914 return 0;
4915}
4916
4917static ssize_t validate_store(struct kmem_cache *s,
4918 const char *buf, size_t length)
4919{
4920 int ret = -EINVAL;
4921
4922 if (buf[0] == '1') {
4923 ret = validate_slab_cache(s);
4924 if (ret >= 0)
4925 ret = length;
4926 }
4927 return ret;
4928}
4929SLAB_ATTR(validate);
4930
4931static ssize_t alloc_calls_show(struct kmem_cache *s, char *buf)
4932{
4933 if (!(s->flags & SLAB_STORE_USER))
4934 return -ENOSYS;
4935 return list_locations(s, buf, TRACK_ALLOC);
4936}
4937SLAB_ATTR_RO(alloc_calls);
4938
4939static ssize_t free_calls_show(struct kmem_cache *s, char *buf)
4940{
4941 if (!(s->flags & SLAB_STORE_USER))
4942 return -ENOSYS;
4943 return list_locations(s, buf, TRACK_FREE);
4944}
4945SLAB_ATTR_RO(free_calls);
4946#endif
4947
4948#ifdef CONFIG_FAILSLAB
4949static ssize_t failslab_show(struct kmem_cache *s, char *buf)
4950{
4951 return sprintf(buf, "%d\n", !!(s->flags & SLAB_FAILSLAB));
4952}
4953
4954static ssize_t failslab_store(struct kmem_cache *s, const char *buf,
4955 size_t length)
4956{
4957 s->flags &= ~SLAB_FAILSLAB;
4958 if (buf[0] == '1')
4959 s->flags |= SLAB_FAILSLAB;
4960 return length;
4961}
4962SLAB_ATTR(failslab);
4963#endif
4964
4965static ssize_t shrink_show(struct kmem_cache *s, char *buf)
4966{
4967 return 0;
4968}
4969
4970static ssize_t shrink_store(struct kmem_cache *s,
4971 const char *buf, size_t length)
4972{
4973 if (buf[0] == '1') {
4974 int rc = kmem_cache_shrink(s);
4975
4976 if (rc)
4977 return rc;
4978 } else
4979 return -EINVAL;
4980 return length;
4981}
4982SLAB_ATTR(shrink);
4983
4984#ifdef CONFIG_NUMA
4985static ssize_t remote_node_defrag_ratio_show(struct kmem_cache *s, char *buf)
4986{
4987 return sprintf(buf, "%d\n", s->remote_node_defrag_ratio / 10);
4988}
4989
4990static ssize_t remote_node_defrag_ratio_store(struct kmem_cache *s,
4991 const char *buf, size_t length)
4992{
4993 unsigned long ratio;
4994 int err;
4995
4996 err = strict_strtoul(buf, 10, &ratio);
4997 if (err)
4998 return err;
4999
5000 if (ratio <= 100)
5001 s->remote_node_defrag_ratio = ratio * 10;
5002
5003 return length;
5004}
5005SLAB_ATTR(remote_node_defrag_ratio);
5006#endif
5007
5008#ifdef CONFIG_SLUB_STATS
5009static int show_stat(struct kmem_cache *s, char *buf, enum stat_item si)
5010{
5011 unsigned long sum = 0;
5012 int cpu;
5013 int len;
5014 int *data = kmalloc(nr_cpu_ids * sizeof(int), GFP_KERNEL);
5015
5016 if (!data)
5017 return -ENOMEM;
5018
5019 for_each_online_cpu(cpu) {
5020 unsigned x = per_cpu_ptr(s->cpu_slab, cpu)->stat[si];
5021
5022 data[cpu] = x;
5023 sum += x;
5024 }
5025
5026 len = sprintf(buf, "%lu", sum);
5027
5028#ifdef CONFIG_SMP
5029 for_each_online_cpu(cpu) {
5030 if (data[cpu] && len < PAGE_SIZE - 20)
5031 len += sprintf(buf + len, " C%d=%u", cpu, data[cpu]);
5032 }
5033#endif
5034 kfree(data);
5035 return len + sprintf(buf + len, "\n");
5036}
5037
5038static void clear_stat(struct kmem_cache *s, enum stat_item si)
5039{
5040 int cpu;
5041
5042 for_each_online_cpu(cpu)
5043 per_cpu_ptr(s->cpu_slab, cpu)->stat[si] = 0;
5044}
5045
5046#define STAT_ATTR(si, text) \
5047static ssize_t text##_show(struct kmem_cache *s, char *buf) \
5048{ \
5049 return show_stat(s, buf, si); \
5050} \
5051static ssize_t text##_store(struct kmem_cache *s, \
5052 const char *buf, size_t length) \
5053{ \
5054 if (buf[0] != '0') \
5055 return -EINVAL; \
5056 clear_stat(s, si); \
5057 return length; \
5058} \
5059SLAB_ATTR(text); \
5060
5061STAT_ATTR(ALLOC_FASTPATH, alloc_fastpath);
5062STAT_ATTR(ALLOC_SLOWPATH, alloc_slowpath);
5063STAT_ATTR(FREE_FASTPATH, free_fastpath);
5064STAT_ATTR(FREE_SLOWPATH, free_slowpath);
5065STAT_ATTR(FREE_FROZEN, free_frozen);
5066STAT_ATTR(FREE_ADD_PARTIAL, free_add_partial);
5067STAT_ATTR(FREE_REMOVE_PARTIAL, free_remove_partial);
5068STAT_ATTR(ALLOC_FROM_PARTIAL, alloc_from_partial);
5069STAT_ATTR(ALLOC_SLAB, alloc_slab);
5070STAT_ATTR(ALLOC_REFILL, alloc_refill);
5071STAT_ATTR(ALLOC_NODE_MISMATCH, alloc_node_mismatch);
5072STAT_ATTR(FREE_SLAB, free_slab);
5073STAT_ATTR(CPUSLAB_FLUSH, cpuslab_flush);
5074STAT_ATTR(DEACTIVATE_FULL, deactivate_full);
5075STAT_ATTR(DEACTIVATE_EMPTY, deactivate_empty);
5076STAT_ATTR(DEACTIVATE_TO_HEAD, deactivate_to_head);
5077STAT_ATTR(DEACTIVATE_TO_TAIL, deactivate_to_tail);
5078STAT_ATTR(DEACTIVATE_REMOTE_FREES, deactivate_remote_frees);
5079STAT_ATTR(DEACTIVATE_BYPASS, deactivate_bypass);
5080STAT_ATTR(ORDER_FALLBACK, order_fallback);
5081STAT_ATTR(CMPXCHG_DOUBLE_CPU_FAIL, cmpxchg_double_cpu_fail);
5082STAT_ATTR(CMPXCHG_DOUBLE_FAIL, cmpxchg_double_fail);
5083STAT_ATTR(CPU_PARTIAL_ALLOC, cpu_partial_alloc);
5084STAT_ATTR(CPU_PARTIAL_FREE, cpu_partial_free);
5085STAT_ATTR(CPU_PARTIAL_NODE, cpu_partial_node);
5086STAT_ATTR(CPU_PARTIAL_DRAIN, cpu_partial_drain);
5087#endif
5088
5089static struct attribute *slab_attrs[] = {
5090 &slab_size_attr.attr,
5091 &object_size_attr.attr,
5092 &objs_per_slab_attr.attr,
5093 &order_attr.attr,
5094 &min_partial_attr.attr,
5095 &cpu_partial_attr.attr,
5096 &objects_attr.attr,
5097 &objects_partial_attr.attr,
5098 &partial_attr.attr,
5099 &cpu_slabs_attr.attr,
5100 &ctor_attr.attr,
5101 &aliases_attr.attr,
5102 &align_attr.attr,
5103 &hwcache_align_attr.attr,
5104 &reclaim_account_attr.attr,
5105 &destroy_by_rcu_attr.attr,
5106 &shrink_attr.attr,
5107 &reserved_attr.attr,
5108 &slabs_cpu_partial_attr.attr,
5109#ifdef CONFIG_SLUB_DEBUG
5110 &total_objects_attr.attr,
5111 &slabs_attr.attr,
5112 &sanity_checks_attr.attr,
5113 &trace_attr.attr,
5114 &red_zone_attr.attr,
5115 &poison_attr.attr,
5116 &store_user_attr.attr,
5117 &validate_attr.attr,
5118 &alloc_calls_attr.attr,
5119 &free_calls_attr.attr,
5120#endif
5121#ifdef CONFIG_ZONE_DMA
5122 &cache_dma_attr.attr,
5123#endif
5124#ifdef CONFIG_NUMA
5125 &remote_node_defrag_ratio_attr.attr,
5126#endif
5127#ifdef CONFIG_SLUB_STATS
5128 &alloc_fastpath_attr.attr,
5129 &alloc_slowpath_attr.attr,
5130 &free_fastpath_attr.attr,
5131 &free_slowpath_attr.attr,
5132 &free_frozen_attr.attr,
5133 &free_add_partial_attr.attr,
5134 &free_remove_partial_attr.attr,
5135 &alloc_from_partial_attr.attr,
5136 &alloc_slab_attr.attr,
5137 &alloc_refill_attr.attr,
5138 &alloc_node_mismatch_attr.attr,
5139 &free_slab_attr.attr,
5140 &cpuslab_flush_attr.attr,
5141 &deactivate_full_attr.attr,
5142 &deactivate_empty_attr.attr,
5143 &deactivate_to_head_attr.attr,
5144 &deactivate_to_tail_attr.attr,
5145 &deactivate_remote_frees_attr.attr,
5146 &deactivate_bypass_attr.attr,
5147 &order_fallback_attr.attr,
5148 &cmpxchg_double_fail_attr.attr,
5149 &cmpxchg_double_cpu_fail_attr.attr,
5150 &cpu_partial_alloc_attr.attr,
5151 &cpu_partial_free_attr.attr,
5152 &cpu_partial_node_attr.attr,
5153 &cpu_partial_drain_attr.attr,
5154#endif
5155#ifdef CONFIG_FAILSLAB
5156 &failslab_attr.attr,
5157#endif
5158
5159 NULL
5160};
5161
5162static struct attribute_group slab_attr_group = {
5163 .attrs = slab_attrs,
5164};
5165
5166static ssize_t slab_attr_show(struct kobject *kobj,
5167 struct attribute *attr,
5168 char *buf)
5169{
5170 struct slab_attribute *attribute;
5171 struct kmem_cache *s;
5172 int err;
5173
5174 attribute = to_slab_attr(attr);
5175 s = to_slab(kobj);
5176
5177 if (!attribute->show)
5178 return -EIO;
5179
5180 err = attribute->show(s, buf);
5181
5182 return err;
5183}
5184
5185static ssize_t slab_attr_store(struct kobject *kobj,
5186 struct attribute *attr,
5187 const char *buf, size_t len)
5188{
5189 struct slab_attribute *attribute;
5190 struct kmem_cache *s;
5191 int err;
5192
5193 attribute = to_slab_attr(attr);
5194 s = to_slab(kobj);
5195
5196 if (!attribute->store)
5197 return -EIO;
5198
5199 err = attribute->store(s, buf, len);
5200
5201 return err;
5202}
5203
5204static const struct sysfs_ops slab_sysfs_ops = {
5205 .show = slab_attr_show,
5206 .store = slab_attr_store,
5207};
5208
5209static struct kobj_type slab_ktype = {
5210 .sysfs_ops = &slab_sysfs_ops,
5211};
5212
5213static int uevent_filter(struct kset *kset, struct kobject *kobj)
5214{
5215 struct kobj_type *ktype = get_ktype(kobj);
5216
5217 if (ktype == &slab_ktype)
5218 return 1;
5219 return 0;
5220}
5221
5222static const struct kset_uevent_ops slab_uevent_ops = {
5223 .filter = uevent_filter,
5224};
5225
5226static struct kset *slab_kset;
5227
5228#define ID_STR_LENGTH 64
5229
5230
5231
5232
5233
5234static char *create_unique_id(struct kmem_cache *s)
5235{
5236 char *name = kmalloc(ID_STR_LENGTH, GFP_KERNEL);
5237 char *p = name;
5238
5239 BUG_ON(!name);
5240
5241 *p++ = ':';
5242
5243
5244
5245
5246
5247
5248
5249 if (s->flags & SLAB_CACHE_DMA)
5250 *p++ = 'd';
5251 if (s->flags & SLAB_RECLAIM_ACCOUNT)
5252 *p++ = 'a';
5253 if (s->flags & SLAB_DEBUG_FREE)
5254 *p++ = 'F';
5255 if (!(s->flags & SLAB_NOTRACK))
5256 *p++ = 't';
5257 if (p != name + 1)
5258 *p++ = '-';
5259 p += sprintf(p, "%07d", s->size);
5260 BUG_ON(p > name + ID_STR_LENGTH - 1);
5261 return name;
5262}
5263
5264static int sysfs_slab_add(struct kmem_cache *s)
5265{
5266 int err;
5267 const char *name;
5268 int unmergeable;
5269
5270 if (slab_state < FULL)
5271
5272 return 0;
5273
5274 unmergeable = slab_unmergeable(s);
5275 if (unmergeable) {
5276
5277
5278
5279
5280
5281 sysfs_remove_link(&slab_kset->kobj, s->name);
5282 name = s->name;
5283 } else {
5284
5285
5286
5287
5288 name = create_unique_id(s);
5289 }
5290
5291 s->kobj.kset = slab_kset;
5292 err = kobject_init_and_add(&s->kobj, &slab_ktype, NULL, name);
5293 if (err) {
5294 kobject_put(&s->kobj);
5295 return err;
5296 }
5297
5298 err = sysfs_create_group(&s->kobj, &slab_attr_group);
5299 if (err) {
5300 kobject_del(&s->kobj);
5301 kobject_put(&s->kobj);
5302 return err;
5303 }
5304 kobject_uevent(&s->kobj, KOBJ_ADD);
5305 if (!unmergeable) {
5306
5307 sysfs_slab_alias(s, s->name);
5308 kfree(name);
5309 }
5310 return 0;
5311}
5312
5313static void sysfs_slab_remove(struct kmem_cache *s)
5314{
5315 if (slab_state < FULL)
5316
5317
5318
5319
5320 return;
5321
5322 kobject_uevent(&s->kobj, KOBJ_REMOVE);
5323 kobject_del(&s->kobj);
5324 kobject_put(&s->kobj);
5325}
5326
5327
5328
5329
5330
5331struct saved_alias {
5332 struct kmem_cache *s;
5333 const char *name;
5334 struct saved_alias *next;
5335};
5336
5337static struct saved_alias *alias_list;
5338
5339static int sysfs_slab_alias(struct kmem_cache *s, const char *name)
5340{
5341 struct saved_alias *al;
5342
5343 if (slab_state == FULL) {
5344
5345
5346
5347 sysfs_remove_link(&slab_kset->kobj, name);
5348 return sysfs_create_link(&slab_kset->kobj, &s->kobj, name);
5349 }
5350
5351 al = kmalloc(sizeof(struct saved_alias), GFP_KERNEL);
5352 if (!al)
5353 return -ENOMEM;
5354
5355 al->s = s;
5356 al->name = name;
5357 al->next = alias_list;
5358 alias_list = al;
5359 return 0;
5360}
5361
5362static int __init slab_sysfs_init(void)
5363{
5364 struct kmem_cache *s;
5365 int err;
5366
5367 mutex_lock(&slab_mutex);
5368
5369 slab_kset = kset_create_and_add("slab", &slab_uevent_ops, kernel_kobj);
5370 if (!slab_kset) {
5371 mutex_unlock(&slab_mutex);
5372 printk(KERN_ERR "Cannot register slab subsystem.\n");
5373 return -ENOSYS;
5374 }
5375
5376 slab_state = FULL;
5377
5378 list_for_each_entry(s, &slab_caches, list) {
5379 err = sysfs_slab_add(s);
5380 if (err)
5381 printk(KERN_ERR "SLUB: Unable to add boot slab %s"
5382 " to sysfs\n", s->name);
5383 }
5384
5385 while (alias_list) {
5386 struct saved_alias *al = alias_list;
5387
5388 alias_list = alias_list->next;
5389 err = sysfs_slab_alias(al->s, al->name);
5390 if (err)
5391 printk(KERN_ERR "SLUB: Unable to add boot slab alias"
5392 " %s to sysfs\n", al->name);
5393 kfree(al);
5394 }
5395
5396 mutex_unlock(&slab_mutex);
5397 resiliency_test();
5398 return 0;
5399}
5400
5401__initcall(slab_sysfs_init);
5402#endif
5403
5404
5405
5406
5407#ifdef CONFIG_SLABINFO
5408static void print_slabinfo_header(struct seq_file *m)
5409{
5410 seq_puts(m, "slabinfo - version: 2.1\n");
5411 seq_puts(m, "# name <active_objs> <num_objs> <object_size> "
5412 "<objperslab> <pagesperslab>");
5413 seq_puts(m, " : tunables <limit> <batchcount> <sharedfactor>");
5414 seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>");
5415 seq_putc(m, '\n');
5416}
5417
5418static void *s_start(struct seq_file *m, loff_t *pos)
5419{
5420 loff_t n = *pos;
5421
5422 mutex_lock(&slab_mutex);
5423 if (!n)
5424 print_slabinfo_header(m);
5425
5426 return seq_list_start(&slab_caches, *pos);
5427}
5428
5429static void *s_next(struct seq_file *m, void *p, loff_t *pos)
5430{
5431 return seq_list_next(p, &slab_caches, pos);
5432}
5433
5434static void s_stop(struct seq_file *m, void *p)
5435{
5436 mutex_unlock(&slab_mutex);
5437}
5438
5439static int s_show(struct seq_file *m, void *p)
5440{
5441 unsigned long nr_partials = 0;
5442 unsigned long nr_slabs = 0;
5443 unsigned long nr_inuse = 0;
5444 unsigned long nr_objs = 0;
5445 unsigned long nr_free = 0;
5446 struct kmem_cache *s;
5447 int node;
5448
5449 s = list_entry(p, struct kmem_cache, list);
5450
5451 for_each_online_node(node) {
5452 struct kmem_cache_node *n = get_node(s, node);
5453
5454 if (!n)
5455 continue;
5456
5457 nr_partials += n->nr_partial;
5458 nr_slabs += atomic_long_read(&n->nr_slabs);
5459 nr_objs += atomic_long_read(&n->total_objects);
5460 nr_free += count_partial(n, count_free);
5461 }
5462
5463 nr_inuse = nr_objs - nr_free;
5464
5465 seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d", s->name, nr_inuse,
5466 nr_objs, s->size, oo_objects(s->oo),
5467 (1 << oo_order(s->oo)));
5468 seq_printf(m, " : tunables %4u %4u %4u", 0, 0, 0);
5469 seq_printf(m, " : slabdata %6lu %6lu %6lu", nr_slabs, nr_slabs,
5470 0UL);
5471 seq_putc(m, '\n');
5472 return 0;
5473}
5474
5475static const struct seq_operations slabinfo_op = {
5476 .start = s_start,
5477 .next = s_next,
5478 .stop = s_stop,
5479 .show = s_show,
5480};
5481
5482static int slabinfo_open(struct inode *inode, struct file *file)
5483{
5484 return seq_open(file, &slabinfo_op);
5485}
5486
5487static const struct file_operations proc_slabinfo_operations = {
5488 .open = slabinfo_open,
5489 .read = seq_read,
5490 .llseek = seq_lseek,
5491 .release = seq_release,
5492};
5493
5494static int __init slab_proc_init(void)
5495{
5496 proc_create("slabinfo", S_IRUSR, NULL, &proc_slabinfo_operations);
5497 return 0;
5498}
5499module_init(slab_proc_init);
5500#endif
5501