1
2
3
4
5
6
7
8
9
10
11
12#include <linux/mm.h>
13#include <linux/swap.h>
14#include <linux/module.h>
15#include <linux/bit_spinlock.h>
16#include <linux/interrupt.h>
17#include <linux/bitops.h>
18#include <linux/slab.h>
19#include "slab.h"
20#include <linux/proc_fs.h>
21#include <linux/seq_file.h>
22#include <linux/kmemcheck.h>
23#include <linux/cpu.h>
24#include <linux/cpuset.h>
25#include <linux/mempolicy.h>
26#include <linux/ctype.h>
27#include <linux/debugobjects.h>
28#include <linux/kallsyms.h>
29#include <linux/memory.h>
30#include <linux/math64.h>
31#include <linux/fault-inject.h>
32#include <linux/stacktrace.h>
33#include <linux/prefetch.h>
34#include <linux/memcontrol.h>
35
36#include <trace/events/kmem.h>
37
38#include "internal.h"
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116static inline int kmem_cache_debug(struct kmem_cache *s)
117{
118#ifdef CONFIG_SLUB_DEBUG
119 return unlikely(s->flags & SLAB_DEBUG_FLAGS);
120#else
121 return 0;
122#endif
123}
124
125
126
127
128
129
130
131
132
133
134#undef SLUB_RESILIENCY_TEST
135
136
137#undef SLUB_DEBUG_CMPXCHG
138
139
140
141
142
143#define MIN_PARTIAL 5
144
145
146
147
148
149
150#define MAX_PARTIAL 10
151
152#define DEBUG_DEFAULT_FLAGS (SLAB_DEBUG_FREE | SLAB_RED_ZONE | \
153 SLAB_POISON | SLAB_STORE_USER)
154
155
156
157
158
159
160#define DEBUG_METADATA_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER)
161
162
163
164
165#define SLUB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
166 SLAB_TRACE | SLAB_DESTROY_BY_RCU | SLAB_NOLEAKTRACE | \
167 SLAB_FAILSLAB)
168
169#define SLUB_MERGE_SAME (SLAB_DEBUG_FREE | SLAB_RECLAIM_ACCOUNT | \
170 SLAB_CACHE_DMA | SLAB_NOTRACK)
171
172#define OO_SHIFT 16
173#define OO_MASK ((1 << OO_SHIFT) - 1)
174#define MAX_OBJS_PER_PAGE 32767
175
176
177#define __OBJECT_POISON 0x80000000UL
178#define __CMPXCHG_DOUBLE 0x40000000UL
179
180#ifdef CONFIG_SMP
181static struct notifier_block slab_notifier;
182#endif
183
184
185
186
187#define TRACK_ADDRS_COUNT 16
188struct track {
189 unsigned long addr;
190#ifdef CONFIG_STACKTRACE
191 unsigned long addrs[TRACK_ADDRS_COUNT];
192#endif
193 int cpu;
194 int pid;
195 unsigned long when;
196};
197
198enum track_item { TRACK_ALLOC, TRACK_FREE };
199
200#ifdef CONFIG_SYSFS
201static int sysfs_slab_add(struct kmem_cache *);
202static int sysfs_slab_alias(struct kmem_cache *, const char *);
203static void sysfs_slab_remove(struct kmem_cache *);
204static void memcg_propagate_slab_attrs(struct kmem_cache *s);
205#else
206static inline int sysfs_slab_add(struct kmem_cache *s) { return 0; }
207static inline int sysfs_slab_alias(struct kmem_cache *s, const char *p)
208 { return 0; }
209static inline void sysfs_slab_remove(struct kmem_cache *s) { }
210
211static inline void memcg_propagate_slab_attrs(struct kmem_cache *s) { }
212#endif
213
214static inline void stat(const struct kmem_cache *s, enum stat_item si)
215{
216#ifdef CONFIG_SLUB_STATS
217 __this_cpu_inc(s->cpu_slab->stat[si]);
218#endif
219}
220
221
222
223
224
225static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node)
226{
227 return s->node[node];
228}
229
230
231static inline int check_valid_pointer(struct kmem_cache *s,
232 struct page *page, const void *object)
233{
234 void *base;
235
236 if (!object)
237 return 1;
238
239 base = page_address(page);
240 if (object < base || object >= base + page->objects * s->size ||
241 (object - base) % s->size) {
242 return 0;
243 }
244
245 return 1;
246}
247
248static inline void *get_freepointer(struct kmem_cache *s, void *object)
249{
250 return *(void **)(object + s->offset);
251}
252
253static void prefetch_freepointer(const struct kmem_cache *s, void *object)
254{
255 prefetch(object + s->offset);
256}
257
258static inline void *get_freepointer_safe(struct kmem_cache *s, void *object)
259{
260 void *p;
261
262#ifdef CONFIG_DEBUG_PAGEALLOC
263 probe_kernel_read(&p, (void **)(object + s->offset), sizeof(p));
264#else
265 p = get_freepointer(s, object);
266#endif
267 return p;
268}
269
270static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp)
271{
272 *(void **)(object + s->offset) = fp;
273}
274
275
276#define for_each_object(__p, __s, __addr, __objects) \
277 for (__p = (__addr); __p < (__addr) + (__objects) * (__s)->size;\
278 __p += (__s)->size)
279
280
281static inline int slab_index(void *p, struct kmem_cache *s, void *addr)
282{
283 return (p - addr) / s->size;
284}
285
286static inline size_t slab_ksize(const struct kmem_cache *s)
287{
288#ifdef CONFIG_SLUB_DEBUG
289
290
291
292
293 if (s->flags & (SLAB_RED_ZONE | SLAB_POISON))
294 return s->object_size;
295
296#endif
297
298
299
300
301
302 if (s->flags & (SLAB_DESTROY_BY_RCU | SLAB_STORE_USER))
303 return s->inuse;
304
305
306
307 return s->size;
308}
309
310static inline int order_objects(int order, unsigned long size, int reserved)
311{
312 return ((PAGE_SIZE << order) - reserved) / size;
313}
314
315static inline struct kmem_cache_order_objects oo_make(int order,
316 unsigned long size, int reserved)
317{
318 struct kmem_cache_order_objects x = {
319 (order << OO_SHIFT) + order_objects(order, size, reserved)
320 };
321
322 return x;
323}
324
325static inline int oo_order(struct kmem_cache_order_objects x)
326{
327 return x.x >> OO_SHIFT;
328}
329
330static inline int oo_objects(struct kmem_cache_order_objects x)
331{
332 return x.x & OO_MASK;
333}
334
335
336
337
338static __always_inline void slab_lock(struct page *page)
339{
340 bit_spin_lock(PG_locked, &page->flags);
341}
342
343static __always_inline void slab_unlock(struct page *page)
344{
345 __bit_spin_unlock(PG_locked, &page->flags);
346}
347
348
349static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
350 void *freelist_old, unsigned long counters_old,
351 void *freelist_new, unsigned long counters_new,
352 const char *n)
353{
354 VM_BUG_ON(!irqs_disabled());
355#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
356 defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
357 if (s->flags & __CMPXCHG_DOUBLE) {
358 if (cmpxchg_double(&page->freelist, &page->counters,
359 freelist_old, counters_old,
360 freelist_new, counters_new))
361 return 1;
362 } else
363#endif
364 {
365 slab_lock(page);
366 if (page->freelist == freelist_old && page->counters == counters_old) {
367 page->freelist = freelist_new;
368 page->counters = counters_new;
369 slab_unlock(page);
370 return 1;
371 }
372 slab_unlock(page);
373 }
374
375 cpu_relax();
376 stat(s, CMPXCHG_DOUBLE_FAIL);
377
378#ifdef SLUB_DEBUG_CMPXCHG
379 printk(KERN_INFO "%s %s: cmpxchg double redo ", n, s->name);
380#endif
381
382 return 0;
383}
384
385static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
386 void *freelist_old, unsigned long counters_old,
387 void *freelist_new, unsigned long counters_new,
388 const char *n)
389{
390#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
391 defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
392 if (s->flags & __CMPXCHG_DOUBLE) {
393 if (cmpxchg_double(&page->freelist, &page->counters,
394 freelist_old, counters_old,
395 freelist_new, counters_new))
396 return 1;
397 } else
398#endif
399 {
400 unsigned long flags;
401
402 local_irq_save(flags);
403 slab_lock(page);
404 if (page->freelist == freelist_old && page->counters == counters_old) {
405 page->freelist = freelist_new;
406 page->counters = counters_new;
407 slab_unlock(page);
408 local_irq_restore(flags);
409 return 1;
410 }
411 slab_unlock(page);
412 local_irq_restore(flags);
413 }
414
415 cpu_relax();
416 stat(s, CMPXCHG_DOUBLE_FAIL);
417
418#ifdef SLUB_DEBUG_CMPXCHG
419 printk(KERN_INFO "%s %s: cmpxchg double redo ", n, s->name);
420#endif
421
422 return 0;
423}
424
425#ifdef CONFIG_SLUB_DEBUG
426
427
428
429
430
431
432static void get_map(struct kmem_cache *s, struct page *page, unsigned long *map)
433{
434 void *p;
435 void *addr = page_address(page);
436
437 for (p = page->freelist; p; p = get_freepointer(s, p))
438 set_bit(slab_index(p, s, addr), map);
439}
440
441
442
443
444#ifdef CONFIG_SLUB_DEBUG_ON
445static int slub_debug = DEBUG_DEFAULT_FLAGS;
446#else
447static int slub_debug;
448#endif
449
450static char *slub_debug_slabs;
451static int disable_higher_order_debug;
452
453
454
455
456static void print_section(char *text, u8 *addr, unsigned int length)
457{
458 print_hex_dump(KERN_ERR, text, DUMP_PREFIX_ADDRESS, 16, 1, addr,
459 length, 1);
460}
461
462static struct track *get_track(struct kmem_cache *s, void *object,
463 enum track_item alloc)
464{
465 struct track *p;
466
467 if (s->offset)
468 p = object + s->offset + sizeof(void *);
469 else
470 p = object + s->inuse;
471
472 return p + alloc;
473}
474
475static void set_track(struct kmem_cache *s, void *object,
476 enum track_item alloc, unsigned long addr)
477{
478 struct track *p = get_track(s, object, alloc);
479
480 if (addr) {
481#ifdef CONFIG_STACKTRACE
482 struct stack_trace trace;
483 int i;
484
485 trace.nr_entries = 0;
486 trace.max_entries = TRACK_ADDRS_COUNT;
487 trace.entries = p->addrs;
488 trace.skip = 3;
489 save_stack_trace(&trace);
490
491
492 if (trace.nr_entries != 0 &&
493 trace.entries[trace.nr_entries - 1] == ULONG_MAX)
494 trace.nr_entries--;
495
496 for (i = trace.nr_entries; i < TRACK_ADDRS_COUNT; i++)
497 p->addrs[i] = 0;
498#endif
499 p->addr = addr;
500 p->cpu = smp_processor_id();
501 p->pid = current->pid;
502 p->when = jiffies;
503 } else
504 memset(p, 0, sizeof(struct track));
505}
506
507static void init_tracking(struct kmem_cache *s, void *object)
508{
509 if (!(s->flags & SLAB_STORE_USER))
510 return;
511
512 set_track(s, object, TRACK_FREE, 0UL);
513 set_track(s, object, TRACK_ALLOC, 0UL);
514}
515
516static void print_track(const char *s, struct track *t)
517{
518 if (!t->addr)
519 return;
520
521 printk(KERN_ERR "INFO: %s in %pS age=%lu cpu=%u pid=%d\n",
522 s, (void *)t->addr, jiffies - t->when, t->cpu, t->pid);
523#ifdef CONFIG_STACKTRACE
524 {
525 int i;
526 for (i = 0; i < TRACK_ADDRS_COUNT; i++)
527 if (t->addrs[i])
528 printk(KERN_ERR "\t%pS\n", (void *)t->addrs[i]);
529 else
530 break;
531 }
532#endif
533}
534
535static void print_tracking(struct kmem_cache *s, void *object)
536{
537 if (!(s->flags & SLAB_STORE_USER))
538 return;
539
540 print_track("Allocated", get_track(s, object, TRACK_ALLOC));
541 print_track("Freed", get_track(s, object, TRACK_FREE));
542}
543
544static void print_page_info(struct page *page)
545{
546 printk(KERN_ERR "INFO: Slab 0x%p objects=%u used=%u fp=0x%p flags=0x%04lx\n",
547 page, page->objects, page->inuse, page->freelist, page->flags);
548
549}
550
551static void slab_bug(struct kmem_cache *s, char *fmt, ...)
552{
553 va_list args;
554 char buf[100];
555
556 va_start(args, fmt);
557 vsnprintf(buf, sizeof(buf), fmt, args);
558 va_end(args);
559 printk(KERN_ERR "========================================"
560 "=====================================\n");
561 printk(KERN_ERR "BUG %s (%s): %s\n", s->name, print_tainted(), buf);
562 printk(KERN_ERR "----------------------------------------"
563 "-------------------------------------\n\n");
564
565 add_taint(TAINT_BAD_PAGE);
566}
567
568static void slab_fix(struct kmem_cache *s, char *fmt, ...)
569{
570 va_list args;
571 char buf[100];
572
573 va_start(args, fmt);
574 vsnprintf(buf, sizeof(buf), fmt, args);
575 va_end(args);
576 printk(KERN_ERR "FIX %s: %s\n", s->name, buf);
577}
578
579static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
580{
581 unsigned int off;
582 u8 *addr = page_address(page);
583
584 print_tracking(s, p);
585
586 print_page_info(page);
587
588 printk(KERN_ERR "INFO: Object 0x%p @offset=%tu fp=0x%p\n\n",
589 p, p - addr, get_freepointer(s, p));
590
591 if (p > addr + 16)
592 print_section("Bytes b4 ", p - 16, 16);
593
594 print_section("Object ", p, min_t(unsigned long, s->object_size,
595 PAGE_SIZE));
596 if (s->flags & SLAB_RED_ZONE)
597 print_section("Redzone ", p + s->object_size,
598 s->inuse - s->object_size);
599
600 if (s->offset)
601 off = s->offset + sizeof(void *);
602 else
603 off = s->inuse;
604
605 if (s->flags & SLAB_STORE_USER)
606 off += 2 * sizeof(struct track);
607
608 if (off != s->size)
609
610 print_section("Padding ", p + off, s->size - off);
611
612 dump_stack();
613}
614
615static void object_err(struct kmem_cache *s, struct page *page,
616 u8 *object, char *reason)
617{
618 slab_bug(s, "%s", reason);
619 print_trailer(s, page, object);
620}
621
622static void slab_err(struct kmem_cache *s, struct page *page, const char *fmt, ...)
623{
624 va_list args;
625 char buf[100];
626
627 va_start(args, fmt);
628 vsnprintf(buf, sizeof(buf), fmt, args);
629 va_end(args);
630 slab_bug(s, "%s", buf);
631 print_page_info(page);
632 dump_stack();
633}
634
635static void init_object(struct kmem_cache *s, void *object, u8 val)
636{
637 u8 *p = object;
638
639 if (s->flags & __OBJECT_POISON) {
640 memset(p, POISON_FREE, s->object_size - 1);
641 p[s->object_size - 1] = POISON_END;
642 }
643
644 if (s->flags & SLAB_RED_ZONE)
645 memset(p + s->object_size, val, s->inuse - s->object_size);
646}
647
648static void restore_bytes(struct kmem_cache *s, char *message, u8 data,
649 void *from, void *to)
650{
651 slab_fix(s, "Restoring 0x%p-0x%p=0x%x\n", from, to - 1, data);
652 memset(from, data, to - from);
653}
654
655static int check_bytes_and_report(struct kmem_cache *s, struct page *page,
656 u8 *object, char *what,
657 u8 *start, unsigned int value, unsigned int bytes)
658{
659 u8 *fault;
660 u8 *end;
661
662 fault = memchr_inv(start, value, bytes);
663 if (!fault)
664 return 1;
665
666 end = start + bytes;
667 while (end > fault && end[-1] == value)
668 end--;
669
670 slab_bug(s, "%s overwritten", what);
671 printk(KERN_ERR "INFO: 0x%p-0x%p. First byte 0x%x instead of 0x%x\n",
672 fault, end - 1, fault[0], value);
673 print_trailer(s, page, object);
674
675 restore_bytes(s, what, value, fault, end);
676 return 0;
677}
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717static int check_pad_bytes(struct kmem_cache *s, struct page *page, u8 *p)
718{
719 unsigned long off = s->inuse;
720
721 if (s->offset)
722
723 off += sizeof(void *);
724
725 if (s->flags & SLAB_STORE_USER)
726
727 off += 2 * sizeof(struct track);
728
729 if (s->size == off)
730 return 1;
731
732 return check_bytes_and_report(s, page, p, "Object padding",
733 p + off, POISON_INUSE, s->size - off);
734}
735
736
737static int slab_pad_check(struct kmem_cache *s, struct page *page)
738{
739 u8 *start;
740 u8 *fault;
741 u8 *end;
742 int length;
743 int remainder;
744
745 if (!(s->flags & SLAB_POISON))
746 return 1;
747
748 start = page_address(page);
749 length = (PAGE_SIZE << compound_order(page)) - s->reserved;
750 end = start + length;
751 remainder = length % s->size;
752 if (!remainder)
753 return 1;
754
755 fault = memchr_inv(end - remainder, POISON_INUSE, remainder);
756 if (!fault)
757 return 1;
758 while (end > fault && end[-1] == POISON_INUSE)
759 end--;
760
761 slab_err(s, page, "Padding overwritten. 0x%p-0x%p", fault, end - 1);
762 print_section("Padding ", end - remainder, remainder);
763
764 restore_bytes(s, "slab padding", POISON_INUSE, end - remainder, end);
765 return 0;
766}
767
768static int check_object(struct kmem_cache *s, struct page *page,
769 void *object, u8 val)
770{
771 u8 *p = object;
772 u8 *endobject = object + s->object_size;
773
774 if (s->flags & SLAB_RED_ZONE) {
775 if (!check_bytes_and_report(s, page, object, "Redzone",
776 endobject, val, s->inuse - s->object_size))
777 return 0;
778 } else {
779 if ((s->flags & SLAB_POISON) && s->object_size < s->inuse) {
780 check_bytes_and_report(s, page, p, "Alignment padding",
781 endobject, POISON_INUSE, s->inuse - s->object_size);
782 }
783 }
784
785 if (s->flags & SLAB_POISON) {
786 if (val != SLUB_RED_ACTIVE && (s->flags & __OBJECT_POISON) &&
787 (!check_bytes_and_report(s, page, p, "Poison", p,
788 POISON_FREE, s->object_size - 1) ||
789 !check_bytes_and_report(s, page, p, "Poison",
790 p + s->object_size - 1, POISON_END, 1)))
791 return 0;
792
793
794
795 check_pad_bytes(s, page, p);
796 }
797
798 if (!s->offset && val == SLUB_RED_ACTIVE)
799
800
801
802
803 return 1;
804
805
806 if (!check_valid_pointer(s, page, get_freepointer(s, p))) {
807 object_err(s, page, p, "Freepointer corrupt");
808
809
810
811
812
813 set_freepointer(s, p, NULL);
814 return 0;
815 }
816 return 1;
817}
818
819static int check_slab(struct kmem_cache *s, struct page *page)
820{
821 int maxobj;
822
823 VM_BUG_ON(!irqs_disabled());
824
825 if (!PageSlab(page)) {
826 slab_err(s, page, "Not a valid slab page");
827 return 0;
828 }
829
830 maxobj = order_objects(compound_order(page), s->size, s->reserved);
831 if (page->objects > maxobj) {
832 slab_err(s, page, "objects %u > max %u",
833 s->name, page->objects, maxobj);
834 return 0;
835 }
836 if (page->inuse > page->objects) {
837 slab_err(s, page, "inuse %u > max %u",
838 s->name, page->inuse, page->objects);
839 return 0;
840 }
841
842 slab_pad_check(s, page);
843 return 1;
844}
845
846
847
848
849
850static int on_freelist(struct kmem_cache *s, struct page *page, void *search)
851{
852 int nr = 0;
853 void *fp;
854 void *object = NULL;
855 unsigned long max_objects;
856
857 fp = page->freelist;
858 while (fp && nr <= page->objects) {
859 if (fp == search)
860 return 1;
861 if (!check_valid_pointer(s, page, fp)) {
862 if (object) {
863 object_err(s, page, object,
864 "Freechain corrupt");
865 set_freepointer(s, object, NULL);
866 break;
867 } else {
868 slab_err(s, page, "Freepointer corrupt");
869 page->freelist = NULL;
870 page->inuse = page->objects;
871 slab_fix(s, "Freelist cleared");
872 return 0;
873 }
874 break;
875 }
876 object = fp;
877 fp = get_freepointer(s, object);
878 nr++;
879 }
880
881 max_objects = order_objects(compound_order(page), s->size, s->reserved);
882 if (max_objects > MAX_OBJS_PER_PAGE)
883 max_objects = MAX_OBJS_PER_PAGE;
884
885 if (page->objects != max_objects) {
886 slab_err(s, page, "Wrong number of objects. Found %d but "
887 "should be %d", page->objects, max_objects);
888 page->objects = max_objects;
889 slab_fix(s, "Number of objects adjusted.");
890 }
891 if (page->inuse != page->objects - nr) {
892 slab_err(s, page, "Wrong object count. Counter is %d but "
893 "counted were %d", page->inuse, page->objects - nr);
894 page->inuse = page->objects - nr;
895 slab_fix(s, "Object count adjusted.");
896 }
897 return search == NULL;
898}
899
900static void trace(struct kmem_cache *s, struct page *page, void *object,
901 int alloc)
902{
903 if (s->flags & SLAB_TRACE) {
904 printk(KERN_INFO "TRACE %s %s 0x%p inuse=%d fp=0x%p\n",
905 s->name,
906 alloc ? "alloc" : "free",
907 object, page->inuse,
908 page->freelist);
909
910 if (!alloc)
911 print_section("Object ", (void *)object, s->object_size);
912
913 dump_stack();
914 }
915}
916
917
918
919
920
921static inline int slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags)
922{
923 flags &= gfp_allowed_mask;
924 lockdep_trace_alloc(flags);
925 might_sleep_if(flags & __GFP_WAIT);
926
927 return should_failslab(s->object_size, flags, s->flags);
928}
929
930static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags, void *object)
931{
932 flags &= gfp_allowed_mask;
933 kmemcheck_slab_alloc(s, flags, object, slab_ksize(s));
934 kmemleak_alloc_recursive(object, s->object_size, 1, s->flags, flags);
935}
936
937static inline void slab_free_hook(struct kmem_cache *s, void *x)
938{
939 kmemleak_free_recursive(x, s->flags);
940
941
942
943
944
945
946#if defined(CONFIG_KMEMCHECK) || defined(CONFIG_LOCKDEP)
947 {
948 unsigned long flags;
949
950 local_irq_save(flags);
951 kmemcheck_slab_free(s, x, s->object_size);
952 debug_check_no_locks_freed(x, s->object_size);
953 local_irq_restore(flags);
954 }
955#endif
956 if (!(s->flags & SLAB_DEBUG_OBJECTS))
957 debug_check_no_obj_freed(x, s->object_size);
958}
959
960
961
962
963
964
965static void add_full(struct kmem_cache *s,
966 struct kmem_cache_node *n, struct page *page)
967{
968 if (!(s->flags & SLAB_STORE_USER))
969 return;
970
971 list_add(&page->lru, &n->full);
972}
973
974
975
976
977static void remove_full(struct kmem_cache *s, struct page *page)
978{
979 if (!(s->flags & SLAB_STORE_USER))
980 return;
981
982 list_del(&page->lru);
983}
984
985
986static inline unsigned long slabs_node(struct kmem_cache *s, int node)
987{
988 struct kmem_cache_node *n = get_node(s, node);
989
990 return atomic_long_read(&n->nr_slabs);
991}
992
993static inline unsigned long node_nr_slabs(struct kmem_cache_node *n)
994{
995 return atomic_long_read(&n->nr_slabs);
996}
997
998static inline void inc_slabs_node(struct kmem_cache *s, int node, int objects)
999{
1000 struct kmem_cache_node *n = get_node(s, node);
1001
1002
1003
1004
1005
1006
1007
1008 if (n) {
1009 atomic_long_inc(&n->nr_slabs);
1010 atomic_long_add(objects, &n->total_objects);
1011 }
1012}
1013static inline void dec_slabs_node(struct kmem_cache *s, int node, int objects)
1014{
1015 struct kmem_cache_node *n = get_node(s, node);
1016
1017 atomic_long_dec(&n->nr_slabs);
1018 atomic_long_sub(objects, &n->total_objects);
1019}
1020
1021
1022static void setup_object_debug(struct kmem_cache *s, struct page *page,
1023 void *object)
1024{
1025 if (!(s->flags & (SLAB_STORE_USER|SLAB_RED_ZONE|__OBJECT_POISON)))
1026 return;
1027
1028 init_object(s, object, SLUB_RED_INACTIVE);
1029 init_tracking(s, object);
1030}
1031
1032static noinline int alloc_debug_processing(struct kmem_cache *s, struct page *page,
1033 void *object, unsigned long addr)
1034{
1035 if (!check_slab(s, page))
1036 goto bad;
1037
1038 if (!check_valid_pointer(s, page, object)) {
1039 object_err(s, page, object, "Freelist Pointer check fails");
1040 goto bad;
1041 }
1042
1043 if (!check_object(s, page, object, SLUB_RED_INACTIVE))
1044 goto bad;
1045
1046
1047 if (s->flags & SLAB_STORE_USER)
1048 set_track(s, object, TRACK_ALLOC, addr);
1049 trace(s, page, object, 1);
1050 init_object(s, object, SLUB_RED_ACTIVE);
1051 return 1;
1052
1053bad:
1054 if (PageSlab(page)) {
1055
1056
1057
1058
1059
1060 slab_fix(s, "Marking all objects used");
1061 page->inuse = page->objects;
1062 page->freelist = NULL;
1063 }
1064 return 0;
1065}
1066
1067static noinline struct kmem_cache_node *free_debug_processing(
1068 struct kmem_cache *s, struct page *page, void *object,
1069 unsigned long addr, unsigned long *flags)
1070{
1071 struct kmem_cache_node *n = get_node(s, page_to_nid(page));
1072
1073 spin_lock_irqsave(&n->list_lock, *flags);
1074 slab_lock(page);
1075
1076 if (!check_slab(s, page))
1077 goto fail;
1078
1079 if (!check_valid_pointer(s, page, object)) {
1080 slab_err(s, page, "Invalid object pointer 0x%p", object);
1081 goto fail;
1082 }
1083
1084 if (on_freelist(s, page, object)) {
1085 object_err(s, page, object, "Object already free");
1086 goto fail;
1087 }
1088
1089 if (!check_object(s, page, object, SLUB_RED_ACTIVE))
1090 goto out;
1091
1092 if (unlikely(s != page->slab_cache)) {
1093 if (!PageSlab(page)) {
1094 slab_err(s, page, "Attempt to free object(0x%p) "
1095 "outside of slab", object);
1096 } else if (!page->slab_cache) {
1097 printk(KERN_ERR
1098 "SLUB <none>: no slab for object 0x%p.\n",
1099 object);
1100 dump_stack();
1101 } else
1102 object_err(s, page, object,
1103 "page slab pointer corrupt.");
1104 goto fail;
1105 }
1106
1107 if (s->flags & SLAB_STORE_USER)
1108 set_track(s, object, TRACK_FREE, addr);
1109 trace(s, page, object, 0);
1110 init_object(s, object, SLUB_RED_INACTIVE);
1111out:
1112 slab_unlock(page);
1113
1114
1115
1116
1117 return n;
1118
1119fail:
1120 slab_unlock(page);
1121 spin_unlock_irqrestore(&n->list_lock, *flags);
1122 slab_fix(s, "Object at 0x%p not freed", object);
1123 return NULL;
1124}
1125
1126static int __init setup_slub_debug(char *str)
1127{
1128 slub_debug = DEBUG_DEFAULT_FLAGS;
1129 if (*str++ != '=' || !*str)
1130
1131
1132
1133 goto out;
1134
1135 if (*str == ',')
1136
1137
1138
1139
1140 goto check_slabs;
1141
1142 if (tolower(*str) == 'o') {
1143
1144
1145
1146
1147 disable_higher_order_debug = 1;
1148 goto out;
1149 }
1150
1151 slub_debug = 0;
1152 if (*str == '-')
1153
1154
1155
1156 goto out;
1157
1158
1159
1160
1161 for (; *str && *str != ','; str++) {
1162 switch (tolower(*str)) {
1163 case 'f':
1164 slub_debug |= SLAB_DEBUG_FREE;
1165 break;
1166 case 'z':
1167 slub_debug |= SLAB_RED_ZONE;
1168 break;
1169 case 'p':
1170 slub_debug |= SLAB_POISON;
1171 break;
1172 case 'u':
1173 slub_debug |= SLAB_STORE_USER;
1174 break;
1175 case 't':
1176 slub_debug |= SLAB_TRACE;
1177 break;
1178 case 'a':
1179 slub_debug |= SLAB_FAILSLAB;
1180 break;
1181 default:
1182 printk(KERN_ERR "slub_debug option '%c' "
1183 "unknown. skipped\n", *str);
1184 }
1185 }
1186
1187check_slabs:
1188 if (*str == ',')
1189 slub_debug_slabs = str + 1;
1190out:
1191 return 1;
1192}
1193
1194__setup("slub_debug", setup_slub_debug);
1195
1196static unsigned long kmem_cache_flags(unsigned long object_size,
1197 unsigned long flags, const char *name,
1198 void (*ctor)(void *))
1199{
1200
1201
1202
1203 if (slub_debug && (!slub_debug_slabs ||
1204 !strncmp(slub_debug_slabs, name, strlen(slub_debug_slabs))))
1205 flags |= slub_debug;
1206
1207 return flags;
1208}
1209#else
1210static inline void setup_object_debug(struct kmem_cache *s,
1211 struct page *page, void *object) {}
1212
1213static inline int alloc_debug_processing(struct kmem_cache *s,
1214 struct page *page, void *object, unsigned long addr) { return 0; }
1215
1216static inline struct kmem_cache_node *free_debug_processing(
1217 struct kmem_cache *s, struct page *page, void *object,
1218 unsigned long addr, unsigned long *flags) { return NULL; }
1219
1220static inline int slab_pad_check(struct kmem_cache *s, struct page *page)
1221 { return 1; }
1222static inline int check_object(struct kmem_cache *s, struct page *page,
1223 void *object, u8 val) { return 1; }
1224static inline void add_full(struct kmem_cache *s, struct kmem_cache_node *n,
1225 struct page *page) {}
1226static inline void remove_full(struct kmem_cache *s, struct page *page) {}
1227static inline unsigned long kmem_cache_flags(unsigned long object_size,
1228 unsigned long flags, const char *name,
1229 void (*ctor)(void *))
1230{
1231 return flags;
1232}
1233#define slub_debug 0
1234
1235#define disable_higher_order_debug 0
1236
1237static inline unsigned long slabs_node(struct kmem_cache *s, int node)
1238 { return 0; }
1239static inline unsigned long node_nr_slabs(struct kmem_cache_node *n)
1240 { return 0; }
1241static inline void inc_slabs_node(struct kmem_cache *s, int node,
1242 int objects) {}
1243static inline void dec_slabs_node(struct kmem_cache *s, int node,
1244 int objects) {}
1245
1246static inline int slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags)
1247 { return 0; }
1248
1249static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags,
1250 void *object) {}
1251
1252static inline void slab_free_hook(struct kmem_cache *s, void *x) {}
1253
1254#endif
1255
1256
1257
1258
1259static inline struct page *alloc_slab_page(gfp_t flags, int node,
1260 struct kmem_cache_order_objects oo)
1261{
1262 int order = oo_order(oo);
1263
1264 flags |= __GFP_NOTRACK;
1265
1266 if (node == NUMA_NO_NODE)
1267 return alloc_pages(flags, order);
1268 else
1269 return alloc_pages_exact_node(node, flags, order);
1270}
1271
1272static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
1273{
1274 struct page *page;
1275 struct kmem_cache_order_objects oo = s->oo;
1276 gfp_t alloc_gfp;
1277
1278 flags &= gfp_allowed_mask;
1279
1280 if (flags & __GFP_WAIT)
1281 local_irq_enable();
1282
1283 flags |= s->allocflags;
1284
1285
1286
1287
1288
1289 alloc_gfp = (flags | __GFP_NOWARN | __GFP_NORETRY) & ~__GFP_NOFAIL;
1290
1291 page = alloc_slab_page(alloc_gfp, node, oo);
1292 if (unlikely(!page)) {
1293 oo = s->min;
1294
1295
1296
1297
1298 page = alloc_slab_page(flags, node, oo);
1299
1300 if (page)
1301 stat(s, ORDER_FALLBACK);
1302 }
1303
1304 if (kmemcheck_enabled && page
1305 && !(s->flags & (SLAB_NOTRACK | DEBUG_DEFAULT_FLAGS))) {
1306 int pages = 1 << oo_order(oo);
1307
1308 kmemcheck_alloc_shadow(page, oo_order(oo), flags, node);
1309
1310
1311
1312
1313
1314 if (s->ctor)
1315 kmemcheck_mark_uninitialized_pages(page, pages);
1316 else
1317 kmemcheck_mark_unallocated_pages(page, pages);
1318 }
1319
1320 if (flags & __GFP_WAIT)
1321 local_irq_disable();
1322 if (!page)
1323 return NULL;
1324
1325 page->objects = oo_objects(oo);
1326 mod_zone_page_state(page_zone(page),
1327 (s->flags & SLAB_RECLAIM_ACCOUNT) ?
1328 NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,
1329 1 << oo_order(oo));
1330
1331 return page;
1332}
1333
1334static void setup_object(struct kmem_cache *s, struct page *page,
1335 void *object)
1336{
1337 setup_object_debug(s, page, object);
1338 if (unlikely(s->ctor))
1339 s->ctor(object);
1340}
1341
1342static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
1343{
1344 struct page *page;
1345 void *start;
1346 void *last;
1347 void *p;
1348 int order;
1349
1350 BUG_ON(flags & GFP_SLAB_BUG_MASK);
1351
1352 page = allocate_slab(s,
1353 flags & (GFP_RECLAIM_MASK | GFP_CONSTRAINT_MASK), node);
1354 if (!page)
1355 goto out;
1356
1357 order = compound_order(page);
1358 inc_slabs_node(s, page_to_nid(page), page->objects);
1359 memcg_bind_pages(s, order);
1360 page->slab_cache = s;
1361 __SetPageSlab(page);
1362 if (page->pfmemalloc)
1363 SetPageSlabPfmemalloc(page);
1364
1365 start = page_address(page);
1366
1367 if (unlikely(s->flags & SLAB_POISON))
1368 memset(start, POISON_INUSE, PAGE_SIZE << order);
1369
1370 last = start;
1371 for_each_object(p, s, start, page->objects) {
1372 setup_object(s, page, last);
1373 set_freepointer(s, last, p);
1374 last = p;
1375 }
1376 setup_object(s, page, last);
1377 set_freepointer(s, last, NULL);
1378
1379 page->freelist = start;
1380 page->inuse = page->objects;
1381 page->frozen = 1;
1382out:
1383 return page;
1384}
1385
1386static void __free_slab(struct kmem_cache *s, struct page *page)
1387{
1388 int order = compound_order(page);
1389 int pages = 1 << order;
1390
1391 if (kmem_cache_debug(s)) {
1392 void *p;
1393
1394 slab_pad_check(s, page);
1395 for_each_object(p, s, page_address(page),
1396 page->objects)
1397 check_object(s, page, p, SLUB_RED_INACTIVE);
1398 }
1399
1400 kmemcheck_free_shadow(page, compound_order(page));
1401
1402 mod_zone_page_state(page_zone(page),
1403 (s->flags & SLAB_RECLAIM_ACCOUNT) ?
1404 NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,
1405 -pages);
1406
1407 __ClearPageSlabPfmemalloc(page);
1408 __ClearPageSlab(page);
1409
1410 memcg_release_pages(s, order);
1411 reset_page_mapcount(page);
1412 if (current->reclaim_state)
1413 current->reclaim_state->reclaimed_slab += pages;
1414 __free_memcg_kmem_pages(page, order);
1415}
1416
1417#define need_reserve_slab_rcu \
1418 (sizeof(((struct page *)NULL)->lru) < sizeof(struct rcu_head))
1419
1420static void rcu_free_slab(struct rcu_head *h)
1421{
1422 struct page *page;
1423
1424 if (need_reserve_slab_rcu)
1425 page = virt_to_head_page(h);
1426 else
1427 page = container_of((struct list_head *)h, struct page, lru);
1428
1429 __free_slab(page->slab_cache, page);
1430}
1431
1432static void free_slab(struct kmem_cache *s, struct page *page)
1433{
1434 if (unlikely(s->flags & SLAB_DESTROY_BY_RCU)) {
1435 struct rcu_head *head;
1436
1437 if (need_reserve_slab_rcu) {
1438 int order = compound_order(page);
1439 int offset = (PAGE_SIZE << order) - s->reserved;
1440
1441 VM_BUG_ON(s->reserved != sizeof(*head));
1442 head = page_address(page) + offset;
1443 } else {
1444
1445
1446
1447 head = (void *)&page->lru;
1448 }
1449
1450 call_rcu(head, rcu_free_slab);
1451 } else
1452 __free_slab(s, page);
1453}
1454
1455static void discard_slab(struct kmem_cache *s, struct page *page)
1456{
1457 dec_slabs_node(s, page_to_nid(page), page->objects);
1458 free_slab(s, page);
1459}
1460
1461
1462
1463
1464
1465
1466static inline void add_partial(struct kmem_cache_node *n,
1467 struct page *page, int tail)
1468{
1469 n->nr_partial++;
1470 if (tail == DEACTIVATE_TO_TAIL)
1471 list_add_tail(&page->lru, &n->partial);
1472 else
1473 list_add(&page->lru, &n->partial);
1474}
1475
1476
1477
1478
1479static inline void remove_partial(struct kmem_cache_node *n,
1480 struct page *page)
1481{
1482 list_del(&page->lru);
1483 n->nr_partial--;
1484}
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494static inline void *acquire_slab(struct kmem_cache *s,
1495 struct kmem_cache_node *n, struct page *page,
1496 int mode)
1497{
1498 void *freelist;
1499 unsigned long counters;
1500 struct page new;
1501
1502
1503
1504
1505
1506
1507 freelist = page->freelist;
1508 counters = page->counters;
1509 new.counters = counters;
1510 if (mode) {
1511 new.inuse = page->objects;
1512 new.freelist = NULL;
1513 } else {
1514 new.freelist = freelist;
1515 }
1516
1517 VM_BUG_ON(new.frozen);
1518 new.frozen = 1;
1519
1520 if (!__cmpxchg_double_slab(s, page,
1521 freelist, counters,
1522 new.freelist, new.counters,
1523 "acquire_slab"))
1524 return NULL;
1525
1526 remove_partial(n, page);
1527 WARN_ON(!freelist);
1528 return freelist;
1529}
1530
1531static int put_cpu_partial(struct kmem_cache *s, struct page *page, int drain);
1532static inline bool pfmemalloc_match(struct page *page, gfp_t gfpflags);
1533
1534
1535
1536
1537static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
1538 struct kmem_cache_cpu *c, gfp_t flags)
1539{
1540 struct page *page, *page2;
1541 void *object = NULL;
1542
1543
1544
1545
1546
1547
1548
1549 if (!n || !n->nr_partial)
1550 return NULL;
1551
1552 spin_lock(&n->list_lock);
1553 list_for_each_entry_safe(page, page2, &n->partial, lru) {
1554 void *t;
1555 int available;
1556
1557 if (!pfmemalloc_match(page, flags))
1558 continue;
1559
1560 t = acquire_slab(s, n, page, object == NULL);
1561 if (!t)
1562 break;
1563
1564 if (!object) {
1565 c->page = page;
1566 stat(s, ALLOC_FROM_PARTIAL);
1567 object = t;
1568 available = page->objects - page->inuse;
1569 } else {
1570 available = put_cpu_partial(s, page, 0);
1571 stat(s, CPU_PARTIAL_NODE);
1572 }
1573 if (kmem_cache_debug(s) || available > s->cpu_partial / 2)
1574 break;
1575
1576 }
1577 spin_unlock(&n->list_lock);
1578 return object;
1579}
1580
1581
1582
1583
1584static void *get_any_partial(struct kmem_cache *s, gfp_t flags,
1585 struct kmem_cache_cpu *c)
1586{
1587#ifdef CONFIG_NUMA
1588 struct zonelist *zonelist;
1589 struct zoneref *z;
1590 struct zone *zone;
1591 enum zone_type high_zoneidx = gfp_zone(flags);
1592 void *object;
1593 unsigned int cpuset_mems_cookie;
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613 if (!s->remote_node_defrag_ratio ||
1614 get_cycles() % 1024 > s->remote_node_defrag_ratio)
1615 return NULL;
1616
1617 do {
1618 cpuset_mems_cookie = get_mems_allowed();
1619 zonelist = node_zonelist(slab_node(), flags);
1620 for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
1621 struct kmem_cache_node *n;
1622
1623 n = get_node(s, zone_to_nid(zone));
1624
1625 if (n && cpuset_zone_allowed_hardwall(zone, flags) &&
1626 n->nr_partial > s->min_partial) {
1627 object = get_partial_node(s, n, c, flags);
1628 if (object) {
1629
1630
1631
1632
1633
1634
1635
1636
1637 put_mems_allowed(cpuset_mems_cookie);
1638 return object;
1639 }
1640 }
1641 }
1642 } while (!put_mems_allowed(cpuset_mems_cookie));
1643#endif
1644 return NULL;
1645}
1646
1647
1648
1649
1650static void *get_partial(struct kmem_cache *s, gfp_t flags, int node,
1651 struct kmem_cache_cpu *c)
1652{
1653 void *object;
1654 int searchnode = (node == NUMA_NO_NODE) ? numa_node_id() : node;
1655
1656 object = get_partial_node(s, get_node(s, searchnode), c, flags);
1657 if (object || node != NUMA_NO_NODE)
1658 return object;
1659
1660 return get_any_partial(s, flags, c);
1661}
1662
1663#ifdef CONFIG_PREEMPT
1664
1665
1666
1667
1668
1669#define TID_STEP roundup_pow_of_two(CONFIG_NR_CPUS)
1670#else
1671
1672
1673
1674
1675#define TID_STEP 1
1676#endif
1677
1678static inline unsigned long next_tid(unsigned long tid)
1679{
1680 return tid + TID_STEP;
1681}
1682
1683static inline unsigned int tid_to_cpu(unsigned long tid)
1684{
1685 return tid % TID_STEP;
1686}
1687
1688static inline unsigned long tid_to_event(unsigned long tid)
1689{
1690 return tid / TID_STEP;
1691}
1692
1693static inline unsigned int init_tid(int cpu)
1694{
1695 return cpu;
1696}
1697
1698static inline void note_cmpxchg_failure(const char *n,
1699 const struct kmem_cache *s, unsigned long tid)
1700{
1701#ifdef SLUB_DEBUG_CMPXCHG
1702 unsigned long actual_tid = __this_cpu_read(s->cpu_slab->tid);
1703
1704 printk(KERN_INFO "%s %s: cmpxchg redo ", n, s->name);
1705
1706#ifdef CONFIG_PREEMPT
1707 if (tid_to_cpu(tid) != tid_to_cpu(actual_tid))
1708 printk("due to cpu change %d -> %d\n",
1709 tid_to_cpu(tid), tid_to_cpu(actual_tid));
1710 else
1711#endif
1712 if (tid_to_event(tid) != tid_to_event(actual_tid))
1713 printk("due to cpu running other code. Event %ld->%ld\n",
1714 tid_to_event(tid), tid_to_event(actual_tid));
1715 else
1716 printk("for unknown reason: actual=%lx was=%lx target=%lx\n",
1717 actual_tid, tid, next_tid(tid));
1718#endif
1719 stat(s, CMPXCHG_DOUBLE_CPU_FAIL);
1720}
1721
1722static void init_kmem_cache_cpus(struct kmem_cache *s)
1723{
1724 int cpu;
1725
1726 for_each_possible_cpu(cpu)
1727 per_cpu_ptr(s->cpu_slab, cpu)->tid = init_tid(cpu);
1728}
1729
1730
1731
1732
1733static void deactivate_slab(struct kmem_cache *s, struct page *page, void *freelist)
1734{
1735 enum slab_modes { M_NONE, M_PARTIAL, M_FULL, M_FREE };
1736 struct kmem_cache_node *n = get_node(s, page_to_nid(page));
1737 int lock = 0;
1738 enum slab_modes l = M_NONE, m = M_NONE;
1739 void *nextfree;
1740 int tail = DEACTIVATE_TO_HEAD;
1741 struct page new;
1742 struct page old;
1743
1744 if (page->freelist) {
1745 stat(s, DEACTIVATE_REMOTE_FREES);
1746 tail = DEACTIVATE_TO_TAIL;
1747 }
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757 while (freelist && (nextfree = get_freepointer(s, freelist))) {
1758 void *prior;
1759 unsigned long counters;
1760
1761 do {
1762 prior = page->freelist;
1763 counters = page->counters;
1764 set_freepointer(s, freelist, prior);
1765 new.counters = counters;
1766 new.inuse--;
1767 VM_BUG_ON(!new.frozen);
1768
1769 } while (!__cmpxchg_double_slab(s, page,
1770 prior, counters,
1771 freelist, new.counters,
1772 "drain percpu freelist"));
1773
1774 freelist = nextfree;
1775 }
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791redo:
1792
1793 old.freelist = page->freelist;
1794 old.counters = page->counters;
1795 VM_BUG_ON(!old.frozen);
1796
1797
1798 new.counters = old.counters;
1799 if (freelist) {
1800 new.inuse--;
1801 set_freepointer(s, freelist, old.freelist);
1802 new.freelist = freelist;
1803 } else
1804 new.freelist = old.freelist;
1805
1806 new.frozen = 0;
1807
1808 if (!new.inuse && n->nr_partial > s->min_partial)
1809 m = M_FREE;
1810 else if (new.freelist) {
1811 m = M_PARTIAL;
1812 if (!lock) {
1813 lock = 1;
1814
1815
1816
1817
1818
1819 spin_lock(&n->list_lock);
1820 }
1821 } else {
1822 m = M_FULL;
1823 if (kmem_cache_debug(s) && !lock) {
1824 lock = 1;
1825
1826
1827
1828
1829
1830 spin_lock(&n->list_lock);
1831 }
1832 }
1833
1834 if (l != m) {
1835
1836 if (l == M_PARTIAL)
1837
1838 remove_partial(n, page);
1839
1840 else if (l == M_FULL)
1841
1842 remove_full(s, page);
1843
1844 if (m == M_PARTIAL) {
1845
1846 add_partial(n, page, tail);
1847 stat(s, tail);
1848
1849 } else if (m == M_FULL) {
1850
1851 stat(s, DEACTIVATE_FULL);
1852 add_full(s, n, page);
1853
1854 }
1855 }
1856
1857 l = m;
1858 if (!__cmpxchg_double_slab(s, page,
1859 old.freelist, old.counters,
1860 new.freelist, new.counters,
1861 "unfreezing slab"))
1862 goto redo;
1863
1864 if (lock)
1865 spin_unlock(&n->list_lock);
1866
1867 if (m == M_FREE) {
1868 stat(s, DEACTIVATE_EMPTY);
1869 discard_slab(s, page);
1870 stat(s, FREE_SLAB);
1871 }
1872}
1873
1874
1875
1876
1877
1878
1879
1880
1881static void unfreeze_partials(struct kmem_cache *s,
1882 struct kmem_cache_cpu *c)
1883{
1884 struct kmem_cache_node *n = NULL, *n2 = NULL;
1885 struct page *page, *discard_page = NULL;
1886
1887 while ((page = c->partial)) {
1888 struct page new;
1889 struct page old;
1890
1891 c->partial = page->next;
1892
1893 n2 = get_node(s, page_to_nid(page));
1894 if (n != n2) {
1895 if (n)
1896 spin_unlock(&n->list_lock);
1897
1898 n = n2;
1899 spin_lock(&n->list_lock);
1900 }
1901
1902 do {
1903
1904 old.freelist = page->freelist;
1905 old.counters = page->counters;
1906 VM_BUG_ON(!old.frozen);
1907
1908 new.counters = old.counters;
1909 new.freelist = old.freelist;
1910
1911 new.frozen = 0;
1912
1913 } while (!__cmpxchg_double_slab(s, page,
1914 old.freelist, old.counters,
1915 new.freelist, new.counters,
1916 "unfreezing slab"));
1917
1918 if (unlikely(!new.inuse && n->nr_partial > s->min_partial)) {
1919 page->next = discard_page;
1920 discard_page = page;
1921 } else {
1922 add_partial(n, page, DEACTIVATE_TO_TAIL);
1923 stat(s, FREE_ADD_PARTIAL);
1924 }
1925 }
1926
1927 if (n)
1928 spin_unlock(&n->list_lock);
1929
1930 while (discard_page) {
1931 page = discard_page;
1932 discard_page = discard_page->next;
1933
1934 stat(s, DEACTIVATE_EMPTY);
1935 discard_slab(s, page);
1936 stat(s, FREE_SLAB);
1937 }
1938}
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949static int put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
1950{
1951 struct page *oldpage;
1952 int pages;
1953 int pobjects;
1954
1955 do {
1956 pages = 0;
1957 pobjects = 0;
1958 oldpage = this_cpu_read(s->cpu_slab->partial);
1959
1960 if (oldpage) {
1961 pobjects = oldpage->pobjects;
1962 pages = oldpage->pages;
1963 if (drain && pobjects > s->cpu_partial) {
1964 unsigned long flags;
1965
1966
1967
1968
1969 local_irq_save(flags);
1970 unfreeze_partials(s, this_cpu_ptr(s->cpu_slab));
1971 local_irq_restore(flags);
1972 oldpage = NULL;
1973 pobjects = 0;
1974 pages = 0;
1975 stat(s, CPU_PARTIAL_DRAIN);
1976 }
1977 }
1978
1979 pages++;
1980 pobjects += page->objects - page->inuse;
1981
1982 page->pages = pages;
1983 page->pobjects = pobjects;
1984 page->next = oldpage;
1985
1986 } while (this_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page) != oldpage);
1987 return pobjects;
1988}
1989
1990static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
1991{
1992 stat(s, CPUSLAB_FLUSH);
1993 deactivate_slab(s, c->page, c->freelist);
1994
1995 c->tid = next_tid(c->tid);
1996 c->page = NULL;
1997 c->freelist = NULL;
1998}
1999
2000
2001
2002
2003
2004
2005static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu)
2006{
2007 struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
2008
2009 if (likely(c)) {
2010 if (c->page)
2011 flush_slab(s, c);
2012
2013 unfreeze_partials(s, c);
2014 }
2015}
2016
2017static void flush_cpu_slab(void *d)
2018{
2019 struct kmem_cache *s = d;
2020
2021 __flush_cpu_slab(s, smp_processor_id());
2022}
2023
2024static bool has_cpu_slab(int cpu, void *info)
2025{
2026 struct kmem_cache *s = info;
2027 struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
2028
2029 return c->page || c->partial;
2030}
2031
2032static void flush_all(struct kmem_cache *s)
2033{
2034 on_each_cpu_cond(has_cpu_slab, flush_cpu_slab, s, 1, GFP_ATOMIC);
2035}
2036
2037
2038
2039
2040
2041static inline int node_match(struct page *page, int node)
2042{
2043#ifdef CONFIG_NUMA
2044 if (node != NUMA_NO_NODE && page_to_nid(page) != node)
2045 return 0;
2046#endif
2047 return 1;
2048}
2049
2050static int count_free(struct page *page)
2051{
2052 return page->objects - page->inuse;
2053}
2054
2055static unsigned long count_partial(struct kmem_cache_node *n,
2056 int (*get_count)(struct page *))
2057{
2058 unsigned long flags;
2059 unsigned long x = 0;
2060 struct page *page;
2061
2062 spin_lock_irqsave(&n->list_lock, flags);
2063 list_for_each_entry(page, &n->partial, lru)
2064 x += get_count(page);
2065 spin_unlock_irqrestore(&n->list_lock, flags);
2066 return x;
2067}
2068
2069static inline unsigned long node_nr_objs(struct kmem_cache_node *n)
2070{
2071#ifdef CONFIG_SLUB_DEBUG
2072 return atomic_long_read(&n->total_objects);
2073#else
2074 return 0;
2075#endif
2076}
2077
2078static noinline void
2079slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid)
2080{
2081 int node;
2082
2083 printk(KERN_WARNING
2084 "SLUB: Unable to allocate memory on node %d (gfp=0x%x)\n",
2085 nid, gfpflags);
2086 printk(KERN_WARNING " cache: %s, object size: %d, buffer size: %d, "
2087 "default order: %d, min order: %d\n", s->name, s->object_size,
2088 s->size, oo_order(s->oo), oo_order(s->min));
2089
2090 if (oo_order(s->min) > get_order(s->object_size))
2091 printk(KERN_WARNING " %s debugging increased min order, use "
2092 "slub_debug=O to disable.\n", s->name);
2093
2094 for_each_online_node(node) {
2095 struct kmem_cache_node *n = get_node(s, node);
2096 unsigned long nr_slabs;
2097 unsigned long nr_objs;
2098 unsigned long nr_free;
2099
2100 if (!n)
2101 continue;
2102
2103 nr_free = count_partial(n, count_free);
2104 nr_slabs = node_nr_slabs(n);
2105 nr_objs = node_nr_objs(n);
2106
2107 printk(KERN_WARNING
2108 " node %d: slabs: %ld, objs: %ld, free: %ld\n",
2109 node, nr_slabs, nr_objs, nr_free);
2110 }
2111}
2112
2113static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags,
2114 int node, struct kmem_cache_cpu **pc)
2115{
2116 void *freelist;
2117 struct kmem_cache_cpu *c = *pc;
2118 struct page *page;
2119
2120 freelist = get_partial(s, flags, node, c);
2121
2122 if (freelist)
2123 return freelist;
2124
2125 page = new_slab(s, flags, node);
2126 if (page) {
2127 c = __this_cpu_ptr(s->cpu_slab);
2128 if (c->page)
2129 flush_slab(s, c);
2130
2131
2132
2133
2134
2135 freelist = page->freelist;
2136 page->freelist = NULL;
2137
2138 stat(s, ALLOC_SLAB);
2139 c->page = page;
2140 *pc = c;
2141 } else
2142 freelist = NULL;
2143
2144 return freelist;
2145}
2146
2147static inline bool pfmemalloc_match(struct page *page, gfp_t gfpflags)
2148{
2149 if (unlikely(PageSlabPfmemalloc(page)))
2150 return gfp_pfmemalloc_allowed(gfpflags);
2151
2152 return true;
2153}
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165static inline void *get_freelist(struct kmem_cache *s, struct page *page)
2166{
2167 struct page new;
2168 unsigned long counters;
2169 void *freelist;
2170
2171 do {
2172 freelist = page->freelist;
2173 counters = page->counters;
2174
2175 new.counters = counters;
2176 VM_BUG_ON(!new.frozen);
2177
2178 new.inuse = page->objects;
2179 new.frozen = freelist != NULL;
2180
2181 } while (!__cmpxchg_double_slab(s, page,
2182 freelist, counters,
2183 NULL, new.counters,
2184 "get_freelist"));
2185
2186 return freelist;
2187}
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
2206 unsigned long addr, struct kmem_cache_cpu *c)
2207{
2208 void *freelist;
2209 struct page *page;
2210 unsigned long flags;
2211
2212 local_irq_save(flags);
2213#ifdef CONFIG_PREEMPT
2214
2215
2216
2217
2218
2219 c = this_cpu_ptr(s->cpu_slab);
2220#endif
2221
2222 page = c->page;
2223 if (!page)
2224 goto new_slab;
2225redo:
2226
2227 if (unlikely(!node_match(page, node))) {
2228 stat(s, ALLOC_NODE_MISMATCH);
2229 deactivate_slab(s, page, c->freelist);
2230 c->page = NULL;
2231 c->freelist = NULL;
2232 goto new_slab;
2233 }
2234
2235
2236
2237
2238
2239
2240 if (unlikely(!pfmemalloc_match(page, gfpflags))) {
2241 deactivate_slab(s, page, c->freelist);
2242 c->page = NULL;
2243 c->freelist = NULL;
2244 goto new_slab;
2245 }
2246
2247
2248 freelist = c->freelist;
2249 if (freelist)
2250 goto load_freelist;
2251
2252 stat(s, ALLOC_SLOWPATH);
2253
2254 freelist = get_freelist(s, page);
2255
2256 if (!freelist) {
2257 c->page = NULL;
2258 stat(s, DEACTIVATE_BYPASS);
2259 goto new_slab;
2260 }
2261
2262 stat(s, ALLOC_REFILL);
2263
2264load_freelist:
2265
2266
2267
2268
2269
2270 VM_BUG_ON(!c->page->frozen);
2271 c->freelist = get_freepointer(s, freelist);
2272 c->tid = next_tid(c->tid);
2273 local_irq_restore(flags);
2274 return freelist;
2275
2276new_slab:
2277
2278 if (c->partial) {
2279 page = c->page = c->partial;
2280 c->partial = page->next;
2281 stat(s, CPU_PARTIAL_ALLOC);
2282 c->freelist = NULL;
2283 goto redo;
2284 }
2285
2286 freelist = new_slab_objects(s, gfpflags, node, &c);
2287
2288 if (unlikely(!freelist)) {
2289 if (!(gfpflags & __GFP_NOWARN) && printk_ratelimit())
2290 slab_out_of_memory(s, gfpflags, node);
2291
2292 local_irq_restore(flags);
2293 return NULL;
2294 }
2295
2296 page = c->page;
2297 if (likely(!kmem_cache_debug(s) && pfmemalloc_match(page, gfpflags)))
2298 goto load_freelist;
2299
2300
2301 if (kmem_cache_debug(s) && !alloc_debug_processing(s, page, freelist, addr))
2302 goto new_slab;
2303
2304 deactivate_slab(s, page, get_freepointer(s, freelist));
2305 c->page = NULL;
2306 c->freelist = NULL;
2307 local_irq_restore(flags);
2308 return freelist;
2309}
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321static __always_inline void *slab_alloc_node(struct kmem_cache *s,
2322 gfp_t gfpflags, int node, unsigned long addr)
2323{
2324 void **object;
2325 struct kmem_cache_cpu *c;
2326 struct page *page;
2327 unsigned long tid;
2328
2329 if (slab_pre_alloc_hook(s, gfpflags))
2330 return NULL;
2331
2332 s = memcg_kmem_get_cache(s, gfpflags);
2333redo:
2334
2335
2336
2337
2338
2339
2340
2341 c = __this_cpu_ptr(s->cpu_slab);
2342
2343
2344
2345
2346
2347
2348
2349 tid = c->tid;
2350 barrier();
2351
2352 object = c->freelist;
2353 page = c->page;
2354 if (unlikely(!object || !node_match(page, node)))
2355 object = __slab_alloc(s, gfpflags, node, addr, c);
2356
2357 else {
2358 void *next_object = get_freepointer_safe(s, object);
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372 if (unlikely(!this_cpu_cmpxchg_double(
2373 s->cpu_slab->freelist, s->cpu_slab->tid,
2374 object, tid,
2375 next_object, next_tid(tid)))) {
2376
2377 note_cmpxchg_failure("slab_alloc", s, tid);
2378 goto redo;
2379 }
2380 prefetch_freepointer(s, next_object);
2381 stat(s, ALLOC_FASTPATH);
2382 }
2383
2384 if (unlikely(gfpflags & __GFP_ZERO) && object)
2385 memset(object, 0, s->object_size);
2386
2387 slab_post_alloc_hook(s, gfpflags, object);
2388
2389 return object;
2390}
2391
2392static __always_inline void *slab_alloc(struct kmem_cache *s,
2393 gfp_t gfpflags, unsigned long addr)
2394{
2395 return slab_alloc_node(s, gfpflags, NUMA_NO_NODE, addr);
2396}
2397
2398void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags)
2399{
2400 void *ret = slab_alloc(s, gfpflags, _RET_IP_);
2401
2402 trace_kmem_cache_alloc(_RET_IP_, ret, s->object_size, s->size, gfpflags);
2403
2404 return ret;
2405}
2406EXPORT_SYMBOL(kmem_cache_alloc);
2407
2408#ifdef CONFIG_TRACING
2409void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size)
2410{
2411 void *ret = slab_alloc(s, gfpflags, _RET_IP_);
2412 trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags);
2413 return ret;
2414}
2415EXPORT_SYMBOL(kmem_cache_alloc_trace);
2416
2417void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order)
2418{
2419 void *ret = kmalloc_order(size, flags, order);
2420 trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << order, flags);
2421 return ret;
2422}
2423EXPORT_SYMBOL(kmalloc_order_trace);
2424#endif
2425
2426#ifdef CONFIG_NUMA
2427void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
2428{
2429 void *ret = slab_alloc_node(s, gfpflags, node, _RET_IP_);
2430
2431 trace_kmem_cache_alloc_node(_RET_IP_, ret,
2432 s->object_size, s->size, gfpflags, node);
2433
2434 return ret;
2435}
2436EXPORT_SYMBOL(kmem_cache_alloc_node);
2437
2438#ifdef CONFIG_TRACING
2439void *kmem_cache_alloc_node_trace(struct kmem_cache *s,
2440 gfp_t gfpflags,
2441 int node, size_t size)
2442{
2443 void *ret = slab_alloc_node(s, gfpflags, node, _RET_IP_);
2444
2445 trace_kmalloc_node(_RET_IP_, ret,
2446 size, s->size, gfpflags, node);
2447 return ret;
2448}
2449EXPORT_SYMBOL(kmem_cache_alloc_node_trace);
2450#endif
2451#endif
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461static void __slab_free(struct kmem_cache *s, struct page *page,
2462 void *x, unsigned long addr)
2463{
2464 void *prior;
2465 void **object = (void *)x;
2466 int was_frozen;
2467 struct page new;
2468 unsigned long counters;
2469 struct kmem_cache_node *n = NULL;
2470 unsigned long uninitialized_var(flags);
2471
2472 stat(s, FREE_SLOWPATH);
2473
2474 if (kmem_cache_debug(s) &&
2475 !(n = free_debug_processing(s, page, x, addr, &flags)))
2476 return;
2477
2478 do {
2479 if (unlikely(n)) {
2480 spin_unlock_irqrestore(&n->list_lock, flags);
2481 n = NULL;
2482 }
2483 prior = page->freelist;
2484 counters = page->counters;
2485 set_freepointer(s, object, prior);
2486 new.counters = counters;
2487 was_frozen = new.frozen;
2488 new.inuse--;
2489 if ((!new.inuse || !prior) && !was_frozen) {
2490
2491 if (!kmem_cache_debug(s) && !prior)
2492
2493
2494
2495
2496
2497 new.frozen = 1;
2498
2499 else {
2500
2501 n = get_node(s, page_to_nid(page));
2502
2503
2504
2505
2506
2507
2508
2509
2510 spin_lock_irqsave(&n->list_lock, flags);
2511
2512 }
2513 }
2514
2515 } while (!cmpxchg_double_slab(s, page,
2516 prior, counters,
2517 object, new.counters,
2518 "__slab_free"));
2519
2520 if (likely(!n)) {
2521
2522
2523
2524
2525
2526 if (new.frozen && !was_frozen) {
2527 put_cpu_partial(s, page, 1);
2528 stat(s, CPU_PARTIAL_FREE);
2529 }
2530
2531
2532
2533
2534 if (was_frozen)
2535 stat(s, FREE_FROZEN);
2536 return;
2537 }
2538
2539 if (unlikely(!new.inuse && n->nr_partial > s->min_partial))
2540 goto slab_empty;
2541
2542
2543
2544
2545
2546 if (kmem_cache_debug(s) && unlikely(!prior)) {
2547 remove_full(s, page);
2548 add_partial(n, page, DEACTIVATE_TO_TAIL);
2549 stat(s, FREE_ADD_PARTIAL);
2550 }
2551 spin_unlock_irqrestore(&n->list_lock, flags);
2552 return;
2553
2554slab_empty:
2555 if (prior) {
2556
2557
2558
2559 remove_partial(n, page);
2560 stat(s, FREE_REMOVE_PARTIAL);
2561 } else
2562
2563 remove_full(s, page);
2564
2565 spin_unlock_irqrestore(&n->list_lock, flags);
2566 stat(s, FREE_SLAB);
2567 discard_slab(s, page);
2568}
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581static __always_inline void slab_free(struct kmem_cache *s,
2582 struct page *page, void *x, unsigned long addr)
2583{
2584 void **object = (void *)x;
2585 struct kmem_cache_cpu *c;
2586 unsigned long tid;
2587
2588 slab_free_hook(s, x);
2589
2590redo:
2591
2592
2593
2594
2595
2596
2597 c = __this_cpu_ptr(s->cpu_slab);
2598
2599 tid = c->tid;
2600 barrier();
2601
2602 if (likely(page == c->page)) {
2603 set_freepointer(s, object, c->freelist);
2604
2605 if (unlikely(!this_cpu_cmpxchg_double(
2606 s->cpu_slab->freelist, s->cpu_slab->tid,
2607 c->freelist, tid,
2608 object, next_tid(tid)))) {
2609
2610 note_cmpxchg_failure("slab_free", s, tid);
2611 goto redo;
2612 }
2613 stat(s, FREE_FASTPATH);
2614 } else
2615 __slab_free(s, page, x, addr);
2616
2617}
2618
2619void kmem_cache_free(struct kmem_cache *s, void *x)
2620{
2621 s = cache_from_obj(s, x);
2622 if (!s)
2623 return;
2624 slab_free(s, virt_to_head_page(x), x, _RET_IP_);
2625 trace_kmem_cache_free(_RET_IP_, x);
2626}
2627EXPORT_SYMBOL(kmem_cache_free);
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648static int slub_min_order;
2649static int slub_max_order = PAGE_ALLOC_COSTLY_ORDER;
2650static int slub_min_objects;
2651
2652
2653
2654
2655
2656static int slub_nomerge;
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683static inline int slab_order(int size, int min_objects,
2684 int max_order, int fract_leftover, int reserved)
2685{
2686 int order;
2687 int rem;
2688 int min_order = slub_min_order;
2689
2690 if (order_objects(min_order, size, reserved) > MAX_OBJS_PER_PAGE)
2691 return get_order(size * MAX_OBJS_PER_PAGE) - 1;
2692
2693 for (order = max(min_order,
2694 fls(min_objects * size - 1) - PAGE_SHIFT);
2695 order <= max_order; order++) {
2696
2697 unsigned long slab_size = PAGE_SIZE << order;
2698
2699 if (slab_size < min_objects * size + reserved)
2700 continue;
2701
2702 rem = (slab_size - reserved) % size;
2703
2704 if (rem <= slab_size / fract_leftover)
2705 break;
2706
2707 }
2708
2709 return order;
2710}
2711
2712static inline int calculate_order(int size, int reserved)
2713{
2714 int order;
2715 int min_objects;
2716 int fraction;
2717 int max_objects;
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727 min_objects = slub_min_objects;
2728 if (!min_objects)
2729 min_objects = 4 * (fls(nr_cpu_ids) + 1);
2730 max_objects = order_objects(slub_max_order, size, reserved);
2731 min_objects = min(min_objects, max_objects);
2732
2733 while (min_objects > 1) {
2734 fraction = 16;
2735 while (fraction >= 4) {
2736 order = slab_order(size, min_objects,
2737 slub_max_order, fraction, reserved);
2738 if (order <= slub_max_order)
2739 return order;
2740 fraction /= 2;
2741 }
2742 min_objects--;
2743 }
2744
2745
2746
2747
2748
2749 order = slab_order(size, 1, slub_max_order, 1, reserved);
2750 if (order <= slub_max_order)
2751 return order;
2752
2753
2754
2755
2756 order = slab_order(size, 1, MAX_ORDER, 1, reserved);
2757 if (order < MAX_ORDER)
2758 return order;
2759 return -ENOSYS;
2760}
2761
2762static void
2763init_kmem_cache_node(struct kmem_cache_node *n)
2764{
2765 n->nr_partial = 0;
2766 spin_lock_init(&n->list_lock);
2767 INIT_LIST_HEAD(&n->partial);
2768#ifdef CONFIG_SLUB_DEBUG
2769 atomic_long_set(&n->nr_slabs, 0);
2770 atomic_long_set(&n->total_objects, 0);
2771 INIT_LIST_HEAD(&n->full);
2772#endif
2773}
2774
2775static inline int alloc_kmem_cache_cpus(struct kmem_cache *s)
2776{
2777 BUILD_BUG_ON(PERCPU_DYNAMIC_EARLY_SIZE <
2778 SLUB_PAGE_SHIFT * sizeof(struct kmem_cache_cpu));
2779
2780
2781
2782
2783
2784 s->cpu_slab = __alloc_percpu(sizeof(struct kmem_cache_cpu),
2785 2 * sizeof(void *));
2786
2787 if (!s->cpu_slab)
2788 return 0;
2789
2790 init_kmem_cache_cpus(s);
2791
2792 return 1;
2793}
2794
2795static struct kmem_cache *kmem_cache_node;
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806static void early_kmem_cache_node_alloc(int node)
2807{
2808 struct page *page;
2809 struct kmem_cache_node *n;
2810
2811 BUG_ON(kmem_cache_node->size < sizeof(struct kmem_cache_node));
2812
2813 page = new_slab(kmem_cache_node, GFP_NOWAIT, node);
2814
2815 BUG_ON(!page);
2816 if (page_to_nid(page) != node) {
2817 printk(KERN_ERR "SLUB: Unable to allocate memory from "
2818 "node %d\n", node);
2819 printk(KERN_ERR "SLUB: Allocating a useless per node structure "
2820 "in order to be able to continue\n");
2821 }
2822
2823 n = page->freelist;
2824 BUG_ON(!n);
2825 page->freelist = get_freepointer(kmem_cache_node, n);
2826 page->inuse = 1;
2827 page->frozen = 0;
2828 kmem_cache_node->node[node] = n;
2829#ifdef CONFIG_SLUB_DEBUG
2830 init_object(kmem_cache_node, n, SLUB_RED_ACTIVE);
2831 init_tracking(kmem_cache_node, n);
2832#endif
2833 init_kmem_cache_node(n);
2834 inc_slabs_node(kmem_cache_node, node, page->objects);
2835
2836 add_partial(n, page, DEACTIVATE_TO_HEAD);
2837}
2838
2839static void free_kmem_cache_nodes(struct kmem_cache *s)
2840{
2841 int node;
2842
2843 for_each_node_state(node, N_NORMAL_MEMORY) {
2844 struct kmem_cache_node *n = s->node[node];
2845
2846 if (n)
2847 kmem_cache_free(kmem_cache_node, n);
2848
2849 s->node[node] = NULL;
2850 }
2851}
2852
2853static int init_kmem_cache_nodes(struct kmem_cache *s)
2854{
2855 int node;
2856
2857 for_each_node_state(node, N_NORMAL_MEMORY) {
2858 struct kmem_cache_node *n;
2859
2860 if (slab_state == DOWN) {
2861 early_kmem_cache_node_alloc(node);
2862 continue;
2863 }
2864 n = kmem_cache_alloc_node(kmem_cache_node,
2865 GFP_KERNEL, node);
2866
2867 if (!n) {
2868 free_kmem_cache_nodes(s);
2869 return 0;
2870 }
2871
2872 s->node[node] = n;
2873 init_kmem_cache_node(n);
2874 }
2875 return 1;
2876}
2877
2878static void set_min_partial(struct kmem_cache *s, unsigned long min)
2879{
2880 if (min < MIN_PARTIAL)
2881 min = MIN_PARTIAL;
2882 else if (min > MAX_PARTIAL)
2883 min = MAX_PARTIAL;
2884 s->min_partial = min;
2885}
2886
2887
2888
2889
2890
2891static int calculate_sizes(struct kmem_cache *s, int forced_order)
2892{
2893 unsigned long flags = s->flags;
2894 unsigned long size = s->object_size;
2895 int order;
2896
2897
2898
2899
2900
2901
2902 size = ALIGN(size, sizeof(void *));
2903
2904#ifdef CONFIG_SLUB_DEBUG
2905
2906
2907
2908
2909
2910 if ((flags & SLAB_POISON) && !(flags & SLAB_DESTROY_BY_RCU) &&
2911 !s->ctor)
2912 s->flags |= __OBJECT_POISON;
2913 else
2914 s->flags &= ~__OBJECT_POISON;
2915
2916
2917
2918
2919
2920
2921
2922 if ((flags & SLAB_RED_ZONE) && size == s->object_size)
2923 size += sizeof(void *);
2924#endif
2925
2926
2927
2928
2929
2930 s->inuse = size;
2931
2932 if (((flags & (SLAB_DESTROY_BY_RCU | SLAB_POISON)) ||
2933 s->ctor)) {
2934
2935
2936
2937
2938
2939
2940
2941
2942 s->offset = size;
2943 size += sizeof(void *);
2944 }
2945
2946#ifdef CONFIG_SLUB_DEBUG
2947 if (flags & SLAB_STORE_USER)
2948
2949
2950
2951
2952 size += 2 * sizeof(struct track);
2953
2954 if (flags & SLAB_RED_ZONE)
2955
2956
2957
2958
2959
2960
2961
2962 size += sizeof(void *);
2963#endif
2964
2965
2966
2967
2968
2969
2970 size = ALIGN(size, s->align);
2971 s->size = size;
2972 if (forced_order >= 0)
2973 order = forced_order;
2974 else
2975 order = calculate_order(size, s->reserved);
2976
2977 if (order < 0)
2978 return 0;
2979
2980 s->allocflags = 0;
2981 if (order)
2982 s->allocflags |= __GFP_COMP;
2983
2984 if (s->flags & SLAB_CACHE_DMA)
2985 s->allocflags |= SLUB_DMA;
2986
2987 if (s->flags & SLAB_RECLAIM_ACCOUNT)
2988 s->allocflags |= __GFP_RECLAIMABLE;
2989
2990
2991
2992
2993 s->oo = oo_make(order, size, s->reserved);
2994 s->min = oo_make(get_order(size), size, s->reserved);
2995 if (oo_objects(s->oo) > oo_objects(s->max))
2996 s->max = s->oo;
2997
2998 return !!oo_objects(s->oo);
2999}
3000
3001static int kmem_cache_open(struct kmem_cache *s, unsigned long flags)
3002{
3003 s->flags = kmem_cache_flags(s->size, flags, s->name, s->ctor);
3004 s->reserved = 0;
3005
3006 if (need_reserve_slab_rcu && (s->flags & SLAB_DESTROY_BY_RCU))
3007 s->reserved = sizeof(struct rcu_head);
3008
3009 if (!calculate_sizes(s, -1))
3010 goto error;
3011 if (disable_higher_order_debug) {
3012
3013
3014
3015
3016 if (get_order(s->size) > get_order(s->object_size)) {
3017 s->flags &= ~DEBUG_METADATA_FLAGS;
3018 s->offset = 0;
3019 if (!calculate_sizes(s, -1))
3020 goto error;
3021 }
3022 }
3023
3024#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
3025 defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
3026 if (system_has_cmpxchg_double() && (s->flags & SLAB_DEBUG_FLAGS) == 0)
3027
3028 s->flags |= __CMPXCHG_DOUBLE;
3029#endif
3030
3031
3032
3033
3034
3035 set_min_partial(s, ilog2(s->size) / 2);
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054 if (kmem_cache_debug(s))
3055 s->cpu_partial = 0;
3056 else if (s->size >= PAGE_SIZE)
3057 s->cpu_partial = 2;
3058 else if (s->size >= 1024)
3059 s->cpu_partial = 6;
3060 else if (s->size >= 256)
3061 s->cpu_partial = 13;
3062 else
3063 s->cpu_partial = 30;
3064
3065#ifdef CONFIG_NUMA
3066 s->remote_node_defrag_ratio = 1000;
3067#endif
3068 if (!init_kmem_cache_nodes(s))
3069 goto error;
3070
3071 if (alloc_kmem_cache_cpus(s))
3072 return 0;
3073
3074 free_kmem_cache_nodes(s);
3075error:
3076 if (flags & SLAB_PANIC)
3077 panic("Cannot create slab %s size=%lu realsize=%u "
3078 "order=%u offset=%u flags=%lx\n",
3079 s->name, (unsigned long)s->size, s->size, oo_order(s->oo),
3080 s->offset, flags);
3081 return -EINVAL;
3082}
3083
3084static void list_slab_objects(struct kmem_cache *s, struct page *page,
3085 const char *text)
3086{
3087#ifdef CONFIG_SLUB_DEBUG
3088 void *addr = page_address(page);
3089 void *p;
3090 unsigned long *map = kzalloc(BITS_TO_LONGS(page->objects) *
3091 sizeof(long), GFP_ATOMIC);
3092 if (!map)
3093 return;
3094 slab_err(s, page, text, s->name);
3095 slab_lock(page);
3096
3097 get_map(s, page, map);
3098 for_each_object(p, s, addr, page->objects) {
3099
3100 if (!test_bit(slab_index(p, s, addr), map)) {
3101 printk(KERN_ERR "INFO: Object 0x%p @offset=%tu\n",
3102 p, p - addr);
3103 print_tracking(s, p);
3104 }
3105 }
3106 slab_unlock(page);
3107 kfree(map);
3108#endif
3109}
3110
3111
3112
3113
3114
3115
3116static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
3117{
3118 struct page *page, *h;
3119
3120 list_for_each_entry_safe(page, h, &n->partial, lru) {
3121 if (!page->inuse) {
3122 remove_partial(n, page);
3123 discard_slab(s, page);
3124 } else {
3125 list_slab_objects(s, page,
3126 "Objects remaining in %s on kmem_cache_close()");
3127 }
3128 }
3129}
3130
3131
3132
3133
3134static inline int kmem_cache_close(struct kmem_cache *s)
3135{
3136 int node;
3137
3138 flush_all(s);
3139
3140 for_each_node_state(node, N_NORMAL_MEMORY) {
3141 struct kmem_cache_node *n = get_node(s, node);
3142
3143 free_partial(s, n);
3144 if (n->nr_partial || slabs_node(s, node))
3145 return 1;
3146 }
3147 free_percpu(s->cpu_slab);
3148 free_kmem_cache_nodes(s);
3149 return 0;
3150}
3151
3152int __kmem_cache_shutdown(struct kmem_cache *s)
3153{
3154 int rc = kmem_cache_close(s);
3155
3156 if (!rc) {
3157
3158
3159
3160
3161
3162
3163
3164
3165 mutex_unlock(&slab_mutex);
3166 sysfs_slab_remove(s);
3167 mutex_lock(&slab_mutex);
3168 }
3169
3170 return rc;
3171}
3172
3173
3174
3175
3176
3177struct kmem_cache *kmalloc_caches[SLUB_PAGE_SHIFT];
3178EXPORT_SYMBOL(kmalloc_caches);
3179
3180#ifdef CONFIG_ZONE_DMA
3181static struct kmem_cache *kmalloc_dma_caches[SLUB_PAGE_SHIFT];
3182#endif
3183
3184static int __init setup_slub_min_order(char *str)
3185{
3186 get_option(&str, &slub_min_order);
3187
3188 return 1;
3189}
3190
3191__setup("slub_min_order=", setup_slub_min_order);
3192
3193static int __init setup_slub_max_order(char *str)
3194{
3195 get_option(&str, &slub_max_order);
3196 slub_max_order = min(slub_max_order, MAX_ORDER - 1);
3197
3198 return 1;
3199}
3200
3201__setup("slub_max_order=", setup_slub_max_order);
3202
3203static int __init setup_slub_min_objects(char *str)
3204{
3205 get_option(&str, &slub_min_objects);
3206
3207 return 1;
3208}
3209
3210__setup("slub_min_objects=", setup_slub_min_objects);
3211
3212static int __init setup_slub_nomerge(char *str)
3213{
3214 slub_nomerge = 1;
3215 return 1;
3216}
3217
3218__setup("slub_nomerge", setup_slub_nomerge);
3219
3220
3221
3222
3223
3224
3225
3226static s8 size_index[24] = {
3227 3,
3228 4,
3229 5,
3230 5,
3231 6,
3232 6,
3233 6,
3234 6,
3235 1,
3236 1,
3237 1,
3238 1,
3239 7,
3240 7,
3241 7,
3242 7,
3243 2,
3244 2,
3245 2,
3246 2,
3247 2,
3248 2,
3249 2,
3250 2
3251};
3252
3253static inline int size_index_elem(size_t bytes)
3254{
3255 return (bytes - 1) / 8;
3256}
3257
3258static struct kmem_cache *get_slab(size_t size, gfp_t flags)
3259{
3260 int index;
3261
3262 if (size <= 192) {
3263 if (!size)
3264 return ZERO_SIZE_PTR;
3265
3266 index = size_index[size_index_elem(size)];
3267 } else
3268 index = fls(size - 1);
3269
3270#ifdef CONFIG_ZONE_DMA
3271 if (unlikely((flags & SLUB_DMA)))
3272 return kmalloc_dma_caches[index];
3273
3274#endif
3275 return kmalloc_caches[index];
3276}
3277
3278void *__kmalloc(size_t size, gfp_t flags)
3279{
3280 struct kmem_cache *s;
3281 void *ret;
3282
3283 if (unlikely(size > SLUB_MAX_SIZE))
3284 return kmalloc_large(size, flags);
3285
3286 s = get_slab(size, flags);
3287
3288 if (unlikely(ZERO_OR_NULL_PTR(s)))
3289 return s;
3290
3291 ret = slab_alloc(s, flags, _RET_IP_);
3292
3293 trace_kmalloc(_RET_IP_, ret, size, s->size, flags);
3294
3295 return ret;
3296}
3297EXPORT_SYMBOL(__kmalloc);
3298
3299#ifdef CONFIG_NUMA
3300static void *kmalloc_large_node(size_t size, gfp_t flags, int node)
3301{
3302 struct page *page;
3303 void *ptr = NULL;
3304
3305 flags |= __GFP_COMP | __GFP_NOTRACK | __GFP_KMEMCG;
3306 page = alloc_pages_node(node, flags, get_order(size));
3307 if (page)
3308 ptr = page_address(page);
3309
3310 kmemleak_alloc(ptr, size, 1, flags);
3311 return ptr;
3312}
3313
3314void *__kmalloc_node(size_t size, gfp_t flags, int node)
3315{
3316 struct kmem_cache *s;
3317 void *ret;
3318
3319 if (unlikely(size > SLUB_MAX_SIZE)) {
3320 ret = kmalloc_large_node(size, flags, node);
3321
3322 trace_kmalloc_node(_RET_IP_, ret,
3323 size, PAGE_SIZE << get_order(size),
3324 flags, node);
3325
3326 return ret;
3327 }
3328
3329 s = get_slab(size, flags);
3330
3331 if (unlikely(ZERO_OR_NULL_PTR(s)))
3332 return s;
3333
3334 ret = slab_alloc_node(s, flags, node, _RET_IP_);
3335
3336 trace_kmalloc_node(_RET_IP_, ret, size, s->size, flags, node);
3337
3338 return ret;
3339}
3340EXPORT_SYMBOL(__kmalloc_node);
3341#endif
3342
3343size_t ksize(const void *object)
3344{
3345 struct page *page;
3346
3347 if (unlikely(object == ZERO_SIZE_PTR))
3348 return 0;
3349
3350 page = virt_to_head_page(object);
3351
3352 if (unlikely(!PageSlab(page))) {
3353 WARN_ON(!PageCompound(page));
3354 return PAGE_SIZE << compound_order(page);
3355 }
3356
3357 return slab_ksize(page->slab_cache);
3358}
3359EXPORT_SYMBOL(ksize);
3360
3361#ifdef CONFIG_SLUB_DEBUG
3362bool verify_mem_not_deleted(const void *x)
3363{
3364 struct page *page;
3365 void *object = (void *)x;
3366 unsigned long flags;
3367 bool rv;
3368
3369 if (unlikely(ZERO_OR_NULL_PTR(x)))
3370 return false;
3371
3372 local_irq_save(flags);
3373
3374 page = virt_to_head_page(x);
3375 if (unlikely(!PageSlab(page))) {
3376
3377 rv = true;
3378 goto out_unlock;
3379 }
3380
3381 slab_lock(page);
3382 if (on_freelist(page->slab_cache, page, object)) {
3383 object_err(page->slab_cache, page, object, "Object is on free-list");
3384 rv = false;
3385 } else {
3386 rv = true;
3387 }
3388 slab_unlock(page);
3389
3390out_unlock:
3391 local_irq_restore(flags);
3392 return rv;
3393}
3394EXPORT_SYMBOL(verify_mem_not_deleted);
3395#endif
3396
3397void kfree(const void *x)
3398{
3399 struct page *page;
3400 void *object = (void *)x;
3401
3402 trace_kfree(_RET_IP_, x);
3403
3404 if (unlikely(ZERO_OR_NULL_PTR(x)))
3405 return;
3406
3407 page = virt_to_head_page(x);
3408 if (unlikely(!PageSlab(page))) {
3409 BUG_ON(!PageCompound(page));
3410 kmemleak_free(x);
3411 __free_memcg_kmem_pages(page, compound_order(page));
3412 return;
3413 }
3414 slab_free(page->slab_cache, page, object, _RET_IP_);
3415}
3416EXPORT_SYMBOL(kfree);
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428int kmem_cache_shrink(struct kmem_cache *s)
3429{
3430 int node;
3431 int i;
3432 struct kmem_cache_node *n;
3433 struct page *page;
3434 struct page *t;
3435 int objects = oo_objects(s->max);
3436 struct list_head *slabs_by_inuse =
3437 kmalloc(sizeof(struct list_head) * objects, GFP_KERNEL);
3438 unsigned long flags;
3439
3440 if (!slabs_by_inuse)
3441 return -ENOMEM;
3442
3443 flush_all(s);
3444 for_each_node_state(node, N_NORMAL_MEMORY) {
3445 n = get_node(s, node);
3446
3447 if (!n->nr_partial)
3448 continue;
3449
3450 for (i = 0; i < objects; i++)
3451 INIT_LIST_HEAD(slabs_by_inuse + i);
3452
3453 spin_lock_irqsave(&n->list_lock, flags);
3454
3455
3456
3457
3458
3459
3460
3461 list_for_each_entry_safe(page, t, &n->partial, lru) {
3462 list_move(&page->lru, slabs_by_inuse + page->inuse);
3463 if (!page->inuse)
3464 n->nr_partial--;
3465 }
3466
3467
3468
3469
3470
3471 for (i = objects - 1; i > 0; i--)
3472 list_splice(slabs_by_inuse + i, n->partial.prev);
3473
3474 spin_unlock_irqrestore(&n->list_lock, flags);
3475
3476
3477 list_for_each_entry_safe(page, t, slabs_by_inuse, lru)
3478 discard_slab(s, page);
3479 }
3480
3481 kfree(slabs_by_inuse);
3482 return 0;
3483}
3484EXPORT_SYMBOL(kmem_cache_shrink);
3485
3486#if defined(CONFIG_MEMORY_HOTPLUG)
3487static int slab_mem_going_offline_callback(void *arg)
3488{
3489 struct kmem_cache *s;
3490
3491 mutex_lock(&slab_mutex);
3492 list_for_each_entry(s, &slab_caches, list)
3493 kmem_cache_shrink(s);
3494 mutex_unlock(&slab_mutex);
3495
3496 return 0;
3497}
3498
3499static void slab_mem_offline_callback(void *arg)
3500{
3501 struct kmem_cache_node *n;
3502 struct kmem_cache *s;
3503 struct memory_notify *marg = arg;
3504 int offline_node;
3505
3506 offline_node = marg->status_change_nid_normal;
3507
3508
3509
3510
3511
3512 if (offline_node < 0)
3513 return;
3514
3515 mutex_lock(&slab_mutex);
3516 list_for_each_entry(s, &slab_caches, list) {
3517 n = get_node(s, offline_node);
3518 if (n) {
3519
3520
3521
3522
3523
3524
3525 BUG_ON(slabs_node(s, offline_node));
3526
3527 s->node[offline_node] = NULL;
3528 kmem_cache_free(kmem_cache_node, n);
3529 }
3530 }
3531 mutex_unlock(&slab_mutex);
3532}
3533
3534static int slab_mem_going_online_callback(void *arg)
3535{
3536 struct kmem_cache_node *n;
3537 struct kmem_cache *s;
3538 struct memory_notify *marg = arg;
3539 int nid = marg->status_change_nid_normal;
3540 int ret = 0;
3541
3542
3543
3544
3545
3546 if (nid < 0)
3547 return 0;
3548
3549
3550
3551
3552
3553
3554 mutex_lock(&slab_mutex);
3555 list_for_each_entry(s, &slab_caches, list) {
3556
3557
3558
3559
3560
3561 n = kmem_cache_alloc(kmem_cache_node, GFP_KERNEL);
3562 if (!n) {
3563 ret = -ENOMEM;
3564 goto out;
3565 }
3566 init_kmem_cache_node(n);
3567 s->node[nid] = n;
3568 }
3569out:
3570 mutex_unlock(&slab_mutex);
3571 return ret;
3572}
3573
3574static int slab_memory_callback(struct notifier_block *self,
3575 unsigned long action, void *arg)
3576{
3577 int ret = 0;
3578
3579 switch (action) {
3580 case MEM_GOING_ONLINE:
3581 ret = slab_mem_going_online_callback(arg);
3582 break;
3583 case MEM_GOING_OFFLINE:
3584 ret = slab_mem_going_offline_callback(arg);
3585 break;
3586 case MEM_OFFLINE:
3587 case MEM_CANCEL_ONLINE:
3588 slab_mem_offline_callback(arg);
3589 break;
3590 case MEM_ONLINE:
3591 case MEM_CANCEL_OFFLINE:
3592 break;
3593 }
3594 if (ret)
3595 ret = notifier_from_errno(ret);
3596 else
3597 ret = NOTIFY_OK;
3598 return ret;
3599}
3600
3601#endif
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613static struct kmem_cache * __init bootstrap(struct kmem_cache *static_cache)
3614{
3615 int node;
3616 struct kmem_cache *s = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT);
3617
3618 memcpy(s, static_cache, kmem_cache->object_size);
3619
3620 for_each_node_state(node, N_NORMAL_MEMORY) {
3621 struct kmem_cache_node *n = get_node(s, node);
3622 struct page *p;
3623
3624 if (n) {
3625 list_for_each_entry(p, &n->partial, lru)
3626 p->slab_cache = s;
3627
3628#ifdef CONFIG_SLUB_DEBUG
3629 list_for_each_entry(p, &n->full, lru)
3630 p->slab_cache = s;
3631#endif
3632 }
3633 }
3634 list_add(&s->list, &slab_caches);
3635 return s;
3636}
3637
3638void __init kmem_cache_init(void)
3639{
3640 static __initdata struct kmem_cache boot_kmem_cache,
3641 boot_kmem_cache_node;
3642 int i;
3643 int caches = 2;
3644
3645 if (debug_guardpage_minorder())
3646 slub_max_order = 0;
3647
3648 kmem_cache_node = &boot_kmem_cache_node;
3649 kmem_cache = &boot_kmem_cache;
3650
3651 create_boot_cache(kmem_cache_node, "kmem_cache_node",
3652 sizeof(struct kmem_cache_node), SLAB_HWCACHE_ALIGN);
3653
3654 hotplug_memory_notifier(slab_memory_callback, SLAB_CALLBACK_PRI);
3655
3656
3657 slab_state = PARTIAL;
3658
3659 create_boot_cache(kmem_cache, "kmem_cache",
3660 offsetof(struct kmem_cache, node) +
3661 nr_node_ids * sizeof(struct kmem_cache_node *),
3662 SLAB_HWCACHE_ALIGN);
3663
3664 kmem_cache = bootstrap(&boot_kmem_cache);
3665
3666
3667
3668
3669
3670
3671 kmem_cache_node = bootstrap(&boot_kmem_cache_node);
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686 BUILD_BUG_ON(KMALLOC_MIN_SIZE > 256 ||
3687 (KMALLOC_MIN_SIZE & (KMALLOC_MIN_SIZE - 1)));
3688
3689 for (i = 8; i < KMALLOC_MIN_SIZE; i += 8) {
3690 int elem = size_index_elem(i);
3691 if (elem >= ARRAY_SIZE(size_index))
3692 break;
3693 size_index[elem] = KMALLOC_SHIFT_LOW;
3694 }
3695
3696 if (KMALLOC_MIN_SIZE == 64) {
3697
3698
3699
3700
3701 for (i = 64 + 8; i <= 96; i += 8)
3702 size_index[size_index_elem(i)] = 7;
3703 } else if (KMALLOC_MIN_SIZE == 128) {
3704
3705
3706
3707
3708
3709 for (i = 128 + 8; i <= 192; i += 8)
3710 size_index[size_index_elem(i)] = 8;
3711 }
3712
3713
3714 if (KMALLOC_MIN_SIZE <= 32) {
3715 kmalloc_caches[1] = create_kmalloc_cache("kmalloc-96", 96, 0);
3716 caches++;
3717 }
3718
3719 if (KMALLOC_MIN_SIZE <= 64) {
3720 kmalloc_caches[2] = create_kmalloc_cache("kmalloc-192", 192, 0);
3721 caches++;
3722 }
3723
3724 for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) {
3725 kmalloc_caches[i] = create_kmalloc_cache("kmalloc", 1 << i, 0);
3726 caches++;
3727 }
3728
3729 slab_state = UP;
3730
3731
3732 if (KMALLOC_MIN_SIZE <= 32) {
3733 kmalloc_caches[1]->name = kstrdup(kmalloc_caches[1]->name, GFP_NOWAIT);
3734 BUG_ON(!kmalloc_caches[1]->name);
3735 }
3736
3737 if (KMALLOC_MIN_SIZE <= 64) {
3738 kmalloc_caches[2]->name = kstrdup(kmalloc_caches[2]->name, GFP_NOWAIT);
3739 BUG_ON(!kmalloc_caches[2]->name);
3740 }
3741
3742 for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) {
3743 char *s = kasprintf(GFP_NOWAIT, "kmalloc-%d", 1 << i);
3744
3745 BUG_ON(!s);
3746 kmalloc_caches[i]->name = s;
3747 }
3748
3749#ifdef CONFIG_SMP
3750 register_cpu_notifier(&slab_notifier);
3751#endif
3752
3753#ifdef CONFIG_ZONE_DMA
3754 for (i = 0; i < SLUB_PAGE_SHIFT; i++) {
3755 struct kmem_cache *s = kmalloc_caches[i];
3756
3757 if (s && s->size) {
3758 char *name = kasprintf(GFP_NOWAIT,
3759 "dma-kmalloc-%d", s->object_size);
3760
3761 BUG_ON(!name);
3762 kmalloc_dma_caches[i] = create_kmalloc_cache(name,
3763 s->object_size, SLAB_CACHE_DMA);
3764 }
3765 }
3766#endif
3767 printk(KERN_INFO
3768 "SLUB: Genslabs=%d, HWalign=%d, Order=%d-%d, MinObjects=%d,"
3769 " CPUs=%d, Nodes=%d\n",
3770 caches, cache_line_size(),
3771 slub_min_order, slub_max_order, slub_min_objects,
3772 nr_cpu_ids, nr_node_ids);
3773}
3774
3775void __init kmem_cache_init_late(void)
3776{
3777}
3778
3779
3780
3781
3782static int slab_unmergeable(struct kmem_cache *s)
3783{
3784 if (slub_nomerge || (s->flags & SLUB_NEVER_MERGE))
3785 return 1;
3786
3787 if (s->ctor)
3788 return 1;
3789
3790
3791
3792
3793 if (s->refcount < 0)
3794 return 1;
3795
3796 return 0;
3797}
3798
3799static struct kmem_cache *find_mergeable(struct mem_cgroup *memcg, size_t size,
3800 size_t align, unsigned long flags, const char *name,
3801 void (*ctor)(void *))
3802{
3803 struct kmem_cache *s;
3804
3805 if (slub_nomerge || (flags & SLUB_NEVER_MERGE))
3806 return NULL;
3807
3808 if (ctor)
3809 return NULL;
3810
3811 size = ALIGN(size, sizeof(void *));
3812 align = calculate_alignment(flags, align, size);
3813 size = ALIGN(size, align);
3814 flags = kmem_cache_flags(size, flags, name, NULL);
3815
3816 list_for_each_entry(s, &slab_caches, list) {
3817 if (slab_unmergeable(s))
3818 continue;
3819
3820 if (size > s->size)
3821 continue;
3822
3823 if ((flags & SLUB_MERGE_SAME) != (s->flags & SLUB_MERGE_SAME))
3824 continue;
3825
3826
3827
3828
3829 if ((s->size & ~(align - 1)) != s->size)
3830 continue;
3831
3832 if (s->size - size >= sizeof(void *))
3833 continue;
3834
3835 if (!cache_match_memcg(s, memcg))
3836 continue;
3837
3838 return s;
3839 }
3840 return NULL;
3841}
3842
3843struct kmem_cache *
3844__kmem_cache_alias(struct mem_cgroup *memcg, const char *name, size_t size,
3845 size_t align, unsigned long flags, void (*ctor)(void *))
3846{
3847 struct kmem_cache *s;
3848
3849 s = find_mergeable(memcg, size, align, flags, name, ctor);
3850 if (s) {
3851 s->refcount++;
3852
3853
3854
3855
3856 s->object_size = max(s->object_size, (int)size);
3857 s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *)));
3858
3859 if (sysfs_slab_alias(s, name)) {
3860 s->refcount--;
3861 s = NULL;
3862 }
3863 }
3864
3865 return s;
3866}
3867
3868int __kmem_cache_create(struct kmem_cache *s, unsigned long flags)
3869{
3870 int err;
3871
3872 err = kmem_cache_open(s, flags);
3873 if (err)
3874 return err;
3875
3876
3877 if (slab_state <= UP)
3878 return 0;
3879
3880 memcg_propagate_slab_attrs(s);
3881 mutex_unlock(&slab_mutex);
3882 err = sysfs_slab_add(s);
3883 mutex_lock(&slab_mutex);
3884
3885 if (err)
3886 kmem_cache_close(s);
3887
3888 return err;
3889}
3890
3891#ifdef CONFIG_SMP
3892
3893
3894
3895
3896static int __cpuinit slab_cpuup_callback(struct notifier_block *nfb,
3897 unsigned long action, void *hcpu)
3898{
3899 long cpu = (long)hcpu;
3900 struct kmem_cache *s;
3901 unsigned long flags;
3902
3903 switch (action) {
3904 case CPU_UP_CANCELED:
3905 case CPU_UP_CANCELED_FROZEN:
3906 case CPU_DEAD:
3907 case CPU_DEAD_FROZEN:
3908 mutex_lock(&slab_mutex);
3909 list_for_each_entry(s, &slab_caches, list) {
3910 local_irq_save(flags);
3911 __flush_cpu_slab(s, cpu);
3912 local_irq_restore(flags);
3913 }
3914 mutex_unlock(&slab_mutex);
3915 break;
3916 default:
3917 break;
3918 }
3919 return NOTIFY_OK;
3920}
3921
3922static struct notifier_block __cpuinitdata slab_notifier = {
3923 .notifier_call = slab_cpuup_callback
3924};
3925
3926#endif
3927
3928void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
3929{
3930 struct kmem_cache *s;
3931 void *ret;
3932
3933 if (unlikely(size > SLUB_MAX_SIZE))
3934 return kmalloc_large(size, gfpflags);
3935
3936 s = get_slab(size, gfpflags);
3937
3938 if (unlikely(ZERO_OR_NULL_PTR(s)))
3939 return s;
3940
3941 ret = slab_alloc(s, gfpflags, caller);
3942
3943
3944 trace_kmalloc(caller, ret, size, s->size, gfpflags);
3945
3946 return ret;
3947}
3948
3949#ifdef CONFIG_NUMA
3950void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
3951 int node, unsigned long caller)
3952{
3953 struct kmem_cache *s;
3954 void *ret;
3955
3956 if (unlikely(size > SLUB_MAX_SIZE)) {
3957 ret = kmalloc_large_node(size, gfpflags, node);
3958
3959 trace_kmalloc_node(caller, ret,
3960 size, PAGE_SIZE << get_order(size),
3961 gfpflags, node);
3962
3963 return ret;
3964 }
3965
3966 s = get_slab(size, gfpflags);
3967
3968 if (unlikely(ZERO_OR_NULL_PTR(s)))
3969 return s;
3970
3971 ret = slab_alloc_node(s, gfpflags, node, caller);
3972
3973
3974 trace_kmalloc_node(caller, ret, size, s->size, gfpflags, node);
3975
3976 return ret;
3977}
3978#endif
3979
3980#ifdef CONFIG_SYSFS
3981static int count_inuse(struct page *page)
3982{
3983 return page->inuse;
3984}
3985
3986static int count_total(struct page *page)
3987{
3988 return page->objects;
3989}
3990#endif
3991
3992#ifdef CONFIG_SLUB_DEBUG
3993static int validate_slab(struct kmem_cache *s, struct page *page,
3994 unsigned long *map)
3995{
3996 void *p;
3997 void *addr = page_address(page);
3998
3999 if (!check_slab(s, page) ||
4000 !on_freelist(s, page, NULL))
4001 return 0;
4002
4003
4004 bitmap_zero(map, page->objects);
4005
4006 get_map(s, page, map);
4007 for_each_object(p, s, addr, page->objects) {
4008 if (test_bit(slab_index(p, s, addr), map))
4009 if (!check_object(s, page, p, SLUB_RED_INACTIVE))
4010 return 0;
4011 }
4012
4013 for_each_object(p, s, addr, page->objects)
4014 if (!test_bit(slab_index(p, s, addr), map))
4015 if (!check_object(s, page, p, SLUB_RED_ACTIVE))
4016 return 0;
4017 return 1;
4018}
4019
4020static void validate_slab_slab(struct kmem_cache *s, struct page *page,
4021 unsigned long *map)
4022{
4023 slab_lock(page);
4024 validate_slab(s, page, map);
4025 slab_unlock(page);
4026}
4027
4028static int validate_slab_node(struct kmem_cache *s,
4029 struct kmem_cache_node *n, unsigned long *map)
4030{
4031 unsigned long count = 0;
4032 struct page *page;
4033 unsigned long flags;
4034
4035 spin_lock_irqsave(&n->list_lock, flags);
4036
4037 list_for_each_entry(page, &n->partial, lru) {
4038 validate_slab_slab(s, page, map);
4039 count++;
4040 }
4041 if (count != n->nr_partial)
4042 printk(KERN_ERR "SLUB %s: %ld partial slabs counted but "
4043 "counter=%ld\n", s->name, count, n->nr_partial);
4044
4045 if (!(s->flags & SLAB_STORE_USER))
4046 goto out;
4047
4048 list_for_each_entry(page, &n->full, lru) {
4049 validate_slab_slab(s, page, map);
4050 count++;
4051 }
4052 if (count != atomic_long_read(&n->nr_slabs))
4053 printk(KERN_ERR "SLUB: %s %ld slabs counted but "
4054 "counter=%ld\n", s->name, count,
4055 atomic_long_read(&n->nr_slabs));
4056
4057out:
4058 spin_unlock_irqrestore(&n->list_lock, flags);
4059 return count;
4060}
4061
4062static long validate_slab_cache(struct kmem_cache *s)
4063{
4064 int node;
4065 unsigned long count = 0;
4066 unsigned long *map = kmalloc(BITS_TO_LONGS(oo_objects(s->max)) *
4067 sizeof(unsigned long), GFP_KERNEL);
4068
4069 if (!map)
4070 return -ENOMEM;
4071
4072 flush_all(s);
4073 for_each_node_state(node, N_NORMAL_MEMORY) {
4074 struct kmem_cache_node *n = get_node(s, node);
4075
4076 count += validate_slab_node(s, n, map);
4077 }
4078 kfree(map);
4079 return count;
4080}
4081
4082
4083
4084
4085
4086struct location {
4087 unsigned long count;
4088 unsigned long addr;
4089 long long sum_time;
4090 long min_time;
4091 long max_time;
4092 long min_pid;
4093 long max_pid;
4094 DECLARE_BITMAP(cpus, NR_CPUS);
4095 nodemask_t nodes;
4096};
4097
4098struct loc_track {
4099 unsigned long max;
4100 unsigned long count;
4101 struct location *loc;
4102};
4103
4104static void free_loc_track(struct loc_track *t)
4105{
4106 if (t->max)
4107 free_pages((unsigned long)t->loc,
4108 get_order(sizeof(struct location) * t->max));
4109}
4110
4111static int alloc_loc_track(struct loc_track *t, unsigned long max, gfp_t flags)
4112{
4113 struct location *l;
4114 int order;
4115
4116 order = get_order(sizeof(struct location) * max);
4117
4118 l = (void *)__get_free_pages(flags, order);
4119 if (!l)
4120 return 0;
4121
4122 if (t->count) {
4123 memcpy(l, t->loc, sizeof(struct location) * t->count);
4124 free_loc_track(t);
4125 }
4126 t->max = max;
4127 t->loc = l;
4128 return 1;
4129}
4130
4131static int add_location(struct loc_track *t, struct kmem_cache *s,
4132 const struct track *track)
4133{
4134 long start, end, pos;
4135 struct location *l;
4136 unsigned long caddr;
4137 unsigned long age = jiffies - track->when;
4138
4139 start = -1;
4140 end = t->count;
4141
4142 for ( ; ; ) {
4143 pos = start + (end - start + 1) / 2;
4144
4145
4146
4147
4148
4149 if (pos == end)
4150 break;
4151
4152 caddr = t->loc[pos].addr;
4153 if (track->addr == caddr) {
4154
4155 l = &t->loc[pos];
4156 l->count++;
4157 if (track->when) {
4158 l->sum_time += age;
4159 if (age < l->min_time)
4160 l->min_time = age;
4161 if (age > l->max_time)
4162 l->max_time = age;
4163
4164 if (track->pid < l->min_pid)
4165 l->min_pid = track->pid;
4166 if (track->pid > l->max_pid)
4167 l->max_pid = track->pid;
4168
4169 cpumask_set_cpu(track->cpu,
4170 to_cpumask(l->cpus));
4171 }
4172 node_set(page_to_nid(virt_to_page(track)), l->nodes);
4173 return 1;
4174 }
4175
4176 if (track->addr < caddr)
4177 end = pos;
4178 else
4179 start = pos;
4180 }
4181
4182
4183
4184
4185 if (t->count >= t->max && !alloc_loc_track(t, 2 * t->max, GFP_ATOMIC))
4186 return 0;
4187
4188 l = t->loc + pos;
4189 if (pos < t->count)
4190 memmove(l + 1, l,
4191 (t->count - pos) * sizeof(struct location));
4192 t->count++;
4193 l->count = 1;
4194 l->addr = track->addr;
4195 l->sum_time = age;
4196 l->min_time = age;
4197 l->max_time = age;
4198 l->min_pid = track->pid;
4199 l->max_pid = track->pid;
4200 cpumask_clear(to_cpumask(l->cpus));
4201 cpumask_set_cpu(track->cpu, to_cpumask(l->cpus));
4202 nodes_clear(l->nodes);
4203 node_set(page_to_nid(virt_to_page(track)), l->nodes);
4204 return 1;
4205}
4206
4207static void process_slab(struct loc_track *t, struct kmem_cache *s,
4208 struct page *page, enum track_item alloc,
4209 unsigned long *map)
4210{
4211 void *addr = page_address(page);
4212 void *p;
4213
4214 bitmap_zero(map, page->objects);
4215 get_map(s, page, map);
4216
4217 for_each_object(p, s, addr, page->objects)
4218 if (!test_bit(slab_index(p, s, addr), map))
4219 add_location(t, s, get_track(s, p, alloc));
4220}
4221
4222static int list_locations(struct kmem_cache *s, char *buf,
4223 enum track_item alloc)
4224{
4225 int len = 0;
4226 unsigned long i;
4227 struct loc_track t = { 0, 0, NULL };
4228 int node;
4229 unsigned long *map = kmalloc(BITS_TO_LONGS(oo_objects(s->max)) *
4230 sizeof(unsigned long), GFP_KERNEL);
4231
4232 if (!map || !alloc_loc_track(&t, PAGE_SIZE / sizeof(struct location),
4233 GFP_TEMPORARY)) {
4234 kfree(map);
4235 return sprintf(buf, "Out of memory\n");
4236 }
4237
4238 flush_all(s);
4239
4240 for_each_node_state(node, N_NORMAL_MEMORY) {
4241 struct kmem_cache_node *n = get_node(s, node);
4242 unsigned long flags;
4243 struct page *page;
4244
4245 if (!atomic_long_read(&n->nr_slabs))
4246 continue;
4247
4248 spin_lock_irqsave(&n->list_lock, flags);
4249 list_for_each_entry(page, &n->partial, lru)
4250 process_slab(&t, s, page, alloc, map);
4251 list_for_each_entry(page, &n->full, lru)
4252 process_slab(&t, s, page, alloc, map);
4253 spin_unlock_irqrestore(&n->list_lock, flags);
4254 }
4255
4256 for (i = 0; i < t.count; i++) {
4257 struct location *l = &t.loc[i];
4258
4259 if (len > PAGE_SIZE - KSYM_SYMBOL_LEN - 100)
4260 break;
4261 len += sprintf(buf + len, "%7ld ", l->count);
4262
4263 if (l->addr)
4264 len += sprintf(buf + len, "%pS", (void *)l->addr);
4265 else
4266 len += sprintf(buf + len, "<not-available>");
4267
4268 if (l->sum_time != l->min_time) {
4269 len += sprintf(buf + len, " age=%ld/%ld/%ld",
4270 l->min_time,
4271 (long)div_u64(l->sum_time, l->count),
4272 l->max_time);
4273 } else
4274 len += sprintf(buf + len, " age=%ld",
4275 l->min_time);
4276
4277 if (l->min_pid != l->max_pid)
4278 len += sprintf(buf + len, " pid=%ld-%ld",
4279 l->min_pid, l->max_pid);
4280 else
4281 len += sprintf(buf + len, " pid=%ld",
4282 l->min_pid);
4283
4284 if (num_online_cpus() > 1 &&
4285 !cpumask_empty(to_cpumask(l->cpus)) &&
4286 len < PAGE_SIZE - 60) {
4287 len += sprintf(buf + len, " cpus=");
4288 len += cpulist_scnprintf(buf + len, PAGE_SIZE - len - 50,
4289 to_cpumask(l->cpus));
4290 }
4291
4292 if (nr_online_nodes > 1 && !nodes_empty(l->nodes) &&
4293 len < PAGE_SIZE - 60) {
4294 len += sprintf(buf + len, " nodes=");
4295 len += nodelist_scnprintf(buf + len, PAGE_SIZE - len - 50,
4296 l->nodes);
4297 }
4298
4299 len += sprintf(buf + len, "\n");
4300 }
4301
4302 free_loc_track(&t);
4303 kfree(map);
4304 if (!t.count)
4305 len += sprintf(buf, "No data\n");
4306 return len;
4307}
4308#endif
4309
4310#ifdef SLUB_RESILIENCY_TEST
4311static void resiliency_test(void)
4312{
4313 u8 *p;
4314
4315 BUILD_BUG_ON(KMALLOC_MIN_SIZE > 16 || SLUB_PAGE_SHIFT < 10);
4316
4317 printk(KERN_ERR "SLUB resiliency testing\n");
4318 printk(KERN_ERR "-----------------------\n");
4319 printk(KERN_ERR "A. Corruption after allocation\n");
4320
4321 p = kzalloc(16, GFP_KERNEL);
4322 p[16] = 0x12;
4323 printk(KERN_ERR "\n1. kmalloc-16: Clobber Redzone/next pointer"
4324 " 0x12->0x%p\n\n", p + 16);
4325
4326 validate_slab_cache(kmalloc_caches[4]);
4327
4328
4329 p = kzalloc(32, GFP_KERNEL);
4330 p[32 + sizeof(void *)] = 0x34;
4331 printk(KERN_ERR "\n2. kmalloc-32: Clobber next pointer/next slab"
4332 " 0x34 -> -0x%p\n", p);
4333 printk(KERN_ERR
4334 "If allocated object is overwritten then not detectable\n\n");
4335
4336 validate_slab_cache(kmalloc_caches[5]);
4337 p = kzalloc(64, GFP_KERNEL);
4338 p += 64 + (get_cycles() & 0xff) * sizeof(void *);
4339 *p = 0x56;
4340 printk(KERN_ERR "\n3. kmalloc-64: corrupting random byte 0x56->0x%p\n",
4341 p);
4342 printk(KERN_ERR
4343 "If allocated object is overwritten then not detectable\n\n");
4344 validate_slab_cache(kmalloc_caches[6]);
4345
4346 printk(KERN_ERR "\nB. Corruption after free\n");
4347 p = kzalloc(128, GFP_KERNEL);
4348 kfree(p);
4349 *p = 0x78;
4350 printk(KERN_ERR "1. kmalloc-128: Clobber first word 0x78->0x%p\n\n", p);
4351 validate_slab_cache(kmalloc_caches[7]);
4352
4353 p = kzalloc(256, GFP_KERNEL);
4354 kfree(p);
4355 p[50] = 0x9a;
4356 printk(KERN_ERR "\n2. kmalloc-256: Clobber 50th byte 0x9a->0x%p\n\n",
4357 p);
4358 validate_slab_cache(kmalloc_caches[8]);
4359
4360 p = kzalloc(512, GFP_KERNEL);
4361 kfree(p);
4362 p[512] = 0xab;
4363 printk(KERN_ERR "\n3. kmalloc-512: Clobber redzone 0xab->0x%p\n\n", p);
4364 validate_slab_cache(kmalloc_caches[9]);
4365}
4366#else
4367#ifdef CONFIG_SYSFS
4368static void resiliency_test(void) {};
4369#endif
4370#endif
4371
4372#ifdef CONFIG_SYSFS
4373enum slab_stat_type {
4374 SL_ALL,
4375 SL_PARTIAL,
4376 SL_CPU,
4377 SL_OBJECTS,
4378 SL_TOTAL
4379};
4380
4381#define SO_ALL (1 << SL_ALL)
4382#define SO_PARTIAL (1 << SL_PARTIAL)
4383#define SO_CPU (1 << SL_CPU)
4384#define SO_OBJECTS (1 << SL_OBJECTS)
4385#define SO_TOTAL (1 << SL_TOTAL)
4386
4387static ssize_t show_slab_objects(struct kmem_cache *s,
4388 char *buf, unsigned long flags)
4389{
4390 unsigned long total = 0;
4391 int node;
4392 int x;
4393 unsigned long *nodes;
4394 unsigned long *per_cpu;
4395
4396 nodes = kzalloc(2 * sizeof(unsigned long) * nr_node_ids, GFP_KERNEL);
4397 if (!nodes)
4398 return -ENOMEM;
4399 per_cpu = nodes + nr_node_ids;
4400
4401 if (flags & SO_CPU) {
4402 int cpu;
4403
4404 for_each_possible_cpu(cpu) {
4405 struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
4406 int node;
4407 struct page *page;
4408
4409 page = ACCESS_ONCE(c->page);
4410 if (!page)
4411 continue;
4412
4413 node = page_to_nid(page);
4414 if (flags & SO_TOTAL)
4415 x = page->objects;
4416 else if (flags & SO_OBJECTS)
4417 x = page->inuse;
4418 else
4419 x = 1;
4420
4421 total += x;
4422 nodes[node] += x;
4423
4424 page = ACCESS_ONCE(c->partial);
4425 if (page) {
4426 x = page->pobjects;
4427 total += x;
4428 nodes[node] += x;
4429 }
4430
4431 per_cpu[node]++;
4432 }
4433 }
4434
4435 lock_memory_hotplug();
4436#ifdef CONFIG_SLUB_DEBUG
4437 if (flags & SO_ALL) {
4438 for_each_node_state(node, N_NORMAL_MEMORY) {
4439 struct kmem_cache_node *n = get_node(s, node);
4440
4441 if (flags & SO_TOTAL)
4442 x = atomic_long_read(&n->total_objects);
4443 else if (flags & SO_OBJECTS)
4444 x = atomic_long_read(&n->total_objects) -
4445 count_partial(n, count_free);
4446
4447 else
4448 x = atomic_long_read(&n->nr_slabs);
4449 total += x;
4450 nodes[node] += x;
4451 }
4452
4453 } else
4454#endif
4455 if (flags & SO_PARTIAL) {
4456 for_each_node_state(node, N_NORMAL_MEMORY) {
4457 struct kmem_cache_node *n = get_node(s, node);
4458
4459 if (flags & SO_TOTAL)
4460 x = count_partial(n, count_total);
4461 else if (flags & SO_OBJECTS)
4462 x = count_partial(n, count_inuse);
4463 else
4464 x = n->nr_partial;
4465 total += x;
4466 nodes[node] += x;
4467 }
4468 }
4469 x = sprintf(buf, "%lu", total);
4470#ifdef CONFIG_NUMA
4471 for_each_node_state(node, N_NORMAL_MEMORY)
4472 if (nodes[node])
4473 x += sprintf(buf + x, " N%d=%lu",
4474 node, nodes[node]);
4475#endif
4476 unlock_memory_hotplug();
4477 kfree(nodes);
4478 return x + sprintf(buf + x, "\n");
4479}
4480
4481#ifdef CONFIG_SLUB_DEBUG
4482static int any_slab_objects(struct kmem_cache *s)
4483{
4484 int node;
4485
4486 for_each_online_node(node) {
4487 struct kmem_cache_node *n = get_node(s, node);
4488
4489 if (!n)
4490 continue;
4491
4492 if (atomic_long_read(&n->total_objects))
4493 return 1;
4494 }
4495 return 0;
4496}
4497#endif
4498
4499#define to_slab_attr(n) container_of(n, struct slab_attribute, attr)
4500#define to_slab(n) container_of(n, struct kmem_cache, kobj)
4501
4502struct slab_attribute {
4503 struct attribute attr;
4504 ssize_t (*show)(struct kmem_cache *s, char *buf);
4505 ssize_t (*store)(struct kmem_cache *s, const char *x, size_t count);
4506};
4507
4508#define SLAB_ATTR_RO(_name) \
4509 static struct slab_attribute _name##_attr = \
4510 __ATTR(_name, 0400, _name##_show, NULL)
4511
4512#define SLAB_ATTR(_name) \
4513 static struct slab_attribute _name##_attr = \
4514 __ATTR(_name, 0600, _name##_show, _name##_store)
4515
4516static ssize_t slab_size_show(struct kmem_cache *s, char *buf)
4517{
4518 return sprintf(buf, "%d\n", s->size);
4519}
4520SLAB_ATTR_RO(slab_size);
4521
4522static ssize_t align_show(struct kmem_cache *s, char *buf)
4523{
4524 return sprintf(buf, "%d\n", s->align);
4525}
4526SLAB_ATTR_RO(align);
4527
4528static ssize_t object_size_show(struct kmem_cache *s, char *buf)
4529{
4530 return sprintf(buf, "%d\n", s->object_size);
4531}
4532SLAB_ATTR_RO(object_size);
4533
4534static ssize_t objs_per_slab_show(struct kmem_cache *s, char *buf)
4535{
4536 return sprintf(buf, "%d\n", oo_objects(s->oo));
4537}
4538SLAB_ATTR_RO(objs_per_slab);
4539
4540static ssize_t order_store(struct kmem_cache *s,
4541 const char *buf, size_t length)
4542{
4543 unsigned long order;
4544 int err;
4545
4546 err = strict_strtoul(buf, 10, &order);
4547 if (err)
4548 return err;
4549
4550 if (order > slub_max_order || order < slub_min_order)
4551 return -EINVAL;
4552
4553 calculate_sizes(s, order);
4554 return length;
4555}
4556
4557static ssize_t order_show(struct kmem_cache *s, char *buf)
4558{
4559 return sprintf(buf, "%d\n", oo_order(s->oo));
4560}
4561SLAB_ATTR(order);
4562
4563static ssize_t min_partial_show(struct kmem_cache *s, char *buf)
4564{
4565 return sprintf(buf, "%lu\n", s->min_partial);
4566}
4567
4568static ssize_t min_partial_store(struct kmem_cache *s, const char *buf,
4569 size_t length)
4570{
4571 unsigned long min;
4572 int err;
4573
4574 err = strict_strtoul(buf, 10, &min);
4575 if (err)
4576 return err;
4577
4578 set_min_partial(s, min);
4579 return length;
4580}
4581SLAB_ATTR(min_partial);
4582
4583static ssize_t cpu_partial_show(struct kmem_cache *s, char *buf)
4584{
4585 return sprintf(buf, "%u\n", s->cpu_partial);
4586}
4587
4588static ssize_t cpu_partial_store(struct kmem_cache *s, const char *buf,
4589 size_t length)
4590{
4591 unsigned long objects;
4592 int err;
4593
4594 err = strict_strtoul(buf, 10, &objects);
4595 if (err)
4596 return err;
4597 if (objects && kmem_cache_debug(s))
4598 return -EINVAL;
4599
4600 s->cpu_partial = objects;
4601 flush_all(s);
4602 return length;
4603}
4604SLAB_ATTR(cpu_partial);
4605
4606static ssize_t ctor_show(struct kmem_cache *s, char *buf)
4607{
4608 if (!s->ctor)
4609 return 0;
4610 return sprintf(buf, "%pS\n", s->ctor);
4611}
4612SLAB_ATTR_RO(ctor);
4613
4614static ssize_t aliases_show(struct kmem_cache *s, char *buf)
4615{
4616 return sprintf(buf, "%d\n", s->refcount - 1);
4617}
4618SLAB_ATTR_RO(aliases);
4619
4620static ssize_t partial_show(struct kmem_cache *s, char *buf)
4621{
4622 return show_slab_objects(s, buf, SO_PARTIAL);
4623}
4624SLAB_ATTR_RO(partial);
4625
4626static ssize_t cpu_slabs_show(struct kmem_cache *s, char *buf)
4627{
4628 return show_slab_objects(s, buf, SO_CPU);
4629}
4630SLAB_ATTR_RO(cpu_slabs);
4631
4632static ssize_t objects_show(struct kmem_cache *s, char *buf)
4633{
4634 return show_slab_objects(s, buf, SO_ALL|SO_OBJECTS);
4635}
4636SLAB_ATTR_RO(objects);
4637
4638static ssize_t objects_partial_show(struct kmem_cache *s, char *buf)
4639{
4640 return show_slab_objects(s, buf, SO_PARTIAL|SO_OBJECTS);
4641}
4642SLAB_ATTR_RO(objects_partial);
4643
4644static ssize_t slabs_cpu_partial_show(struct kmem_cache *s, char *buf)
4645{
4646 int objects = 0;
4647 int pages = 0;
4648 int cpu;
4649 int len;
4650
4651 for_each_online_cpu(cpu) {
4652 struct page *page = per_cpu_ptr(s->cpu_slab, cpu)->partial;
4653
4654 if (page) {
4655 pages += page->pages;
4656 objects += page->pobjects;
4657 }
4658 }
4659
4660 len = sprintf(buf, "%d(%d)", objects, pages);
4661
4662#ifdef CONFIG_SMP
4663 for_each_online_cpu(cpu) {
4664 struct page *page = per_cpu_ptr(s->cpu_slab, cpu) ->partial;
4665
4666 if (page && len < PAGE_SIZE - 20)
4667 len += sprintf(buf + len, " C%d=%d(%d)", cpu,
4668 page->pobjects, page->pages);
4669 }
4670#endif
4671 return len + sprintf(buf + len, "\n");
4672}
4673SLAB_ATTR_RO(slabs_cpu_partial);
4674
4675static ssize_t reclaim_account_show(struct kmem_cache *s, char *buf)
4676{
4677 return sprintf(buf, "%d\n", !!(s->flags & SLAB_RECLAIM_ACCOUNT));
4678}
4679
4680static ssize_t reclaim_account_store(struct kmem_cache *s,
4681 const char *buf, size_t length)
4682{
4683 s->flags &= ~SLAB_RECLAIM_ACCOUNT;
4684 if (buf[0] == '1')
4685 s->flags |= SLAB_RECLAIM_ACCOUNT;
4686 return length;
4687}
4688SLAB_ATTR(reclaim_account);
4689
4690static ssize_t hwcache_align_show(struct kmem_cache *s, char *buf)
4691{
4692 return sprintf(buf, "%d\n", !!(s->flags & SLAB_HWCACHE_ALIGN));
4693}
4694SLAB_ATTR_RO(hwcache_align);
4695
4696#ifdef CONFIG_ZONE_DMA
4697static ssize_t cache_dma_show(struct kmem_cache *s, char *buf)
4698{
4699 return sprintf(buf, "%d\n", !!(s->flags & SLAB_CACHE_DMA));
4700}
4701SLAB_ATTR_RO(cache_dma);
4702#endif
4703
4704static ssize_t destroy_by_rcu_show(struct kmem_cache *s, char *buf)
4705{
4706 return sprintf(buf, "%d\n", !!(s->flags & SLAB_DESTROY_BY_RCU));
4707}
4708SLAB_ATTR_RO(destroy_by_rcu);
4709
4710static ssize_t reserved_show(struct kmem_cache *s, char *buf)
4711{
4712 return sprintf(buf, "%d\n", s->reserved);
4713}
4714SLAB_ATTR_RO(reserved);
4715
4716#ifdef CONFIG_SLUB_DEBUG
4717static ssize_t slabs_show(struct kmem_cache *s, char *buf)
4718{
4719 return show_slab_objects(s, buf, SO_ALL);
4720}
4721SLAB_ATTR_RO(slabs);
4722
4723static ssize_t total_objects_show(struct kmem_cache *s, char *buf)
4724{
4725 return show_slab_objects(s, buf, SO_ALL|SO_TOTAL);
4726}
4727SLAB_ATTR_RO(total_objects);
4728
4729static ssize_t sanity_checks_show(struct kmem_cache *s, char *buf)
4730{
4731 return sprintf(buf, "%d\n", !!(s->flags & SLAB_DEBUG_FREE));
4732}
4733
4734static ssize_t sanity_checks_store(struct kmem_cache *s,
4735 const char *buf, size_t length)
4736{
4737 s->flags &= ~SLAB_DEBUG_FREE;
4738 if (buf[0] == '1') {
4739 s->flags &= ~__CMPXCHG_DOUBLE;
4740 s->flags |= SLAB_DEBUG_FREE;
4741 }
4742 return length;
4743}
4744SLAB_ATTR(sanity_checks);
4745
4746static ssize_t trace_show(struct kmem_cache *s, char *buf)
4747{
4748 return sprintf(buf, "%d\n", !!(s->flags & SLAB_TRACE));
4749}
4750
4751static ssize_t trace_store(struct kmem_cache *s, const char *buf,
4752 size_t length)
4753{
4754 s->flags &= ~SLAB_TRACE;
4755 if (buf[0] == '1') {
4756 s->flags &= ~__CMPXCHG_DOUBLE;
4757 s->flags |= SLAB_TRACE;
4758 }
4759 return length;
4760}
4761SLAB_ATTR(trace);
4762
4763static ssize_t red_zone_show(struct kmem_cache *s, char *buf)
4764{
4765 return sprintf(buf, "%d\n", !!(s->flags & SLAB_RED_ZONE));
4766}
4767
4768static ssize_t red_zone_store(struct kmem_cache *s,
4769 const char *buf, size_t length)
4770{
4771 if (any_slab_objects(s))
4772 return -EBUSY;
4773
4774 s->flags &= ~SLAB_RED_ZONE;
4775 if (buf[0] == '1') {
4776 s->flags &= ~__CMPXCHG_DOUBLE;
4777 s->flags |= SLAB_RED_ZONE;
4778 }
4779 calculate_sizes(s, -1);
4780 return length;
4781}
4782SLAB_ATTR(red_zone);
4783
4784static ssize_t poison_show(struct kmem_cache *s, char *buf)
4785{
4786 return sprintf(buf, "%d\n", !!(s->flags & SLAB_POISON));
4787}
4788
4789static ssize_t poison_store(struct kmem_cache *s,
4790 const char *buf, size_t length)
4791{
4792 if (any_slab_objects(s))
4793 return -EBUSY;
4794
4795 s->flags &= ~SLAB_POISON;
4796 if (buf[0] == '1') {
4797 s->flags &= ~__CMPXCHG_DOUBLE;
4798 s->flags |= SLAB_POISON;
4799 }
4800 calculate_sizes(s, -1);
4801 return length;
4802}
4803SLAB_ATTR(poison);
4804
4805static ssize_t store_user_show(struct kmem_cache *s, char *buf)
4806{
4807 return sprintf(buf, "%d\n", !!(s->flags & SLAB_STORE_USER));
4808}
4809
4810static ssize_t store_user_store(struct kmem_cache *s,
4811 const char *buf, size_t length)
4812{
4813 if (any_slab_objects(s))
4814 return -EBUSY;
4815
4816 s->flags &= ~SLAB_STORE_USER;
4817 if (buf[0] == '1') {
4818 s->flags &= ~__CMPXCHG_DOUBLE;
4819 s->flags |= SLAB_STORE_USER;
4820 }
4821 calculate_sizes(s, -1);
4822 return length;
4823}
4824SLAB_ATTR(store_user);
4825
4826static ssize_t validate_show(struct kmem_cache *s, char *buf)
4827{
4828 return 0;
4829}
4830
4831static ssize_t validate_store(struct kmem_cache *s,
4832 const char *buf, size_t length)
4833{
4834 int ret = -EINVAL;
4835
4836 if (buf[0] == '1') {
4837 ret = validate_slab_cache(s);
4838 if (ret >= 0)
4839 ret = length;
4840 }
4841 return ret;
4842}
4843SLAB_ATTR(validate);
4844
4845static ssize_t alloc_calls_show(struct kmem_cache *s, char *buf)
4846{
4847 if (!(s->flags & SLAB_STORE_USER))
4848 return -ENOSYS;
4849 return list_locations(s, buf, TRACK_ALLOC);
4850}
4851SLAB_ATTR_RO(alloc_calls);
4852
4853static ssize_t free_calls_show(struct kmem_cache *s, char *buf)
4854{
4855 if (!(s->flags & SLAB_STORE_USER))
4856 return -ENOSYS;
4857 return list_locations(s, buf, TRACK_FREE);
4858}
4859SLAB_ATTR_RO(free_calls);
4860#endif
4861
4862#ifdef CONFIG_FAILSLAB
4863static ssize_t failslab_show(struct kmem_cache *s, char *buf)
4864{
4865 return sprintf(buf, "%d\n", !!(s->flags & SLAB_FAILSLAB));
4866}
4867
4868static ssize_t failslab_store(struct kmem_cache *s, const char *buf,
4869 size_t length)
4870{
4871 s->flags &= ~SLAB_FAILSLAB;
4872 if (buf[0] == '1')
4873 s->flags |= SLAB_FAILSLAB;
4874 return length;
4875}
4876SLAB_ATTR(failslab);
4877#endif
4878
4879static ssize_t shrink_show(struct kmem_cache *s, char *buf)
4880{
4881 return 0;
4882}
4883
4884static ssize_t shrink_store(struct kmem_cache *s,
4885 const char *buf, size_t length)
4886{
4887 if (buf[0] == '1') {
4888 int rc = kmem_cache_shrink(s);
4889
4890 if (rc)
4891 return rc;
4892 } else
4893 return -EINVAL;
4894 return length;
4895}
4896SLAB_ATTR(shrink);
4897
4898#ifdef CONFIG_NUMA
4899static ssize_t remote_node_defrag_ratio_show(struct kmem_cache *s, char *buf)
4900{
4901 return sprintf(buf, "%d\n", s->remote_node_defrag_ratio / 10);
4902}
4903
4904static ssize_t remote_node_defrag_ratio_store(struct kmem_cache *s,
4905 const char *buf, size_t length)
4906{
4907 unsigned long ratio;
4908 int err;
4909
4910 err = strict_strtoul(buf, 10, &ratio);
4911 if (err)
4912 return err;
4913
4914 if (ratio <= 100)
4915 s->remote_node_defrag_ratio = ratio * 10;
4916
4917 return length;
4918}
4919SLAB_ATTR(remote_node_defrag_ratio);
4920#endif
4921
4922#ifdef CONFIG_SLUB_STATS
4923static int show_stat(struct kmem_cache *s, char *buf, enum stat_item si)
4924{
4925 unsigned long sum = 0;
4926 int cpu;
4927 int len;
4928 int *data = kmalloc(nr_cpu_ids * sizeof(int), GFP_KERNEL);
4929
4930 if (!data)
4931 return -ENOMEM;
4932
4933 for_each_online_cpu(cpu) {
4934 unsigned x = per_cpu_ptr(s->cpu_slab, cpu)->stat[si];
4935
4936 data[cpu] = x;
4937 sum += x;
4938 }
4939
4940 len = sprintf(buf, "%lu", sum);
4941
4942#ifdef CONFIG_SMP
4943 for_each_online_cpu(cpu) {
4944 if (data[cpu] && len < PAGE_SIZE - 20)
4945 len += sprintf(buf + len, " C%d=%u", cpu, data[cpu]);
4946 }
4947#endif
4948 kfree(data);
4949 return len + sprintf(buf + len, "\n");
4950}
4951
4952static void clear_stat(struct kmem_cache *s, enum stat_item si)
4953{
4954 int cpu;
4955
4956 for_each_online_cpu(cpu)
4957 per_cpu_ptr(s->cpu_slab, cpu)->stat[si] = 0;
4958}
4959
4960#define STAT_ATTR(si, text) \
4961static ssize_t text##_show(struct kmem_cache *s, char *buf) \
4962{ \
4963 return show_stat(s, buf, si); \
4964} \
4965static ssize_t text##_store(struct kmem_cache *s, \
4966 const char *buf, size_t length) \
4967{ \
4968 if (buf[0] != '0') \
4969 return -EINVAL; \
4970 clear_stat(s, si); \
4971 return length; \
4972} \
4973SLAB_ATTR(text); \
4974
4975STAT_ATTR(ALLOC_FASTPATH, alloc_fastpath);
4976STAT_ATTR(ALLOC_SLOWPATH, alloc_slowpath);
4977STAT_ATTR(FREE_FASTPATH, free_fastpath);
4978STAT_ATTR(FREE_SLOWPATH, free_slowpath);
4979STAT_ATTR(FREE_FROZEN, free_frozen);
4980STAT_ATTR(FREE_ADD_PARTIAL, free_add_partial);
4981STAT_ATTR(FREE_REMOVE_PARTIAL, free_remove_partial);
4982STAT_ATTR(ALLOC_FROM_PARTIAL, alloc_from_partial);
4983STAT_ATTR(ALLOC_SLAB, alloc_slab);
4984STAT_ATTR(ALLOC_REFILL, alloc_refill);
4985STAT_ATTR(ALLOC_NODE_MISMATCH, alloc_node_mismatch);
4986STAT_ATTR(FREE_SLAB, free_slab);
4987STAT_ATTR(CPUSLAB_FLUSH, cpuslab_flush);
4988STAT_ATTR(DEACTIVATE_FULL, deactivate_full);
4989STAT_ATTR(DEACTIVATE_EMPTY, deactivate_empty);
4990STAT_ATTR(DEACTIVATE_TO_HEAD, deactivate_to_head);
4991STAT_ATTR(DEACTIVATE_TO_TAIL, deactivate_to_tail);
4992STAT_ATTR(DEACTIVATE_REMOTE_FREES, deactivate_remote_frees);
4993STAT_ATTR(DEACTIVATE_BYPASS, deactivate_bypass);
4994STAT_ATTR(ORDER_FALLBACK, order_fallback);
4995STAT_ATTR(CMPXCHG_DOUBLE_CPU_FAIL, cmpxchg_double_cpu_fail);
4996STAT_ATTR(CMPXCHG_DOUBLE_FAIL, cmpxchg_double_fail);
4997STAT_ATTR(CPU_PARTIAL_ALLOC, cpu_partial_alloc);
4998STAT_ATTR(CPU_PARTIAL_FREE, cpu_partial_free);
4999STAT_ATTR(CPU_PARTIAL_NODE, cpu_partial_node);
5000STAT_ATTR(CPU_PARTIAL_DRAIN, cpu_partial_drain);
5001#endif
5002
5003static struct attribute *slab_attrs[] = {
5004 &slab_size_attr.attr,
5005 &object_size_attr.attr,
5006 &objs_per_slab_attr.attr,
5007 &order_attr.attr,
5008 &min_partial_attr.attr,
5009 &cpu_partial_attr.attr,
5010 &objects_attr.attr,
5011 &objects_partial_attr.attr,
5012 &partial_attr.attr,
5013 &cpu_slabs_attr.attr,
5014 &ctor_attr.attr,
5015 &aliases_attr.attr,
5016 &align_attr.attr,
5017 &hwcache_align_attr.attr,
5018 &reclaim_account_attr.attr,
5019 &destroy_by_rcu_attr.attr,
5020 &shrink_attr.attr,
5021 &reserved_attr.attr,
5022 &slabs_cpu_partial_attr.attr,
5023#ifdef CONFIG_SLUB_DEBUG
5024 &total_objects_attr.attr,
5025 &slabs_attr.attr,
5026 &sanity_checks_attr.attr,
5027 &trace_attr.attr,
5028 &red_zone_attr.attr,
5029 &poison_attr.attr,
5030 &store_user_attr.attr,
5031 &validate_attr.attr,
5032 &alloc_calls_attr.attr,
5033 &free_calls_attr.attr,
5034#endif
5035#ifdef CONFIG_ZONE_DMA
5036 &cache_dma_attr.attr,
5037#endif
5038#ifdef CONFIG_NUMA
5039 &remote_node_defrag_ratio_attr.attr,
5040#endif
5041#ifdef CONFIG_SLUB_STATS
5042 &alloc_fastpath_attr.attr,
5043 &alloc_slowpath_attr.attr,
5044 &free_fastpath_attr.attr,
5045 &free_slowpath_attr.attr,
5046 &free_frozen_attr.attr,
5047 &free_add_partial_attr.attr,
5048 &free_remove_partial_attr.attr,
5049 &alloc_from_partial_attr.attr,
5050 &alloc_slab_attr.attr,
5051 &alloc_refill_attr.attr,
5052 &alloc_node_mismatch_attr.attr,
5053 &free_slab_attr.attr,
5054 &cpuslab_flush_attr.attr,
5055 &deactivate_full_attr.attr,
5056 &deactivate_empty_attr.attr,
5057 &deactivate_to_head_attr.attr,
5058 &deactivate_to_tail_attr.attr,
5059 &deactivate_remote_frees_attr.attr,
5060 &deactivate_bypass_attr.attr,
5061 &order_fallback_attr.attr,
5062 &cmpxchg_double_fail_attr.attr,
5063 &cmpxchg_double_cpu_fail_attr.attr,
5064 &cpu_partial_alloc_attr.attr,
5065 &cpu_partial_free_attr.attr,
5066 &cpu_partial_node_attr.attr,
5067 &cpu_partial_drain_attr.attr,
5068#endif
5069#ifdef CONFIG_FAILSLAB
5070 &failslab_attr.attr,
5071#endif
5072
5073 NULL
5074};
5075
5076static struct attribute_group slab_attr_group = {
5077 .attrs = slab_attrs,
5078};
5079
5080static ssize_t slab_attr_show(struct kobject *kobj,
5081 struct attribute *attr,
5082 char *buf)
5083{
5084 struct slab_attribute *attribute;
5085 struct kmem_cache *s;
5086 int err;
5087
5088 attribute = to_slab_attr(attr);
5089 s = to_slab(kobj);
5090
5091 if (!attribute->show)
5092 return -EIO;
5093
5094 err = attribute->show(s, buf);
5095
5096 return err;
5097}
5098
5099static ssize_t slab_attr_store(struct kobject *kobj,
5100 struct attribute *attr,
5101 const char *buf, size_t len)
5102{
5103 struct slab_attribute *attribute;
5104 struct kmem_cache *s;
5105 int err;
5106
5107 attribute = to_slab_attr(attr);
5108 s = to_slab(kobj);
5109
5110 if (!attribute->store)
5111 return -EIO;
5112
5113 err = attribute->store(s, buf, len);
5114#ifdef CONFIG_MEMCG_KMEM
5115 if (slab_state >= FULL && err >= 0 && is_root_cache(s)) {
5116 int i;
5117
5118 mutex_lock(&slab_mutex);
5119 if (s->max_attr_size < len)
5120 s->max_attr_size = len;
5121
5122
5123
5124
5125
5126
5127
5128
5129
5130
5131
5132
5133
5134
5135
5136
5137
5138
5139 for_each_memcg_cache_index(i) {
5140 struct kmem_cache *c = cache_from_memcg(s, i);
5141 if (c)
5142 attribute->store(c, buf, len);
5143 }
5144 mutex_unlock(&slab_mutex);
5145 }
5146#endif
5147 return err;
5148}
5149
5150static void memcg_propagate_slab_attrs(struct kmem_cache *s)
5151{
5152#ifdef CONFIG_MEMCG_KMEM
5153 int i;
5154 char *buffer = NULL;
5155
5156 if (!is_root_cache(s))
5157 return;
5158
5159
5160
5161
5162
5163 if (!s->max_attr_size)
5164 return;
5165
5166 for (i = 0; i < ARRAY_SIZE(slab_attrs); i++) {
5167 char mbuf[64];
5168 char *buf;
5169 struct slab_attribute *attr = to_slab_attr(slab_attrs[i]);
5170
5171 if (!attr || !attr->store || !attr->show)
5172 continue;
5173
5174
5175
5176
5177
5178
5179
5180
5181
5182
5183 if (buffer)
5184 buf = buffer;
5185 else if (s->max_attr_size < ARRAY_SIZE(mbuf))
5186 buf = mbuf;
5187 else {
5188 buffer = (char *) get_zeroed_page(GFP_KERNEL);
5189 if (WARN_ON(!buffer))
5190 continue;
5191 buf = buffer;
5192 }
5193
5194 attr->show(s->memcg_params->root_cache, buf);
5195 attr->store(s, buf, strlen(buf));
5196 }
5197
5198 if (buffer)
5199 free_page((unsigned long)buffer);
5200#endif
5201}
5202
5203static const struct sysfs_ops slab_sysfs_ops = {
5204 .show = slab_attr_show,
5205 .store = slab_attr_store,
5206};
5207
5208static struct kobj_type slab_ktype = {
5209 .sysfs_ops = &slab_sysfs_ops,
5210};
5211
5212static int uevent_filter(struct kset *kset, struct kobject *kobj)
5213{
5214 struct kobj_type *ktype = get_ktype(kobj);
5215
5216 if (ktype == &slab_ktype)
5217 return 1;
5218 return 0;
5219}
5220
5221static const struct kset_uevent_ops slab_uevent_ops = {
5222 .filter = uevent_filter,
5223};
5224
5225static struct kset *slab_kset;
5226
5227#define ID_STR_LENGTH 64
5228
5229
5230
5231
5232
5233static char *create_unique_id(struct kmem_cache *s)
5234{
5235 char *name = kmalloc(ID_STR_LENGTH, GFP_KERNEL);
5236 char *p = name;
5237
5238 BUG_ON(!name);
5239
5240 *p++ = ':';
5241
5242
5243
5244
5245
5246
5247
5248 if (s->flags & SLAB_CACHE_DMA)
5249 *p++ = 'd';
5250 if (s->flags & SLAB_RECLAIM_ACCOUNT)
5251 *p++ = 'a';
5252 if (s->flags & SLAB_DEBUG_FREE)
5253 *p++ = 'F';
5254 if (!(s->flags & SLAB_NOTRACK))
5255 *p++ = 't';
5256 if (p != name + 1)
5257 *p++ = '-';
5258 p += sprintf(p, "%07d", s->size);
5259
5260#ifdef CONFIG_MEMCG_KMEM
5261 if (!is_root_cache(s))
5262 p += sprintf(p, "-%08d", memcg_cache_id(s->memcg_params->memcg));
5263#endif
5264
5265 BUG_ON(p > name + ID_STR_LENGTH - 1);
5266 return name;
5267}
5268
5269static int sysfs_slab_add(struct kmem_cache *s)
5270{
5271 int err;
5272 const char *name;
5273 int unmergeable = slab_unmergeable(s);
5274
5275 if (unmergeable) {
5276
5277
5278
5279
5280
5281 sysfs_remove_link(&slab_kset->kobj, s->name);
5282 name = s->name;
5283 } else {
5284
5285
5286
5287
5288 name = create_unique_id(s);
5289 }
5290
5291 s->kobj.kset = slab_kset;
5292 err = kobject_init_and_add(&s->kobj, &slab_ktype, NULL, name);
5293 if (err) {
5294 kobject_put(&s->kobj);
5295 return err;
5296 }
5297
5298 err = sysfs_create_group(&s->kobj, &slab_attr_group);
5299 if (err) {
5300 kobject_del(&s->kobj);
5301 kobject_put(&s->kobj);
5302 return err;
5303 }
5304 kobject_uevent(&s->kobj, KOBJ_ADD);
5305 if (!unmergeable) {
5306
5307 sysfs_slab_alias(s, s->name);
5308 kfree(name);
5309 }
5310 return 0;
5311}
5312
5313static void sysfs_slab_remove(struct kmem_cache *s)
5314{
5315 if (slab_state < FULL)
5316
5317
5318
5319
5320 return;
5321
5322 kobject_uevent(&s->kobj, KOBJ_REMOVE);
5323 kobject_del(&s->kobj);
5324 kobject_put(&s->kobj);
5325}
5326
5327
5328
5329
5330
5331struct saved_alias {
5332 struct kmem_cache *s;
5333 const char *name;
5334 struct saved_alias *next;
5335};
5336
5337static struct saved_alias *alias_list;
5338
5339static int sysfs_slab_alias(struct kmem_cache *s, const char *name)
5340{
5341 struct saved_alias *al;
5342
5343 if (slab_state == FULL) {
5344
5345
5346
5347 sysfs_remove_link(&slab_kset->kobj, name);
5348 return sysfs_create_link(&slab_kset->kobj, &s->kobj, name);
5349 }
5350
5351 al = kmalloc(sizeof(struct saved_alias), GFP_KERNEL);
5352 if (!al)
5353 return -ENOMEM;
5354
5355 al->s = s;
5356 al->name = name;
5357 al->next = alias_list;
5358 alias_list = al;
5359 return 0;
5360}
5361
5362static int __init slab_sysfs_init(void)
5363{
5364 struct kmem_cache *s;
5365 int err;
5366
5367 mutex_lock(&slab_mutex);
5368
5369 slab_kset = kset_create_and_add("slab", &slab_uevent_ops, kernel_kobj);
5370 if (!slab_kset) {
5371 mutex_unlock(&slab_mutex);
5372 printk(KERN_ERR "Cannot register slab subsystem.\n");
5373 return -ENOSYS;
5374 }
5375
5376 slab_state = FULL;
5377
5378 list_for_each_entry(s, &slab_caches, list) {
5379 err = sysfs_slab_add(s);
5380 if (err)
5381 printk(KERN_ERR "SLUB: Unable to add boot slab %s"
5382 " to sysfs\n", s->name);
5383 }
5384
5385 while (alias_list) {
5386 struct saved_alias *al = alias_list;
5387
5388 alias_list = alias_list->next;
5389 err = sysfs_slab_alias(al->s, al->name);
5390 if (err)
5391 printk(KERN_ERR "SLUB: Unable to add boot slab alias"
5392 " %s to sysfs\n", al->name);
5393 kfree(al);
5394 }
5395
5396 mutex_unlock(&slab_mutex);
5397 resiliency_test();
5398 return 0;
5399}
5400
5401__initcall(slab_sysfs_init);
5402#endif
5403
5404
5405
5406
5407#ifdef CONFIG_SLABINFO
5408void get_slabinfo(struct kmem_cache *s, struct slabinfo *sinfo)
5409{
5410 unsigned long nr_partials = 0;
5411 unsigned long nr_slabs = 0;
5412 unsigned long nr_objs = 0;
5413 unsigned long nr_free = 0;
5414 int node;
5415
5416 for_each_online_node(node) {
5417 struct kmem_cache_node *n = get_node(s, node);
5418
5419 if (!n)
5420 continue;
5421
5422 nr_partials += n->nr_partial;
5423 nr_slabs += atomic_long_read(&n->nr_slabs);
5424 nr_objs += atomic_long_read(&n->total_objects);
5425 nr_free += count_partial(n, count_free);
5426 }
5427
5428 sinfo->active_objs = nr_objs - nr_free;
5429 sinfo->num_objs = nr_objs;
5430 sinfo->active_slabs = nr_slabs;
5431 sinfo->num_slabs = nr_slabs;
5432 sinfo->objects_per_slab = oo_objects(s->oo);
5433 sinfo->cache_order = oo_order(s->oo);
5434}
5435
5436void slabinfo_show_stats(struct seq_file *m, struct kmem_cache *s)
5437{
5438}
5439
5440ssize_t slabinfo_write(struct file *file, const char __user *buffer,
5441 size_t count, loff_t *ppos)
5442{
5443 return -EIO;
5444}
5445#endif
5446