1
2
3
4
5
6
7
8
9
10
11
12#include <linux/mm.h>
13#include <linux/swap.h>
14#include <linux/module.h>
15#include <linux/bit_spinlock.h>
16#include <linux/interrupt.h>
17#include <linux/bitops.h>
18#include <linux/slab.h>
19#include <linux/proc_fs.h>
20#include <linux/seq_file.h>
21#include <linux/kmemcheck.h>
22#include <linux/cpu.h>
23#include <linux/cpuset.h>
24#include <linux/mempolicy.h>
25#include <linux/ctype.h>
26#include <linux/debugobjects.h>
27#include <linux/kallsyms.h>
28#include <linux/memory.h>
29#include <linux/math64.h>
30#include <linux/fault-inject.h>
31#include <linux/stacktrace.h>
32#include <linux/prefetch.h>
33
34#include <trace/events/kmem.h>
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112#define SLAB_DEBUG_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
113 SLAB_TRACE | SLAB_DEBUG_FREE)
114
115static inline int kmem_cache_debug(struct kmem_cache *s)
116{
117#ifdef CONFIG_SLUB_DEBUG
118 return unlikely(s->flags & SLAB_DEBUG_FLAGS);
119#else
120 return 0;
121#endif
122}
123
124
125
126
127
128
129
130
131
132
133#undef SLUB_RESILIENCY_TEST
134
135
136#undef SLUB_DEBUG_CMPXCHG
137
138
139
140
141
142#define MIN_PARTIAL 5
143
144
145
146
147
148
149#define MAX_PARTIAL 10
150
151#define DEBUG_DEFAULT_FLAGS (SLAB_DEBUG_FREE | SLAB_RED_ZONE | \
152 SLAB_POISON | SLAB_STORE_USER)
153
154
155
156
157
158
159#define DEBUG_METADATA_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER)
160
161
162
163
164#define SLUB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
165 SLAB_TRACE | SLAB_DESTROY_BY_RCU | SLAB_NOLEAKTRACE | \
166 SLAB_FAILSLAB)
167
168#define SLUB_MERGE_SAME (SLAB_DEBUG_FREE | SLAB_RECLAIM_ACCOUNT | \
169 SLAB_CACHE_DMA | SLAB_NOTRACK)
170
171#define OO_SHIFT 16
172#define OO_MASK ((1 << OO_SHIFT) - 1)
173#define MAX_OBJS_PER_PAGE 32767
174
175
176#define __OBJECT_POISON 0x80000000UL
177#define __CMPXCHG_DOUBLE 0x40000000UL
178
179static int kmem_size = sizeof(struct kmem_cache);
180
181#ifdef CONFIG_SMP
182static struct notifier_block slab_notifier;
183#endif
184
185static enum {
186 DOWN,
187 PARTIAL,
188 UP,
189 SYSFS
190} slab_state = DOWN;
191
192
193static DECLARE_RWSEM(slub_lock);
194static LIST_HEAD(slab_caches);
195
196
197
198
199#define TRACK_ADDRS_COUNT 16
200struct track {
201 unsigned long addr;
202#ifdef CONFIG_STACKTRACE
203 unsigned long addrs[TRACK_ADDRS_COUNT];
204#endif
205 int cpu;
206 int pid;
207 unsigned long when;
208};
209
210enum track_item { TRACK_ALLOC, TRACK_FREE };
211
212#ifdef CONFIG_SYSFS
213static int sysfs_slab_add(struct kmem_cache *);
214static int sysfs_slab_alias(struct kmem_cache *, const char *);
215static void sysfs_slab_remove(struct kmem_cache *);
216
217#else
218static inline int sysfs_slab_add(struct kmem_cache *s) { return 0; }
219static inline int sysfs_slab_alias(struct kmem_cache *s, const char *p)
220 { return 0; }
221static inline void sysfs_slab_remove(struct kmem_cache *s)
222{
223 kfree(s->name);
224 kfree(s);
225}
226
227#endif
228
229static inline void stat(const struct kmem_cache *s, enum stat_item si)
230{
231#ifdef CONFIG_SLUB_STATS
232 __this_cpu_inc(s->cpu_slab->stat[si]);
233#endif
234}
235
236
237
238
239
240int slab_is_available(void)
241{
242 return slab_state >= UP;
243}
244
245static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node)
246{
247 return s->node[node];
248}
249
250
251static inline int check_valid_pointer(struct kmem_cache *s,
252 struct page *page, const void *object)
253{
254 void *base;
255
256 if (!object)
257 return 1;
258
259 base = page_address(page);
260 if (object < base || object >= base + page->objects * s->size ||
261 (object - base) % s->size) {
262 return 0;
263 }
264
265 return 1;
266}
267
268static inline void *get_freepointer(struct kmem_cache *s, void *object)
269{
270 return *(void **)(object + s->offset);
271}
272
273static void prefetch_freepointer(const struct kmem_cache *s, void *object)
274{
275 prefetch(object + s->offset);
276}
277
278static inline void *get_freepointer_safe(struct kmem_cache *s, void *object)
279{
280 void *p;
281
282#ifdef CONFIG_DEBUG_PAGEALLOC
283 probe_kernel_read(&p, (void **)(object + s->offset), sizeof(p));
284#else
285 p = get_freepointer(s, object);
286#endif
287 return p;
288}
289
290static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp)
291{
292 *(void **)(object + s->offset) = fp;
293}
294
295
296#define for_each_object(__p, __s, __addr, __objects) \
297 for (__p = (__addr); __p < (__addr) + (__objects) * (__s)->size;\
298 __p += (__s)->size)
299
300
301static inline int slab_index(void *p, struct kmem_cache *s, void *addr)
302{
303 return (p - addr) / s->size;
304}
305
306static inline size_t slab_ksize(const struct kmem_cache *s)
307{
308#ifdef CONFIG_SLUB_DEBUG
309
310
311
312
313 if (s->flags & (SLAB_RED_ZONE | SLAB_POISON))
314 return s->objsize;
315
316#endif
317
318
319
320
321
322 if (s->flags & (SLAB_DESTROY_BY_RCU | SLAB_STORE_USER))
323 return s->inuse;
324
325
326
327 return s->size;
328}
329
330static inline int order_objects(int order, unsigned long size, int reserved)
331{
332 return ((PAGE_SIZE << order) - reserved) / size;
333}
334
335static inline struct kmem_cache_order_objects oo_make(int order,
336 unsigned long size, int reserved)
337{
338 struct kmem_cache_order_objects x = {
339 (order << OO_SHIFT) + order_objects(order, size, reserved)
340 };
341
342 return x;
343}
344
345static inline int oo_order(struct kmem_cache_order_objects x)
346{
347 return x.x >> OO_SHIFT;
348}
349
350static inline int oo_objects(struct kmem_cache_order_objects x)
351{
352 return x.x & OO_MASK;
353}
354
355
356
357
358static __always_inline void slab_lock(struct page *page)
359{
360 bit_spin_lock(PG_locked, &page->flags);
361}
362
363static __always_inline void slab_unlock(struct page *page)
364{
365 __bit_spin_unlock(PG_locked, &page->flags);
366}
367
368
369static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
370 void *freelist_old, unsigned long counters_old,
371 void *freelist_new, unsigned long counters_new,
372 const char *n)
373{
374 VM_BUG_ON(!irqs_disabled());
375#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
376 defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
377 if (s->flags & __CMPXCHG_DOUBLE) {
378 if (cmpxchg_double(&page->freelist, &page->counters,
379 freelist_old, counters_old,
380 freelist_new, counters_new))
381 return 1;
382 } else
383#endif
384 {
385 slab_lock(page);
386 if (page->freelist == freelist_old && page->counters == counters_old) {
387 page->freelist = freelist_new;
388 page->counters = counters_new;
389 slab_unlock(page);
390 return 1;
391 }
392 slab_unlock(page);
393 }
394
395 cpu_relax();
396 stat(s, CMPXCHG_DOUBLE_FAIL);
397
398#ifdef SLUB_DEBUG_CMPXCHG
399 printk(KERN_INFO "%s %s: cmpxchg double redo ", n, s->name);
400#endif
401
402 return 0;
403}
404
405static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
406 void *freelist_old, unsigned long counters_old,
407 void *freelist_new, unsigned long counters_new,
408 const char *n)
409{
410#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
411 defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
412 if (s->flags & __CMPXCHG_DOUBLE) {
413 if (cmpxchg_double(&page->freelist, &page->counters,
414 freelist_old, counters_old,
415 freelist_new, counters_new))
416 return 1;
417 } else
418#endif
419 {
420 unsigned long flags;
421
422 local_irq_save(flags);
423 slab_lock(page);
424 if (page->freelist == freelist_old && page->counters == counters_old) {
425 page->freelist = freelist_new;
426 page->counters = counters_new;
427 slab_unlock(page);
428 local_irq_restore(flags);
429 return 1;
430 }
431 slab_unlock(page);
432 local_irq_restore(flags);
433 }
434
435 cpu_relax();
436 stat(s, CMPXCHG_DOUBLE_FAIL);
437
438#ifdef SLUB_DEBUG_CMPXCHG
439 printk(KERN_INFO "%s %s: cmpxchg double redo ", n, s->name);
440#endif
441
442 return 0;
443}
444
445#ifdef CONFIG_SLUB_DEBUG
446
447
448
449
450
451
452static void get_map(struct kmem_cache *s, struct page *page, unsigned long *map)
453{
454 void *p;
455 void *addr = page_address(page);
456
457 for (p = page->freelist; p; p = get_freepointer(s, p))
458 set_bit(slab_index(p, s, addr), map);
459}
460
461
462
463
464#ifdef CONFIG_SLUB_DEBUG_ON
465static int slub_debug = DEBUG_DEFAULT_FLAGS;
466#else
467static int slub_debug;
468#endif
469
470static char *slub_debug_slabs;
471static int disable_higher_order_debug;
472
473
474
475
476static void print_section(char *text, u8 *addr, unsigned int length)
477{
478 print_hex_dump(KERN_ERR, text, DUMP_PREFIX_ADDRESS, 16, 1, addr,
479 length, 1);
480}
481
482static struct track *get_track(struct kmem_cache *s, void *object,
483 enum track_item alloc)
484{
485 struct track *p;
486
487 if (s->offset)
488 p = object + s->offset + sizeof(void *);
489 else
490 p = object + s->inuse;
491
492 return p + alloc;
493}
494
495static void set_track(struct kmem_cache *s, void *object,
496 enum track_item alloc, unsigned long addr)
497{
498 struct track *p = get_track(s, object, alloc);
499
500 if (addr) {
501#ifdef CONFIG_STACKTRACE
502 struct stack_trace trace;
503 int i;
504
505 trace.nr_entries = 0;
506 trace.max_entries = TRACK_ADDRS_COUNT;
507 trace.entries = p->addrs;
508 trace.skip = 3;
509 save_stack_trace(&trace);
510
511
512 if (trace.nr_entries != 0 &&
513 trace.entries[trace.nr_entries - 1] == ULONG_MAX)
514 trace.nr_entries--;
515
516 for (i = trace.nr_entries; i < TRACK_ADDRS_COUNT; i++)
517 p->addrs[i] = 0;
518#endif
519 p->addr = addr;
520 p->cpu = smp_processor_id();
521 p->pid = current->pid;
522 p->when = jiffies;
523 } else
524 memset(p, 0, sizeof(struct track));
525}
526
527static void init_tracking(struct kmem_cache *s, void *object)
528{
529 if (!(s->flags & SLAB_STORE_USER))
530 return;
531
532 set_track(s, object, TRACK_FREE, 0UL);
533 set_track(s, object, TRACK_ALLOC, 0UL);
534}
535
536static void print_track(const char *s, struct track *t)
537{
538 if (!t->addr)
539 return;
540
541 printk(KERN_ERR "INFO: %s in %pS age=%lu cpu=%u pid=%d\n",
542 s, (void *)t->addr, jiffies - t->when, t->cpu, t->pid);
543#ifdef CONFIG_STACKTRACE
544 {
545 int i;
546 for (i = 0; i < TRACK_ADDRS_COUNT; i++)
547 if (t->addrs[i])
548 printk(KERN_ERR "\t%pS\n", (void *)t->addrs[i]);
549 else
550 break;
551 }
552#endif
553}
554
555static void print_tracking(struct kmem_cache *s, void *object)
556{
557 if (!(s->flags & SLAB_STORE_USER))
558 return;
559
560 print_track("Allocated", get_track(s, object, TRACK_ALLOC));
561 print_track("Freed", get_track(s, object, TRACK_FREE));
562}
563
564static void print_page_info(struct page *page)
565{
566 printk(KERN_ERR "INFO: Slab 0x%p objects=%u used=%u fp=0x%p flags=0x%04lx\n",
567 page, page->objects, page->inuse, page->freelist, page->flags);
568
569}
570
571static void slab_bug(struct kmem_cache *s, char *fmt, ...)
572{
573 va_list args;
574 char buf[100];
575
576 va_start(args, fmt);
577 vsnprintf(buf, sizeof(buf), fmt, args);
578 va_end(args);
579 printk(KERN_ERR "========================================"
580 "=====================================\n");
581 printk(KERN_ERR "BUG %s (%s): %s\n", s->name, print_tainted(), buf);
582 printk(KERN_ERR "----------------------------------------"
583 "-------------------------------------\n\n");
584}
585
586static void slab_fix(struct kmem_cache *s, char *fmt, ...)
587{
588 va_list args;
589 char buf[100];
590
591 va_start(args, fmt);
592 vsnprintf(buf, sizeof(buf), fmt, args);
593 va_end(args);
594 printk(KERN_ERR "FIX %s: %s\n", s->name, buf);
595}
596
597static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
598{
599 unsigned int off;
600 u8 *addr = page_address(page);
601
602 print_tracking(s, p);
603
604 print_page_info(page);
605
606 printk(KERN_ERR "INFO: Object 0x%p @offset=%tu fp=0x%p\n\n",
607 p, p - addr, get_freepointer(s, p));
608
609 if (p > addr + 16)
610 print_section("Bytes b4 ", p - 16, 16);
611
612 print_section("Object ", p, min_t(unsigned long, s->objsize,
613 PAGE_SIZE));
614 if (s->flags & SLAB_RED_ZONE)
615 print_section("Redzone ", p + s->objsize,
616 s->inuse - s->objsize);
617
618 if (s->offset)
619 off = s->offset + sizeof(void *);
620 else
621 off = s->inuse;
622
623 if (s->flags & SLAB_STORE_USER)
624 off += 2 * sizeof(struct track);
625
626 if (off != s->size)
627
628 print_section("Padding ", p + off, s->size - off);
629
630 dump_stack();
631}
632
633static void object_err(struct kmem_cache *s, struct page *page,
634 u8 *object, char *reason)
635{
636 slab_bug(s, "%s", reason);
637 print_trailer(s, page, object);
638}
639
640static void slab_err(struct kmem_cache *s, struct page *page, char *fmt, ...)
641{
642 va_list args;
643 char buf[100];
644
645 va_start(args, fmt);
646 vsnprintf(buf, sizeof(buf), fmt, args);
647 va_end(args);
648 slab_bug(s, "%s", buf);
649 print_page_info(page);
650 dump_stack();
651}
652
653static void init_object(struct kmem_cache *s, void *object, u8 val)
654{
655 u8 *p = object;
656
657 if (s->flags & __OBJECT_POISON) {
658 memset(p, POISON_FREE, s->objsize - 1);
659 p[s->objsize - 1] = POISON_END;
660 }
661
662 if (s->flags & SLAB_RED_ZONE)
663 memset(p + s->objsize, val, s->inuse - s->objsize);
664}
665
666static void restore_bytes(struct kmem_cache *s, char *message, u8 data,
667 void *from, void *to)
668{
669 slab_fix(s, "Restoring 0x%p-0x%p=0x%x\n", from, to - 1, data);
670 memset(from, data, to - from);
671}
672
673static int check_bytes_and_report(struct kmem_cache *s, struct page *page,
674 u8 *object, char *what,
675 u8 *start, unsigned int value, unsigned int bytes)
676{
677 u8 *fault;
678 u8 *end;
679
680 fault = memchr_inv(start, value, bytes);
681 if (!fault)
682 return 1;
683
684 end = start + bytes;
685 while (end > fault && end[-1] == value)
686 end--;
687
688 slab_bug(s, "%s overwritten", what);
689 printk(KERN_ERR "INFO: 0x%p-0x%p. First byte 0x%x instead of 0x%x\n",
690 fault, end - 1, fault[0], value);
691 print_trailer(s, page, object);
692
693 restore_bytes(s, what, value, fault, end);
694 return 0;
695}
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735static int check_pad_bytes(struct kmem_cache *s, struct page *page, u8 *p)
736{
737 unsigned long off = s->inuse;
738
739 if (s->offset)
740
741 off += sizeof(void *);
742
743 if (s->flags & SLAB_STORE_USER)
744
745 off += 2 * sizeof(struct track);
746
747 if (s->size == off)
748 return 1;
749
750 return check_bytes_and_report(s, page, p, "Object padding",
751 p + off, POISON_INUSE, s->size - off);
752}
753
754
755static int slab_pad_check(struct kmem_cache *s, struct page *page)
756{
757 u8 *start;
758 u8 *fault;
759 u8 *end;
760 int length;
761 int remainder;
762
763 if (!(s->flags & SLAB_POISON))
764 return 1;
765
766 start = page_address(page);
767 length = (PAGE_SIZE << compound_order(page)) - s->reserved;
768 end = start + length;
769 remainder = length % s->size;
770 if (!remainder)
771 return 1;
772
773 fault = memchr_inv(end - remainder, POISON_INUSE, remainder);
774 if (!fault)
775 return 1;
776 while (end > fault && end[-1] == POISON_INUSE)
777 end--;
778
779 slab_err(s, page, "Padding overwritten. 0x%p-0x%p", fault, end - 1);
780 print_section("Padding ", end - remainder, remainder);
781
782 restore_bytes(s, "slab padding", POISON_INUSE, end - remainder, end);
783 return 0;
784}
785
786static int check_object(struct kmem_cache *s, struct page *page,
787 void *object, u8 val)
788{
789 u8 *p = object;
790 u8 *endobject = object + s->objsize;
791
792 if (s->flags & SLAB_RED_ZONE) {
793 if (!check_bytes_and_report(s, page, object, "Redzone",
794 endobject, val, s->inuse - s->objsize))
795 return 0;
796 } else {
797 if ((s->flags & SLAB_POISON) && s->objsize < s->inuse) {
798 check_bytes_and_report(s, page, p, "Alignment padding",
799 endobject, POISON_INUSE, s->inuse - s->objsize);
800 }
801 }
802
803 if (s->flags & SLAB_POISON) {
804 if (val != SLUB_RED_ACTIVE && (s->flags & __OBJECT_POISON) &&
805 (!check_bytes_and_report(s, page, p, "Poison", p,
806 POISON_FREE, s->objsize - 1) ||
807 !check_bytes_and_report(s, page, p, "Poison",
808 p + s->objsize - 1, POISON_END, 1)))
809 return 0;
810
811
812
813 check_pad_bytes(s, page, p);
814 }
815
816 if (!s->offset && val == SLUB_RED_ACTIVE)
817
818
819
820
821 return 1;
822
823
824 if (!check_valid_pointer(s, page, get_freepointer(s, p))) {
825 object_err(s, page, p, "Freepointer corrupt");
826
827
828
829
830
831 set_freepointer(s, p, NULL);
832 return 0;
833 }
834 return 1;
835}
836
837static int check_slab(struct kmem_cache *s, struct page *page)
838{
839 int maxobj;
840
841 VM_BUG_ON(!irqs_disabled());
842
843 if (!PageSlab(page)) {
844 slab_err(s, page, "Not a valid slab page");
845 return 0;
846 }
847
848 maxobj = order_objects(compound_order(page), s->size, s->reserved);
849 if (page->objects > maxobj) {
850 slab_err(s, page, "objects %u > max %u",
851 s->name, page->objects, maxobj);
852 return 0;
853 }
854 if (page->inuse > page->objects) {
855 slab_err(s, page, "inuse %u > max %u",
856 s->name, page->inuse, page->objects);
857 return 0;
858 }
859
860 slab_pad_check(s, page);
861 return 1;
862}
863
864
865
866
867
868static int on_freelist(struct kmem_cache *s, struct page *page, void *search)
869{
870 int nr = 0;
871 void *fp;
872 void *object = NULL;
873 unsigned long max_objects;
874
875 fp = page->freelist;
876 while (fp && nr <= page->objects) {
877 if (fp == search)
878 return 1;
879 if (!check_valid_pointer(s, page, fp)) {
880 if (object) {
881 object_err(s, page, object,
882 "Freechain corrupt");
883 set_freepointer(s, object, NULL);
884 break;
885 } else {
886 slab_err(s, page, "Freepointer corrupt");
887 page->freelist = NULL;
888 page->inuse = page->objects;
889 slab_fix(s, "Freelist cleared");
890 return 0;
891 }
892 break;
893 }
894 object = fp;
895 fp = get_freepointer(s, object);
896 nr++;
897 }
898
899 max_objects = order_objects(compound_order(page), s->size, s->reserved);
900 if (max_objects > MAX_OBJS_PER_PAGE)
901 max_objects = MAX_OBJS_PER_PAGE;
902
903 if (page->objects != max_objects) {
904 slab_err(s, page, "Wrong number of objects. Found %d but "
905 "should be %d", page->objects, max_objects);
906 page->objects = max_objects;
907 slab_fix(s, "Number of objects adjusted.");
908 }
909 if (page->inuse != page->objects - nr) {
910 slab_err(s, page, "Wrong object count. Counter is %d but "
911 "counted were %d", page->inuse, page->objects - nr);
912 page->inuse = page->objects - nr;
913 slab_fix(s, "Object count adjusted.");
914 }
915 return search == NULL;
916}
917
918static void trace(struct kmem_cache *s, struct page *page, void *object,
919 int alloc)
920{
921 if (s->flags & SLAB_TRACE) {
922 printk(KERN_INFO "TRACE %s %s 0x%p inuse=%d fp=0x%p\n",
923 s->name,
924 alloc ? "alloc" : "free",
925 object, page->inuse,
926 page->freelist);
927
928 if (!alloc)
929 print_section("Object ", (void *)object, s->objsize);
930
931 dump_stack();
932 }
933}
934
935
936
937
938
939static inline int slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags)
940{
941 flags &= gfp_allowed_mask;
942 lockdep_trace_alloc(flags);
943 might_sleep_if(flags & __GFP_WAIT);
944
945 return should_failslab(s->objsize, flags, s->flags);
946}
947
948static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags, void *object)
949{
950 flags &= gfp_allowed_mask;
951 kmemcheck_slab_alloc(s, flags, object, slab_ksize(s));
952 kmemleak_alloc_recursive(object, s->objsize, 1, s->flags, flags);
953}
954
955static inline void slab_free_hook(struct kmem_cache *s, void *x)
956{
957 kmemleak_free_recursive(x, s->flags);
958
959
960
961
962
963
964#if defined(CONFIG_KMEMCHECK) || defined(CONFIG_LOCKDEP)
965 {
966 unsigned long flags;
967
968 local_irq_save(flags);
969 kmemcheck_slab_free(s, x, s->objsize);
970 debug_check_no_locks_freed(x, s->objsize);
971 local_irq_restore(flags);
972 }
973#endif
974 if (!(s->flags & SLAB_DEBUG_OBJECTS))
975 debug_check_no_obj_freed(x, s->objsize);
976}
977
978
979
980
981
982
983static void add_full(struct kmem_cache *s,
984 struct kmem_cache_node *n, struct page *page)
985{
986 if (!(s->flags & SLAB_STORE_USER))
987 return;
988
989 list_add(&page->lru, &n->full);
990}
991
992
993
994
995static void remove_full(struct kmem_cache *s, struct page *page)
996{
997 if (!(s->flags & SLAB_STORE_USER))
998 return;
999
1000 list_del(&page->lru);
1001}
1002
1003
1004static inline unsigned long slabs_node(struct kmem_cache *s, int node)
1005{
1006 struct kmem_cache_node *n = get_node(s, node);
1007
1008 return atomic_long_read(&n->nr_slabs);
1009}
1010
1011static inline unsigned long node_nr_slabs(struct kmem_cache_node *n)
1012{
1013 return atomic_long_read(&n->nr_slabs);
1014}
1015
1016static inline void inc_slabs_node(struct kmem_cache *s, int node, int objects)
1017{
1018 struct kmem_cache_node *n = get_node(s, node);
1019
1020
1021
1022
1023
1024
1025
1026 if (n) {
1027 atomic_long_inc(&n->nr_slabs);
1028 atomic_long_add(objects, &n->total_objects);
1029 }
1030}
1031static inline void dec_slabs_node(struct kmem_cache *s, int node, int objects)
1032{
1033 struct kmem_cache_node *n = get_node(s, node);
1034
1035 atomic_long_dec(&n->nr_slabs);
1036 atomic_long_sub(objects, &n->total_objects);
1037}
1038
1039
1040static void setup_object_debug(struct kmem_cache *s, struct page *page,
1041 void *object)
1042{
1043 if (!(s->flags & (SLAB_STORE_USER|SLAB_RED_ZONE|__OBJECT_POISON)))
1044 return;
1045
1046 init_object(s, object, SLUB_RED_INACTIVE);
1047 init_tracking(s, object);
1048}
1049
1050static noinline int alloc_debug_processing(struct kmem_cache *s, struct page *page,
1051 void *object, unsigned long addr)
1052{
1053 if (!check_slab(s, page))
1054 goto bad;
1055
1056 if (!check_valid_pointer(s, page, object)) {
1057 object_err(s, page, object, "Freelist Pointer check fails");
1058 goto bad;
1059 }
1060
1061 if (!check_object(s, page, object, SLUB_RED_INACTIVE))
1062 goto bad;
1063
1064
1065 if (s->flags & SLAB_STORE_USER)
1066 set_track(s, object, TRACK_ALLOC, addr);
1067 trace(s, page, object, 1);
1068 init_object(s, object, SLUB_RED_ACTIVE);
1069 return 1;
1070
1071bad:
1072 if (PageSlab(page)) {
1073
1074
1075
1076
1077
1078 slab_fix(s, "Marking all objects used");
1079 page->inuse = page->objects;
1080 page->freelist = NULL;
1081 }
1082 return 0;
1083}
1084
1085static noinline int free_debug_processing(struct kmem_cache *s,
1086 struct page *page, void *object, unsigned long addr)
1087{
1088 unsigned long flags;
1089 int rc = 0;
1090
1091 local_irq_save(flags);
1092 slab_lock(page);
1093
1094 if (!check_slab(s, page))
1095 goto fail;
1096
1097 if (!check_valid_pointer(s, page, object)) {
1098 slab_err(s, page, "Invalid object pointer 0x%p", object);
1099 goto fail;
1100 }
1101
1102 if (on_freelist(s, page, object)) {
1103 object_err(s, page, object, "Object already free");
1104 goto fail;
1105 }
1106
1107 if (!check_object(s, page, object, SLUB_RED_ACTIVE))
1108 goto out;
1109
1110 if (unlikely(s != page->slab)) {
1111 if (!PageSlab(page)) {
1112 slab_err(s, page, "Attempt to free object(0x%p) "
1113 "outside of slab", object);
1114 } else if (!page->slab) {
1115 printk(KERN_ERR
1116 "SLUB <none>: no slab for object 0x%p.\n",
1117 object);
1118 dump_stack();
1119 } else
1120 object_err(s, page, object,
1121 "page slab pointer corrupt.");
1122 goto fail;
1123 }
1124
1125 if (s->flags & SLAB_STORE_USER)
1126 set_track(s, object, TRACK_FREE, addr);
1127 trace(s, page, object, 0);
1128 init_object(s, object, SLUB_RED_INACTIVE);
1129 rc = 1;
1130out:
1131 slab_unlock(page);
1132 local_irq_restore(flags);
1133 return rc;
1134
1135fail:
1136 slab_fix(s, "Object at 0x%p not freed", object);
1137 goto out;
1138}
1139
1140static int __init setup_slub_debug(char *str)
1141{
1142 slub_debug = DEBUG_DEFAULT_FLAGS;
1143 if (*str++ != '=' || !*str)
1144
1145
1146
1147 goto out;
1148
1149 if (*str == ',')
1150
1151
1152
1153
1154 goto check_slabs;
1155
1156 if (tolower(*str) == 'o') {
1157
1158
1159
1160
1161 disable_higher_order_debug = 1;
1162 goto out;
1163 }
1164
1165 slub_debug = 0;
1166 if (*str == '-')
1167
1168
1169
1170 goto out;
1171
1172
1173
1174
1175 for (; *str && *str != ','; str++) {
1176 switch (tolower(*str)) {
1177 case 'f':
1178 slub_debug |= SLAB_DEBUG_FREE;
1179 break;
1180 case 'z':
1181 slub_debug |= SLAB_RED_ZONE;
1182 break;
1183 case 'p':
1184 slub_debug |= SLAB_POISON;
1185 break;
1186 case 'u':
1187 slub_debug |= SLAB_STORE_USER;
1188 break;
1189 case 't':
1190 slub_debug |= SLAB_TRACE;
1191 break;
1192 case 'a':
1193 slub_debug |= SLAB_FAILSLAB;
1194 break;
1195 default:
1196 printk(KERN_ERR "slub_debug option '%c' "
1197 "unknown. skipped\n", *str);
1198 }
1199 }
1200
1201check_slabs:
1202 if (*str == ',')
1203 slub_debug_slabs = str + 1;
1204out:
1205 return 1;
1206}
1207
1208__setup("slub_debug", setup_slub_debug);
1209
1210static unsigned long kmem_cache_flags(unsigned long objsize,
1211 unsigned long flags, const char *name,
1212 void (*ctor)(void *))
1213{
1214
1215
1216
1217 if (slub_debug && (!slub_debug_slabs ||
1218 !strncmp(slub_debug_slabs, name, strlen(slub_debug_slabs))))
1219 flags |= slub_debug;
1220
1221 return flags;
1222}
1223#else
1224static inline void setup_object_debug(struct kmem_cache *s,
1225 struct page *page, void *object) {}
1226
1227static inline int alloc_debug_processing(struct kmem_cache *s,
1228 struct page *page, void *object, unsigned long addr) { return 0; }
1229
1230static inline int free_debug_processing(struct kmem_cache *s,
1231 struct page *page, void *object, unsigned long addr) { return 0; }
1232
1233static inline int slab_pad_check(struct kmem_cache *s, struct page *page)
1234 { return 1; }
1235static inline int check_object(struct kmem_cache *s, struct page *page,
1236 void *object, u8 val) { return 1; }
1237static inline void add_full(struct kmem_cache *s, struct kmem_cache_node *n,
1238 struct page *page) {}
1239static inline void remove_full(struct kmem_cache *s, struct page *page) {}
1240static inline unsigned long kmem_cache_flags(unsigned long objsize,
1241 unsigned long flags, const char *name,
1242 void (*ctor)(void *))
1243{
1244 return flags;
1245}
1246#define slub_debug 0
1247
1248#define disable_higher_order_debug 0
1249
1250static inline unsigned long slabs_node(struct kmem_cache *s, int node)
1251 { return 0; }
1252static inline unsigned long node_nr_slabs(struct kmem_cache_node *n)
1253 { return 0; }
1254static inline void inc_slabs_node(struct kmem_cache *s, int node,
1255 int objects) {}
1256static inline void dec_slabs_node(struct kmem_cache *s, int node,
1257 int objects) {}
1258
1259static inline int slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags)
1260 { return 0; }
1261
1262static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags,
1263 void *object) {}
1264
1265static inline void slab_free_hook(struct kmem_cache *s, void *x) {}
1266
1267#endif
1268
1269
1270
1271
1272static inline struct page *alloc_slab_page(gfp_t flags, int node,
1273 struct kmem_cache_order_objects oo)
1274{
1275 int order = oo_order(oo);
1276
1277 flags |= __GFP_NOTRACK;
1278
1279 if (node == NUMA_NO_NODE)
1280 return alloc_pages(flags, order);
1281 else
1282 return alloc_pages_exact_node(node, flags, order);
1283}
1284
1285static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
1286{
1287 struct page *page;
1288 struct kmem_cache_order_objects oo = s->oo;
1289 gfp_t alloc_gfp;
1290
1291 flags &= gfp_allowed_mask;
1292
1293 if (flags & __GFP_WAIT)
1294 local_irq_enable();
1295
1296 flags |= s->allocflags;
1297
1298
1299
1300
1301
1302 alloc_gfp = (flags | __GFP_NOWARN | __GFP_NORETRY) & ~__GFP_NOFAIL;
1303
1304 page = alloc_slab_page(alloc_gfp, node, oo);
1305 if (unlikely(!page)) {
1306 oo = s->min;
1307
1308
1309
1310
1311 page = alloc_slab_page(flags, node, oo);
1312
1313 if (page)
1314 stat(s, ORDER_FALLBACK);
1315 }
1316
1317 if (flags & __GFP_WAIT)
1318 local_irq_disable();
1319
1320 if (!page)
1321 return NULL;
1322
1323 if (kmemcheck_enabled
1324 && !(s->flags & (SLAB_NOTRACK | DEBUG_DEFAULT_FLAGS))) {
1325 int pages = 1 << oo_order(oo);
1326
1327 kmemcheck_alloc_shadow(page, oo_order(oo), flags, node);
1328
1329
1330
1331
1332
1333 if (s->ctor)
1334 kmemcheck_mark_uninitialized_pages(page, pages);
1335 else
1336 kmemcheck_mark_unallocated_pages(page, pages);
1337 }
1338
1339 page->objects = oo_objects(oo);
1340 mod_zone_page_state(page_zone(page),
1341 (s->flags & SLAB_RECLAIM_ACCOUNT) ?
1342 NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,
1343 1 << oo_order(oo));
1344
1345 return page;
1346}
1347
1348static void setup_object(struct kmem_cache *s, struct page *page,
1349 void *object)
1350{
1351 setup_object_debug(s, page, object);
1352 if (unlikely(s->ctor))
1353 s->ctor(object);
1354}
1355
1356static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
1357{
1358 struct page *page;
1359 void *start;
1360 void *last;
1361 void *p;
1362
1363 BUG_ON(flags & GFP_SLAB_BUG_MASK);
1364
1365 page = allocate_slab(s,
1366 flags & (GFP_RECLAIM_MASK | GFP_CONSTRAINT_MASK), node);
1367 if (!page)
1368 goto out;
1369
1370 inc_slabs_node(s, page_to_nid(page), page->objects);
1371 page->slab = s;
1372 __SetPageSlab(page);
1373
1374 start = page_address(page);
1375
1376 if (unlikely(s->flags & SLAB_POISON))
1377 memset(start, POISON_INUSE, PAGE_SIZE << compound_order(page));
1378
1379 last = start;
1380 for_each_object(p, s, start, page->objects) {
1381 setup_object(s, page, last);
1382 set_freepointer(s, last, p);
1383 last = p;
1384 }
1385 setup_object(s, page, last);
1386 set_freepointer(s, last, NULL);
1387
1388 page->freelist = start;
1389 page->inuse = page->objects;
1390 page->frozen = 1;
1391out:
1392 return page;
1393}
1394
1395static void __free_slab(struct kmem_cache *s, struct page *page)
1396{
1397 int order = compound_order(page);
1398 int pages = 1 << order;
1399
1400 if (kmem_cache_debug(s)) {
1401 void *p;
1402
1403 slab_pad_check(s, page);
1404 for_each_object(p, s, page_address(page),
1405 page->objects)
1406 check_object(s, page, p, SLUB_RED_INACTIVE);
1407 }
1408
1409 kmemcheck_free_shadow(page, compound_order(page));
1410
1411 mod_zone_page_state(page_zone(page),
1412 (s->flags & SLAB_RECLAIM_ACCOUNT) ?
1413 NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,
1414 -pages);
1415
1416 __ClearPageSlab(page);
1417 reset_page_mapcount(page);
1418 if (current->reclaim_state)
1419 current->reclaim_state->reclaimed_slab += pages;
1420 __free_pages(page, order);
1421}
1422
1423#define need_reserve_slab_rcu \
1424 (sizeof(((struct page *)NULL)->lru) < sizeof(struct rcu_head))
1425
1426static void rcu_free_slab(struct rcu_head *h)
1427{
1428 struct page *page;
1429
1430 if (need_reserve_slab_rcu)
1431 page = virt_to_head_page(h);
1432 else
1433 page = container_of((struct list_head *)h, struct page, lru);
1434
1435 __free_slab(page->slab, page);
1436}
1437
1438static void free_slab(struct kmem_cache *s, struct page *page)
1439{
1440 if (unlikely(s->flags & SLAB_DESTROY_BY_RCU)) {
1441 struct rcu_head *head;
1442
1443 if (need_reserve_slab_rcu) {
1444 int order = compound_order(page);
1445 int offset = (PAGE_SIZE << order) - s->reserved;
1446
1447 VM_BUG_ON(s->reserved != sizeof(*head));
1448 head = page_address(page) + offset;
1449 } else {
1450
1451
1452
1453 head = (void *)&page->lru;
1454 }
1455
1456 call_rcu(head, rcu_free_slab);
1457 } else
1458 __free_slab(s, page);
1459}
1460
1461static void discard_slab(struct kmem_cache *s, struct page *page)
1462{
1463 dec_slabs_node(s, page_to_nid(page), page->objects);
1464 free_slab(s, page);
1465}
1466
1467
1468
1469
1470
1471
1472static inline void add_partial(struct kmem_cache_node *n,
1473 struct page *page, int tail)
1474{
1475 n->nr_partial++;
1476 if (tail == DEACTIVATE_TO_TAIL)
1477 list_add_tail(&page->lru, &n->partial);
1478 else
1479 list_add(&page->lru, &n->partial);
1480}
1481
1482
1483
1484
1485static inline void remove_partial(struct kmem_cache_node *n,
1486 struct page *page)
1487{
1488 list_del(&page->lru);
1489 n->nr_partial--;
1490}
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500static inline void *acquire_slab(struct kmem_cache *s,
1501 struct kmem_cache_node *n, struct page *page,
1502 int mode)
1503{
1504 void *freelist;
1505 unsigned long counters;
1506 struct page new;
1507
1508
1509
1510
1511
1512
1513 do {
1514 freelist = page->freelist;
1515 counters = page->counters;
1516 new.counters = counters;
1517 if (mode) {
1518 new.inuse = page->objects;
1519 new.freelist = NULL;
1520 } else {
1521 new.freelist = freelist;
1522 }
1523
1524 VM_BUG_ON(new.frozen);
1525 new.frozen = 1;
1526
1527 } while (!__cmpxchg_double_slab(s, page,
1528 freelist, counters,
1529 new.freelist, new.counters,
1530 "lock and freeze"));
1531
1532 remove_partial(n, page);
1533 return freelist;
1534}
1535
1536static int put_cpu_partial(struct kmem_cache *s, struct page *page, int drain);
1537
1538
1539
1540
1541static void *get_partial_node(struct kmem_cache *s,
1542 struct kmem_cache_node *n, struct kmem_cache_cpu *c)
1543{
1544 struct page *page, *page2;
1545 void *object = NULL;
1546
1547
1548
1549
1550
1551
1552
1553 if (!n || !n->nr_partial)
1554 return NULL;
1555
1556 spin_lock(&n->list_lock);
1557 list_for_each_entry_safe(page, page2, &n->partial, lru) {
1558 void *t = acquire_slab(s, n, page, object == NULL);
1559 int available;
1560
1561 if (!t)
1562 break;
1563
1564 if (!object) {
1565 c->page = page;
1566 c->node = page_to_nid(page);
1567 stat(s, ALLOC_FROM_PARTIAL);
1568 object = t;
1569 available = page->objects - page->inuse;
1570 } else {
1571 available = put_cpu_partial(s, page, 0);
1572 stat(s, CPU_PARTIAL_NODE);
1573 }
1574 if (kmem_cache_debug(s) || available > s->cpu_partial / 2)
1575 break;
1576
1577 }
1578 spin_unlock(&n->list_lock);
1579 return object;
1580}
1581
1582
1583
1584
1585static void *get_any_partial(struct kmem_cache *s, gfp_t flags,
1586 struct kmem_cache_cpu *c)
1587{
1588#ifdef CONFIG_NUMA
1589 struct zonelist *zonelist;
1590 struct zoneref *z;
1591 struct zone *zone;
1592 enum zone_type high_zoneidx = gfp_zone(flags);
1593 void *object;
1594 unsigned int cpuset_mems_cookie;
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614 if (!s->remote_node_defrag_ratio ||
1615 get_cycles() % 1024 > s->remote_node_defrag_ratio)
1616 return NULL;
1617
1618 do {
1619 cpuset_mems_cookie = get_mems_allowed();
1620 zonelist = node_zonelist(slab_node(current->mempolicy), flags);
1621 for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
1622 struct kmem_cache_node *n;
1623
1624 n = get_node(s, zone_to_nid(zone));
1625
1626 if (n && cpuset_zone_allowed_hardwall(zone, flags) &&
1627 n->nr_partial > s->min_partial) {
1628 object = get_partial_node(s, n, c);
1629 if (object) {
1630
1631
1632
1633
1634
1635
1636
1637
1638 put_mems_allowed(cpuset_mems_cookie);
1639 return object;
1640 }
1641 }
1642 }
1643 } while (!put_mems_allowed(cpuset_mems_cookie));
1644#endif
1645 return NULL;
1646}
1647
1648
1649
1650
1651static void *get_partial(struct kmem_cache *s, gfp_t flags, int node,
1652 struct kmem_cache_cpu *c)
1653{
1654 void *object;
1655 int searchnode = (node == NUMA_NO_NODE) ? numa_node_id() : node;
1656
1657 object = get_partial_node(s, get_node(s, searchnode), c);
1658 if (object || node != NUMA_NO_NODE)
1659 return object;
1660
1661 return get_any_partial(s, flags, c);
1662}
1663
1664#ifdef CONFIG_PREEMPT
1665
1666
1667
1668
1669
1670#define TID_STEP roundup_pow_of_two(CONFIG_NR_CPUS)
1671#else
1672
1673
1674
1675
1676#define TID_STEP 1
1677#endif
1678
1679static inline unsigned long next_tid(unsigned long tid)
1680{
1681 return tid + TID_STEP;
1682}
1683
1684static inline unsigned int tid_to_cpu(unsigned long tid)
1685{
1686 return tid % TID_STEP;
1687}
1688
1689static inline unsigned long tid_to_event(unsigned long tid)
1690{
1691 return tid / TID_STEP;
1692}
1693
1694static inline unsigned int init_tid(int cpu)
1695{
1696 return cpu;
1697}
1698
1699static inline void note_cmpxchg_failure(const char *n,
1700 const struct kmem_cache *s, unsigned long tid)
1701{
1702#ifdef SLUB_DEBUG_CMPXCHG
1703 unsigned long actual_tid = __this_cpu_read(s->cpu_slab->tid);
1704
1705 printk(KERN_INFO "%s %s: cmpxchg redo ", n, s->name);
1706
1707#ifdef CONFIG_PREEMPT
1708 if (tid_to_cpu(tid) != tid_to_cpu(actual_tid))
1709 printk("due to cpu change %d -> %d\n",
1710 tid_to_cpu(tid), tid_to_cpu(actual_tid));
1711 else
1712#endif
1713 if (tid_to_event(tid) != tid_to_event(actual_tid))
1714 printk("due to cpu running other code. Event %ld->%ld\n",
1715 tid_to_event(tid), tid_to_event(actual_tid));
1716 else
1717 printk("for unknown reason: actual=%lx was=%lx target=%lx\n",
1718 actual_tid, tid, next_tid(tid));
1719#endif
1720 stat(s, CMPXCHG_DOUBLE_CPU_FAIL);
1721}
1722
1723void init_kmem_cache_cpus(struct kmem_cache *s)
1724{
1725 int cpu;
1726
1727 for_each_possible_cpu(cpu)
1728 per_cpu_ptr(s->cpu_slab, cpu)->tid = init_tid(cpu);
1729}
1730
1731
1732
1733
1734static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
1735{
1736 enum slab_modes { M_NONE, M_PARTIAL, M_FULL, M_FREE };
1737 struct page *page = c->page;
1738 struct kmem_cache_node *n = get_node(s, page_to_nid(page));
1739 int lock = 0;
1740 enum slab_modes l = M_NONE, m = M_NONE;
1741 void *freelist;
1742 void *nextfree;
1743 int tail = DEACTIVATE_TO_HEAD;
1744 struct page new;
1745 struct page old;
1746
1747 if (page->freelist) {
1748 stat(s, DEACTIVATE_REMOTE_FREES);
1749 tail = DEACTIVATE_TO_TAIL;
1750 }
1751
1752 c->tid = next_tid(c->tid);
1753 c->page = NULL;
1754 freelist = c->freelist;
1755 c->freelist = NULL;
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765 while (freelist && (nextfree = get_freepointer(s, freelist))) {
1766 void *prior;
1767 unsigned long counters;
1768
1769 do {
1770 prior = page->freelist;
1771 counters = page->counters;
1772 set_freepointer(s, freelist, prior);
1773 new.counters = counters;
1774 new.inuse--;
1775 VM_BUG_ON(!new.frozen);
1776
1777 } while (!__cmpxchg_double_slab(s, page,
1778 prior, counters,
1779 freelist, new.counters,
1780 "drain percpu freelist"));
1781
1782 freelist = nextfree;
1783 }
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799redo:
1800
1801 old.freelist = page->freelist;
1802 old.counters = page->counters;
1803 VM_BUG_ON(!old.frozen);
1804
1805
1806 new.counters = old.counters;
1807 if (freelist) {
1808 new.inuse--;
1809 set_freepointer(s, freelist, old.freelist);
1810 new.freelist = freelist;
1811 } else
1812 new.freelist = old.freelist;
1813
1814 new.frozen = 0;
1815
1816 if (!new.inuse && n->nr_partial > s->min_partial)
1817 m = M_FREE;
1818 else if (new.freelist) {
1819 m = M_PARTIAL;
1820 if (!lock) {
1821 lock = 1;
1822
1823
1824
1825
1826
1827 spin_lock(&n->list_lock);
1828 }
1829 } else {
1830 m = M_FULL;
1831 if (kmem_cache_debug(s) && !lock) {
1832 lock = 1;
1833
1834
1835
1836
1837
1838 spin_lock(&n->list_lock);
1839 }
1840 }
1841
1842 if (l != m) {
1843
1844 if (l == M_PARTIAL)
1845
1846 remove_partial(n, page);
1847
1848 else if (l == M_FULL)
1849
1850 remove_full(s, page);
1851
1852 if (m == M_PARTIAL) {
1853
1854 add_partial(n, page, tail);
1855 stat(s, tail);
1856
1857 } else if (m == M_FULL) {
1858
1859 stat(s, DEACTIVATE_FULL);
1860 add_full(s, n, page);
1861
1862 }
1863 }
1864
1865 l = m;
1866 if (!__cmpxchg_double_slab(s, page,
1867 old.freelist, old.counters,
1868 new.freelist, new.counters,
1869 "unfreezing slab"))
1870 goto redo;
1871
1872 if (lock)
1873 spin_unlock(&n->list_lock);
1874
1875 if (m == M_FREE) {
1876 stat(s, DEACTIVATE_EMPTY);
1877 discard_slab(s, page);
1878 stat(s, FREE_SLAB);
1879 }
1880}
1881
1882
1883static void unfreeze_partials(struct kmem_cache *s)
1884{
1885 struct kmem_cache_node *n = NULL;
1886 struct kmem_cache_cpu *c = this_cpu_ptr(s->cpu_slab);
1887 struct page *page, *discard_page = NULL;
1888
1889 while ((page = c->partial)) {
1890 enum slab_modes { M_PARTIAL, M_FREE };
1891 enum slab_modes l, m;
1892 struct page new;
1893 struct page old;
1894
1895 c->partial = page->next;
1896 l = M_FREE;
1897
1898 do {
1899
1900 old.freelist = page->freelist;
1901 old.counters = page->counters;
1902 VM_BUG_ON(!old.frozen);
1903
1904 new.counters = old.counters;
1905 new.freelist = old.freelist;
1906
1907 new.frozen = 0;
1908
1909 if (!new.inuse && (!n || n->nr_partial > s->min_partial))
1910 m = M_FREE;
1911 else {
1912 struct kmem_cache_node *n2 = get_node(s,
1913 page_to_nid(page));
1914
1915 m = M_PARTIAL;
1916 if (n != n2) {
1917 if (n)
1918 spin_unlock(&n->list_lock);
1919
1920 n = n2;
1921 spin_lock(&n->list_lock);
1922 }
1923 }
1924
1925 if (l != m) {
1926 if (l == M_PARTIAL) {
1927 remove_partial(n, page);
1928 stat(s, FREE_REMOVE_PARTIAL);
1929 } else {
1930 add_partial(n, page,
1931 DEACTIVATE_TO_TAIL);
1932 stat(s, FREE_ADD_PARTIAL);
1933 }
1934
1935 l = m;
1936 }
1937
1938 } while (!cmpxchg_double_slab(s, page,
1939 old.freelist, old.counters,
1940 new.freelist, new.counters,
1941 "unfreezing slab"));
1942
1943 if (m == M_FREE) {
1944 page->next = discard_page;
1945 discard_page = page;
1946 }
1947 }
1948
1949 if (n)
1950 spin_unlock(&n->list_lock);
1951
1952 while (discard_page) {
1953 page = discard_page;
1954 discard_page = discard_page->next;
1955
1956 stat(s, DEACTIVATE_EMPTY);
1957 discard_slab(s, page);
1958 stat(s, FREE_SLAB);
1959 }
1960}
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971int put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
1972{
1973 struct page *oldpage;
1974 int pages;
1975 int pobjects;
1976
1977 do {
1978 pages = 0;
1979 pobjects = 0;
1980 oldpage = this_cpu_read(s->cpu_slab->partial);
1981
1982 if (oldpage) {
1983 pobjects = oldpage->pobjects;
1984 pages = oldpage->pages;
1985 if (drain && pobjects > s->cpu_partial) {
1986 unsigned long flags;
1987
1988
1989
1990
1991 local_irq_save(flags);
1992 unfreeze_partials(s);
1993 local_irq_restore(flags);
1994 pobjects = 0;
1995 pages = 0;
1996 stat(s, CPU_PARTIAL_DRAIN);
1997 }
1998 }
1999
2000 pages++;
2001 pobjects += page->objects - page->inuse;
2002
2003 page->pages = pages;
2004 page->pobjects = pobjects;
2005 page->next = oldpage;
2006
2007 } while (this_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page) != oldpage);
2008 return pobjects;
2009}
2010
2011static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
2012{
2013 stat(s, CPUSLAB_FLUSH);
2014 deactivate_slab(s, c);
2015}
2016
2017
2018
2019
2020
2021
2022static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu)
2023{
2024 struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
2025
2026 if (likely(c)) {
2027 if (c->page)
2028 flush_slab(s, c);
2029
2030 unfreeze_partials(s);
2031 }
2032}
2033
2034static void flush_cpu_slab(void *d)
2035{
2036 struct kmem_cache *s = d;
2037
2038 __flush_cpu_slab(s, smp_processor_id());
2039}
2040
2041static bool has_cpu_slab(int cpu, void *info)
2042{
2043 struct kmem_cache *s = info;
2044 struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
2045
2046 return c->page || c->partial;
2047}
2048
2049static void flush_all(struct kmem_cache *s)
2050{
2051 on_each_cpu_cond(has_cpu_slab, flush_cpu_slab, s, 1, GFP_ATOMIC);
2052}
2053
2054
2055
2056
2057
2058static inline int node_match(struct kmem_cache_cpu *c, int node)
2059{
2060#ifdef CONFIG_NUMA
2061 if (node != NUMA_NO_NODE && c->node != node)
2062 return 0;
2063#endif
2064 return 1;
2065}
2066
2067static int count_free(struct page *page)
2068{
2069 return page->objects - page->inuse;
2070}
2071
2072static unsigned long count_partial(struct kmem_cache_node *n,
2073 int (*get_count)(struct page *))
2074{
2075 unsigned long flags;
2076 unsigned long x = 0;
2077 struct page *page;
2078
2079 spin_lock_irqsave(&n->list_lock, flags);
2080 list_for_each_entry(page, &n->partial, lru)
2081 x += get_count(page);
2082 spin_unlock_irqrestore(&n->list_lock, flags);
2083 return x;
2084}
2085
2086static inline unsigned long node_nr_objs(struct kmem_cache_node *n)
2087{
2088#ifdef CONFIG_SLUB_DEBUG
2089 return atomic_long_read(&n->total_objects);
2090#else
2091 return 0;
2092#endif
2093}
2094
2095static noinline void
2096slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid)
2097{
2098 int node;
2099
2100 printk(KERN_WARNING
2101 "SLUB: Unable to allocate memory on node %d (gfp=0x%x)\n",
2102 nid, gfpflags);
2103 printk(KERN_WARNING " cache: %s, object size: %d, buffer size: %d, "
2104 "default order: %d, min order: %d\n", s->name, s->objsize,
2105 s->size, oo_order(s->oo), oo_order(s->min));
2106
2107 if (oo_order(s->min) > get_order(s->objsize))
2108 printk(KERN_WARNING " %s debugging increased min order, use "
2109 "slub_debug=O to disable.\n", s->name);
2110
2111 for_each_online_node(node) {
2112 struct kmem_cache_node *n = get_node(s, node);
2113 unsigned long nr_slabs;
2114 unsigned long nr_objs;
2115 unsigned long nr_free;
2116
2117 if (!n)
2118 continue;
2119
2120 nr_free = count_partial(n, count_free);
2121 nr_slabs = node_nr_slabs(n);
2122 nr_objs = node_nr_objs(n);
2123
2124 printk(KERN_WARNING
2125 " node %d: slabs: %ld, objs: %ld, free: %ld\n",
2126 node, nr_slabs, nr_objs, nr_free);
2127 }
2128}
2129
2130static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags,
2131 int node, struct kmem_cache_cpu **pc)
2132{
2133 void *object;
2134 struct kmem_cache_cpu *c;
2135 struct page *page = new_slab(s, flags, node);
2136
2137 if (page) {
2138 c = __this_cpu_ptr(s->cpu_slab);
2139 if (c->page)
2140 flush_slab(s, c);
2141
2142
2143
2144
2145
2146 object = page->freelist;
2147 page->freelist = NULL;
2148
2149 stat(s, ALLOC_SLAB);
2150 c->node = page_to_nid(page);
2151 c->page = page;
2152 *pc = c;
2153 } else
2154 object = NULL;
2155
2156 return object;
2157}
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167static inline void *get_freelist(struct kmem_cache *s, struct page *page)
2168{
2169 struct page new;
2170 unsigned long counters;
2171 void *freelist;
2172
2173 do {
2174 freelist = page->freelist;
2175 counters = page->counters;
2176 new.counters = counters;
2177 VM_BUG_ON(!new.frozen);
2178
2179 new.inuse = page->objects;
2180 new.frozen = freelist != NULL;
2181
2182 } while (!cmpxchg_double_slab(s, page,
2183 freelist, counters,
2184 NULL, new.counters,
2185 "get_freelist"));
2186
2187 return freelist;
2188}
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
2207 unsigned long addr, struct kmem_cache_cpu *c)
2208{
2209 void **object;
2210 unsigned long flags;
2211
2212 local_irq_save(flags);
2213#ifdef CONFIG_PREEMPT
2214
2215
2216
2217
2218
2219 c = this_cpu_ptr(s->cpu_slab);
2220#endif
2221
2222 if (!c->page)
2223 goto new_slab;
2224redo:
2225 if (unlikely(!node_match(c, node))) {
2226 stat(s, ALLOC_NODE_MISMATCH);
2227 deactivate_slab(s, c);
2228 goto new_slab;
2229 }
2230
2231
2232 object = c->freelist;
2233 if (object)
2234 goto load_freelist;
2235
2236 stat(s, ALLOC_SLOWPATH);
2237
2238 object = get_freelist(s, c->page);
2239
2240 if (!object) {
2241 c->page = NULL;
2242 stat(s, DEACTIVATE_BYPASS);
2243 goto new_slab;
2244 }
2245
2246 stat(s, ALLOC_REFILL);
2247
2248load_freelist:
2249 c->freelist = get_freepointer(s, object);
2250 c->tid = next_tid(c->tid);
2251 local_irq_restore(flags);
2252 return object;
2253
2254new_slab:
2255
2256 if (c->partial) {
2257 c->page = c->partial;
2258 c->partial = c->page->next;
2259 c->node = page_to_nid(c->page);
2260 stat(s, CPU_PARTIAL_ALLOC);
2261 c->freelist = NULL;
2262 goto redo;
2263 }
2264
2265
2266 object = get_partial(s, gfpflags, node, c);
2267
2268 if (unlikely(!object)) {
2269
2270 object = new_slab_objects(s, gfpflags, node, &c);
2271
2272 if (unlikely(!object)) {
2273 if (!(gfpflags & __GFP_NOWARN) && printk_ratelimit())
2274 slab_out_of_memory(s, gfpflags, node);
2275
2276 local_irq_restore(flags);
2277 return NULL;
2278 }
2279 }
2280
2281 if (likely(!kmem_cache_debug(s)))
2282 goto load_freelist;
2283
2284
2285 if (!alloc_debug_processing(s, c->page, object, addr))
2286 goto new_slab;
2287
2288 c->freelist = get_freepointer(s, object);
2289 deactivate_slab(s, c);
2290 c->node = NUMA_NO_NODE;
2291 local_irq_restore(flags);
2292 return object;
2293}
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305static __always_inline void *slab_alloc(struct kmem_cache *s,
2306 gfp_t gfpflags, int node, unsigned long addr)
2307{
2308 void **object;
2309 struct kmem_cache_cpu *c;
2310 unsigned long tid;
2311
2312 if (slab_pre_alloc_hook(s, gfpflags))
2313 return NULL;
2314
2315redo:
2316
2317
2318
2319
2320
2321
2322
2323 c = __this_cpu_ptr(s->cpu_slab);
2324
2325
2326
2327
2328
2329
2330
2331 tid = c->tid;
2332 barrier();
2333
2334 object = c->freelist;
2335 if (unlikely(!object || !node_match(c, node)))
2336
2337 object = __slab_alloc(s, gfpflags, node, addr, c);
2338
2339 else {
2340 void *next_object = get_freepointer_safe(s, object);
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354 if (unlikely(!this_cpu_cmpxchg_double(
2355 s->cpu_slab->freelist, s->cpu_slab->tid,
2356 object, tid,
2357 next_object, next_tid(tid)))) {
2358
2359 note_cmpxchg_failure("slab_alloc", s, tid);
2360 goto redo;
2361 }
2362 prefetch_freepointer(s, next_object);
2363 stat(s, ALLOC_FASTPATH);
2364 }
2365
2366 if (unlikely(gfpflags & __GFP_ZERO) && object)
2367 memset(object, 0, s->objsize);
2368
2369 slab_post_alloc_hook(s, gfpflags, object);
2370
2371 return object;
2372}
2373
2374void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags)
2375{
2376 void *ret = slab_alloc(s, gfpflags, NUMA_NO_NODE, _RET_IP_);
2377
2378 trace_kmem_cache_alloc(_RET_IP_, ret, s->objsize, s->size, gfpflags);
2379
2380 return ret;
2381}
2382EXPORT_SYMBOL(kmem_cache_alloc);
2383
2384#ifdef CONFIG_TRACING
2385void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size)
2386{
2387 void *ret = slab_alloc(s, gfpflags, NUMA_NO_NODE, _RET_IP_);
2388 trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags);
2389 return ret;
2390}
2391EXPORT_SYMBOL(kmem_cache_alloc_trace);
2392
2393void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order)
2394{
2395 void *ret = kmalloc_order(size, flags, order);
2396 trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << order, flags);
2397 return ret;
2398}
2399EXPORT_SYMBOL(kmalloc_order_trace);
2400#endif
2401
2402#ifdef CONFIG_NUMA
2403void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
2404{
2405 void *ret = slab_alloc(s, gfpflags, node, _RET_IP_);
2406
2407 trace_kmem_cache_alloc_node(_RET_IP_, ret,
2408 s->objsize, s->size, gfpflags, node);
2409
2410 return ret;
2411}
2412EXPORT_SYMBOL(kmem_cache_alloc_node);
2413
2414#ifdef CONFIG_TRACING
2415void *kmem_cache_alloc_node_trace(struct kmem_cache *s,
2416 gfp_t gfpflags,
2417 int node, size_t size)
2418{
2419 void *ret = slab_alloc(s, gfpflags, node, _RET_IP_);
2420
2421 trace_kmalloc_node(_RET_IP_, ret,
2422 size, s->size, gfpflags, node);
2423 return ret;
2424}
2425EXPORT_SYMBOL(kmem_cache_alloc_node_trace);
2426#endif
2427#endif
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437static void __slab_free(struct kmem_cache *s, struct page *page,
2438 void *x, unsigned long addr)
2439{
2440 void *prior;
2441 void **object = (void *)x;
2442 int was_frozen;
2443 int inuse;
2444 struct page new;
2445 unsigned long counters;
2446 struct kmem_cache_node *n = NULL;
2447 unsigned long uninitialized_var(flags);
2448
2449 stat(s, FREE_SLOWPATH);
2450
2451 if (kmem_cache_debug(s) && !free_debug_processing(s, page, x, addr))
2452 return;
2453
2454 do {
2455 prior = page->freelist;
2456 counters = page->counters;
2457 set_freepointer(s, object, prior);
2458 new.counters = counters;
2459 was_frozen = new.frozen;
2460 new.inuse--;
2461 if ((!new.inuse || !prior) && !was_frozen && !n) {
2462
2463 if (!kmem_cache_debug(s) && !prior)
2464
2465
2466
2467
2468
2469 new.frozen = 1;
2470
2471 else {
2472
2473 n = get_node(s, page_to_nid(page));
2474
2475
2476
2477
2478
2479
2480
2481
2482 spin_lock_irqsave(&n->list_lock, flags);
2483
2484 }
2485 }
2486 inuse = new.inuse;
2487
2488 } while (!cmpxchg_double_slab(s, page,
2489 prior, counters,
2490 object, new.counters,
2491 "__slab_free"));
2492
2493 if (likely(!n)) {
2494
2495
2496
2497
2498
2499 if (new.frozen && !was_frozen) {
2500 put_cpu_partial(s, page, 1);
2501 stat(s, CPU_PARTIAL_FREE);
2502 }
2503
2504
2505
2506
2507 if (was_frozen)
2508 stat(s, FREE_FROZEN);
2509 return;
2510 }
2511
2512
2513
2514
2515
2516 if (was_frozen)
2517 stat(s, FREE_FROZEN);
2518 else {
2519 if (unlikely(!inuse && n->nr_partial > s->min_partial))
2520 goto slab_empty;
2521
2522
2523
2524
2525
2526 if (unlikely(!prior)) {
2527 remove_full(s, page);
2528 add_partial(n, page, DEACTIVATE_TO_TAIL);
2529 stat(s, FREE_ADD_PARTIAL);
2530 }
2531 }
2532 spin_unlock_irqrestore(&n->list_lock, flags);
2533 return;
2534
2535slab_empty:
2536 if (prior) {
2537
2538
2539
2540 remove_partial(n, page);
2541 stat(s, FREE_REMOVE_PARTIAL);
2542 } else
2543
2544 remove_full(s, page);
2545
2546 spin_unlock_irqrestore(&n->list_lock, flags);
2547 stat(s, FREE_SLAB);
2548 discard_slab(s, page);
2549}
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562static __always_inline void slab_free(struct kmem_cache *s,
2563 struct page *page, void *x, unsigned long addr)
2564{
2565 void **object = (void *)x;
2566 struct kmem_cache_cpu *c;
2567 unsigned long tid;
2568
2569 slab_free_hook(s, x);
2570
2571redo:
2572
2573
2574
2575
2576
2577
2578 c = __this_cpu_ptr(s->cpu_slab);
2579
2580 tid = c->tid;
2581 barrier();
2582
2583 if (likely(page == c->page)) {
2584 set_freepointer(s, object, c->freelist);
2585
2586 if (unlikely(!this_cpu_cmpxchg_double(
2587 s->cpu_slab->freelist, s->cpu_slab->tid,
2588 c->freelist, tid,
2589 object, next_tid(tid)))) {
2590
2591 note_cmpxchg_failure("slab_free", s, tid);
2592 goto redo;
2593 }
2594 stat(s, FREE_FASTPATH);
2595 } else
2596 __slab_free(s, page, x, addr);
2597
2598}
2599
2600void kmem_cache_free(struct kmem_cache *s, void *x)
2601{
2602 struct page *page;
2603
2604 page = virt_to_head_page(x);
2605
2606 slab_free(s, page, x, _RET_IP_);
2607
2608 trace_kmem_cache_free(_RET_IP_, x);
2609}
2610EXPORT_SYMBOL(kmem_cache_free);
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631static int slub_min_order;
2632static int slub_max_order = PAGE_ALLOC_COSTLY_ORDER;
2633static int slub_min_objects;
2634
2635
2636
2637
2638
2639static int slub_nomerge;
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666static inline int slab_order(int size, int min_objects,
2667 int max_order, int fract_leftover, int reserved)
2668{
2669 int order;
2670 int rem;
2671 int min_order = slub_min_order;
2672
2673 if (order_objects(min_order, size, reserved) > MAX_OBJS_PER_PAGE)
2674 return get_order(size * MAX_OBJS_PER_PAGE) - 1;
2675
2676 for (order = max(min_order,
2677 fls(min_objects * size - 1) - PAGE_SHIFT);
2678 order <= max_order; order++) {
2679
2680 unsigned long slab_size = PAGE_SIZE << order;
2681
2682 if (slab_size < min_objects * size + reserved)
2683 continue;
2684
2685 rem = (slab_size - reserved) % size;
2686
2687 if (rem <= slab_size / fract_leftover)
2688 break;
2689
2690 }
2691
2692 return order;
2693}
2694
2695static inline int calculate_order(int size, int reserved)
2696{
2697 int order;
2698 int min_objects;
2699 int fraction;
2700 int max_objects;
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710 min_objects = slub_min_objects;
2711 if (!min_objects)
2712 min_objects = 4 * (fls(nr_cpu_ids) + 1);
2713 max_objects = order_objects(slub_max_order, size, reserved);
2714 min_objects = min(min_objects, max_objects);
2715
2716 while (min_objects > 1) {
2717 fraction = 16;
2718 while (fraction >= 4) {
2719 order = slab_order(size, min_objects,
2720 slub_max_order, fraction, reserved);
2721 if (order <= slub_max_order)
2722 return order;
2723 fraction /= 2;
2724 }
2725 min_objects--;
2726 }
2727
2728
2729
2730
2731
2732 order = slab_order(size, 1, slub_max_order, 1, reserved);
2733 if (order <= slub_max_order)
2734 return order;
2735
2736
2737
2738
2739 order = slab_order(size, 1, MAX_ORDER, 1, reserved);
2740 if (order < MAX_ORDER)
2741 return order;
2742 return -ENOSYS;
2743}
2744
2745
2746
2747
2748static unsigned long calculate_alignment(unsigned long flags,
2749 unsigned long align, unsigned long size)
2750{
2751
2752
2753
2754
2755
2756
2757
2758 if (flags & SLAB_HWCACHE_ALIGN) {
2759 unsigned long ralign = cache_line_size();
2760 while (size <= ralign / 2)
2761 ralign /= 2;
2762 align = max(align, ralign);
2763 }
2764
2765 if (align < ARCH_SLAB_MINALIGN)
2766 align = ARCH_SLAB_MINALIGN;
2767
2768 return ALIGN(align, sizeof(void *));
2769}
2770
2771static void
2772init_kmem_cache_node(struct kmem_cache_node *n)
2773{
2774 n->nr_partial = 0;
2775 spin_lock_init(&n->list_lock);
2776 INIT_LIST_HEAD(&n->partial);
2777#ifdef CONFIG_SLUB_DEBUG
2778 atomic_long_set(&n->nr_slabs, 0);
2779 atomic_long_set(&n->total_objects, 0);
2780 INIT_LIST_HEAD(&n->full);
2781#endif
2782}
2783
2784static inline int alloc_kmem_cache_cpus(struct kmem_cache *s)
2785{
2786 BUILD_BUG_ON(PERCPU_DYNAMIC_EARLY_SIZE <
2787 SLUB_PAGE_SHIFT * sizeof(struct kmem_cache_cpu));
2788
2789
2790
2791
2792
2793 s->cpu_slab = __alloc_percpu(sizeof(struct kmem_cache_cpu),
2794 2 * sizeof(void *));
2795
2796 if (!s->cpu_slab)
2797 return 0;
2798
2799 init_kmem_cache_cpus(s);
2800
2801 return 1;
2802}
2803
2804static struct kmem_cache *kmem_cache_node;
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815static void early_kmem_cache_node_alloc(int node)
2816{
2817 struct page *page;
2818 struct kmem_cache_node *n;
2819
2820 BUG_ON(kmem_cache_node->size < sizeof(struct kmem_cache_node));
2821
2822 page = new_slab(kmem_cache_node, GFP_NOWAIT, node);
2823
2824 BUG_ON(!page);
2825 if (page_to_nid(page) != node) {
2826 printk(KERN_ERR "SLUB: Unable to allocate memory from "
2827 "node %d\n", node);
2828 printk(KERN_ERR "SLUB: Allocating a useless per node structure "
2829 "in order to be able to continue\n");
2830 }
2831
2832 n = page->freelist;
2833 BUG_ON(!n);
2834 page->freelist = get_freepointer(kmem_cache_node, n);
2835 page->inuse = 1;
2836 page->frozen = 0;
2837 kmem_cache_node->node[node] = n;
2838#ifdef CONFIG_SLUB_DEBUG
2839 init_object(kmem_cache_node, n, SLUB_RED_ACTIVE);
2840 init_tracking(kmem_cache_node, n);
2841#endif
2842 init_kmem_cache_node(n);
2843 inc_slabs_node(kmem_cache_node, node, page->objects);
2844
2845 add_partial(n, page, DEACTIVATE_TO_HEAD);
2846}
2847
2848static void free_kmem_cache_nodes(struct kmem_cache *s)
2849{
2850 int node;
2851
2852 for_each_node_state(node, N_NORMAL_MEMORY) {
2853 struct kmem_cache_node *n = s->node[node];
2854
2855 if (n)
2856 kmem_cache_free(kmem_cache_node, n);
2857
2858 s->node[node] = NULL;
2859 }
2860}
2861
2862static int init_kmem_cache_nodes(struct kmem_cache *s)
2863{
2864 int node;
2865
2866 for_each_node_state(node, N_NORMAL_MEMORY) {
2867 struct kmem_cache_node *n;
2868
2869 if (slab_state == DOWN) {
2870 early_kmem_cache_node_alloc(node);
2871 continue;
2872 }
2873 n = kmem_cache_alloc_node(kmem_cache_node,
2874 GFP_KERNEL, node);
2875
2876 if (!n) {
2877 free_kmem_cache_nodes(s);
2878 return 0;
2879 }
2880
2881 s->node[node] = n;
2882 init_kmem_cache_node(n);
2883 }
2884 return 1;
2885}
2886
2887static void set_min_partial(struct kmem_cache *s, unsigned long min)
2888{
2889 if (min < MIN_PARTIAL)
2890 min = MIN_PARTIAL;
2891 else if (min > MAX_PARTIAL)
2892 min = MAX_PARTIAL;
2893 s->min_partial = min;
2894}
2895
2896
2897
2898
2899
2900static int calculate_sizes(struct kmem_cache *s, int forced_order)
2901{
2902 unsigned long flags = s->flags;
2903 unsigned long size = s->objsize;
2904 unsigned long align = s->align;
2905 int order;
2906
2907
2908
2909
2910
2911
2912 size = ALIGN(size, sizeof(void *));
2913
2914#ifdef CONFIG_SLUB_DEBUG
2915
2916
2917
2918
2919
2920 if ((flags & SLAB_POISON) && !(flags & SLAB_DESTROY_BY_RCU) &&
2921 !s->ctor)
2922 s->flags |= __OBJECT_POISON;
2923 else
2924 s->flags &= ~__OBJECT_POISON;
2925
2926
2927
2928
2929
2930
2931
2932 if ((flags & SLAB_RED_ZONE) && size == s->objsize)
2933 size += sizeof(void *);
2934#endif
2935
2936
2937
2938
2939
2940 s->inuse = size;
2941
2942 if (((flags & (SLAB_DESTROY_BY_RCU | SLAB_POISON)) ||
2943 s->ctor)) {
2944
2945
2946
2947
2948
2949
2950
2951
2952 s->offset = size;
2953 size += sizeof(void *);
2954 }
2955
2956#ifdef CONFIG_SLUB_DEBUG
2957 if (flags & SLAB_STORE_USER)
2958
2959
2960
2961
2962 size += 2 * sizeof(struct track);
2963
2964 if (flags & SLAB_RED_ZONE)
2965
2966
2967
2968
2969
2970
2971
2972 size += sizeof(void *);
2973#endif
2974
2975
2976
2977
2978
2979
2980 align = calculate_alignment(flags, align, s->objsize);
2981 s->align = align;
2982
2983
2984
2985
2986
2987
2988 size = ALIGN(size, align);
2989 s->size = size;
2990 if (forced_order >= 0)
2991 order = forced_order;
2992 else
2993 order = calculate_order(size, s->reserved);
2994
2995 if (order < 0)
2996 return 0;
2997
2998 s->allocflags = 0;
2999 if (order)
3000 s->allocflags |= __GFP_COMP;
3001
3002 if (s->flags & SLAB_CACHE_DMA)
3003 s->allocflags |= SLUB_DMA;
3004
3005 if (s->flags & SLAB_RECLAIM_ACCOUNT)
3006 s->allocflags |= __GFP_RECLAIMABLE;
3007
3008
3009
3010
3011 s->oo = oo_make(order, size, s->reserved);
3012 s->min = oo_make(get_order(size), size, s->reserved);
3013 if (oo_objects(s->oo) > oo_objects(s->max))
3014 s->max = s->oo;
3015
3016 return !!oo_objects(s->oo);
3017
3018}
3019
3020static int kmem_cache_open(struct kmem_cache *s,
3021 const char *name, size_t size,
3022 size_t align, unsigned long flags,
3023 void (*ctor)(void *))
3024{
3025 memset(s, 0, kmem_size);
3026 s->name = name;
3027 s->ctor = ctor;
3028 s->objsize = size;
3029 s->align = align;
3030 s->flags = kmem_cache_flags(size, flags, name, ctor);
3031 s->reserved = 0;
3032
3033 if (need_reserve_slab_rcu && (s->flags & SLAB_DESTROY_BY_RCU))
3034 s->reserved = sizeof(struct rcu_head);
3035
3036 if (!calculate_sizes(s, -1))
3037 goto error;
3038 if (disable_higher_order_debug) {
3039
3040
3041
3042
3043 if (get_order(s->size) > get_order(s->objsize)) {
3044 s->flags &= ~DEBUG_METADATA_FLAGS;
3045 s->offset = 0;
3046 if (!calculate_sizes(s, -1))
3047 goto error;
3048 }
3049 }
3050
3051#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
3052 defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
3053 if (system_has_cmpxchg_double() && (s->flags & SLAB_DEBUG_FLAGS) == 0)
3054
3055 s->flags |= __CMPXCHG_DOUBLE;
3056#endif
3057
3058
3059
3060
3061
3062 set_min_partial(s, ilog2(s->size) / 2);
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081 if (kmem_cache_debug(s))
3082 s->cpu_partial = 0;
3083 else if (s->size >= PAGE_SIZE)
3084 s->cpu_partial = 2;
3085 else if (s->size >= 1024)
3086 s->cpu_partial = 6;
3087 else if (s->size >= 256)
3088 s->cpu_partial = 13;
3089 else
3090 s->cpu_partial = 30;
3091
3092 s->refcount = 1;
3093#ifdef CONFIG_NUMA
3094 s->remote_node_defrag_ratio = 1000;
3095#endif
3096 if (!init_kmem_cache_nodes(s))
3097 goto error;
3098
3099 if (alloc_kmem_cache_cpus(s))
3100 return 1;
3101
3102 free_kmem_cache_nodes(s);
3103error:
3104 if (flags & SLAB_PANIC)
3105 panic("Cannot create slab %s size=%lu realsize=%u "
3106 "order=%u offset=%u flags=%lx\n",
3107 s->name, (unsigned long)size, s->size, oo_order(s->oo),
3108 s->offset, flags);
3109 return 0;
3110}
3111
3112
3113
3114
3115unsigned int kmem_cache_size(struct kmem_cache *s)
3116{
3117 return s->objsize;
3118}
3119EXPORT_SYMBOL(kmem_cache_size);
3120
3121static void list_slab_objects(struct kmem_cache *s, struct page *page,
3122 const char *text)
3123{
3124#ifdef CONFIG_SLUB_DEBUG
3125 void *addr = page_address(page);
3126 void *p;
3127 unsigned long *map = kzalloc(BITS_TO_LONGS(page->objects) *
3128 sizeof(long), GFP_ATOMIC);
3129 if (!map)
3130 return;
3131 slab_err(s, page, "%s", text);
3132 slab_lock(page);
3133
3134 get_map(s, page, map);
3135 for_each_object(p, s, addr, page->objects) {
3136
3137 if (!test_bit(slab_index(p, s, addr), map)) {
3138 printk(KERN_ERR "INFO: Object 0x%p @offset=%tu\n",
3139 p, p - addr);
3140 print_tracking(s, p);
3141 }
3142 }
3143 slab_unlock(page);
3144 kfree(map);
3145#endif
3146}
3147
3148
3149
3150
3151
3152
3153static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
3154{
3155 struct page *page, *h;
3156
3157 list_for_each_entry_safe(page, h, &n->partial, lru) {
3158 if (!page->inuse) {
3159 remove_partial(n, page);
3160 discard_slab(s, page);
3161 } else {
3162 list_slab_objects(s, page,
3163 "Objects remaining on kmem_cache_close()");
3164 }
3165 }
3166}
3167
3168
3169
3170
3171static inline int kmem_cache_close(struct kmem_cache *s)
3172{
3173 int node;
3174
3175 flush_all(s);
3176 free_percpu(s->cpu_slab);
3177
3178 for_each_node_state(node, N_NORMAL_MEMORY) {
3179 struct kmem_cache_node *n = get_node(s, node);
3180
3181 free_partial(s, n);
3182 if (n->nr_partial || slabs_node(s, node))
3183 return 1;
3184 }
3185 free_kmem_cache_nodes(s);
3186 return 0;
3187}
3188
3189
3190
3191
3192
3193void kmem_cache_destroy(struct kmem_cache *s)
3194{
3195 down_write(&slub_lock);
3196 s->refcount--;
3197 if (!s->refcount) {
3198 list_del(&s->list);
3199 up_write(&slub_lock);
3200 if (kmem_cache_close(s)) {
3201 printk(KERN_ERR "SLUB %s: %s called for cache that "
3202 "still has objects.\n", s->name, __func__);
3203 dump_stack();
3204 }
3205 if (s->flags & SLAB_DESTROY_BY_RCU)
3206 rcu_barrier();
3207 sysfs_slab_remove(s);
3208 } else
3209 up_write(&slub_lock);
3210}
3211EXPORT_SYMBOL(kmem_cache_destroy);
3212
3213
3214
3215
3216
3217struct kmem_cache *kmalloc_caches[SLUB_PAGE_SHIFT];
3218EXPORT_SYMBOL(kmalloc_caches);
3219
3220static struct kmem_cache *kmem_cache;
3221
3222#ifdef CONFIG_ZONE_DMA
3223static struct kmem_cache *kmalloc_dma_caches[SLUB_PAGE_SHIFT];
3224#endif
3225
3226static int __init setup_slub_min_order(char *str)
3227{
3228 get_option(&str, &slub_min_order);
3229
3230 return 1;
3231}
3232
3233__setup("slub_min_order=", setup_slub_min_order);
3234
3235static int __init setup_slub_max_order(char *str)
3236{
3237 get_option(&str, &slub_max_order);
3238 slub_max_order = min(slub_max_order, MAX_ORDER - 1);
3239
3240 return 1;
3241}
3242
3243__setup("slub_max_order=", setup_slub_max_order);
3244
3245static int __init setup_slub_min_objects(char *str)
3246{
3247 get_option(&str, &slub_min_objects);
3248
3249 return 1;
3250}
3251
3252__setup("slub_min_objects=", setup_slub_min_objects);
3253
3254static int __init setup_slub_nomerge(char *str)
3255{
3256 slub_nomerge = 1;
3257 return 1;
3258}
3259
3260__setup("slub_nomerge", setup_slub_nomerge);
3261
3262static struct kmem_cache *__init create_kmalloc_cache(const char *name,
3263 int size, unsigned int flags)
3264{
3265 struct kmem_cache *s;
3266
3267 s = kmem_cache_alloc(kmem_cache, GFP_NOWAIT);
3268
3269
3270
3271
3272
3273 if (!kmem_cache_open(s, name, size, ARCH_KMALLOC_MINALIGN,
3274 flags, NULL))
3275 goto panic;
3276
3277 list_add(&s->list, &slab_caches);
3278 return s;
3279
3280panic:
3281 panic("Creation of kmalloc slab %s size=%d failed.\n", name, size);
3282 return NULL;
3283}
3284
3285
3286
3287
3288
3289
3290
3291static s8 size_index[24] = {
3292 3,
3293 4,
3294 5,
3295 5,
3296 6,
3297 6,
3298 6,
3299 6,
3300 1,
3301 1,
3302 1,
3303 1,
3304 7,
3305 7,
3306 7,
3307 7,
3308 2,
3309 2,
3310 2,
3311 2,
3312 2,
3313 2,
3314 2,
3315 2
3316};
3317
3318static inline int size_index_elem(size_t bytes)
3319{
3320 return (bytes - 1) / 8;
3321}
3322
3323static struct kmem_cache *get_slab(size_t size, gfp_t flags)
3324{
3325 int index;
3326
3327 if (size <= 192) {
3328 if (!size)
3329 return ZERO_SIZE_PTR;
3330
3331 index = size_index[size_index_elem(size)];
3332 } else
3333 index = fls(size - 1);
3334
3335#ifdef CONFIG_ZONE_DMA
3336 if (unlikely((flags & SLUB_DMA)))
3337 return kmalloc_dma_caches[index];
3338
3339#endif
3340 return kmalloc_caches[index];
3341}
3342
3343void *__kmalloc(size_t size, gfp_t flags)
3344{
3345 struct kmem_cache *s;
3346 void *ret;
3347
3348 if (unlikely(size > SLUB_MAX_SIZE))
3349 return kmalloc_large(size, flags);
3350
3351 s = get_slab(size, flags);
3352
3353 if (unlikely(ZERO_OR_NULL_PTR(s)))
3354 return s;
3355
3356 ret = slab_alloc(s, flags, NUMA_NO_NODE, _RET_IP_);
3357
3358 trace_kmalloc(_RET_IP_, ret, size, s->size, flags);
3359
3360 return ret;
3361}
3362EXPORT_SYMBOL(__kmalloc);
3363
3364#ifdef CONFIG_NUMA
3365static void *kmalloc_large_node(size_t size, gfp_t flags, int node)
3366{
3367 struct page *page;
3368 void *ptr = NULL;
3369
3370 flags |= __GFP_COMP | __GFP_NOTRACK;
3371 page = alloc_pages_node(node, flags, get_order(size));
3372 if (page)
3373 ptr = page_address(page);
3374
3375 kmemleak_alloc(ptr, size, 1, flags);
3376 return ptr;
3377}
3378
3379void *__kmalloc_node(size_t size, gfp_t flags, int node)
3380{
3381 struct kmem_cache *s;
3382 void *ret;
3383
3384 if (unlikely(size > SLUB_MAX_SIZE)) {
3385 ret = kmalloc_large_node(size, flags, node);
3386
3387 trace_kmalloc_node(_RET_IP_, ret,
3388 size, PAGE_SIZE << get_order(size),
3389 flags, node);
3390
3391 return ret;
3392 }
3393
3394 s = get_slab(size, flags);
3395
3396 if (unlikely(ZERO_OR_NULL_PTR(s)))
3397 return s;
3398
3399 ret = slab_alloc(s, flags, node, _RET_IP_);
3400
3401 trace_kmalloc_node(_RET_IP_, ret, size, s->size, flags, node);
3402
3403 return ret;
3404}
3405EXPORT_SYMBOL(__kmalloc_node);
3406#endif
3407
3408size_t ksize(const void *object)
3409{
3410 struct page *page;
3411
3412 if (unlikely(object == ZERO_SIZE_PTR))
3413 return 0;
3414
3415 page = virt_to_head_page(object);
3416
3417 if (unlikely(!PageSlab(page))) {
3418 WARN_ON(!PageCompound(page));
3419 return PAGE_SIZE << compound_order(page);
3420 }
3421
3422 return slab_ksize(page->slab);
3423}
3424EXPORT_SYMBOL(ksize);
3425
3426#ifdef CONFIG_SLUB_DEBUG
3427bool verify_mem_not_deleted(const void *x)
3428{
3429 struct page *page;
3430 void *object = (void *)x;
3431 unsigned long flags;
3432 bool rv;
3433
3434 if (unlikely(ZERO_OR_NULL_PTR(x)))
3435 return false;
3436
3437 local_irq_save(flags);
3438
3439 page = virt_to_head_page(x);
3440 if (unlikely(!PageSlab(page))) {
3441
3442 rv = true;
3443 goto out_unlock;
3444 }
3445
3446 slab_lock(page);
3447 if (on_freelist(page->slab, page, object)) {
3448 object_err(page->slab, page, object, "Object is on free-list");
3449 rv = false;
3450 } else {
3451 rv = true;
3452 }
3453 slab_unlock(page);
3454
3455out_unlock:
3456 local_irq_restore(flags);
3457 return rv;
3458}
3459EXPORT_SYMBOL(verify_mem_not_deleted);
3460#endif
3461
3462void kfree(const void *x)
3463{
3464 struct page *page;
3465 void *object = (void *)x;
3466
3467 trace_kfree(_RET_IP_, x);
3468
3469 if (unlikely(ZERO_OR_NULL_PTR(x)))
3470 return;
3471
3472 page = virt_to_head_page(x);
3473 if (unlikely(!PageSlab(page))) {
3474 BUG_ON(!PageCompound(page));
3475 kmemleak_free(x);
3476 put_page(page);
3477 return;
3478 }
3479 slab_free(page->slab, page, object, _RET_IP_);
3480}
3481EXPORT_SYMBOL(kfree);
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493int kmem_cache_shrink(struct kmem_cache *s)
3494{
3495 int node;
3496 int i;
3497 struct kmem_cache_node *n;
3498 struct page *page;
3499 struct page *t;
3500 int objects = oo_objects(s->max);
3501 struct list_head *slabs_by_inuse =
3502 kmalloc(sizeof(struct list_head) * objects, GFP_KERNEL);
3503 unsigned long flags;
3504
3505 if (!slabs_by_inuse)
3506 return -ENOMEM;
3507
3508 flush_all(s);
3509 for_each_node_state(node, N_NORMAL_MEMORY) {
3510 n = get_node(s, node);
3511
3512 if (!n->nr_partial)
3513 continue;
3514
3515 for (i = 0; i < objects; i++)
3516 INIT_LIST_HEAD(slabs_by_inuse + i);
3517
3518 spin_lock_irqsave(&n->list_lock, flags);
3519
3520
3521
3522
3523
3524
3525
3526 list_for_each_entry_safe(page, t, &n->partial, lru) {
3527 list_move(&page->lru, slabs_by_inuse + page->inuse);
3528 if (!page->inuse)
3529 n->nr_partial--;
3530 }
3531
3532
3533
3534
3535
3536 for (i = objects - 1; i > 0; i--)
3537 list_splice(slabs_by_inuse + i, n->partial.prev);
3538
3539 spin_unlock_irqrestore(&n->list_lock, flags);
3540
3541
3542 list_for_each_entry_safe(page, t, slabs_by_inuse, lru)
3543 discard_slab(s, page);
3544 }
3545
3546 kfree(slabs_by_inuse);
3547 return 0;
3548}
3549EXPORT_SYMBOL(kmem_cache_shrink);
3550
3551#if defined(CONFIG_MEMORY_HOTPLUG)
3552static int slab_mem_going_offline_callback(void *arg)
3553{
3554 struct kmem_cache *s;
3555
3556 down_read(&slub_lock);
3557 list_for_each_entry(s, &slab_caches, list)
3558 kmem_cache_shrink(s);
3559 up_read(&slub_lock);
3560
3561 return 0;
3562}
3563
3564static void slab_mem_offline_callback(void *arg)
3565{
3566 struct kmem_cache_node *n;
3567 struct kmem_cache *s;
3568 struct memory_notify *marg = arg;
3569 int offline_node;
3570
3571 offline_node = marg->status_change_nid;
3572
3573
3574
3575
3576
3577 if (offline_node < 0)
3578 return;
3579
3580 down_read(&slub_lock);
3581 list_for_each_entry(s, &slab_caches, list) {
3582 n = get_node(s, offline_node);
3583 if (n) {
3584
3585
3586
3587
3588
3589
3590 BUG_ON(slabs_node(s, offline_node));
3591
3592 s->node[offline_node] = NULL;
3593 kmem_cache_free(kmem_cache_node, n);
3594 }
3595 }
3596 up_read(&slub_lock);
3597}
3598
3599static int slab_mem_going_online_callback(void *arg)
3600{
3601 struct kmem_cache_node *n;
3602 struct kmem_cache *s;
3603 struct memory_notify *marg = arg;
3604 int nid = marg->status_change_nid;
3605 int ret = 0;
3606
3607
3608
3609
3610
3611 if (nid < 0)
3612 return 0;
3613
3614
3615
3616
3617
3618
3619 down_read(&slub_lock);
3620 list_for_each_entry(s, &slab_caches, list) {
3621
3622
3623
3624
3625
3626 n = kmem_cache_alloc(kmem_cache_node, GFP_KERNEL);
3627 if (!n) {
3628 ret = -ENOMEM;
3629 goto out;
3630 }
3631 init_kmem_cache_node(n);
3632 s->node[nid] = n;
3633 }
3634out:
3635 up_read(&slub_lock);
3636 return ret;
3637}
3638
3639static int slab_memory_callback(struct notifier_block *self,
3640 unsigned long action, void *arg)
3641{
3642 int ret = 0;
3643
3644 switch (action) {
3645 case MEM_GOING_ONLINE:
3646 ret = slab_mem_going_online_callback(arg);
3647 break;
3648 case MEM_GOING_OFFLINE:
3649 ret = slab_mem_going_offline_callback(arg);
3650 break;
3651 case MEM_OFFLINE:
3652 case MEM_CANCEL_ONLINE:
3653 slab_mem_offline_callback(arg);
3654 break;
3655 case MEM_ONLINE:
3656 case MEM_CANCEL_OFFLINE:
3657 break;
3658 }
3659 if (ret)
3660 ret = notifier_from_errno(ret);
3661 else
3662 ret = NOTIFY_OK;
3663 return ret;
3664}
3665
3666#endif
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677static void __init kmem_cache_bootstrap_fixup(struct kmem_cache *s)
3678{
3679 int node;
3680
3681 list_add(&s->list, &slab_caches);
3682 s->refcount = -1;
3683
3684 for_each_node_state(node, N_NORMAL_MEMORY) {
3685 struct kmem_cache_node *n = get_node(s, node);
3686 struct page *p;
3687
3688 if (n) {
3689 list_for_each_entry(p, &n->partial, lru)
3690 p->slab = s;
3691
3692#ifdef CONFIG_SLUB_DEBUG
3693 list_for_each_entry(p, &n->full, lru)
3694 p->slab = s;
3695#endif
3696 }
3697 }
3698}
3699
3700void __init kmem_cache_init(void)
3701{
3702 int i;
3703 int caches = 0;
3704 struct kmem_cache *temp_kmem_cache;
3705 int order;
3706 struct kmem_cache *temp_kmem_cache_node;
3707 unsigned long kmalloc_size;
3708
3709 if (debug_guardpage_minorder())
3710 slub_max_order = 0;
3711
3712 kmem_size = offsetof(struct kmem_cache, node) +
3713 nr_node_ids * sizeof(struct kmem_cache_node *);
3714
3715
3716 kmalloc_size = ALIGN(kmem_size, cache_line_size());
3717 order = get_order(2 * kmalloc_size);
3718 kmem_cache = (void *)__get_free_pages(GFP_NOWAIT, order);
3719
3720
3721
3722
3723
3724
3725 kmem_cache_node = (void *)kmem_cache + kmalloc_size;
3726
3727 kmem_cache_open(kmem_cache_node, "kmem_cache_node",
3728 sizeof(struct kmem_cache_node),
3729 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
3730
3731 hotplug_memory_notifier(slab_memory_callback, SLAB_CALLBACK_PRI);
3732
3733
3734 slab_state = PARTIAL;
3735
3736 temp_kmem_cache = kmem_cache;
3737 kmem_cache_open(kmem_cache, "kmem_cache", kmem_size,
3738 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
3739 kmem_cache = kmem_cache_alloc(kmem_cache, GFP_NOWAIT);
3740 memcpy(kmem_cache, temp_kmem_cache, kmem_size);
3741
3742
3743
3744
3745
3746
3747 temp_kmem_cache_node = kmem_cache_node;
3748
3749 kmem_cache_node = kmem_cache_alloc(kmem_cache, GFP_NOWAIT);
3750 memcpy(kmem_cache_node, temp_kmem_cache_node, kmem_size);
3751
3752 kmem_cache_bootstrap_fixup(kmem_cache_node);
3753
3754 caches++;
3755 kmem_cache_bootstrap_fixup(kmem_cache);
3756 caches++;
3757
3758 free_pages((unsigned long)temp_kmem_cache, order);
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773 BUILD_BUG_ON(KMALLOC_MIN_SIZE > 256 ||
3774 (KMALLOC_MIN_SIZE & (KMALLOC_MIN_SIZE - 1)));
3775
3776 for (i = 8; i < KMALLOC_MIN_SIZE; i += 8) {
3777 int elem = size_index_elem(i);
3778 if (elem >= ARRAY_SIZE(size_index))
3779 break;
3780 size_index[elem] = KMALLOC_SHIFT_LOW;
3781 }
3782
3783 if (KMALLOC_MIN_SIZE == 64) {
3784
3785
3786
3787
3788 for (i = 64 + 8; i <= 96; i += 8)
3789 size_index[size_index_elem(i)] = 7;
3790 } else if (KMALLOC_MIN_SIZE == 128) {
3791
3792
3793
3794
3795
3796 for (i = 128 + 8; i <= 192; i += 8)
3797 size_index[size_index_elem(i)] = 8;
3798 }
3799
3800
3801 if (KMALLOC_MIN_SIZE <= 32) {
3802 kmalloc_caches[1] = create_kmalloc_cache("kmalloc-96", 96, 0);
3803 caches++;
3804 }
3805
3806 if (KMALLOC_MIN_SIZE <= 64) {
3807 kmalloc_caches[2] = create_kmalloc_cache("kmalloc-192", 192, 0);
3808 caches++;
3809 }
3810
3811 for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) {
3812 kmalloc_caches[i] = create_kmalloc_cache("kmalloc", 1 << i, 0);
3813 caches++;
3814 }
3815
3816 slab_state = UP;
3817
3818
3819 if (KMALLOC_MIN_SIZE <= 32) {
3820 kmalloc_caches[1]->name = kstrdup(kmalloc_caches[1]->name, GFP_NOWAIT);
3821 BUG_ON(!kmalloc_caches[1]->name);
3822 }
3823
3824 if (KMALLOC_MIN_SIZE <= 64) {
3825 kmalloc_caches[2]->name = kstrdup(kmalloc_caches[2]->name, GFP_NOWAIT);
3826 BUG_ON(!kmalloc_caches[2]->name);
3827 }
3828
3829 for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) {
3830 char *s = kasprintf(GFP_NOWAIT, "kmalloc-%d", 1 << i);
3831
3832 BUG_ON(!s);
3833 kmalloc_caches[i]->name = s;
3834 }
3835
3836#ifdef CONFIG_SMP
3837 register_cpu_notifier(&slab_notifier);
3838#endif
3839
3840#ifdef CONFIG_ZONE_DMA
3841 for (i = 0; i < SLUB_PAGE_SHIFT; i++) {
3842 struct kmem_cache *s = kmalloc_caches[i];
3843
3844 if (s && s->size) {
3845 char *name = kasprintf(GFP_NOWAIT,
3846 "dma-kmalloc-%d", s->objsize);
3847
3848 BUG_ON(!name);
3849 kmalloc_dma_caches[i] = create_kmalloc_cache(name,
3850 s->objsize, SLAB_CACHE_DMA);
3851 }
3852 }
3853#endif
3854 printk(KERN_INFO
3855 "SLUB: Genslabs=%d, HWalign=%d, Order=%d-%d, MinObjects=%d,"
3856 " CPUs=%d, Nodes=%d\n",
3857 caches, cache_line_size(),
3858 slub_min_order, slub_max_order, slub_min_objects,
3859 nr_cpu_ids, nr_node_ids);
3860}
3861
3862void __init kmem_cache_init_late(void)
3863{
3864}
3865
3866
3867
3868
3869static int slab_unmergeable(struct kmem_cache *s)
3870{
3871 if (slub_nomerge || (s->flags & SLUB_NEVER_MERGE))
3872 return 1;
3873
3874 if (s->ctor)
3875 return 1;
3876
3877
3878
3879
3880 if (s->refcount < 0)
3881 return 1;
3882
3883 return 0;
3884}
3885
3886static struct kmem_cache *find_mergeable(size_t size,
3887 size_t align, unsigned long flags, const char *name,
3888 void (*ctor)(void *))
3889{
3890 struct kmem_cache *s;
3891
3892 if (slub_nomerge || (flags & SLUB_NEVER_MERGE))
3893 return NULL;
3894
3895 if (ctor)
3896 return NULL;
3897
3898 size = ALIGN(size, sizeof(void *));
3899 align = calculate_alignment(flags, align, size);
3900 size = ALIGN(size, align);
3901 flags = kmem_cache_flags(size, flags, name, NULL);
3902
3903 list_for_each_entry(s, &slab_caches, list) {
3904 if (slab_unmergeable(s))
3905 continue;
3906
3907 if (size > s->size)
3908 continue;
3909
3910 if ((flags & SLUB_MERGE_SAME) != (s->flags & SLUB_MERGE_SAME))
3911 continue;
3912
3913
3914
3915
3916 if ((s->size & ~(align - 1)) != s->size)
3917 continue;
3918
3919 if (s->size - size >= sizeof(void *))
3920 continue;
3921
3922 return s;
3923 }
3924 return NULL;
3925}
3926
3927struct kmem_cache *kmem_cache_create(const char *name, size_t size,
3928 size_t align, unsigned long flags, void (*ctor)(void *))
3929{
3930 struct kmem_cache *s;
3931 char *n;
3932
3933 if (WARN_ON(!name))
3934 return NULL;
3935
3936 down_write(&slub_lock);
3937 s = find_mergeable(size, align, flags, name, ctor);
3938 if (s) {
3939 s->refcount++;
3940
3941
3942
3943
3944 s->objsize = max(s->objsize, (int)size);
3945 s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *)));
3946
3947 if (sysfs_slab_alias(s, name)) {
3948 s->refcount--;
3949 goto err;
3950 }
3951 up_write(&slub_lock);
3952 return s;
3953 }
3954
3955 n = kstrdup(name, GFP_KERNEL);
3956 if (!n)
3957 goto err;
3958
3959 s = kmalloc(kmem_size, GFP_KERNEL);
3960 if (s) {
3961 if (kmem_cache_open(s, n,
3962 size, align, flags, ctor)) {
3963 list_add(&s->list, &slab_caches);
3964 up_write(&slub_lock);
3965 if (sysfs_slab_add(s)) {
3966 down_write(&slub_lock);
3967 list_del(&s->list);
3968 kfree(n);
3969 kfree(s);
3970 goto err;
3971 }
3972 return s;
3973 }
3974 kfree(s);
3975 }
3976 kfree(n);
3977err:
3978 up_write(&slub_lock);
3979
3980 if (flags & SLAB_PANIC)
3981 panic("Cannot create slabcache %s\n", name);
3982 else
3983 s = NULL;
3984 return s;
3985}
3986EXPORT_SYMBOL(kmem_cache_create);
3987
3988#ifdef CONFIG_SMP
3989
3990
3991
3992
3993static int __cpuinit slab_cpuup_callback(struct notifier_block *nfb,
3994 unsigned long action, void *hcpu)
3995{
3996 long cpu = (long)hcpu;
3997 struct kmem_cache *s;
3998 unsigned long flags;
3999
4000 switch (action) {
4001 case CPU_UP_CANCELED:
4002 case CPU_UP_CANCELED_FROZEN:
4003 case CPU_DEAD:
4004 case CPU_DEAD_FROZEN:
4005 down_read(&slub_lock);
4006 list_for_each_entry(s, &slab_caches, list) {
4007 local_irq_save(flags);
4008 __flush_cpu_slab(s, cpu);
4009 local_irq_restore(flags);
4010 }
4011 up_read(&slub_lock);
4012 break;
4013 default:
4014 break;
4015 }
4016 return NOTIFY_OK;
4017}
4018
4019static struct notifier_block __cpuinitdata slab_notifier = {
4020 .notifier_call = slab_cpuup_callback
4021};
4022
4023#endif
4024
4025void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
4026{
4027 struct kmem_cache *s;
4028 void *ret;
4029
4030 if (unlikely(size > SLUB_MAX_SIZE))
4031 return kmalloc_large(size, gfpflags);
4032
4033 s = get_slab(size, gfpflags);
4034
4035 if (unlikely(ZERO_OR_NULL_PTR(s)))
4036 return s;
4037
4038 ret = slab_alloc(s, gfpflags, NUMA_NO_NODE, caller);
4039
4040
4041 trace_kmalloc(caller, ret, size, s->size, gfpflags);
4042
4043 return ret;
4044}
4045
4046#ifdef CONFIG_NUMA
4047void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
4048 int node, unsigned long caller)
4049{
4050 struct kmem_cache *s;
4051 void *ret;
4052
4053 if (unlikely(size > SLUB_MAX_SIZE)) {
4054 ret = kmalloc_large_node(size, gfpflags, node);
4055
4056 trace_kmalloc_node(caller, ret,
4057 size, PAGE_SIZE << get_order(size),
4058 gfpflags, node);
4059
4060 return ret;
4061 }
4062
4063 s = get_slab(size, gfpflags);
4064
4065 if (unlikely(ZERO_OR_NULL_PTR(s)))
4066 return s;
4067
4068 ret = slab_alloc(s, gfpflags, node, caller);
4069
4070
4071 trace_kmalloc_node(caller, ret, size, s->size, gfpflags, node);
4072
4073 return ret;
4074}
4075#endif
4076
4077#ifdef CONFIG_SYSFS
4078static int count_inuse(struct page *page)
4079{
4080 return page->inuse;
4081}
4082
4083static int count_total(struct page *page)
4084{
4085 return page->objects;
4086}
4087#endif
4088
4089#ifdef CONFIG_SLUB_DEBUG
4090static int validate_slab(struct kmem_cache *s, struct page *page,
4091 unsigned long *map)
4092{
4093 void *p;
4094 void *addr = page_address(page);
4095
4096 if (!check_slab(s, page) ||
4097 !on_freelist(s, page, NULL))
4098 return 0;
4099
4100
4101 bitmap_zero(map, page->objects);
4102
4103 get_map(s, page, map);
4104 for_each_object(p, s, addr, page->objects) {
4105 if (test_bit(slab_index(p, s, addr), map))
4106 if (!check_object(s, page, p, SLUB_RED_INACTIVE))
4107 return 0;
4108 }
4109
4110 for_each_object(p, s, addr, page->objects)
4111 if (!test_bit(slab_index(p, s, addr), map))
4112 if (!check_object(s, page, p, SLUB_RED_ACTIVE))
4113 return 0;
4114 return 1;
4115}
4116
4117static void validate_slab_slab(struct kmem_cache *s, struct page *page,
4118 unsigned long *map)
4119{
4120 slab_lock(page);
4121 validate_slab(s, page, map);
4122 slab_unlock(page);
4123}
4124
4125static int validate_slab_node(struct kmem_cache *s,
4126 struct kmem_cache_node *n, unsigned long *map)
4127{
4128 unsigned long count = 0;
4129 struct page *page;
4130 unsigned long flags;
4131
4132 spin_lock_irqsave(&n->list_lock, flags);
4133
4134 list_for_each_entry(page, &n->partial, lru) {
4135 validate_slab_slab(s, page, map);
4136 count++;
4137 }
4138 if (count != n->nr_partial)
4139 printk(KERN_ERR "SLUB %s: %ld partial slabs counted but "
4140 "counter=%ld\n", s->name, count, n->nr_partial);
4141
4142 if (!(s->flags & SLAB_STORE_USER))
4143 goto out;
4144
4145 list_for_each_entry(page, &n->full, lru) {
4146 validate_slab_slab(s, page, map);
4147 count++;
4148 }
4149 if (count != atomic_long_read(&n->nr_slabs))
4150 printk(KERN_ERR "SLUB: %s %ld slabs counted but "
4151 "counter=%ld\n", s->name, count,
4152 atomic_long_read(&n->nr_slabs));
4153
4154out:
4155 spin_unlock_irqrestore(&n->list_lock, flags);
4156 return count;
4157}
4158
4159static long validate_slab_cache(struct kmem_cache *s)
4160{
4161 int node;
4162 unsigned long count = 0;
4163 unsigned long *map = kmalloc(BITS_TO_LONGS(oo_objects(s->max)) *
4164 sizeof(unsigned long), GFP_KERNEL);
4165
4166 if (!map)
4167 return -ENOMEM;
4168
4169 flush_all(s);
4170 for_each_node_state(node, N_NORMAL_MEMORY) {
4171 struct kmem_cache_node *n = get_node(s, node);
4172
4173 count += validate_slab_node(s, n, map);
4174 }
4175 kfree(map);
4176 return count;
4177}
4178
4179
4180
4181
4182
4183struct location {
4184 unsigned long count;
4185 unsigned long addr;
4186 long long sum_time;
4187 long min_time;
4188 long max_time;
4189 long min_pid;
4190 long max_pid;
4191 DECLARE_BITMAP(cpus, NR_CPUS);
4192 nodemask_t nodes;
4193};
4194
4195struct loc_track {
4196 unsigned long max;
4197 unsigned long count;
4198 struct location *loc;
4199};
4200
4201static void free_loc_track(struct loc_track *t)
4202{
4203 if (t->max)
4204 free_pages((unsigned long)t->loc,
4205 get_order(sizeof(struct location) * t->max));
4206}
4207
4208static int alloc_loc_track(struct loc_track *t, unsigned long max, gfp_t flags)
4209{
4210 struct location *l;
4211 int order;
4212
4213 order = get_order(sizeof(struct location) * max);
4214
4215 l = (void *)__get_free_pages(flags, order);
4216 if (!l)
4217 return 0;
4218
4219 if (t->count) {
4220 memcpy(l, t->loc, sizeof(struct location) * t->count);
4221 free_loc_track(t);
4222 }
4223 t->max = max;
4224 t->loc = l;
4225 return 1;
4226}
4227
4228static int add_location(struct loc_track *t, struct kmem_cache *s,
4229 const struct track *track)
4230{
4231 long start, end, pos;
4232 struct location *l;
4233 unsigned long caddr;
4234 unsigned long age = jiffies - track->when;
4235
4236 start = -1;
4237 end = t->count;
4238
4239 for ( ; ; ) {
4240 pos = start + (end - start + 1) / 2;
4241
4242
4243
4244
4245
4246 if (pos == end)
4247 break;
4248
4249 caddr = t->loc[pos].addr;
4250 if (track->addr == caddr) {
4251
4252 l = &t->loc[pos];
4253 l->count++;
4254 if (track->when) {
4255 l->sum_time += age;
4256 if (age < l->min_time)
4257 l->min_time = age;
4258 if (age > l->max_time)
4259 l->max_time = age;
4260
4261 if (track->pid < l->min_pid)
4262 l->min_pid = track->pid;
4263 if (track->pid > l->max_pid)
4264 l->max_pid = track->pid;
4265
4266 cpumask_set_cpu(track->cpu,
4267 to_cpumask(l->cpus));
4268 }
4269 node_set(page_to_nid(virt_to_page(track)), l->nodes);
4270 return 1;
4271 }
4272
4273 if (track->addr < caddr)
4274 end = pos;
4275 else
4276 start = pos;
4277 }
4278
4279
4280
4281
4282 if (t->count >= t->max && !alloc_loc_track(t, 2 * t->max, GFP_ATOMIC))
4283 return 0;
4284
4285 l = t->loc + pos;
4286 if (pos < t->count)
4287 memmove(l + 1, l,
4288 (t->count - pos) * sizeof(struct location));
4289 t->count++;
4290 l->count = 1;
4291 l->addr = track->addr;
4292 l->sum_time = age;
4293 l->min_time = age;
4294 l->max_time = age;
4295 l->min_pid = track->pid;
4296 l->max_pid = track->pid;
4297 cpumask_clear(to_cpumask(l->cpus));
4298 cpumask_set_cpu(track->cpu, to_cpumask(l->cpus));
4299 nodes_clear(l->nodes);
4300 node_set(page_to_nid(virt_to_page(track)), l->nodes);
4301 return 1;
4302}
4303
4304static void process_slab(struct loc_track *t, struct kmem_cache *s,
4305 struct page *page, enum track_item alloc,
4306 unsigned long *map)
4307{
4308 void *addr = page_address(page);
4309 void *p;
4310
4311 bitmap_zero(map, page->objects);
4312 get_map(s, page, map);
4313
4314 for_each_object(p, s, addr, page->objects)
4315 if (!test_bit(slab_index(p, s, addr), map))
4316 add_location(t, s, get_track(s, p, alloc));
4317}
4318
4319static int list_locations(struct kmem_cache *s, char *buf,
4320 enum track_item alloc)
4321{
4322 int len = 0;
4323 unsigned long i;
4324 struct loc_track t = { 0, 0, NULL };
4325 int node;
4326 unsigned long *map = kmalloc(BITS_TO_LONGS(oo_objects(s->max)) *
4327 sizeof(unsigned long), GFP_KERNEL);
4328
4329 if (!map || !alloc_loc_track(&t, PAGE_SIZE / sizeof(struct location),
4330 GFP_TEMPORARY)) {
4331 kfree(map);
4332 return sprintf(buf, "Out of memory\n");
4333 }
4334
4335 flush_all(s);
4336
4337 for_each_node_state(node, N_NORMAL_MEMORY) {
4338 struct kmem_cache_node *n = get_node(s, node);
4339 unsigned long flags;
4340 struct page *page;
4341
4342 if (!atomic_long_read(&n->nr_slabs))
4343 continue;
4344
4345 spin_lock_irqsave(&n->list_lock, flags);
4346 list_for_each_entry(page, &n->partial, lru)
4347 process_slab(&t, s, page, alloc, map);
4348 list_for_each_entry(page, &n->full, lru)
4349 process_slab(&t, s, page, alloc, map);
4350 spin_unlock_irqrestore(&n->list_lock, flags);
4351 }
4352
4353 for (i = 0; i < t.count; i++) {
4354 struct location *l = &t.loc[i];
4355
4356 if (len > PAGE_SIZE - KSYM_SYMBOL_LEN - 100)
4357 break;
4358 len += sprintf(buf + len, "%7ld ", l->count);
4359
4360 if (l->addr)
4361 len += sprintf(buf + len, "%pS", (void *)l->addr);
4362 else
4363 len += sprintf(buf + len, "<not-available>");
4364
4365 if (l->sum_time != l->min_time) {
4366 len += sprintf(buf + len, " age=%ld/%ld/%ld",
4367 l->min_time,
4368 (long)div_u64(l->sum_time, l->count),
4369 l->max_time);
4370 } else
4371 len += sprintf(buf + len, " age=%ld",
4372 l->min_time);
4373
4374 if (l->min_pid != l->max_pid)
4375 len += sprintf(buf + len, " pid=%ld-%ld",
4376 l->min_pid, l->max_pid);
4377 else
4378 len += sprintf(buf + len, " pid=%ld",
4379 l->min_pid);
4380
4381 if (num_online_cpus() > 1 &&
4382 !cpumask_empty(to_cpumask(l->cpus)) &&
4383 len < PAGE_SIZE - 60) {
4384 len += sprintf(buf + len, " cpus=");
4385 len += cpulist_scnprintf(buf + len, PAGE_SIZE - len - 50,
4386 to_cpumask(l->cpus));
4387 }
4388
4389 if (nr_online_nodes > 1 && !nodes_empty(l->nodes) &&
4390 len < PAGE_SIZE - 60) {
4391 len += sprintf(buf + len, " nodes=");
4392 len += nodelist_scnprintf(buf + len, PAGE_SIZE - len - 50,
4393 l->nodes);
4394 }
4395
4396 len += sprintf(buf + len, "\n");
4397 }
4398
4399 free_loc_track(&t);
4400 kfree(map);
4401 if (!t.count)
4402 len += sprintf(buf, "No data\n");
4403 return len;
4404}
4405#endif
4406
4407#ifdef SLUB_RESILIENCY_TEST
4408static void resiliency_test(void)
4409{
4410 u8 *p;
4411
4412 BUILD_BUG_ON(KMALLOC_MIN_SIZE > 16 || SLUB_PAGE_SHIFT < 10);
4413
4414 printk(KERN_ERR "SLUB resiliency testing\n");
4415 printk(KERN_ERR "-----------------------\n");
4416 printk(KERN_ERR "A. Corruption after allocation\n");
4417
4418 p = kzalloc(16, GFP_KERNEL);
4419 p[16] = 0x12;
4420 printk(KERN_ERR "\n1. kmalloc-16: Clobber Redzone/next pointer"
4421 " 0x12->0x%p\n\n", p + 16);
4422
4423 validate_slab_cache(kmalloc_caches[4]);
4424
4425
4426 p = kzalloc(32, GFP_KERNEL);
4427 p[32 + sizeof(void *)] = 0x34;
4428 printk(KERN_ERR "\n2. kmalloc-32: Clobber next pointer/next slab"
4429 " 0x34 -> -0x%p\n", p);
4430 printk(KERN_ERR
4431 "If allocated object is overwritten then not detectable\n\n");
4432
4433 validate_slab_cache(kmalloc_caches[5]);
4434 p = kzalloc(64, GFP_KERNEL);
4435 p += 64 + (get_cycles() & 0xff) * sizeof(void *);
4436 *p = 0x56;
4437 printk(KERN_ERR "\n3. kmalloc-64: corrupting random byte 0x56->0x%p\n",
4438 p);
4439 printk(KERN_ERR
4440 "If allocated object is overwritten then not detectable\n\n");
4441 validate_slab_cache(kmalloc_caches[6]);
4442
4443 printk(KERN_ERR "\nB. Corruption after free\n");
4444 p = kzalloc(128, GFP_KERNEL);
4445 kfree(p);
4446 *p = 0x78;
4447 printk(KERN_ERR "1. kmalloc-128: Clobber first word 0x78->0x%p\n\n", p);
4448 validate_slab_cache(kmalloc_caches[7]);
4449
4450 p = kzalloc(256, GFP_KERNEL);
4451 kfree(p);
4452 p[50] = 0x9a;
4453 printk(KERN_ERR "\n2. kmalloc-256: Clobber 50th byte 0x9a->0x%p\n\n",
4454 p);
4455 validate_slab_cache(kmalloc_caches[8]);
4456
4457 p = kzalloc(512, GFP_KERNEL);
4458 kfree(p);
4459 p[512] = 0xab;
4460 printk(KERN_ERR "\n3. kmalloc-512: Clobber redzone 0xab->0x%p\n\n", p);
4461 validate_slab_cache(kmalloc_caches[9]);
4462}
4463#else
4464#ifdef CONFIG_SYSFS
4465static void resiliency_test(void) {};
4466#endif
4467#endif
4468
4469#ifdef CONFIG_SYSFS
4470enum slab_stat_type {
4471 SL_ALL,
4472 SL_PARTIAL,
4473 SL_CPU,
4474 SL_OBJECTS,
4475 SL_TOTAL
4476};
4477
4478#define SO_ALL (1 << SL_ALL)
4479#define SO_PARTIAL (1 << SL_PARTIAL)
4480#define SO_CPU (1 << SL_CPU)
4481#define SO_OBJECTS (1 << SL_OBJECTS)
4482#define SO_TOTAL (1 << SL_TOTAL)
4483
4484static ssize_t show_slab_objects(struct kmem_cache *s,
4485 char *buf, unsigned long flags)
4486{
4487 unsigned long total = 0;
4488 int node;
4489 int x;
4490 unsigned long *nodes;
4491 unsigned long *per_cpu;
4492
4493 nodes = kzalloc(2 * sizeof(unsigned long) * nr_node_ids, GFP_KERNEL);
4494 if (!nodes)
4495 return -ENOMEM;
4496 per_cpu = nodes + nr_node_ids;
4497
4498 if (flags & SO_CPU) {
4499 int cpu;
4500
4501 for_each_possible_cpu(cpu) {
4502 struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
4503 int node = ACCESS_ONCE(c->node);
4504 struct page *page;
4505
4506 if (node < 0)
4507 continue;
4508 page = ACCESS_ONCE(c->page);
4509 if (page) {
4510 if (flags & SO_TOTAL)
4511 x = page->objects;
4512 else if (flags & SO_OBJECTS)
4513 x = page->inuse;
4514 else
4515 x = 1;
4516
4517 total += x;
4518 nodes[node] += x;
4519 }
4520 page = c->partial;
4521
4522 if (page) {
4523 x = page->pobjects;
4524 total += x;
4525 nodes[node] += x;
4526 }
4527 per_cpu[node]++;
4528 }
4529 }
4530
4531 lock_memory_hotplug();
4532#ifdef CONFIG_SLUB_DEBUG
4533 if (flags & SO_ALL) {
4534 for_each_node_state(node, N_NORMAL_MEMORY) {
4535 struct kmem_cache_node *n = get_node(s, node);
4536
4537 if (flags & SO_TOTAL)
4538 x = atomic_long_read(&n->total_objects);
4539 else if (flags & SO_OBJECTS)
4540 x = atomic_long_read(&n->total_objects) -
4541 count_partial(n, count_free);
4542
4543 else
4544 x = atomic_long_read(&n->nr_slabs);
4545 total += x;
4546 nodes[node] += x;
4547 }
4548
4549 } else
4550#endif
4551 if (flags & SO_PARTIAL) {
4552 for_each_node_state(node, N_NORMAL_MEMORY) {
4553 struct kmem_cache_node *n = get_node(s, node);
4554
4555 if (flags & SO_TOTAL)
4556 x = count_partial(n, count_total);
4557 else if (flags & SO_OBJECTS)
4558 x = count_partial(n, count_inuse);
4559 else
4560 x = n->nr_partial;
4561 total += x;
4562 nodes[node] += x;
4563 }
4564 }
4565 x = sprintf(buf, "%lu", total);
4566#ifdef CONFIG_NUMA
4567 for_each_node_state(node, N_NORMAL_MEMORY)
4568 if (nodes[node])
4569 x += sprintf(buf + x, " N%d=%lu",
4570 node, nodes[node]);
4571#endif
4572 unlock_memory_hotplug();
4573 kfree(nodes);
4574 return x + sprintf(buf + x, "\n");
4575}
4576
4577#ifdef CONFIG_SLUB_DEBUG
4578static int any_slab_objects(struct kmem_cache *s)
4579{
4580 int node;
4581
4582 for_each_online_node(node) {
4583 struct kmem_cache_node *n = get_node(s, node);
4584
4585 if (!n)
4586 continue;
4587
4588 if (atomic_long_read(&n->total_objects))
4589 return 1;
4590 }
4591 return 0;
4592}
4593#endif
4594
4595#define to_slab_attr(n) container_of(n, struct slab_attribute, attr)
4596#define to_slab(n) container_of(n, struct kmem_cache, kobj)
4597
4598struct slab_attribute {
4599 struct attribute attr;
4600 ssize_t (*show)(struct kmem_cache *s, char *buf);
4601 ssize_t (*store)(struct kmem_cache *s, const char *x, size_t count);
4602};
4603
4604#define SLAB_ATTR_RO(_name) \
4605 static struct slab_attribute _name##_attr = \
4606 __ATTR(_name, 0400, _name##_show, NULL)
4607
4608#define SLAB_ATTR(_name) \
4609 static struct slab_attribute _name##_attr = \
4610 __ATTR(_name, 0600, _name##_show, _name##_store)
4611
4612static ssize_t slab_size_show(struct kmem_cache *s, char *buf)
4613{
4614 return sprintf(buf, "%d\n", s->size);
4615}
4616SLAB_ATTR_RO(slab_size);
4617
4618static ssize_t align_show(struct kmem_cache *s, char *buf)
4619{
4620 return sprintf(buf, "%d\n", s->align);
4621}
4622SLAB_ATTR_RO(align);
4623
4624static ssize_t object_size_show(struct kmem_cache *s, char *buf)
4625{
4626 return sprintf(buf, "%d\n", s->objsize);
4627}
4628SLAB_ATTR_RO(object_size);
4629
4630static ssize_t objs_per_slab_show(struct kmem_cache *s, char *buf)
4631{
4632 return sprintf(buf, "%d\n", oo_objects(s->oo));
4633}
4634SLAB_ATTR_RO(objs_per_slab);
4635
4636static ssize_t order_store(struct kmem_cache *s,
4637 const char *buf, size_t length)
4638{
4639 unsigned long order;
4640 int err;
4641
4642 err = strict_strtoul(buf, 10, &order);
4643 if (err)
4644 return err;
4645
4646 if (order > slub_max_order || order < slub_min_order)
4647 return -EINVAL;
4648
4649 calculate_sizes(s, order);
4650 return length;
4651}
4652
4653static ssize_t order_show(struct kmem_cache *s, char *buf)
4654{
4655 return sprintf(buf, "%d\n", oo_order(s->oo));
4656}
4657SLAB_ATTR(order);
4658
4659static ssize_t min_partial_show(struct kmem_cache *s, char *buf)
4660{
4661 return sprintf(buf, "%lu\n", s->min_partial);
4662}
4663
4664static ssize_t min_partial_store(struct kmem_cache *s, const char *buf,
4665 size_t length)
4666{
4667 unsigned long min;
4668 int err;
4669
4670 err = strict_strtoul(buf, 10, &min);
4671 if (err)
4672 return err;
4673
4674 set_min_partial(s, min);
4675 return length;
4676}
4677SLAB_ATTR(min_partial);
4678
4679static ssize_t cpu_partial_show(struct kmem_cache *s, char *buf)
4680{
4681 return sprintf(buf, "%u\n", s->cpu_partial);
4682}
4683
4684static ssize_t cpu_partial_store(struct kmem_cache *s, const char *buf,
4685 size_t length)
4686{
4687 unsigned long objects;
4688 int err;
4689
4690 err = strict_strtoul(buf, 10, &objects);
4691 if (err)
4692 return err;
4693 if (objects && kmem_cache_debug(s))
4694 return -EINVAL;
4695
4696 s->cpu_partial = objects;
4697 flush_all(s);
4698 return length;
4699}
4700SLAB_ATTR(cpu_partial);
4701
4702static ssize_t ctor_show(struct kmem_cache *s, char *buf)
4703{
4704 if (!s->ctor)
4705 return 0;
4706 return sprintf(buf, "%pS\n", s->ctor);
4707}
4708SLAB_ATTR_RO(ctor);
4709
4710static ssize_t aliases_show(struct kmem_cache *s, char *buf)
4711{
4712 return sprintf(buf, "%d\n", s->refcount - 1);
4713}
4714SLAB_ATTR_RO(aliases);
4715
4716static ssize_t partial_show(struct kmem_cache *s, char *buf)
4717{
4718 return show_slab_objects(s, buf, SO_PARTIAL);
4719}
4720SLAB_ATTR_RO(partial);
4721
4722static ssize_t cpu_slabs_show(struct kmem_cache *s, char *buf)
4723{
4724 return show_slab_objects(s, buf, SO_CPU);
4725}
4726SLAB_ATTR_RO(cpu_slabs);
4727
4728static ssize_t objects_show(struct kmem_cache *s, char *buf)
4729{
4730 return show_slab_objects(s, buf, SO_ALL|SO_OBJECTS);
4731}
4732SLAB_ATTR_RO(objects);
4733
4734static ssize_t objects_partial_show(struct kmem_cache *s, char *buf)
4735{
4736 return show_slab_objects(s, buf, SO_PARTIAL|SO_OBJECTS);
4737}
4738SLAB_ATTR_RO(objects_partial);
4739
4740static ssize_t slabs_cpu_partial_show(struct kmem_cache *s, char *buf)
4741{
4742 int objects = 0;
4743 int pages = 0;
4744 int cpu;
4745 int len;
4746
4747 for_each_online_cpu(cpu) {
4748 struct page *page = per_cpu_ptr(s->cpu_slab, cpu)->partial;
4749
4750 if (page) {
4751 pages += page->pages;
4752 objects += page->pobjects;
4753 }
4754 }
4755
4756 len = sprintf(buf, "%d(%d)", objects, pages);
4757
4758#ifdef CONFIG_SMP
4759 for_each_online_cpu(cpu) {
4760 struct page *page = per_cpu_ptr(s->cpu_slab, cpu) ->partial;
4761
4762 if (page && len < PAGE_SIZE - 20)
4763 len += sprintf(buf + len, " C%d=%d(%d)", cpu,
4764 page->pobjects, page->pages);
4765 }
4766#endif
4767 return len + sprintf(buf + len, "\n");
4768}
4769SLAB_ATTR_RO(slabs_cpu_partial);
4770
4771static ssize_t reclaim_account_show(struct kmem_cache *s, char *buf)
4772{
4773 return sprintf(buf, "%d\n", !!(s->flags & SLAB_RECLAIM_ACCOUNT));
4774}
4775
4776static ssize_t reclaim_account_store(struct kmem_cache *s,
4777 const char *buf, size_t length)
4778{
4779 s->flags &= ~SLAB_RECLAIM_ACCOUNT;
4780 if (buf[0] == '1')
4781 s->flags |= SLAB_RECLAIM_ACCOUNT;
4782 return length;
4783}
4784SLAB_ATTR(reclaim_account);
4785
4786static ssize_t hwcache_align_show(struct kmem_cache *s, char *buf)
4787{
4788 return sprintf(buf, "%d\n", !!(s->flags & SLAB_HWCACHE_ALIGN));
4789}
4790SLAB_ATTR_RO(hwcache_align);
4791
4792#ifdef CONFIG_ZONE_DMA
4793static ssize_t cache_dma_show(struct kmem_cache *s, char *buf)
4794{
4795 return sprintf(buf, "%d\n", !!(s->flags & SLAB_CACHE_DMA));
4796}
4797SLAB_ATTR_RO(cache_dma);
4798#endif
4799
4800static ssize_t destroy_by_rcu_show(struct kmem_cache *s, char *buf)
4801{
4802 return sprintf(buf, "%d\n", !!(s->flags & SLAB_DESTROY_BY_RCU));
4803}
4804SLAB_ATTR_RO(destroy_by_rcu);
4805
4806static ssize_t reserved_show(struct kmem_cache *s, char *buf)
4807{
4808 return sprintf(buf, "%d\n", s->reserved);
4809}
4810SLAB_ATTR_RO(reserved);
4811
4812#ifdef CONFIG_SLUB_DEBUG
4813static ssize_t slabs_show(struct kmem_cache *s, char *buf)
4814{
4815 return show_slab_objects(s, buf, SO_ALL);
4816}
4817SLAB_ATTR_RO(slabs);
4818
4819static ssize_t total_objects_show(struct kmem_cache *s, char *buf)
4820{
4821 return show_slab_objects(s, buf, SO_ALL|SO_TOTAL);
4822}
4823SLAB_ATTR_RO(total_objects);
4824
4825static ssize_t sanity_checks_show(struct kmem_cache *s, char *buf)
4826{
4827 return sprintf(buf, "%d\n", !!(s->flags & SLAB_DEBUG_FREE));
4828}
4829
4830static ssize_t sanity_checks_store(struct kmem_cache *s,
4831 const char *buf, size_t length)
4832{
4833 s->flags &= ~SLAB_DEBUG_FREE;
4834 if (buf[0] == '1') {
4835 s->flags &= ~__CMPXCHG_DOUBLE;
4836 s->flags |= SLAB_DEBUG_FREE;
4837 }
4838 return length;
4839}
4840SLAB_ATTR(sanity_checks);
4841
4842static ssize_t trace_show(struct kmem_cache *s, char *buf)
4843{
4844 return sprintf(buf, "%d\n", !!(s->flags & SLAB_TRACE));
4845}
4846
4847static ssize_t trace_store(struct kmem_cache *s, const char *buf,
4848 size_t length)
4849{
4850 s->flags &= ~SLAB_TRACE;
4851 if (buf[0] == '1') {
4852 s->flags &= ~__CMPXCHG_DOUBLE;
4853 s->flags |= SLAB_TRACE;
4854 }
4855 return length;
4856}
4857SLAB_ATTR(trace);
4858
4859static ssize_t red_zone_show(struct kmem_cache *s, char *buf)
4860{
4861 return sprintf(buf, "%d\n", !!(s->flags & SLAB_RED_ZONE));
4862}
4863
4864static ssize_t red_zone_store(struct kmem_cache *s,
4865 const char *buf, size_t length)
4866{
4867 if (any_slab_objects(s))
4868 return -EBUSY;
4869
4870 s->flags &= ~SLAB_RED_ZONE;
4871 if (buf[0] == '1') {
4872 s->flags &= ~__CMPXCHG_DOUBLE;
4873 s->flags |= SLAB_RED_ZONE;
4874 }
4875 calculate_sizes(s, -1);
4876 return length;
4877}
4878SLAB_ATTR(red_zone);
4879
4880static ssize_t poison_show(struct kmem_cache *s, char *buf)
4881{
4882 return sprintf(buf, "%d\n", !!(s->flags & SLAB_POISON));
4883}
4884
4885static ssize_t poison_store(struct kmem_cache *s,
4886 const char *buf, size_t length)
4887{
4888 if (any_slab_objects(s))
4889 return -EBUSY;
4890
4891 s->flags &= ~SLAB_POISON;
4892 if (buf[0] == '1') {
4893 s->flags &= ~__CMPXCHG_DOUBLE;
4894 s->flags |= SLAB_POISON;
4895 }
4896 calculate_sizes(s, -1);
4897 return length;
4898}
4899SLAB_ATTR(poison);
4900
4901static ssize_t store_user_show(struct kmem_cache *s, char *buf)
4902{
4903 return sprintf(buf, "%d\n", !!(s->flags & SLAB_STORE_USER));
4904}
4905
4906static ssize_t store_user_store(struct kmem_cache *s,
4907 const char *buf, size_t length)
4908{
4909 if (any_slab_objects(s))
4910 return -EBUSY;
4911
4912 s->flags &= ~SLAB_STORE_USER;
4913 if (buf[0] == '1') {
4914 s->flags &= ~__CMPXCHG_DOUBLE;
4915 s->flags |= SLAB_STORE_USER;
4916 }
4917 calculate_sizes(s, -1);
4918 return length;
4919}
4920SLAB_ATTR(store_user);
4921
4922static ssize_t validate_show(struct kmem_cache *s, char *buf)
4923{
4924 return 0;
4925}
4926
4927static ssize_t validate_store(struct kmem_cache *s,
4928 const char *buf, size_t length)
4929{
4930 int ret = -EINVAL;
4931
4932 if (buf[0] == '1') {
4933 ret = validate_slab_cache(s);
4934 if (ret >= 0)
4935 ret = length;
4936 }
4937 return ret;
4938}
4939SLAB_ATTR(validate);
4940
4941static ssize_t alloc_calls_show(struct kmem_cache *s, char *buf)
4942{
4943 if (!(s->flags & SLAB_STORE_USER))
4944 return -ENOSYS;
4945 return list_locations(s, buf, TRACK_ALLOC);
4946}
4947SLAB_ATTR_RO(alloc_calls);
4948
4949static ssize_t free_calls_show(struct kmem_cache *s, char *buf)
4950{
4951 if (!(s->flags & SLAB_STORE_USER))
4952 return -ENOSYS;
4953 return list_locations(s, buf, TRACK_FREE);
4954}
4955SLAB_ATTR_RO(free_calls);
4956#endif
4957
4958#ifdef CONFIG_FAILSLAB
4959static ssize_t failslab_show(struct kmem_cache *s, char *buf)
4960{
4961 return sprintf(buf, "%d\n", !!(s->flags & SLAB_FAILSLAB));
4962}
4963
4964static ssize_t failslab_store(struct kmem_cache *s, const char *buf,
4965 size_t length)
4966{
4967 s->flags &= ~SLAB_FAILSLAB;
4968 if (buf[0] == '1')
4969 s->flags |= SLAB_FAILSLAB;
4970 return length;
4971}
4972SLAB_ATTR(failslab);
4973#endif
4974
4975static ssize_t shrink_show(struct kmem_cache *s, char *buf)
4976{
4977 return 0;
4978}
4979
4980static ssize_t shrink_store(struct kmem_cache *s,
4981 const char *buf, size_t length)
4982{
4983 if (buf[0] == '1') {
4984 int rc = kmem_cache_shrink(s);
4985
4986 if (rc)
4987 return rc;
4988 } else
4989 return -EINVAL;
4990 return length;
4991}
4992SLAB_ATTR(shrink);
4993
4994#ifdef CONFIG_NUMA
4995static ssize_t remote_node_defrag_ratio_show(struct kmem_cache *s, char *buf)
4996{
4997 return sprintf(buf, "%d\n", s->remote_node_defrag_ratio / 10);
4998}
4999
5000static ssize_t remote_node_defrag_ratio_store(struct kmem_cache *s,
5001 const char *buf, size_t length)
5002{
5003 unsigned long ratio;
5004 int err;
5005
5006 err = strict_strtoul(buf, 10, &ratio);
5007 if (err)
5008 return err;
5009
5010 if (ratio <= 100)
5011 s->remote_node_defrag_ratio = ratio * 10;
5012
5013 return length;
5014}
5015SLAB_ATTR(remote_node_defrag_ratio);
5016#endif
5017
5018#ifdef CONFIG_SLUB_STATS
5019static int show_stat(struct kmem_cache *s, char *buf, enum stat_item si)
5020{
5021 unsigned long sum = 0;
5022 int cpu;
5023 int len;
5024 int *data = kmalloc(nr_cpu_ids * sizeof(int), GFP_KERNEL);
5025
5026 if (!data)
5027 return -ENOMEM;
5028
5029 for_each_online_cpu(cpu) {
5030 unsigned x = per_cpu_ptr(s->cpu_slab, cpu)->stat[si];
5031
5032 data[cpu] = x;
5033 sum += x;
5034 }
5035
5036 len = sprintf(buf, "%lu", sum);
5037
5038#ifdef CONFIG_SMP
5039 for_each_online_cpu(cpu) {
5040 if (data[cpu] && len < PAGE_SIZE - 20)
5041 len += sprintf(buf + len, " C%d=%u", cpu, data[cpu]);
5042 }
5043#endif
5044 kfree(data);
5045 return len + sprintf(buf + len, "\n");
5046}
5047
5048static void clear_stat(struct kmem_cache *s, enum stat_item si)
5049{
5050 int cpu;
5051
5052 for_each_online_cpu(cpu)
5053 per_cpu_ptr(s->cpu_slab, cpu)->stat[si] = 0;
5054}
5055
5056#define STAT_ATTR(si, text) \
5057static ssize_t text##_show(struct kmem_cache *s, char *buf) \
5058{ \
5059 return show_stat(s, buf, si); \
5060} \
5061static ssize_t text##_store(struct kmem_cache *s, \
5062 const char *buf, size_t length) \
5063{ \
5064 if (buf[0] != '0') \
5065 return -EINVAL; \
5066 clear_stat(s, si); \
5067 return length; \
5068} \
5069SLAB_ATTR(text); \
5070
5071STAT_ATTR(ALLOC_FASTPATH, alloc_fastpath);
5072STAT_ATTR(ALLOC_SLOWPATH, alloc_slowpath);
5073STAT_ATTR(FREE_FASTPATH, free_fastpath);
5074STAT_ATTR(FREE_SLOWPATH, free_slowpath);
5075STAT_ATTR(FREE_FROZEN, free_frozen);
5076STAT_ATTR(FREE_ADD_PARTIAL, free_add_partial);
5077STAT_ATTR(FREE_REMOVE_PARTIAL, free_remove_partial);
5078STAT_ATTR(ALLOC_FROM_PARTIAL, alloc_from_partial);
5079STAT_ATTR(ALLOC_SLAB, alloc_slab);
5080STAT_ATTR(ALLOC_REFILL, alloc_refill);
5081STAT_ATTR(ALLOC_NODE_MISMATCH, alloc_node_mismatch);
5082STAT_ATTR(FREE_SLAB, free_slab);
5083STAT_ATTR(CPUSLAB_FLUSH, cpuslab_flush);
5084STAT_ATTR(DEACTIVATE_FULL, deactivate_full);
5085STAT_ATTR(DEACTIVATE_EMPTY, deactivate_empty);
5086STAT_ATTR(DEACTIVATE_TO_HEAD, deactivate_to_head);
5087STAT_ATTR(DEACTIVATE_TO_TAIL, deactivate_to_tail);
5088STAT_ATTR(DEACTIVATE_REMOTE_FREES, deactivate_remote_frees);
5089STAT_ATTR(DEACTIVATE_BYPASS, deactivate_bypass);
5090STAT_ATTR(ORDER_FALLBACK, order_fallback);
5091STAT_ATTR(CMPXCHG_DOUBLE_CPU_FAIL, cmpxchg_double_cpu_fail);
5092STAT_ATTR(CMPXCHG_DOUBLE_FAIL, cmpxchg_double_fail);
5093STAT_ATTR(CPU_PARTIAL_ALLOC, cpu_partial_alloc);
5094STAT_ATTR(CPU_PARTIAL_FREE, cpu_partial_free);
5095STAT_ATTR(CPU_PARTIAL_NODE, cpu_partial_node);
5096STAT_ATTR(CPU_PARTIAL_DRAIN, cpu_partial_drain);
5097#endif
5098
5099static struct attribute *slab_attrs[] = {
5100 &slab_size_attr.attr,
5101 &object_size_attr.attr,
5102 &objs_per_slab_attr.attr,
5103 &order_attr.attr,
5104 &min_partial_attr.attr,
5105 &cpu_partial_attr.attr,
5106 &objects_attr.attr,
5107 &objects_partial_attr.attr,
5108 &partial_attr.attr,
5109 &cpu_slabs_attr.attr,
5110 &ctor_attr.attr,
5111 &aliases_attr.attr,
5112 &align_attr.attr,
5113 &hwcache_align_attr.attr,
5114 &reclaim_account_attr.attr,
5115 &destroy_by_rcu_attr.attr,
5116 &shrink_attr.attr,
5117 &reserved_attr.attr,
5118 &slabs_cpu_partial_attr.attr,
5119#ifdef CONFIG_SLUB_DEBUG
5120 &total_objects_attr.attr,
5121 &slabs_attr.attr,
5122 &sanity_checks_attr.attr,
5123 &trace_attr.attr,
5124 &red_zone_attr.attr,
5125 &poison_attr.attr,
5126 &store_user_attr.attr,
5127 &validate_attr.attr,
5128 &alloc_calls_attr.attr,
5129 &free_calls_attr.attr,
5130#endif
5131#ifdef CONFIG_ZONE_DMA
5132 &cache_dma_attr.attr,
5133#endif
5134#ifdef CONFIG_NUMA
5135 &remote_node_defrag_ratio_attr.attr,
5136#endif
5137#ifdef CONFIG_SLUB_STATS
5138 &alloc_fastpath_attr.attr,
5139 &alloc_slowpath_attr.attr,
5140 &free_fastpath_attr.attr,
5141 &free_slowpath_attr.attr,
5142 &free_frozen_attr.attr,
5143 &free_add_partial_attr.attr,
5144 &free_remove_partial_attr.attr,
5145 &alloc_from_partial_attr.attr,
5146 &alloc_slab_attr.attr,
5147 &alloc_refill_attr.attr,
5148 &alloc_node_mismatch_attr.attr,
5149 &free_slab_attr.attr,
5150 &cpuslab_flush_attr.attr,
5151 &deactivate_full_attr.attr,
5152 &deactivate_empty_attr.attr,
5153 &deactivate_to_head_attr.attr,
5154 &deactivate_to_tail_attr.attr,
5155 &deactivate_remote_frees_attr.attr,
5156 &deactivate_bypass_attr.attr,
5157 &order_fallback_attr.attr,
5158 &cmpxchg_double_fail_attr.attr,
5159 &cmpxchg_double_cpu_fail_attr.attr,
5160 &cpu_partial_alloc_attr.attr,
5161 &cpu_partial_free_attr.attr,
5162 &cpu_partial_node_attr.attr,
5163 &cpu_partial_drain_attr.attr,
5164#endif
5165#ifdef CONFIG_FAILSLAB
5166 &failslab_attr.attr,
5167#endif
5168
5169 NULL
5170};
5171
5172static struct attribute_group slab_attr_group = {
5173 .attrs = slab_attrs,
5174};
5175
5176static ssize_t slab_attr_show(struct kobject *kobj,
5177 struct attribute *attr,
5178 char *buf)
5179{
5180 struct slab_attribute *attribute;
5181 struct kmem_cache *s;
5182 int err;
5183
5184 attribute = to_slab_attr(attr);
5185 s = to_slab(kobj);
5186
5187 if (!attribute->show)
5188 return -EIO;
5189
5190 err = attribute->show(s, buf);
5191
5192 return err;
5193}
5194
5195static ssize_t slab_attr_store(struct kobject *kobj,
5196 struct attribute *attr,
5197 const char *buf, size_t len)
5198{
5199 struct slab_attribute *attribute;
5200 struct kmem_cache *s;
5201 int err;
5202
5203 attribute = to_slab_attr(attr);
5204 s = to_slab(kobj);
5205
5206 if (!attribute->store)
5207 return -EIO;
5208
5209 err = attribute->store(s, buf, len);
5210
5211 return err;
5212}
5213
5214static void kmem_cache_release(struct kobject *kobj)
5215{
5216 struct kmem_cache *s = to_slab(kobj);
5217
5218 kfree(s->name);
5219 kfree(s);
5220}
5221
5222static const struct sysfs_ops slab_sysfs_ops = {
5223 .show = slab_attr_show,
5224 .store = slab_attr_store,
5225};
5226
5227static struct kobj_type slab_ktype = {
5228 .sysfs_ops = &slab_sysfs_ops,
5229 .release = kmem_cache_release
5230};
5231
5232static int uevent_filter(struct kset *kset, struct kobject *kobj)
5233{
5234 struct kobj_type *ktype = get_ktype(kobj);
5235
5236 if (ktype == &slab_ktype)
5237 return 1;
5238 return 0;
5239}
5240
5241static const struct kset_uevent_ops slab_uevent_ops = {
5242 .filter = uevent_filter,
5243};
5244
5245static struct kset *slab_kset;
5246
5247#define ID_STR_LENGTH 64
5248
5249
5250
5251
5252
5253static char *create_unique_id(struct kmem_cache *s)
5254{
5255 char *name = kmalloc(ID_STR_LENGTH, GFP_KERNEL);
5256 char *p = name;
5257
5258 BUG_ON(!name);
5259
5260 *p++ = ':';
5261
5262
5263
5264
5265
5266
5267
5268 if (s->flags & SLAB_CACHE_DMA)
5269 *p++ = 'd';
5270 if (s->flags & SLAB_RECLAIM_ACCOUNT)
5271 *p++ = 'a';
5272 if (s->flags & SLAB_DEBUG_FREE)
5273 *p++ = 'F';
5274 if (!(s->flags & SLAB_NOTRACK))
5275 *p++ = 't';
5276 if (p != name + 1)
5277 *p++ = '-';
5278 p += sprintf(p, "%07d", s->size);
5279 BUG_ON(p > name + ID_STR_LENGTH - 1);
5280 return name;
5281}
5282
5283static int sysfs_slab_add(struct kmem_cache *s)
5284{
5285 int err;
5286 const char *name;
5287 int unmergeable;
5288
5289 if (slab_state < SYSFS)
5290
5291 return 0;
5292
5293 unmergeable = slab_unmergeable(s);
5294 if (unmergeable) {
5295
5296
5297
5298
5299
5300 sysfs_remove_link(&slab_kset->kobj, s->name);
5301 name = s->name;
5302 } else {
5303
5304
5305
5306
5307 name = create_unique_id(s);
5308 }
5309
5310 s->kobj.kset = slab_kset;
5311 err = kobject_init_and_add(&s->kobj, &slab_ktype, NULL, name);
5312 if (err) {
5313 kobject_put(&s->kobj);
5314 return err;
5315 }
5316
5317 err = sysfs_create_group(&s->kobj, &slab_attr_group);
5318 if (err) {
5319 kobject_del(&s->kobj);
5320 kobject_put(&s->kobj);
5321 return err;
5322 }
5323 kobject_uevent(&s->kobj, KOBJ_ADD);
5324 if (!unmergeable) {
5325
5326 sysfs_slab_alias(s, s->name);
5327 kfree(name);
5328 }
5329 return 0;
5330}
5331
5332static void sysfs_slab_remove(struct kmem_cache *s)
5333{
5334 if (slab_state < SYSFS)
5335
5336
5337
5338
5339 return;
5340
5341 kobject_uevent(&s->kobj, KOBJ_REMOVE);
5342 kobject_del(&s->kobj);
5343 kobject_put(&s->kobj);
5344}
5345
5346
5347
5348
5349
5350struct saved_alias {
5351 struct kmem_cache *s;
5352 const char *name;
5353 struct saved_alias *next;
5354};
5355
5356static struct saved_alias *alias_list;
5357
5358static int sysfs_slab_alias(struct kmem_cache *s, const char *name)
5359{
5360 struct saved_alias *al;
5361
5362 if (slab_state == SYSFS) {
5363
5364
5365
5366 sysfs_remove_link(&slab_kset->kobj, name);
5367 return sysfs_create_link(&slab_kset->kobj, &s->kobj, name);
5368 }
5369
5370 al = kmalloc(sizeof(struct saved_alias), GFP_KERNEL);
5371 if (!al)
5372 return -ENOMEM;
5373
5374 al->s = s;
5375 al->name = name;
5376 al->next = alias_list;
5377 alias_list = al;
5378 return 0;
5379}
5380
5381static int __init slab_sysfs_init(void)
5382{
5383 struct kmem_cache *s;
5384 int err;
5385
5386 down_write(&slub_lock);
5387
5388 slab_kset = kset_create_and_add("slab", &slab_uevent_ops, kernel_kobj);
5389 if (!slab_kset) {
5390 up_write(&slub_lock);
5391 printk(KERN_ERR "Cannot register slab subsystem.\n");
5392 return -ENOSYS;
5393 }
5394
5395 slab_state = SYSFS;
5396
5397 list_for_each_entry(s, &slab_caches, list) {
5398 err = sysfs_slab_add(s);
5399 if (err)
5400 printk(KERN_ERR "SLUB: Unable to add boot slab %s"
5401 " to sysfs\n", s->name);
5402 }
5403
5404 while (alias_list) {
5405 struct saved_alias *al = alias_list;
5406
5407 alias_list = alias_list->next;
5408 err = sysfs_slab_alias(al->s, al->name);
5409 if (err)
5410 printk(KERN_ERR "SLUB: Unable to add boot slab alias"
5411 " %s to sysfs\n", s->name);
5412 kfree(al);
5413 }
5414
5415 up_write(&slub_lock);
5416 resiliency_test();
5417 return 0;
5418}
5419
5420__initcall(slab_sysfs_init);
5421#endif
5422
5423
5424
5425
5426#ifdef CONFIG_SLABINFO
5427static void print_slabinfo_header(struct seq_file *m)
5428{
5429 seq_puts(m, "slabinfo - version: 2.1\n");
5430 seq_puts(m, "# name <active_objs> <num_objs> <objsize> "
5431 "<objperslab> <pagesperslab>");
5432 seq_puts(m, " : tunables <limit> <batchcount> <sharedfactor>");
5433 seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>");
5434 seq_putc(m, '\n');
5435}
5436
5437static void *s_start(struct seq_file *m, loff_t *pos)
5438{
5439 loff_t n = *pos;
5440
5441 down_read(&slub_lock);
5442 if (!n)
5443 print_slabinfo_header(m);
5444
5445 return seq_list_start(&slab_caches, *pos);
5446}
5447
5448static void *s_next(struct seq_file *m, void *p, loff_t *pos)
5449{
5450 return seq_list_next(p, &slab_caches, pos);
5451}
5452
5453static void s_stop(struct seq_file *m, void *p)
5454{
5455 up_read(&slub_lock);
5456}
5457
5458static int s_show(struct seq_file *m, void *p)
5459{
5460 unsigned long nr_partials = 0;
5461 unsigned long nr_slabs = 0;
5462 unsigned long nr_inuse = 0;
5463 unsigned long nr_objs = 0;
5464 unsigned long nr_free = 0;
5465 struct kmem_cache *s;
5466 int node;
5467
5468 s = list_entry(p, struct kmem_cache, list);
5469
5470 for_each_online_node(node) {
5471 struct kmem_cache_node *n = get_node(s, node);
5472
5473 if (!n)
5474 continue;
5475
5476 nr_partials += n->nr_partial;
5477 nr_slabs += atomic_long_read(&n->nr_slabs);
5478 nr_objs += atomic_long_read(&n->total_objects);
5479 nr_free += count_partial(n, count_free);
5480 }
5481
5482 nr_inuse = nr_objs - nr_free;
5483
5484 seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d", s->name, nr_inuse,
5485 nr_objs, s->size, oo_objects(s->oo),
5486 (1 << oo_order(s->oo)));
5487 seq_printf(m, " : tunables %4u %4u %4u", 0, 0, 0);
5488 seq_printf(m, " : slabdata %6lu %6lu %6lu", nr_slabs, nr_slabs,
5489 0UL);
5490 seq_putc(m, '\n');
5491 return 0;
5492}
5493
5494static const struct seq_operations slabinfo_op = {
5495 .start = s_start,
5496 .next = s_next,
5497 .stop = s_stop,
5498 .show = s_show,
5499};
5500
5501static int slabinfo_open(struct inode *inode, struct file *file)
5502{
5503 return seq_open(file, &slabinfo_op);
5504}
5505
5506static const struct file_operations proc_slabinfo_operations = {
5507 .open = slabinfo_open,
5508 .read = seq_read,
5509 .llseek = seq_lseek,
5510 .release = seq_release,
5511};
5512
5513static int __init slab_proc_init(void)
5514{
5515 proc_create("slabinfo", S_IRUSR, NULL, &proc_slabinfo_operations);
5516 return 0;
5517}
5518module_init(slab_proc_init);
5519#endif
5520