1
2
3
4
5
6
7
8
9
10
11
12#include <linux/mm.h>
13#include <linux/swap.h>
14#include <linux/module.h>
15#include <linux/bit_spinlock.h>
16#include <linux/interrupt.h>
17#include <linux/bitops.h>
18#include <linux/slab.h>
19#include "slab.h"
20#include <linux/proc_fs.h>
21#include <linux/seq_file.h>
22#include <linux/kmemcheck.h>
23#include <linux/cpu.h>
24#include <linux/cpuset.h>
25#include <linux/mempolicy.h>
26#include <linux/ctype.h>
27#include <linux/debugobjects.h>
28#include <linux/kallsyms.h>
29#include <linux/memory.h>
30#include <linux/math64.h>
31#include <linux/fault-inject.h>
32#include <linux/stacktrace.h>
33#include <linux/prefetch.h>
34
35#include <trace/events/kmem.h>
36
37#include "internal.h"
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115#define SLAB_DEBUG_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
116 SLAB_TRACE | SLAB_DEBUG_FREE)
117
118static inline int kmem_cache_debug(struct kmem_cache *s)
119{
120#ifdef CONFIG_SLUB_DEBUG
121 return unlikely(s->flags & SLAB_DEBUG_FLAGS);
122#else
123 return 0;
124#endif
125}
126
127
128
129
130
131
132
133
134
135
136#undef SLUB_RESILIENCY_TEST
137
138
139#undef SLUB_DEBUG_CMPXCHG
140
141
142
143
144
145#define MIN_PARTIAL 5
146
147
148
149
150
151
152#define MAX_PARTIAL 10
153
154#define DEBUG_DEFAULT_FLAGS (SLAB_DEBUG_FREE | SLAB_RED_ZONE | \
155 SLAB_POISON | SLAB_STORE_USER)
156
157
158
159
160
161
162#define DEBUG_METADATA_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER)
163
164
165
166
167#define SLUB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
168 SLAB_TRACE | SLAB_DESTROY_BY_RCU | SLAB_NOLEAKTRACE | \
169 SLAB_FAILSLAB)
170
171#define SLUB_MERGE_SAME (SLAB_DEBUG_FREE | SLAB_RECLAIM_ACCOUNT | \
172 SLAB_CACHE_DMA | SLAB_NOTRACK)
173
174#define OO_SHIFT 16
175#define OO_MASK ((1 << OO_SHIFT) - 1)
176#define MAX_OBJS_PER_PAGE 32767
177
178
179#define __OBJECT_POISON 0x80000000UL
180#define __CMPXCHG_DOUBLE 0x40000000UL
181
182static int kmem_size = sizeof(struct kmem_cache);
183
184#ifdef CONFIG_SMP
185static struct notifier_block slab_notifier;
186#endif
187
188
189
190
191#define TRACK_ADDRS_COUNT 16
192struct track {
193 unsigned long addr;
194#ifdef CONFIG_STACKTRACE
195 unsigned long addrs[TRACK_ADDRS_COUNT];
196#endif
197 int cpu;
198 int pid;
199 unsigned long when;
200};
201
202enum track_item { TRACK_ALLOC, TRACK_FREE };
203
204#ifdef CONFIG_SYSFS
205static int sysfs_slab_add(struct kmem_cache *);
206static int sysfs_slab_alias(struct kmem_cache *, const char *);
207static void sysfs_slab_remove(struct kmem_cache *);
208
209#else
210static inline int sysfs_slab_add(struct kmem_cache *s) { return 0; }
211static inline int sysfs_slab_alias(struct kmem_cache *s, const char *p)
212 { return 0; }
213static inline void sysfs_slab_remove(struct kmem_cache *s)
214{
215 kfree(s->name);
216 kfree(s);
217}
218
219#endif
220
221static inline void stat(const struct kmem_cache *s, enum stat_item si)
222{
223#ifdef CONFIG_SLUB_STATS
224 __this_cpu_inc(s->cpu_slab->stat[si]);
225#endif
226}
227
228
229
230
231
232static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node)
233{
234 return s->node[node];
235}
236
237
238static inline int check_valid_pointer(struct kmem_cache *s,
239 struct page *page, const void *object)
240{
241 void *base;
242
243 if (!object)
244 return 1;
245
246 base = page_address(page);
247 if (object < base || object >= base + page->objects * s->size ||
248 (object - base) % s->size) {
249 return 0;
250 }
251
252 return 1;
253}
254
255static inline void *get_freepointer(struct kmem_cache *s, void *object)
256{
257 return *(void **)(object + s->offset);
258}
259
260static void prefetch_freepointer(const struct kmem_cache *s, void *object)
261{
262 prefetch(object + s->offset);
263}
264
265static inline void *get_freepointer_safe(struct kmem_cache *s, void *object)
266{
267 void *p;
268
269#ifdef CONFIG_DEBUG_PAGEALLOC
270 probe_kernel_read(&p, (void **)(object + s->offset), sizeof(p));
271#else
272 p = get_freepointer(s, object);
273#endif
274 return p;
275}
276
277static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp)
278{
279 *(void **)(object + s->offset) = fp;
280}
281
282
283#define for_each_object(__p, __s, __addr, __objects) \
284 for (__p = (__addr); __p < (__addr) + (__objects) * (__s)->size;\
285 __p += (__s)->size)
286
287
288static inline int slab_index(void *p, struct kmem_cache *s, void *addr)
289{
290 return (p - addr) / s->size;
291}
292
293static inline size_t slab_ksize(const struct kmem_cache *s)
294{
295#ifdef CONFIG_SLUB_DEBUG
296
297
298
299
300 if (s->flags & (SLAB_RED_ZONE | SLAB_POISON))
301 return s->object_size;
302
303#endif
304
305
306
307
308
309 if (s->flags & (SLAB_DESTROY_BY_RCU | SLAB_STORE_USER))
310 return s->inuse;
311
312
313
314 return s->size;
315}
316
317static inline int order_objects(int order, unsigned long size, int reserved)
318{
319 return ((PAGE_SIZE << order) - reserved) / size;
320}
321
322static inline struct kmem_cache_order_objects oo_make(int order,
323 unsigned long size, int reserved)
324{
325 struct kmem_cache_order_objects x = {
326 (order << OO_SHIFT) + order_objects(order, size, reserved)
327 };
328
329 return x;
330}
331
332static inline int oo_order(struct kmem_cache_order_objects x)
333{
334 return x.x >> OO_SHIFT;
335}
336
337static inline int oo_objects(struct kmem_cache_order_objects x)
338{
339 return x.x & OO_MASK;
340}
341
342
343
344
345static __always_inline void slab_lock(struct page *page)
346{
347 bit_spin_lock(PG_locked, &page->flags);
348}
349
350static __always_inline void slab_unlock(struct page *page)
351{
352 __bit_spin_unlock(PG_locked, &page->flags);
353}
354
355
356static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
357 void *freelist_old, unsigned long counters_old,
358 void *freelist_new, unsigned long counters_new,
359 const char *n)
360{
361 VM_BUG_ON(!irqs_disabled());
362#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
363 defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
364 if (s->flags & __CMPXCHG_DOUBLE) {
365 if (cmpxchg_double(&page->freelist, &page->counters,
366 freelist_old, counters_old,
367 freelist_new, counters_new))
368 return 1;
369 } else
370#endif
371 {
372 slab_lock(page);
373 if (page->freelist == freelist_old && page->counters == counters_old) {
374 page->freelist = freelist_new;
375 page->counters = counters_new;
376 slab_unlock(page);
377 return 1;
378 }
379 slab_unlock(page);
380 }
381
382 cpu_relax();
383 stat(s, CMPXCHG_DOUBLE_FAIL);
384
385#ifdef SLUB_DEBUG_CMPXCHG
386 printk(KERN_INFO "%s %s: cmpxchg double redo ", n, s->name);
387#endif
388
389 return 0;
390}
391
392static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
393 void *freelist_old, unsigned long counters_old,
394 void *freelist_new, unsigned long counters_new,
395 const char *n)
396{
397#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
398 defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
399 if (s->flags & __CMPXCHG_DOUBLE) {
400 if (cmpxchg_double(&page->freelist, &page->counters,
401 freelist_old, counters_old,
402 freelist_new, counters_new))
403 return 1;
404 } else
405#endif
406 {
407 unsigned long flags;
408
409 local_irq_save(flags);
410 slab_lock(page);
411 if (page->freelist == freelist_old && page->counters == counters_old) {
412 page->freelist = freelist_new;
413 page->counters = counters_new;
414 slab_unlock(page);
415 local_irq_restore(flags);
416 return 1;
417 }
418 slab_unlock(page);
419 local_irq_restore(flags);
420 }
421
422 cpu_relax();
423 stat(s, CMPXCHG_DOUBLE_FAIL);
424
425#ifdef SLUB_DEBUG_CMPXCHG
426 printk(KERN_INFO "%s %s: cmpxchg double redo ", n, s->name);
427#endif
428
429 return 0;
430}
431
432#ifdef CONFIG_SLUB_DEBUG
433
434
435
436
437
438
439static void get_map(struct kmem_cache *s, struct page *page, unsigned long *map)
440{
441 void *p;
442 void *addr = page_address(page);
443
444 for (p = page->freelist; p; p = get_freepointer(s, p))
445 set_bit(slab_index(p, s, addr), map);
446}
447
448
449
450
451#ifdef CONFIG_SLUB_DEBUG_ON
452static int slub_debug = DEBUG_DEFAULT_FLAGS;
453#else
454static int slub_debug;
455#endif
456
457static char *slub_debug_slabs;
458static int disable_higher_order_debug;
459
460
461
462
463static void print_section(char *text, u8 *addr, unsigned int length)
464{
465 print_hex_dump(KERN_ERR, text, DUMP_PREFIX_ADDRESS, 16, 1, addr,
466 length, 1);
467}
468
469static struct track *get_track(struct kmem_cache *s, void *object,
470 enum track_item alloc)
471{
472 struct track *p;
473
474 if (s->offset)
475 p = object + s->offset + sizeof(void *);
476 else
477 p = object + s->inuse;
478
479 return p + alloc;
480}
481
482static void set_track(struct kmem_cache *s, void *object,
483 enum track_item alloc, unsigned long addr)
484{
485 struct track *p = get_track(s, object, alloc);
486
487 if (addr) {
488#ifdef CONFIG_STACKTRACE
489 struct stack_trace trace;
490 int i;
491
492 trace.nr_entries = 0;
493 trace.max_entries = TRACK_ADDRS_COUNT;
494 trace.entries = p->addrs;
495 trace.skip = 3;
496 save_stack_trace(&trace);
497
498
499 if (trace.nr_entries != 0 &&
500 trace.entries[trace.nr_entries - 1] == ULONG_MAX)
501 trace.nr_entries--;
502
503 for (i = trace.nr_entries; i < TRACK_ADDRS_COUNT; i++)
504 p->addrs[i] = 0;
505#endif
506 p->addr = addr;
507 p->cpu = smp_processor_id();
508 p->pid = current->pid;
509 p->when = jiffies;
510 } else
511 memset(p, 0, sizeof(struct track));
512}
513
514static void init_tracking(struct kmem_cache *s, void *object)
515{
516 if (!(s->flags & SLAB_STORE_USER))
517 return;
518
519 set_track(s, object, TRACK_FREE, 0UL);
520 set_track(s, object, TRACK_ALLOC, 0UL);
521}
522
523static void print_track(const char *s, struct track *t)
524{
525 if (!t->addr)
526 return;
527
528 printk(KERN_ERR "INFO: %s in %pS age=%lu cpu=%u pid=%d\n",
529 s, (void *)t->addr, jiffies - t->when, t->cpu, t->pid);
530#ifdef CONFIG_STACKTRACE
531 {
532 int i;
533 for (i = 0; i < TRACK_ADDRS_COUNT; i++)
534 if (t->addrs[i])
535 printk(KERN_ERR "\t%pS\n", (void *)t->addrs[i]);
536 else
537 break;
538 }
539#endif
540}
541
542static void print_tracking(struct kmem_cache *s, void *object)
543{
544 if (!(s->flags & SLAB_STORE_USER))
545 return;
546
547 print_track("Allocated", get_track(s, object, TRACK_ALLOC));
548 print_track("Freed", get_track(s, object, TRACK_FREE));
549}
550
551static void print_page_info(struct page *page)
552{
553 printk(KERN_ERR "INFO: Slab 0x%p objects=%u used=%u fp=0x%p flags=0x%04lx\n",
554 page, page->objects, page->inuse, page->freelist, page->flags);
555
556}
557
558static void slab_bug(struct kmem_cache *s, char *fmt, ...)
559{
560 va_list args;
561 char buf[100];
562
563 va_start(args, fmt);
564 vsnprintf(buf, sizeof(buf), fmt, args);
565 va_end(args);
566 printk(KERN_ERR "========================================"
567 "=====================================\n");
568 printk(KERN_ERR "BUG %s (%s): %s\n", s->name, print_tainted(), buf);
569 printk(KERN_ERR "----------------------------------------"
570 "-------------------------------------\n\n");
571}
572
573static void slab_fix(struct kmem_cache *s, char *fmt, ...)
574{
575 va_list args;
576 char buf[100];
577
578 va_start(args, fmt);
579 vsnprintf(buf, sizeof(buf), fmt, args);
580 va_end(args);
581 printk(KERN_ERR "FIX %s: %s\n", s->name, buf);
582}
583
584static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
585{
586 unsigned int off;
587 u8 *addr = page_address(page);
588
589 print_tracking(s, p);
590
591 print_page_info(page);
592
593 printk(KERN_ERR "INFO: Object 0x%p @offset=%tu fp=0x%p\n\n",
594 p, p - addr, get_freepointer(s, p));
595
596 if (p > addr + 16)
597 print_section("Bytes b4 ", p - 16, 16);
598
599 print_section("Object ", p, min_t(unsigned long, s->object_size,
600 PAGE_SIZE));
601 if (s->flags & SLAB_RED_ZONE)
602 print_section("Redzone ", p + s->object_size,
603 s->inuse - s->object_size);
604
605 if (s->offset)
606 off = s->offset + sizeof(void *);
607 else
608 off = s->inuse;
609
610 if (s->flags & SLAB_STORE_USER)
611 off += 2 * sizeof(struct track);
612
613 if (off != s->size)
614
615 print_section("Padding ", p + off, s->size - off);
616
617 dump_stack();
618}
619
620static void object_err(struct kmem_cache *s, struct page *page,
621 u8 *object, char *reason)
622{
623 slab_bug(s, "%s", reason);
624 print_trailer(s, page, object);
625}
626
627static void slab_err(struct kmem_cache *s, struct page *page, char *fmt, ...)
628{
629 va_list args;
630 char buf[100];
631
632 va_start(args, fmt);
633 vsnprintf(buf, sizeof(buf), fmt, args);
634 va_end(args);
635 slab_bug(s, "%s", buf);
636 print_page_info(page);
637 dump_stack();
638}
639
640static void init_object(struct kmem_cache *s, void *object, u8 val)
641{
642 u8 *p = object;
643
644 if (s->flags & __OBJECT_POISON) {
645 memset(p, POISON_FREE, s->object_size - 1);
646 p[s->object_size - 1] = POISON_END;
647 }
648
649 if (s->flags & SLAB_RED_ZONE)
650 memset(p + s->object_size, val, s->inuse - s->object_size);
651}
652
653static void restore_bytes(struct kmem_cache *s, char *message, u8 data,
654 void *from, void *to)
655{
656 slab_fix(s, "Restoring 0x%p-0x%p=0x%x\n", from, to - 1, data);
657 memset(from, data, to - from);
658}
659
660static int check_bytes_and_report(struct kmem_cache *s, struct page *page,
661 u8 *object, char *what,
662 u8 *start, unsigned int value, unsigned int bytes)
663{
664 u8 *fault;
665 u8 *end;
666
667 fault = memchr_inv(start, value, bytes);
668 if (!fault)
669 return 1;
670
671 end = start + bytes;
672 while (end > fault && end[-1] == value)
673 end--;
674
675 slab_bug(s, "%s overwritten", what);
676 printk(KERN_ERR "INFO: 0x%p-0x%p. First byte 0x%x instead of 0x%x\n",
677 fault, end - 1, fault[0], value);
678 print_trailer(s, page, object);
679
680 restore_bytes(s, what, value, fault, end);
681 return 0;
682}
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722static int check_pad_bytes(struct kmem_cache *s, struct page *page, u8 *p)
723{
724 unsigned long off = s->inuse;
725
726 if (s->offset)
727
728 off += sizeof(void *);
729
730 if (s->flags & SLAB_STORE_USER)
731
732 off += 2 * sizeof(struct track);
733
734 if (s->size == off)
735 return 1;
736
737 return check_bytes_and_report(s, page, p, "Object padding",
738 p + off, POISON_INUSE, s->size - off);
739}
740
741
742static int slab_pad_check(struct kmem_cache *s, struct page *page)
743{
744 u8 *start;
745 u8 *fault;
746 u8 *end;
747 int length;
748 int remainder;
749
750 if (!(s->flags & SLAB_POISON))
751 return 1;
752
753 start = page_address(page);
754 length = (PAGE_SIZE << compound_order(page)) - s->reserved;
755 end = start + length;
756 remainder = length % s->size;
757 if (!remainder)
758 return 1;
759
760 fault = memchr_inv(end - remainder, POISON_INUSE, remainder);
761 if (!fault)
762 return 1;
763 while (end > fault && end[-1] == POISON_INUSE)
764 end--;
765
766 slab_err(s, page, "Padding overwritten. 0x%p-0x%p", fault, end - 1);
767 print_section("Padding ", end - remainder, remainder);
768
769 restore_bytes(s, "slab padding", POISON_INUSE, end - remainder, end);
770 return 0;
771}
772
773static int check_object(struct kmem_cache *s, struct page *page,
774 void *object, u8 val)
775{
776 u8 *p = object;
777 u8 *endobject = object + s->object_size;
778
779 if (s->flags & SLAB_RED_ZONE) {
780 if (!check_bytes_and_report(s, page, object, "Redzone",
781 endobject, val, s->inuse - s->object_size))
782 return 0;
783 } else {
784 if ((s->flags & SLAB_POISON) && s->object_size < s->inuse) {
785 check_bytes_and_report(s, page, p, "Alignment padding",
786 endobject, POISON_INUSE, s->inuse - s->object_size);
787 }
788 }
789
790 if (s->flags & SLAB_POISON) {
791 if (val != SLUB_RED_ACTIVE && (s->flags & __OBJECT_POISON) &&
792 (!check_bytes_and_report(s, page, p, "Poison", p,
793 POISON_FREE, s->object_size - 1) ||
794 !check_bytes_and_report(s, page, p, "Poison",
795 p + s->object_size - 1, POISON_END, 1)))
796 return 0;
797
798
799
800 check_pad_bytes(s, page, p);
801 }
802
803 if (!s->offset && val == SLUB_RED_ACTIVE)
804
805
806
807
808 return 1;
809
810
811 if (!check_valid_pointer(s, page, get_freepointer(s, p))) {
812 object_err(s, page, p, "Freepointer corrupt");
813
814
815
816
817
818 set_freepointer(s, p, NULL);
819 return 0;
820 }
821 return 1;
822}
823
824static int check_slab(struct kmem_cache *s, struct page *page)
825{
826 int maxobj;
827
828 VM_BUG_ON(!irqs_disabled());
829
830 if (!PageSlab(page)) {
831 slab_err(s, page, "Not a valid slab page");
832 return 0;
833 }
834
835 maxobj = order_objects(compound_order(page), s->size, s->reserved);
836 if (page->objects > maxobj) {
837 slab_err(s, page, "objects %u > max %u",
838 s->name, page->objects, maxobj);
839 return 0;
840 }
841 if (page->inuse > page->objects) {
842 slab_err(s, page, "inuse %u > max %u",
843 s->name, page->inuse, page->objects);
844 return 0;
845 }
846
847 slab_pad_check(s, page);
848 return 1;
849}
850
851
852
853
854
855static int on_freelist(struct kmem_cache *s, struct page *page, void *search)
856{
857 int nr = 0;
858 void *fp;
859 void *object = NULL;
860 unsigned long max_objects;
861
862 fp = page->freelist;
863 while (fp && nr <= page->objects) {
864 if (fp == search)
865 return 1;
866 if (!check_valid_pointer(s, page, fp)) {
867 if (object) {
868 object_err(s, page, object,
869 "Freechain corrupt");
870 set_freepointer(s, object, NULL);
871 break;
872 } else {
873 slab_err(s, page, "Freepointer corrupt");
874 page->freelist = NULL;
875 page->inuse = page->objects;
876 slab_fix(s, "Freelist cleared");
877 return 0;
878 }
879 break;
880 }
881 object = fp;
882 fp = get_freepointer(s, object);
883 nr++;
884 }
885
886 max_objects = order_objects(compound_order(page), s->size, s->reserved);
887 if (max_objects > MAX_OBJS_PER_PAGE)
888 max_objects = MAX_OBJS_PER_PAGE;
889
890 if (page->objects != max_objects) {
891 slab_err(s, page, "Wrong number of objects. Found %d but "
892 "should be %d", page->objects, max_objects);
893 page->objects = max_objects;
894 slab_fix(s, "Number of objects adjusted.");
895 }
896 if (page->inuse != page->objects - nr) {
897 slab_err(s, page, "Wrong object count. Counter is %d but "
898 "counted were %d", page->inuse, page->objects - nr);
899 page->inuse = page->objects - nr;
900 slab_fix(s, "Object count adjusted.");
901 }
902 return search == NULL;
903}
904
905static void trace(struct kmem_cache *s, struct page *page, void *object,
906 int alloc)
907{
908 if (s->flags & SLAB_TRACE) {
909 printk(KERN_INFO "TRACE %s %s 0x%p inuse=%d fp=0x%p\n",
910 s->name,
911 alloc ? "alloc" : "free",
912 object, page->inuse,
913 page->freelist);
914
915 if (!alloc)
916 print_section("Object ", (void *)object, s->object_size);
917
918 dump_stack();
919 }
920}
921
922
923
924
925
926static inline int slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags)
927{
928 flags &= gfp_allowed_mask;
929 lockdep_trace_alloc(flags);
930 might_sleep_if(flags & __GFP_WAIT);
931
932 return should_failslab(s->object_size, flags, s->flags);
933}
934
935static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags, void *object)
936{
937 flags &= gfp_allowed_mask;
938 kmemcheck_slab_alloc(s, flags, object, slab_ksize(s));
939 kmemleak_alloc_recursive(object, s->object_size, 1, s->flags, flags);
940}
941
942static inline void slab_free_hook(struct kmem_cache *s, void *x)
943{
944 kmemleak_free_recursive(x, s->flags);
945
946
947
948
949
950
951#if defined(CONFIG_KMEMCHECK) || defined(CONFIG_LOCKDEP)
952 {
953 unsigned long flags;
954
955 local_irq_save(flags);
956 kmemcheck_slab_free(s, x, s->object_size);
957 debug_check_no_locks_freed(x, s->object_size);
958 local_irq_restore(flags);
959 }
960#endif
961 if (!(s->flags & SLAB_DEBUG_OBJECTS))
962 debug_check_no_obj_freed(x, s->object_size);
963}
964
965
966
967
968
969
970static void add_full(struct kmem_cache *s,
971 struct kmem_cache_node *n, struct page *page)
972{
973 if (!(s->flags & SLAB_STORE_USER))
974 return;
975
976 list_add(&page->lru, &n->full);
977}
978
979
980
981
982static void remove_full(struct kmem_cache *s, struct page *page)
983{
984 if (!(s->flags & SLAB_STORE_USER))
985 return;
986
987 list_del(&page->lru);
988}
989
990
991static inline unsigned long slabs_node(struct kmem_cache *s, int node)
992{
993 struct kmem_cache_node *n = get_node(s, node);
994
995 return atomic_long_read(&n->nr_slabs);
996}
997
998static inline unsigned long node_nr_slabs(struct kmem_cache_node *n)
999{
1000 return atomic_long_read(&n->nr_slabs);
1001}
1002
1003static inline void inc_slabs_node(struct kmem_cache *s, int node, int objects)
1004{
1005 struct kmem_cache_node *n = get_node(s, node);
1006
1007
1008
1009
1010
1011
1012
1013 if (n) {
1014 atomic_long_inc(&n->nr_slabs);
1015 atomic_long_add(objects, &n->total_objects);
1016 }
1017}
1018static inline void dec_slabs_node(struct kmem_cache *s, int node, int objects)
1019{
1020 struct kmem_cache_node *n = get_node(s, node);
1021
1022 atomic_long_dec(&n->nr_slabs);
1023 atomic_long_sub(objects, &n->total_objects);
1024}
1025
1026
1027static void setup_object_debug(struct kmem_cache *s, struct page *page,
1028 void *object)
1029{
1030 if (!(s->flags & (SLAB_STORE_USER|SLAB_RED_ZONE|__OBJECT_POISON)))
1031 return;
1032
1033 init_object(s, object, SLUB_RED_INACTIVE);
1034 init_tracking(s, object);
1035}
1036
1037static noinline int alloc_debug_processing(struct kmem_cache *s, struct page *page,
1038 void *object, unsigned long addr)
1039{
1040 if (!check_slab(s, page))
1041 goto bad;
1042
1043 if (!check_valid_pointer(s, page, object)) {
1044 object_err(s, page, object, "Freelist Pointer check fails");
1045 goto bad;
1046 }
1047
1048 if (!check_object(s, page, object, SLUB_RED_INACTIVE))
1049 goto bad;
1050
1051
1052 if (s->flags & SLAB_STORE_USER)
1053 set_track(s, object, TRACK_ALLOC, addr);
1054 trace(s, page, object, 1);
1055 init_object(s, object, SLUB_RED_ACTIVE);
1056 return 1;
1057
1058bad:
1059 if (PageSlab(page)) {
1060
1061
1062
1063
1064
1065 slab_fix(s, "Marking all objects used");
1066 page->inuse = page->objects;
1067 page->freelist = NULL;
1068 }
1069 return 0;
1070}
1071
1072static noinline int free_debug_processing(struct kmem_cache *s,
1073 struct page *page, void *object, unsigned long addr)
1074{
1075 unsigned long flags;
1076 int rc = 0;
1077
1078 local_irq_save(flags);
1079 slab_lock(page);
1080
1081 if (!check_slab(s, page))
1082 goto fail;
1083
1084 if (!check_valid_pointer(s, page, object)) {
1085 slab_err(s, page, "Invalid object pointer 0x%p", object);
1086 goto fail;
1087 }
1088
1089 if (on_freelist(s, page, object)) {
1090 object_err(s, page, object, "Object already free");
1091 goto fail;
1092 }
1093
1094 if (!check_object(s, page, object, SLUB_RED_ACTIVE))
1095 goto out;
1096
1097 if (unlikely(s != page->slab)) {
1098 if (!PageSlab(page)) {
1099 slab_err(s, page, "Attempt to free object(0x%p) "
1100 "outside of slab", object);
1101 } else if (!page->slab) {
1102 printk(KERN_ERR
1103 "SLUB <none>: no slab for object 0x%p.\n",
1104 object);
1105 dump_stack();
1106 } else
1107 object_err(s, page, object,
1108 "page slab pointer corrupt.");
1109 goto fail;
1110 }
1111
1112 if (s->flags & SLAB_STORE_USER)
1113 set_track(s, object, TRACK_FREE, addr);
1114 trace(s, page, object, 0);
1115 init_object(s, object, SLUB_RED_INACTIVE);
1116 rc = 1;
1117out:
1118 slab_unlock(page);
1119 local_irq_restore(flags);
1120 return rc;
1121
1122fail:
1123 slab_fix(s, "Object at 0x%p not freed", object);
1124 goto out;
1125}
1126
1127static int __init setup_slub_debug(char *str)
1128{
1129 slub_debug = DEBUG_DEFAULT_FLAGS;
1130 if (*str++ != '=' || !*str)
1131
1132
1133
1134 goto out;
1135
1136 if (*str == ',')
1137
1138
1139
1140
1141 goto check_slabs;
1142
1143 if (tolower(*str) == 'o') {
1144
1145
1146
1147
1148 disable_higher_order_debug = 1;
1149 goto out;
1150 }
1151
1152 slub_debug = 0;
1153 if (*str == '-')
1154
1155
1156
1157 goto out;
1158
1159
1160
1161
1162 for (; *str && *str != ','; str++) {
1163 switch (tolower(*str)) {
1164 case 'f':
1165 slub_debug |= SLAB_DEBUG_FREE;
1166 break;
1167 case 'z':
1168 slub_debug |= SLAB_RED_ZONE;
1169 break;
1170 case 'p':
1171 slub_debug |= SLAB_POISON;
1172 break;
1173 case 'u':
1174 slub_debug |= SLAB_STORE_USER;
1175 break;
1176 case 't':
1177 slub_debug |= SLAB_TRACE;
1178 break;
1179 case 'a':
1180 slub_debug |= SLAB_FAILSLAB;
1181 break;
1182 default:
1183 printk(KERN_ERR "slub_debug option '%c' "
1184 "unknown. skipped\n", *str);
1185 }
1186 }
1187
1188check_slabs:
1189 if (*str == ',')
1190 slub_debug_slabs = str + 1;
1191out:
1192 return 1;
1193}
1194
1195__setup("slub_debug", setup_slub_debug);
1196
1197static unsigned long kmem_cache_flags(unsigned long object_size,
1198 unsigned long flags, const char *name,
1199 void (*ctor)(void *))
1200{
1201
1202
1203
1204 if (slub_debug && (!slub_debug_slabs ||
1205 !strncmp(slub_debug_slabs, name, strlen(slub_debug_slabs))))
1206 flags |= slub_debug;
1207
1208 return flags;
1209}
1210#else
1211static inline void setup_object_debug(struct kmem_cache *s,
1212 struct page *page, void *object) {}
1213
1214static inline int alloc_debug_processing(struct kmem_cache *s,
1215 struct page *page, void *object, unsigned long addr) { return 0; }
1216
1217static inline int free_debug_processing(struct kmem_cache *s,
1218 struct page *page, void *object, unsigned long addr) { return 0; }
1219
1220static inline int slab_pad_check(struct kmem_cache *s, struct page *page)
1221 { return 1; }
1222static inline int check_object(struct kmem_cache *s, struct page *page,
1223 void *object, u8 val) { return 1; }
1224static inline void add_full(struct kmem_cache *s, struct kmem_cache_node *n,
1225 struct page *page) {}
1226static inline void remove_full(struct kmem_cache *s, struct page *page) {}
1227static inline unsigned long kmem_cache_flags(unsigned long object_size,
1228 unsigned long flags, const char *name,
1229 void (*ctor)(void *))
1230{
1231 return flags;
1232}
1233#define slub_debug 0
1234
1235#define disable_higher_order_debug 0
1236
1237static inline unsigned long slabs_node(struct kmem_cache *s, int node)
1238 { return 0; }
1239static inline unsigned long node_nr_slabs(struct kmem_cache_node *n)
1240 { return 0; }
1241static inline void inc_slabs_node(struct kmem_cache *s, int node,
1242 int objects) {}
1243static inline void dec_slabs_node(struct kmem_cache *s, int node,
1244 int objects) {}
1245
1246static inline int slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags)
1247 { return 0; }
1248
1249static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags,
1250 void *object) {}
1251
1252static inline void slab_free_hook(struct kmem_cache *s, void *x) {}
1253
1254#endif
1255
1256
1257
1258
1259static inline struct page *alloc_slab_page(gfp_t flags, int node,
1260 struct kmem_cache_order_objects oo)
1261{
1262 int order = oo_order(oo);
1263
1264 flags |= __GFP_NOTRACK;
1265
1266 if (node == NUMA_NO_NODE)
1267 return alloc_pages(flags, order);
1268 else
1269 return alloc_pages_exact_node(node, flags, order);
1270}
1271
1272static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
1273{
1274 struct page *page;
1275 struct kmem_cache_order_objects oo = s->oo;
1276 gfp_t alloc_gfp;
1277
1278 flags &= gfp_allowed_mask;
1279
1280 if (flags & __GFP_WAIT)
1281 local_irq_enable();
1282
1283 flags |= s->allocflags;
1284
1285
1286
1287
1288
1289 alloc_gfp = (flags | __GFP_NOWARN | __GFP_NORETRY) & ~__GFP_NOFAIL;
1290
1291 page = alloc_slab_page(alloc_gfp, node, oo);
1292 if (unlikely(!page)) {
1293 oo = s->min;
1294
1295
1296
1297
1298 page = alloc_slab_page(flags, node, oo);
1299
1300 if (page)
1301 stat(s, ORDER_FALLBACK);
1302 }
1303
1304 if (kmemcheck_enabled && page
1305 && !(s->flags & (SLAB_NOTRACK | DEBUG_DEFAULT_FLAGS))) {
1306 int pages = 1 << oo_order(oo);
1307
1308 kmemcheck_alloc_shadow(page, oo_order(oo), flags, node);
1309
1310
1311
1312
1313
1314 if (s->ctor)
1315 kmemcheck_mark_uninitialized_pages(page, pages);
1316 else
1317 kmemcheck_mark_unallocated_pages(page, pages);
1318 }
1319
1320 if (flags & __GFP_WAIT)
1321 local_irq_disable();
1322 if (!page)
1323 return NULL;
1324
1325 page->objects = oo_objects(oo);
1326 mod_zone_page_state(page_zone(page),
1327 (s->flags & SLAB_RECLAIM_ACCOUNT) ?
1328 NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,
1329 1 << oo_order(oo));
1330
1331 return page;
1332}
1333
1334static void setup_object(struct kmem_cache *s, struct page *page,
1335 void *object)
1336{
1337 setup_object_debug(s, page, object);
1338 if (unlikely(s->ctor))
1339 s->ctor(object);
1340}
1341
1342static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
1343{
1344 struct page *page;
1345 void *start;
1346 void *last;
1347 void *p;
1348
1349 BUG_ON(flags & GFP_SLAB_BUG_MASK);
1350
1351 page = allocate_slab(s,
1352 flags & (GFP_RECLAIM_MASK | GFP_CONSTRAINT_MASK), node);
1353 if (!page)
1354 goto out;
1355
1356 inc_slabs_node(s, page_to_nid(page), page->objects);
1357 page->slab = s;
1358 __SetPageSlab(page);
1359 if (page->pfmemalloc)
1360 SetPageSlabPfmemalloc(page);
1361
1362 start = page_address(page);
1363
1364 if (unlikely(s->flags & SLAB_POISON))
1365 memset(start, POISON_INUSE, PAGE_SIZE << compound_order(page));
1366
1367 last = start;
1368 for_each_object(p, s, start, page->objects) {
1369 setup_object(s, page, last);
1370 set_freepointer(s, last, p);
1371 last = p;
1372 }
1373 setup_object(s, page, last);
1374 set_freepointer(s, last, NULL);
1375
1376 page->freelist = start;
1377 page->inuse = page->objects;
1378 page->frozen = 1;
1379out:
1380 return page;
1381}
1382
1383static void __free_slab(struct kmem_cache *s, struct page *page)
1384{
1385 int order = compound_order(page);
1386 int pages = 1 << order;
1387
1388 if (kmem_cache_debug(s)) {
1389 void *p;
1390
1391 slab_pad_check(s, page);
1392 for_each_object(p, s, page_address(page),
1393 page->objects)
1394 check_object(s, page, p, SLUB_RED_INACTIVE);
1395 }
1396
1397 kmemcheck_free_shadow(page, compound_order(page));
1398
1399 mod_zone_page_state(page_zone(page),
1400 (s->flags & SLAB_RECLAIM_ACCOUNT) ?
1401 NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,
1402 -pages);
1403
1404 __ClearPageSlabPfmemalloc(page);
1405 __ClearPageSlab(page);
1406 reset_page_mapcount(page);
1407 if (current->reclaim_state)
1408 current->reclaim_state->reclaimed_slab += pages;
1409 __free_pages(page, order);
1410}
1411
1412#define need_reserve_slab_rcu \
1413 (sizeof(((struct page *)NULL)->lru) < sizeof(struct rcu_head))
1414
1415static void rcu_free_slab(struct rcu_head *h)
1416{
1417 struct page *page;
1418
1419 if (need_reserve_slab_rcu)
1420 page = virt_to_head_page(h);
1421 else
1422 page = container_of((struct list_head *)h, struct page, lru);
1423
1424 __free_slab(page->slab, page);
1425}
1426
1427static void free_slab(struct kmem_cache *s, struct page *page)
1428{
1429 if (unlikely(s->flags & SLAB_DESTROY_BY_RCU)) {
1430 struct rcu_head *head;
1431
1432 if (need_reserve_slab_rcu) {
1433 int order = compound_order(page);
1434 int offset = (PAGE_SIZE << order) - s->reserved;
1435
1436 VM_BUG_ON(s->reserved != sizeof(*head));
1437 head = page_address(page) + offset;
1438 } else {
1439
1440
1441
1442 head = (void *)&page->lru;
1443 }
1444
1445 call_rcu(head, rcu_free_slab);
1446 } else
1447 __free_slab(s, page);
1448}
1449
1450static void discard_slab(struct kmem_cache *s, struct page *page)
1451{
1452 dec_slabs_node(s, page_to_nid(page), page->objects);
1453 free_slab(s, page);
1454}
1455
1456
1457
1458
1459
1460
1461static inline void add_partial(struct kmem_cache_node *n,
1462 struct page *page, int tail)
1463{
1464 n->nr_partial++;
1465 if (tail == DEACTIVATE_TO_TAIL)
1466 list_add_tail(&page->lru, &n->partial);
1467 else
1468 list_add(&page->lru, &n->partial);
1469}
1470
1471
1472
1473
1474static inline void remove_partial(struct kmem_cache_node *n,
1475 struct page *page)
1476{
1477 list_del(&page->lru);
1478 n->nr_partial--;
1479}
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489static inline void *acquire_slab(struct kmem_cache *s,
1490 struct kmem_cache_node *n, struct page *page,
1491 int mode)
1492{
1493 void *freelist;
1494 unsigned long counters;
1495 struct page new;
1496
1497
1498
1499
1500
1501
1502 freelist = page->freelist;
1503 counters = page->counters;
1504 new.counters = counters;
1505 if (mode) {
1506 new.inuse = page->objects;
1507 new.freelist = NULL;
1508 } else {
1509 new.freelist = freelist;
1510 }
1511
1512 VM_BUG_ON(new.frozen);
1513 new.frozen = 1;
1514
1515 if (!__cmpxchg_double_slab(s, page,
1516 freelist, counters,
1517 new.freelist, new.counters,
1518 "acquire_slab"))
1519 return NULL;
1520
1521 remove_partial(n, page);
1522 WARN_ON(!freelist);
1523 return freelist;
1524}
1525
1526static int put_cpu_partial(struct kmem_cache *s, struct page *page, int drain);
1527static inline bool pfmemalloc_match(struct page *page, gfp_t gfpflags);
1528
1529
1530
1531
1532static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
1533 struct kmem_cache_cpu *c, gfp_t flags)
1534{
1535 struct page *page, *page2;
1536 void *object = NULL;
1537
1538
1539
1540
1541
1542
1543
1544 if (!n || !n->nr_partial)
1545 return NULL;
1546
1547 spin_lock(&n->list_lock);
1548 list_for_each_entry_safe(page, page2, &n->partial, lru) {
1549 void *t;
1550 int available;
1551
1552 if (!pfmemalloc_match(page, flags))
1553 continue;
1554
1555 t = acquire_slab(s, n, page, object == NULL);
1556 if (!t)
1557 break;
1558
1559 if (!object) {
1560 c->page = page;
1561 stat(s, ALLOC_FROM_PARTIAL);
1562 object = t;
1563 available = page->objects - page->inuse;
1564 } else {
1565 available = put_cpu_partial(s, page, 0);
1566 stat(s, CPU_PARTIAL_NODE);
1567 }
1568 if (kmem_cache_debug(s) || available > s->cpu_partial / 2)
1569 break;
1570
1571 }
1572 spin_unlock(&n->list_lock);
1573 return object;
1574}
1575
1576
1577
1578
1579static void *get_any_partial(struct kmem_cache *s, gfp_t flags,
1580 struct kmem_cache_cpu *c)
1581{
1582#ifdef CONFIG_NUMA
1583 struct zonelist *zonelist;
1584 struct zoneref *z;
1585 struct zone *zone;
1586 enum zone_type high_zoneidx = gfp_zone(flags);
1587 void *object;
1588 unsigned int cpuset_mems_cookie;
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608 if (!s->remote_node_defrag_ratio ||
1609 get_cycles() % 1024 > s->remote_node_defrag_ratio)
1610 return NULL;
1611
1612 do {
1613 cpuset_mems_cookie = get_mems_allowed();
1614 zonelist = node_zonelist(slab_node(), flags);
1615 for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
1616 struct kmem_cache_node *n;
1617
1618 n = get_node(s, zone_to_nid(zone));
1619
1620 if (n && cpuset_zone_allowed_hardwall(zone, flags) &&
1621 n->nr_partial > s->min_partial) {
1622 object = get_partial_node(s, n, c, flags);
1623 if (object) {
1624
1625
1626
1627
1628
1629
1630
1631
1632 put_mems_allowed(cpuset_mems_cookie);
1633 return object;
1634 }
1635 }
1636 }
1637 } while (!put_mems_allowed(cpuset_mems_cookie));
1638#endif
1639 return NULL;
1640}
1641
1642
1643
1644
1645static void *get_partial(struct kmem_cache *s, gfp_t flags, int node,
1646 struct kmem_cache_cpu *c)
1647{
1648 void *object;
1649 int searchnode = (node == NUMA_NO_NODE) ? numa_node_id() : node;
1650
1651 object = get_partial_node(s, get_node(s, searchnode), c, flags);
1652 if (object || node != NUMA_NO_NODE)
1653 return object;
1654
1655 return get_any_partial(s, flags, c);
1656}
1657
1658#ifdef CONFIG_PREEMPT
1659
1660
1661
1662
1663
1664#define TID_STEP roundup_pow_of_two(CONFIG_NR_CPUS)
1665#else
1666
1667
1668
1669
1670#define TID_STEP 1
1671#endif
1672
1673static inline unsigned long next_tid(unsigned long tid)
1674{
1675 return tid + TID_STEP;
1676}
1677
1678static inline unsigned int tid_to_cpu(unsigned long tid)
1679{
1680 return tid % TID_STEP;
1681}
1682
1683static inline unsigned long tid_to_event(unsigned long tid)
1684{
1685 return tid / TID_STEP;
1686}
1687
1688static inline unsigned int init_tid(int cpu)
1689{
1690 return cpu;
1691}
1692
1693static inline void note_cmpxchg_failure(const char *n,
1694 const struct kmem_cache *s, unsigned long tid)
1695{
1696#ifdef SLUB_DEBUG_CMPXCHG
1697 unsigned long actual_tid = __this_cpu_read(s->cpu_slab->tid);
1698
1699 printk(KERN_INFO "%s %s: cmpxchg redo ", n, s->name);
1700
1701#ifdef CONFIG_PREEMPT
1702 if (tid_to_cpu(tid) != tid_to_cpu(actual_tid))
1703 printk("due to cpu change %d -> %d\n",
1704 tid_to_cpu(tid), tid_to_cpu(actual_tid));
1705 else
1706#endif
1707 if (tid_to_event(tid) != tid_to_event(actual_tid))
1708 printk("due to cpu running other code. Event %ld->%ld\n",
1709 tid_to_event(tid), tid_to_event(actual_tid));
1710 else
1711 printk("for unknown reason: actual=%lx was=%lx target=%lx\n",
1712 actual_tid, tid, next_tid(tid));
1713#endif
1714 stat(s, CMPXCHG_DOUBLE_CPU_FAIL);
1715}
1716
1717void init_kmem_cache_cpus(struct kmem_cache *s)
1718{
1719 int cpu;
1720
1721 for_each_possible_cpu(cpu)
1722 per_cpu_ptr(s->cpu_slab, cpu)->tid = init_tid(cpu);
1723}
1724
1725
1726
1727
1728static void deactivate_slab(struct kmem_cache *s, struct page *page, void *freelist)
1729{
1730 enum slab_modes { M_NONE, M_PARTIAL, M_FULL, M_FREE };
1731 struct kmem_cache_node *n = get_node(s, page_to_nid(page));
1732 int lock = 0;
1733 enum slab_modes l = M_NONE, m = M_NONE;
1734 void *nextfree;
1735 int tail = DEACTIVATE_TO_HEAD;
1736 struct page new;
1737 struct page old;
1738
1739 if (page->freelist) {
1740 stat(s, DEACTIVATE_REMOTE_FREES);
1741 tail = DEACTIVATE_TO_TAIL;
1742 }
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752 while (freelist && (nextfree = get_freepointer(s, freelist))) {
1753 void *prior;
1754 unsigned long counters;
1755
1756 do {
1757 prior = page->freelist;
1758 counters = page->counters;
1759 set_freepointer(s, freelist, prior);
1760 new.counters = counters;
1761 new.inuse--;
1762 VM_BUG_ON(!new.frozen);
1763
1764 } while (!__cmpxchg_double_slab(s, page,
1765 prior, counters,
1766 freelist, new.counters,
1767 "drain percpu freelist"));
1768
1769 freelist = nextfree;
1770 }
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786redo:
1787
1788 old.freelist = page->freelist;
1789 old.counters = page->counters;
1790 VM_BUG_ON(!old.frozen);
1791
1792
1793 new.counters = old.counters;
1794 if (freelist) {
1795 new.inuse--;
1796 set_freepointer(s, freelist, old.freelist);
1797 new.freelist = freelist;
1798 } else
1799 new.freelist = old.freelist;
1800
1801 new.frozen = 0;
1802
1803 if (!new.inuse && n->nr_partial > s->min_partial)
1804 m = M_FREE;
1805 else if (new.freelist) {
1806 m = M_PARTIAL;
1807 if (!lock) {
1808 lock = 1;
1809
1810
1811
1812
1813
1814 spin_lock(&n->list_lock);
1815 }
1816 } else {
1817 m = M_FULL;
1818 if (kmem_cache_debug(s) && !lock) {
1819 lock = 1;
1820
1821
1822
1823
1824
1825 spin_lock(&n->list_lock);
1826 }
1827 }
1828
1829 if (l != m) {
1830
1831 if (l == M_PARTIAL)
1832
1833 remove_partial(n, page);
1834
1835 else if (l == M_FULL)
1836
1837 remove_full(s, page);
1838
1839 if (m == M_PARTIAL) {
1840
1841 add_partial(n, page, tail);
1842 stat(s, tail);
1843
1844 } else if (m == M_FULL) {
1845
1846 stat(s, DEACTIVATE_FULL);
1847 add_full(s, n, page);
1848
1849 }
1850 }
1851
1852 l = m;
1853 if (!__cmpxchg_double_slab(s, page,
1854 old.freelist, old.counters,
1855 new.freelist, new.counters,
1856 "unfreezing slab"))
1857 goto redo;
1858
1859 if (lock)
1860 spin_unlock(&n->list_lock);
1861
1862 if (m == M_FREE) {
1863 stat(s, DEACTIVATE_EMPTY);
1864 discard_slab(s, page);
1865 stat(s, FREE_SLAB);
1866 }
1867}
1868
1869
1870
1871
1872
1873
1874static void unfreeze_partials(struct kmem_cache *s)
1875{
1876 struct kmem_cache_node *n = NULL, *n2 = NULL;
1877 struct kmem_cache_cpu *c = this_cpu_ptr(s->cpu_slab);
1878 struct page *page, *discard_page = NULL;
1879
1880 while ((page = c->partial)) {
1881 struct page new;
1882 struct page old;
1883
1884 c->partial = page->next;
1885
1886 n2 = get_node(s, page_to_nid(page));
1887 if (n != n2) {
1888 if (n)
1889 spin_unlock(&n->list_lock);
1890
1891 n = n2;
1892 spin_lock(&n->list_lock);
1893 }
1894
1895 do {
1896
1897 old.freelist = page->freelist;
1898 old.counters = page->counters;
1899 VM_BUG_ON(!old.frozen);
1900
1901 new.counters = old.counters;
1902 new.freelist = old.freelist;
1903
1904 new.frozen = 0;
1905
1906 } while (!__cmpxchg_double_slab(s, page,
1907 old.freelist, old.counters,
1908 new.freelist, new.counters,
1909 "unfreezing slab"));
1910
1911 if (unlikely(!new.inuse && n->nr_partial > s->min_partial)) {
1912 page->next = discard_page;
1913 discard_page = page;
1914 } else {
1915 add_partial(n, page, DEACTIVATE_TO_TAIL);
1916 stat(s, FREE_ADD_PARTIAL);
1917 }
1918 }
1919
1920 if (n)
1921 spin_unlock(&n->list_lock);
1922
1923 while (discard_page) {
1924 page = discard_page;
1925 discard_page = discard_page->next;
1926
1927 stat(s, DEACTIVATE_EMPTY);
1928 discard_slab(s, page);
1929 stat(s, FREE_SLAB);
1930 }
1931}
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942int put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
1943{
1944 struct page *oldpage;
1945 int pages;
1946 int pobjects;
1947
1948 do {
1949 pages = 0;
1950 pobjects = 0;
1951 oldpage = this_cpu_read(s->cpu_slab->partial);
1952
1953 if (oldpage) {
1954 pobjects = oldpage->pobjects;
1955 pages = oldpage->pages;
1956 if (drain && pobjects > s->cpu_partial) {
1957 unsigned long flags;
1958
1959
1960
1961
1962 local_irq_save(flags);
1963 unfreeze_partials(s);
1964 local_irq_restore(flags);
1965 pobjects = 0;
1966 pages = 0;
1967 stat(s, CPU_PARTIAL_DRAIN);
1968 }
1969 }
1970
1971 pages++;
1972 pobjects += page->objects - page->inuse;
1973
1974 page->pages = pages;
1975 page->pobjects = pobjects;
1976 page->next = oldpage;
1977
1978 } while (this_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page) != oldpage);
1979 return pobjects;
1980}
1981
1982static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
1983{
1984 stat(s, CPUSLAB_FLUSH);
1985 deactivate_slab(s, c->page, c->freelist);
1986
1987 c->tid = next_tid(c->tid);
1988 c->page = NULL;
1989 c->freelist = NULL;
1990}
1991
1992
1993
1994
1995
1996
1997static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu)
1998{
1999 struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
2000
2001 if (likely(c)) {
2002 if (c->page)
2003 flush_slab(s, c);
2004
2005 unfreeze_partials(s);
2006 }
2007}
2008
2009static void flush_cpu_slab(void *d)
2010{
2011 struct kmem_cache *s = d;
2012
2013 __flush_cpu_slab(s, smp_processor_id());
2014}
2015
2016static bool has_cpu_slab(int cpu, void *info)
2017{
2018 struct kmem_cache *s = info;
2019 struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
2020
2021 return c->page || c->partial;
2022}
2023
2024static void flush_all(struct kmem_cache *s)
2025{
2026 on_each_cpu_cond(has_cpu_slab, flush_cpu_slab, s, 1, GFP_ATOMIC);
2027}
2028
2029
2030
2031
2032
2033static inline int node_match(struct page *page, int node)
2034{
2035#ifdef CONFIG_NUMA
2036 if (node != NUMA_NO_NODE && page_to_nid(page) != node)
2037 return 0;
2038#endif
2039 return 1;
2040}
2041
2042static int count_free(struct page *page)
2043{
2044 return page->objects - page->inuse;
2045}
2046
2047static unsigned long count_partial(struct kmem_cache_node *n,
2048 int (*get_count)(struct page *))
2049{
2050 unsigned long flags;
2051 unsigned long x = 0;
2052 struct page *page;
2053
2054 spin_lock_irqsave(&n->list_lock, flags);
2055 list_for_each_entry(page, &n->partial, lru)
2056 x += get_count(page);
2057 spin_unlock_irqrestore(&n->list_lock, flags);
2058 return x;
2059}
2060
2061static inline unsigned long node_nr_objs(struct kmem_cache_node *n)
2062{
2063#ifdef CONFIG_SLUB_DEBUG
2064 return atomic_long_read(&n->total_objects);
2065#else
2066 return 0;
2067#endif
2068}
2069
2070static noinline void
2071slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid)
2072{
2073 int node;
2074
2075 printk(KERN_WARNING
2076 "SLUB: Unable to allocate memory on node %d (gfp=0x%x)\n",
2077 nid, gfpflags);
2078 printk(KERN_WARNING " cache: %s, object size: %d, buffer size: %d, "
2079 "default order: %d, min order: %d\n", s->name, s->object_size,
2080 s->size, oo_order(s->oo), oo_order(s->min));
2081
2082 if (oo_order(s->min) > get_order(s->object_size))
2083 printk(KERN_WARNING " %s debugging increased min order, use "
2084 "slub_debug=O to disable.\n", s->name);
2085
2086 for_each_online_node(node) {
2087 struct kmem_cache_node *n = get_node(s, node);
2088 unsigned long nr_slabs;
2089 unsigned long nr_objs;
2090 unsigned long nr_free;
2091
2092 if (!n)
2093 continue;
2094
2095 nr_free = count_partial(n, count_free);
2096 nr_slabs = node_nr_slabs(n);
2097 nr_objs = node_nr_objs(n);
2098
2099 printk(KERN_WARNING
2100 " node %d: slabs: %ld, objs: %ld, free: %ld\n",
2101 node, nr_slabs, nr_objs, nr_free);
2102 }
2103}
2104
2105static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags,
2106 int node, struct kmem_cache_cpu **pc)
2107{
2108 void *freelist;
2109 struct kmem_cache_cpu *c = *pc;
2110 struct page *page;
2111
2112 freelist = get_partial(s, flags, node, c);
2113
2114 if (freelist)
2115 return freelist;
2116
2117 page = new_slab(s, flags, node);
2118 if (page) {
2119 c = __this_cpu_ptr(s->cpu_slab);
2120 if (c->page)
2121 flush_slab(s, c);
2122
2123
2124
2125
2126
2127 freelist = page->freelist;
2128 page->freelist = NULL;
2129
2130 stat(s, ALLOC_SLAB);
2131 c->page = page;
2132 *pc = c;
2133 } else
2134 freelist = NULL;
2135
2136 return freelist;
2137}
2138
2139static inline bool pfmemalloc_match(struct page *page, gfp_t gfpflags)
2140{
2141 if (unlikely(PageSlabPfmemalloc(page)))
2142 return gfp_pfmemalloc_allowed(gfpflags);
2143
2144 return true;
2145}
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157static inline void *get_freelist(struct kmem_cache *s, struct page *page)
2158{
2159 struct page new;
2160 unsigned long counters;
2161 void *freelist;
2162
2163 do {
2164 freelist = page->freelist;
2165 counters = page->counters;
2166
2167 new.counters = counters;
2168 VM_BUG_ON(!new.frozen);
2169
2170 new.inuse = page->objects;
2171 new.frozen = freelist != NULL;
2172
2173 } while (!__cmpxchg_double_slab(s, page,
2174 freelist, counters,
2175 NULL, new.counters,
2176 "get_freelist"));
2177
2178 return freelist;
2179}
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
2198 unsigned long addr, struct kmem_cache_cpu *c)
2199{
2200 void *freelist;
2201 struct page *page;
2202 unsigned long flags;
2203
2204 local_irq_save(flags);
2205#ifdef CONFIG_PREEMPT
2206
2207
2208
2209
2210
2211 c = this_cpu_ptr(s->cpu_slab);
2212#endif
2213
2214 page = c->page;
2215 if (!page)
2216 goto new_slab;
2217redo:
2218
2219 if (unlikely(!node_match(page, node))) {
2220 stat(s, ALLOC_NODE_MISMATCH);
2221 deactivate_slab(s, page, c->freelist);
2222 c->page = NULL;
2223 c->freelist = NULL;
2224 goto new_slab;
2225 }
2226
2227
2228
2229
2230
2231
2232 if (unlikely(!pfmemalloc_match(page, gfpflags))) {
2233 deactivate_slab(s, page, c->freelist);
2234 c->page = NULL;
2235 c->freelist = NULL;
2236 goto new_slab;
2237 }
2238
2239
2240 freelist = c->freelist;
2241 if (freelist)
2242 goto load_freelist;
2243
2244 stat(s, ALLOC_SLOWPATH);
2245
2246 freelist = get_freelist(s, page);
2247
2248 if (!freelist) {
2249 c->page = NULL;
2250 stat(s, DEACTIVATE_BYPASS);
2251 goto new_slab;
2252 }
2253
2254 stat(s, ALLOC_REFILL);
2255
2256load_freelist:
2257
2258
2259
2260
2261
2262 VM_BUG_ON(!c->page->frozen);
2263 c->freelist = get_freepointer(s, freelist);
2264 c->tid = next_tid(c->tid);
2265 local_irq_restore(flags);
2266 return freelist;
2267
2268new_slab:
2269
2270 if (c->partial) {
2271 page = c->page = c->partial;
2272 c->partial = page->next;
2273 stat(s, CPU_PARTIAL_ALLOC);
2274 c->freelist = NULL;
2275 goto redo;
2276 }
2277
2278 freelist = new_slab_objects(s, gfpflags, node, &c);
2279
2280 if (unlikely(!freelist)) {
2281 if (!(gfpflags & __GFP_NOWARN) && printk_ratelimit())
2282 slab_out_of_memory(s, gfpflags, node);
2283
2284 local_irq_restore(flags);
2285 return NULL;
2286 }
2287
2288 page = c->page;
2289 if (likely(!kmem_cache_debug(s) && pfmemalloc_match(page, gfpflags)))
2290 goto load_freelist;
2291
2292
2293 if (kmem_cache_debug(s) && !alloc_debug_processing(s, page, freelist, addr))
2294 goto new_slab;
2295
2296 deactivate_slab(s, page, get_freepointer(s, freelist));
2297 c->page = NULL;
2298 c->freelist = NULL;
2299 local_irq_restore(flags);
2300 return freelist;
2301}
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313static __always_inline void *slab_alloc(struct kmem_cache *s,
2314 gfp_t gfpflags, int node, unsigned long addr)
2315{
2316 void **object;
2317 struct kmem_cache_cpu *c;
2318 struct page *page;
2319 unsigned long tid;
2320
2321 if (slab_pre_alloc_hook(s, gfpflags))
2322 return NULL;
2323
2324redo:
2325
2326
2327
2328
2329
2330
2331
2332 c = __this_cpu_ptr(s->cpu_slab);
2333
2334
2335
2336
2337
2338
2339
2340 tid = c->tid;
2341 barrier();
2342
2343 object = c->freelist;
2344 page = c->page;
2345 if (unlikely(!object || !node_match(page, node)))
2346 object = __slab_alloc(s, gfpflags, node, addr, c);
2347
2348 else {
2349 void *next_object = get_freepointer_safe(s, object);
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363 if (unlikely(!this_cpu_cmpxchg_double(
2364 s->cpu_slab->freelist, s->cpu_slab->tid,
2365 object, tid,
2366 next_object, next_tid(tid)))) {
2367
2368 note_cmpxchg_failure("slab_alloc", s, tid);
2369 goto redo;
2370 }
2371 prefetch_freepointer(s, next_object);
2372 stat(s, ALLOC_FASTPATH);
2373 }
2374
2375 if (unlikely(gfpflags & __GFP_ZERO) && object)
2376 memset(object, 0, s->object_size);
2377
2378 slab_post_alloc_hook(s, gfpflags, object);
2379
2380 return object;
2381}
2382
2383void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags)
2384{
2385 void *ret = slab_alloc(s, gfpflags, NUMA_NO_NODE, _RET_IP_);
2386
2387 trace_kmem_cache_alloc(_RET_IP_, ret, s->object_size, s->size, gfpflags);
2388
2389 return ret;
2390}
2391EXPORT_SYMBOL(kmem_cache_alloc);
2392
2393#ifdef CONFIG_TRACING
2394void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size)
2395{
2396 void *ret = slab_alloc(s, gfpflags, NUMA_NO_NODE, _RET_IP_);
2397 trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags);
2398 return ret;
2399}
2400EXPORT_SYMBOL(kmem_cache_alloc_trace);
2401
2402void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order)
2403{
2404 void *ret = kmalloc_order(size, flags, order);
2405 trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << order, flags);
2406 return ret;
2407}
2408EXPORT_SYMBOL(kmalloc_order_trace);
2409#endif
2410
2411#ifdef CONFIG_NUMA
2412void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
2413{
2414 void *ret = slab_alloc(s, gfpflags, node, _RET_IP_);
2415
2416 trace_kmem_cache_alloc_node(_RET_IP_, ret,
2417 s->object_size, s->size, gfpflags, node);
2418
2419 return ret;
2420}
2421EXPORT_SYMBOL(kmem_cache_alloc_node);
2422
2423#ifdef CONFIG_TRACING
2424void *kmem_cache_alloc_node_trace(struct kmem_cache *s,
2425 gfp_t gfpflags,
2426 int node, size_t size)
2427{
2428 void *ret = slab_alloc(s, gfpflags, node, _RET_IP_);
2429
2430 trace_kmalloc_node(_RET_IP_, ret,
2431 size, s->size, gfpflags, node);
2432 return ret;
2433}
2434EXPORT_SYMBOL(kmem_cache_alloc_node_trace);
2435#endif
2436#endif
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446static void __slab_free(struct kmem_cache *s, struct page *page,
2447 void *x, unsigned long addr)
2448{
2449 void *prior;
2450 void **object = (void *)x;
2451 int was_frozen;
2452 int inuse;
2453 struct page new;
2454 unsigned long counters;
2455 struct kmem_cache_node *n = NULL;
2456 unsigned long uninitialized_var(flags);
2457
2458 stat(s, FREE_SLOWPATH);
2459
2460 if (kmem_cache_debug(s) && !free_debug_processing(s, page, x, addr))
2461 return;
2462
2463 do {
2464 prior = page->freelist;
2465 counters = page->counters;
2466 set_freepointer(s, object, prior);
2467 new.counters = counters;
2468 was_frozen = new.frozen;
2469 new.inuse--;
2470 if ((!new.inuse || !prior) && !was_frozen && !n) {
2471
2472 if (!kmem_cache_debug(s) && !prior)
2473
2474
2475
2476
2477
2478 new.frozen = 1;
2479
2480 else {
2481
2482 n = get_node(s, page_to_nid(page));
2483
2484
2485
2486
2487
2488
2489
2490
2491 spin_lock_irqsave(&n->list_lock, flags);
2492
2493 }
2494 }
2495 inuse = new.inuse;
2496
2497 } while (!cmpxchg_double_slab(s, page,
2498 prior, counters,
2499 object, new.counters,
2500 "__slab_free"));
2501
2502 if (likely(!n)) {
2503
2504
2505
2506
2507
2508 if (new.frozen && !was_frozen) {
2509 put_cpu_partial(s, page, 1);
2510 stat(s, CPU_PARTIAL_FREE);
2511 }
2512
2513
2514
2515
2516 if (was_frozen)
2517 stat(s, FREE_FROZEN);
2518 return;
2519 }
2520
2521
2522
2523
2524
2525 if (was_frozen)
2526 stat(s, FREE_FROZEN);
2527 else {
2528 if (unlikely(!inuse && n->nr_partial > s->min_partial))
2529 goto slab_empty;
2530
2531
2532
2533
2534
2535 if (unlikely(!prior)) {
2536 remove_full(s, page);
2537 add_partial(n, page, DEACTIVATE_TO_TAIL);
2538 stat(s, FREE_ADD_PARTIAL);
2539 }
2540 }
2541 spin_unlock_irqrestore(&n->list_lock, flags);
2542 return;
2543
2544slab_empty:
2545 if (prior) {
2546
2547
2548
2549 remove_partial(n, page);
2550 stat(s, FREE_REMOVE_PARTIAL);
2551 } else
2552
2553 remove_full(s, page);
2554
2555 spin_unlock_irqrestore(&n->list_lock, flags);
2556 stat(s, FREE_SLAB);
2557 discard_slab(s, page);
2558}
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571static __always_inline void slab_free(struct kmem_cache *s,
2572 struct page *page, void *x, unsigned long addr)
2573{
2574 void **object = (void *)x;
2575 struct kmem_cache_cpu *c;
2576 unsigned long tid;
2577
2578 slab_free_hook(s, x);
2579
2580redo:
2581
2582
2583
2584
2585
2586
2587 c = __this_cpu_ptr(s->cpu_slab);
2588
2589 tid = c->tid;
2590 barrier();
2591
2592 if (likely(page == c->page)) {
2593 set_freepointer(s, object, c->freelist);
2594
2595 if (unlikely(!this_cpu_cmpxchg_double(
2596 s->cpu_slab->freelist, s->cpu_slab->tid,
2597 c->freelist, tid,
2598 object, next_tid(tid)))) {
2599
2600 note_cmpxchg_failure("slab_free", s, tid);
2601 goto redo;
2602 }
2603 stat(s, FREE_FASTPATH);
2604 } else
2605 __slab_free(s, page, x, addr);
2606
2607}
2608
2609void kmem_cache_free(struct kmem_cache *s, void *x)
2610{
2611 struct page *page;
2612
2613 page = virt_to_head_page(x);
2614
2615 slab_free(s, page, x, _RET_IP_);
2616
2617 trace_kmem_cache_free(_RET_IP_, x);
2618}
2619EXPORT_SYMBOL(kmem_cache_free);
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640static int slub_min_order;
2641static int slub_max_order = PAGE_ALLOC_COSTLY_ORDER;
2642static int slub_min_objects;
2643
2644
2645
2646
2647
2648static int slub_nomerge;
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675static inline int slab_order(int size, int min_objects,
2676 int max_order, int fract_leftover, int reserved)
2677{
2678 int order;
2679 int rem;
2680 int min_order = slub_min_order;
2681
2682 if (order_objects(min_order, size, reserved) > MAX_OBJS_PER_PAGE)
2683 return get_order(size * MAX_OBJS_PER_PAGE) - 1;
2684
2685 for (order = max(min_order,
2686 fls(min_objects * size - 1) - PAGE_SHIFT);
2687 order <= max_order; order++) {
2688
2689 unsigned long slab_size = PAGE_SIZE << order;
2690
2691 if (slab_size < min_objects * size + reserved)
2692 continue;
2693
2694 rem = (slab_size - reserved) % size;
2695
2696 if (rem <= slab_size / fract_leftover)
2697 break;
2698
2699 }
2700
2701 return order;
2702}
2703
2704static inline int calculate_order(int size, int reserved)
2705{
2706 int order;
2707 int min_objects;
2708 int fraction;
2709 int max_objects;
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719 min_objects = slub_min_objects;
2720 if (!min_objects)
2721 min_objects = 4 * (fls(nr_cpu_ids) + 1);
2722 max_objects = order_objects(slub_max_order, size, reserved);
2723 min_objects = min(min_objects, max_objects);
2724
2725 while (min_objects > 1) {
2726 fraction = 16;
2727 while (fraction >= 4) {
2728 order = slab_order(size, min_objects,
2729 slub_max_order, fraction, reserved);
2730 if (order <= slub_max_order)
2731 return order;
2732 fraction /= 2;
2733 }
2734 min_objects--;
2735 }
2736
2737
2738
2739
2740
2741 order = slab_order(size, 1, slub_max_order, 1, reserved);
2742 if (order <= slub_max_order)
2743 return order;
2744
2745
2746
2747
2748 order = slab_order(size, 1, MAX_ORDER, 1, reserved);
2749 if (order < MAX_ORDER)
2750 return order;
2751 return -ENOSYS;
2752}
2753
2754
2755
2756
2757static unsigned long calculate_alignment(unsigned long flags,
2758 unsigned long align, unsigned long size)
2759{
2760
2761
2762
2763
2764
2765
2766
2767 if (flags & SLAB_HWCACHE_ALIGN) {
2768 unsigned long ralign = cache_line_size();
2769 while (size <= ralign / 2)
2770 ralign /= 2;
2771 align = max(align, ralign);
2772 }
2773
2774 if (align < ARCH_SLAB_MINALIGN)
2775 align = ARCH_SLAB_MINALIGN;
2776
2777 return ALIGN(align, sizeof(void *));
2778}
2779
2780static void
2781init_kmem_cache_node(struct kmem_cache_node *n)
2782{
2783 n->nr_partial = 0;
2784 spin_lock_init(&n->list_lock);
2785 INIT_LIST_HEAD(&n->partial);
2786#ifdef CONFIG_SLUB_DEBUG
2787 atomic_long_set(&n->nr_slabs, 0);
2788 atomic_long_set(&n->total_objects, 0);
2789 INIT_LIST_HEAD(&n->full);
2790#endif
2791}
2792
2793static inline int alloc_kmem_cache_cpus(struct kmem_cache *s)
2794{
2795 BUILD_BUG_ON(PERCPU_DYNAMIC_EARLY_SIZE <
2796 SLUB_PAGE_SHIFT * sizeof(struct kmem_cache_cpu));
2797
2798
2799
2800
2801
2802 s->cpu_slab = __alloc_percpu(sizeof(struct kmem_cache_cpu),
2803 2 * sizeof(void *));
2804
2805 if (!s->cpu_slab)
2806 return 0;
2807
2808 init_kmem_cache_cpus(s);
2809
2810 return 1;
2811}
2812
2813static struct kmem_cache *kmem_cache_node;
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824static void early_kmem_cache_node_alloc(int node)
2825{
2826 struct page *page;
2827 struct kmem_cache_node *n;
2828
2829 BUG_ON(kmem_cache_node->size < sizeof(struct kmem_cache_node));
2830
2831 page = new_slab(kmem_cache_node, GFP_NOWAIT, node);
2832
2833 BUG_ON(!page);
2834 if (page_to_nid(page) != node) {
2835 printk(KERN_ERR "SLUB: Unable to allocate memory from "
2836 "node %d\n", node);
2837 printk(KERN_ERR "SLUB: Allocating a useless per node structure "
2838 "in order to be able to continue\n");
2839 }
2840
2841 n = page->freelist;
2842 BUG_ON(!n);
2843 page->freelist = get_freepointer(kmem_cache_node, n);
2844 page->inuse = 1;
2845 page->frozen = 0;
2846 kmem_cache_node->node[node] = n;
2847#ifdef CONFIG_SLUB_DEBUG
2848 init_object(kmem_cache_node, n, SLUB_RED_ACTIVE);
2849 init_tracking(kmem_cache_node, n);
2850#endif
2851 init_kmem_cache_node(n);
2852 inc_slabs_node(kmem_cache_node, node, page->objects);
2853
2854 add_partial(n, page, DEACTIVATE_TO_HEAD);
2855}
2856
2857static void free_kmem_cache_nodes(struct kmem_cache *s)
2858{
2859 int node;
2860
2861 for_each_node_state(node, N_NORMAL_MEMORY) {
2862 struct kmem_cache_node *n = s->node[node];
2863
2864 if (n)
2865 kmem_cache_free(kmem_cache_node, n);
2866
2867 s->node[node] = NULL;
2868 }
2869}
2870
2871static int init_kmem_cache_nodes(struct kmem_cache *s)
2872{
2873 int node;
2874
2875 for_each_node_state(node, N_NORMAL_MEMORY) {
2876 struct kmem_cache_node *n;
2877
2878 if (slab_state == DOWN) {
2879 early_kmem_cache_node_alloc(node);
2880 continue;
2881 }
2882 n = kmem_cache_alloc_node(kmem_cache_node,
2883 GFP_KERNEL, node);
2884
2885 if (!n) {
2886 free_kmem_cache_nodes(s);
2887 return 0;
2888 }
2889
2890 s->node[node] = n;
2891 init_kmem_cache_node(n);
2892 }
2893 return 1;
2894}
2895
2896static void set_min_partial(struct kmem_cache *s, unsigned long min)
2897{
2898 if (min < MIN_PARTIAL)
2899 min = MIN_PARTIAL;
2900 else if (min > MAX_PARTIAL)
2901 min = MAX_PARTIAL;
2902 s->min_partial = min;
2903}
2904
2905
2906
2907
2908
2909static int calculate_sizes(struct kmem_cache *s, int forced_order)
2910{
2911 unsigned long flags = s->flags;
2912 unsigned long size = s->object_size;
2913 unsigned long align = s->align;
2914 int order;
2915
2916
2917
2918
2919
2920
2921 size = ALIGN(size, sizeof(void *));
2922
2923#ifdef CONFIG_SLUB_DEBUG
2924
2925
2926
2927
2928
2929 if ((flags & SLAB_POISON) && !(flags & SLAB_DESTROY_BY_RCU) &&
2930 !s->ctor)
2931 s->flags |= __OBJECT_POISON;
2932 else
2933 s->flags &= ~__OBJECT_POISON;
2934
2935
2936
2937
2938
2939
2940
2941 if ((flags & SLAB_RED_ZONE) && size == s->object_size)
2942 size += sizeof(void *);
2943#endif
2944
2945
2946
2947
2948
2949 s->inuse = size;
2950
2951 if (((flags & (SLAB_DESTROY_BY_RCU | SLAB_POISON)) ||
2952 s->ctor)) {
2953
2954
2955
2956
2957
2958
2959
2960
2961 s->offset = size;
2962 size += sizeof(void *);
2963 }
2964
2965#ifdef CONFIG_SLUB_DEBUG
2966 if (flags & SLAB_STORE_USER)
2967
2968
2969
2970
2971 size += 2 * sizeof(struct track);
2972
2973 if (flags & SLAB_RED_ZONE)
2974
2975
2976
2977
2978
2979
2980
2981 size += sizeof(void *);
2982#endif
2983
2984
2985
2986
2987
2988
2989 align = calculate_alignment(flags, align, s->object_size);
2990 s->align = align;
2991
2992
2993
2994
2995
2996
2997 size = ALIGN(size, align);
2998 s->size = size;
2999 if (forced_order >= 0)
3000 order = forced_order;
3001 else
3002 order = calculate_order(size, s->reserved);
3003
3004 if (order < 0)
3005 return 0;
3006
3007 s->allocflags = 0;
3008 if (order)
3009 s->allocflags |= __GFP_COMP;
3010
3011 if (s->flags & SLAB_CACHE_DMA)
3012 s->allocflags |= SLUB_DMA;
3013
3014 if (s->flags & SLAB_RECLAIM_ACCOUNT)
3015 s->allocflags |= __GFP_RECLAIMABLE;
3016
3017
3018
3019
3020 s->oo = oo_make(order, size, s->reserved);
3021 s->min = oo_make(get_order(size), size, s->reserved);
3022 if (oo_objects(s->oo) > oo_objects(s->max))
3023 s->max = s->oo;
3024
3025 return !!oo_objects(s->oo);
3026
3027}
3028
3029static int kmem_cache_open(struct kmem_cache *s,
3030 const char *name, size_t size,
3031 size_t align, unsigned long flags,
3032 void (*ctor)(void *))
3033{
3034 memset(s, 0, kmem_size);
3035 s->name = name;
3036 s->ctor = ctor;
3037 s->object_size = size;
3038 s->align = align;
3039 s->flags = kmem_cache_flags(size, flags, name, ctor);
3040 s->reserved = 0;
3041
3042 if (need_reserve_slab_rcu && (s->flags & SLAB_DESTROY_BY_RCU))
3043 s->reserved = sizeof(struct rcu_head);
3044
3045 if (!calculate_sizes(s, -1))
3046 goto error;
3047 if (disable_higher_order_debug) {
3048
3049
3050
3051
3052 if (get_order(s->size) > get_order(s->object_size)) {
3053 s->flags &= ~DEBUG_METADATA_FLAGS;
3054 s->offset = 0;
3055 if (!calculate_sizes(s, -1))
3056 goto error;
3057 }
3058 }
3059
3060#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
3061 defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
3062 if (system_has_cmpxchg_double() && (s->flags & SLAB_DEBUG_FLAGS) == 0)
3063
3064 s->flags |= __CMPXCHG_DOUBLE;
3065#endif
3066
3067
3068
3069
3070
3071 set_min_partial(s, ilog2(s->size) / 2);
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090 if (kmem_cache_debug(s))
3091 s->cpu_partial = 0;
3092 else if (s->size >= PAGE_SIZE)
3093 s->cpu_partial = 2;
3094 else if (s->size >= 1024)
3095 s->cpu_partial = 6;
3096 else if (s->size >= 256)
3097 s->cpu_partial = 13;
3098 else
3099 s->cpu_partial = 30;
3100
3101 s->refcount = 1;
3102#ifdef CONFIG_NUMA
3103 s->remote_node_defrag_ratio = 1000;
3104#endif
3105 if (!init_kmem_cache_nodes(s))
3106 goto error;
3107
3108 if (alloc_kmem_cache_cpus(s))
3109 return 1;
3110
3111 free_kmem_cache_nodes(s);
3112error:
3113 if (flags & SLAB_PANIC)
3114 panic("Cannot create slab %s size=%lu realsize=%u "
3115 "order=%u offset=%u flags=%lx\n",
3116 s->name, (unsigned long)size, s->size, oo_order(s->oo),
3117 s->offset, flags);
3118 return 0;
3119}
3120
3121
3122
3123
3124unsigned int kmem_cache_size(struct kmem_cache *s)
3125{
3126 return s->object_size;
3127}
3128EXPORT_SYMBOL(kmem_cache_size);
3129
3130static void list_slab_objects(struct kmem_cache *s, struct page *page,
3131 const char *text)
3132{
3133#ifdef CONFIG_SLUB_DEBUG
3134 void *addr = page_address(page);
3135 void *p;
3136 unsigned long *map = kzalloc(BITS_TO_LONGS(page->objects) *
3137 sizeof(long), GFP_ATOMIC);
3138 if (!map)
3139 return;
3140 slab_err(s, page, "%s", text);
3141 slab_lock(page);
3142
3143 get_map(s, page, map);
3144 for_each_object(p, s, addr, page->objects) {
3145
3146 if (!test_bit(slab_index(p, s, addr), map)) {
3147 printk(KERN_ERR "INFO: Object 0x%p @offset=%tu\n",
3148 p, p - addr);
3149 print_tracking(s, p);
3150 }
3151 }
3152 slab_unlock(page);
3153 kfree(map);
3154#endif
3155}
3156
3157
3158
3159
3160
3161
3162static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
3163{
3164 struct page *page, *h;
3165
3166 list_for_each_entry_safe(page, h, &n->partial, lru) {
3167 if (!page->inuse) {
3168 remove_partial(n, page);
3169 discard_slab(s, page);
3170 } else {
3171 list_slab_objects(s, page,
3172 "Objects remaining on kmem_cache_close()");
3173 }
3174 }
3175}
3176
3177
3178
3179
3180static inline int kmem_cache_close(struct kmem_cache *s)
3181{
3182 int node;
3183
3184 flush_all(s);
3185 free_percpu(s->cpu_slab);
3186
3187 for_each_node_state(node, N_NORMAL_MEMORY) {
3188 struct kmem_cache_node *n = get_node(s, node);
3189
3190 free_partial(s, n);
3191 if (n->nr_partial || slabs_node(s, node))
3192 return 1;
3193 }
3194 free_kmem_cache_nodes(s);
3195 return 0;
3196}
3197
3198
3199
3200
3201
3202void kmem_cache_destroy(struct kmem_cache *s)
3203{
3204 mutex_lock(&slab_mutex);
3205 s->refcount--;
3206 if (!s->refcount) {
3207 list_del(&s->list);
3208 mutex_unlock(&slab_mutex);
3209 if (kmem_cache_close(s)) {
3210 printk(KERN_ERR "SLUB %s: %s called for cache that "
3211 "still has objects.\n", s->name, __func__);
3212 dump_stack();
3213 }
3214 if (s->flags & SLAB_DESTROY_BY_RCU)
3215 rcu_barrier();
3216 sysfs_slab_remove(s);
3217 } else
3218 mutex_unlock(&slab_mutex);
3219}
3220EXPORT_SYMBOL(kmem_cache_destroy);
3221
3222
3223
3224
3225
3226struct kmem_cache *kmalloc_caches[SLUB_PAGE_SHIFT];
3227EXPORT_SYMBOL(kmalloc_caches);
3228
3229static struct kmem_cache *kmem_cache;
3230
3231#ifdef CONFIG_ZONE_DMA
3232static struct kmem_cache *kmalloc_dma_caches[SLUB_PAGE_SHIFT];
3233#endif
3234
3235static int __init setup_slub_min_order(char *str)
3236{
3237 get_option(&str, &slub_min_order);
3238
3239 return 1;
3240}
3241
3242__setup("slub_min_order=", setup_slub_min_order);
3243
3244static int __init setup_slub_max_order(char *str)
3245{
3246 get_option(&str, &slub_max_order);
3247 slub_max_order = min(slub_max_order, MAX_ORDER - 1);
3248
3249 return 1;
3250}
3251
3252__setup("slub_max_order=", setup_slub_max_order);
3253
3254static int __init setup_slub_min_objects(char *str)
3255{
3256 get_option(&str, &slub_min_objects);
3257
3258 return 1;
3259}
3260
3261__setup("slub_min_objects=", setup_slub_min_objects);
3262
3263static int __init setup_slub_nomerge(char *str)
3264{
3265 slub_nomerge = 1;
3266 return 1;
3267}
3268
3269__setup("slub_nomerge", setup_slub_nomerge);
3270
3271static struct kmem_cache *__init create_kmalloc_cache(const char *name,
3272 int size, unsigned int flags)
3273{
3274 struct kmem_cache *s;
3275
3276 s = kmem_cache_alloc(kmem_cache, GFP_NOWAIT);
3277
3278
3279
3280
3281
3282 if (!kmem_cache_open(s, name, size, ARCH_KMALLOC_MINALIGN,
3283 flags, NULL))
3284 goto panic;
3285
3286 list_add(&s->list, &slab_caches);
3287 return s;
3288
3289panic:
3290 panic("Creation of kmalloc slab %s size=%d failed.\n", name, size);
3291 return NULL;
3292}
3293
3294
3295
3296
3297
3298
3299
3300static s8 size_index[24] = {
3301 3,
3302 4,
3303 5,
3304 5,
3305 6,
3306 6,
3307 6,
3308 6,
3309 1,
3310 1,
3311 1,
3312 1,
3313 7,
3314 7,
3315 7,
3316 7,
3317 2,
3318 2,
3319 2,
3320 2,
3321 2,
3322 2,
3323 2,
3324 2
3325};
3326
3327static inline int size_index_elem(size_t bytes)
3328{
3329 return (bytes - 1) / 8;
3330}
3331
3332static struct kmem_cache *get_slab(size_t size, gfp_t flags)
3333{
3334 int index;
3335
3336 if (size <= 192) {
3337 if (!size)
3338 return ZERO_SIZE_PTR;
3339
3340 index = size_index[size_index_elem(size)];
3341 } else
3342 index = fls(size - 1);
3343
3344#ifdef CONFIG_ZONE_DMA
3345 if (unlikely((flags & SLUB_DMA)))
3346 return kmalloc_dma_caches[index];
3347
3348#endif
3349 return kmalloc_caches[index];
3350}
3351
3352void *__kmalloc(size_t size, gfp_t flags)
3353{
3354 struct kmem_cache *s;
3355 void *ret;
3356
3357 if (unlikely(size > SLUB_MAX_SIZE))
3358 return kmalloc_large(size, flags);
3359
3360 s = get_slab(size, flags);
3361
3362 if (unlikely(ZERO_OR_NULL_PTR(s)))
3363 return s;
3364
3365 ret = slab_alloc(s, flags, NUMA_NO_NODE, _RET_IP_);
3366
3367 trace_kmalloc(_RET_IP_, ret, size, s->size, flags);
3368
3369 return ret;
3370}
3371EXPORT_SYMBOL(__kmalloc);
3372
3373#ifdef CONFIG_NUMA
3374static void *kmalloc_large_node(size_t size, gfp_t flags, int node)
3375{
3376 struct page *page;
3377 void *ptr = NULL;
3378
3379 flags |= __GFP_COMP | __GFP_NOTRACK;
3380 page = alloc_pages_node(node, flags, get_order(size));
3381 if (page)
3382 ptr = page_address(page);
3383
3384 kmemleak_alloc(ptr, size, 1, flags);
3385 return ptr;
3386}
3387
3388void *__kmalloc_node(size_t size, gfp_t flags, int node)
3389{
3390 struct kmem_cache *s;
3391 void *ret;
3392
3393 if (unlikely(size > SLUB_MAX_SIZE)) {
3394 ret = kmalloc_large_node(size, flags, node);
3395
3396 trace_kmalloc_node(_RET_IP_, ret,
3397 size, PAGE_SIZE << get_order(size),
3398 flags, node);
3399
3400 return ret;
3401 }
3402
3403 s = get_slab(size, flags);
3404
3405 if (unlikely(ZERO_OR_NULL_PTR(s)))
3406 return s;
3407
3408 ret = slab_alloc(s, flags, node, _RET_IP_);
3409
3410 trace_kmalloc_node(_RET_IP_, ret, size, s->size, flags, node);
3411
3412 return ret;
3413}
3414EXPORT_SYMBOL(__kmalloc_node);
3415#endif
3416
3417size_t ksize(const void *object)
3418{
3419 struct page *page;
3420
3421 if (unlikely(object == ZERO_SIZE_PTR))
3422 return 0;
3423
3424 page = virt_to_head_page(object);
3425
3426 if (unlikely(!PageSlab(page))) {
3427 WARN_ON(!PageCompound(page));
3428 return PAGE_SIZE << compound_order(page);
3429 }
3430
3431 return slab_ksize(page->slab);
3432}
3433EXPORT_SYMBOL(ksize);
3434
3435#ifdef CONFIG_SLUB_DEBUG
3436bool verify_mem_not_deleted(const void *x)
3437{
3438 struct page *page;
3439 void *object = (void *)x;
3440 unsigned long flags;
3441 bool rv;
3442
3443 if (unlikely(ZERO_OR_NULL_PTR(x)))
3444 return false;
3445
3446 local_irq_save(flags);
3447
3448 page = virt_to_head_page(x);
3449 if (unlikely(!PageSlab(page))) {
3450
3451 rv = true;
3452 goto out_unlock;
3453 }
3454
3455 slab_lock(page);
3456 if (on_freelist(page->slab, page, object)) {
3457 object_err(page->slab, page, object, "Object is on free-list");
3458 rv = false;
3459 } else {
3460 rv = true;
3461 }
3462 slab_unlock(page);
3463
3464out_unlock:
3465 local_irq_restore(flags);
3466 return rv;
3467}
3468EXPORT_SYMBOL(verify_mem_not_deleted);
3469#endif
3470
3471void kfree(const void *x)
3472{
3473 struct page *page;
3474 void *object = (void *)x;
3475
3476 trace_kfree(_RET_IP_, x);
3477
3478 if (unlikely(ZERO_OR_NULL_PTR(x)))
3479 return;
3480
3481 page = virt_to_head_page(x);
3482 if (unlikely(!PageSlab(page))) {
3483 BUG_ON(!PageCompound(page));
3484 kmemleak_free(x);
3485 put_page(page);
3486 return;
3487 }
3488 slab_free(page->slab, page, object, _RET_IP_);
3489}
3490EXPORT_SYMBOL(kfree);
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502int kmem_cache_shrink(struct kmem_cache *s)
3503{
3504 int node;
3505 int i;
3506 struct kmem_cache_node *n;
3507 struct page *page;
3508 struct page *t;
3509 int objects = oo_objects(s->max);
3510 struct list_head *slabs_by_inuse =
3511 kmalloc(sizeof(struct list_head) * objects, GFP_KERNEL);
3512 unsigned long flags;
3513
3514 if (!slabs_by_inuse)
3515 return -ENOMEM;
3516
3517 flush_all(s);
3518 for_each_node_state(node, N_NORMAL_MEMORY) {
3519 n = get_node(s, node);
3520
3521 if (!n->nr_partial)
3522 continue;
3523
3524 for (i = 0; i < objects; i++)
3525 INIT_LIST_HEAD(slabs_by_inuse + i);
3526
3527 spin_lock_irqsave(&n->list_lock, flags);
3528
3529
3530
3531
3532
3533
3534
3535 list_for_each_entry_safe(page, t, &n->partial, lru) {
3536 list_move(&page->lru, slabs_by_inuse + page->inuse);
3537 if (!page->inuse)
3538 n->nr_partial--;
3539 }
3540
3541
3542
3543
3544
3545 for (i = objects - 1; i > 0; i--)
3546 list_splice(slabs_by_inuse + i, n->partial.prev);
3547
3548 spin_unlock_irqrestore(&n->list_lock, flags);
3549
3550
3551 list_for_each_entry_safe(page, t, slabs_by_inuse, lru)
3552 discard_slab(s, page);
3553 }
3554
3555 kfree(slabs_by_inuse);
3556 return 0;
3557}
3558EXPORT_SYMBOL(kmem_cache_shrink);
3559
3560#if defined(CONFIG_MEMORY_HOTPLUG)
3561static int slab_mem_going_offline_callback(void *arg)
3562{
3563 struct kmem_cache *s;
3564
3565 mutex_lock(&slab_mutex);
3566 list_for_each_entry(s, &slab_caches, list)
3567 kmem_cache_shrink(s);
3568 mutex_unlock(&slab_mutex);
3569
3570 return 0;
3571}
3572
3573static void slab_mem_offline_callback(void *arg)
3574{
3575 struct kmem_cache_node *n;
3576 struct kmem_cache *s;
3577 struct memory_notify *marg = arg;
3578 int offline_node;
3579
3580 offline_node = marg->status_change_nid;
3581
3582
3583
3584
3585
3586 if (offline_node < 0)
3587 return;
3588
3589 mutex_lock(&slab_mutex);
3590 list_for_each_entry(s, &slab_caches, list) {
3591 n = get_node(s, offline_node);
3592 if (n) {
3593
3594
3595
3596
3597
3598
3599 BUG_ON(slabs_node(s, offline_node));
3600
3601 s->node[offline_node] = NULL;
3602 kmem_cache_free(kmem_cache_node, n);
3603 }
3604 }
3605 mutex_unlock(&slab_mutex);
3606}
3607
3608static int slab_mem_going_online_callback(void *arg)
3609{
3610 struct kmem_cache_node *n;
3611 struct kmem_cache *s;
3612 struct memory_notify *marg = arg;
3613 int nid = marg->status_change_nid;
3614 int ret = 0;
3615
3616
3617
3618
3619
3620 if (nid < 0)
3621 return 0;
3622
3623
3624
3625
3626
3627
3628 mutex_lock(&slab_mutex);
3629 list_for_each_entry(s, &slab_caches, list) {
3630
3631
3632
3633
3634
3635 n = kmem_cache_alloc(kmem_cache_node, GFP_KERNEL);
3636 if (!n) {
3637 ret = -ENOMEM;
3638 goto out;
3639 }
3640 init_kmem_cache_node(n);
3641 s->node[nid] = n;
3642 }
3643out:
3644 mutex_unlock(&slab_mutex);
3645 return ret;
3646}
3647
3648static int slab_memory_callback(struct notifier_block *self,
3649 unsigned long action, void *arg)
3650{
3651 int ret = 0;
3652
3653 switch (action) {
3654 case MEM_GOING_ONLINE:
3655 ret = slab_mem_going_online_callback(arg);
3656 break;
3657 case MEM_GOING_OFFLINE:
3658 ret = slab_mem_going_offline_callback(arg);
3659 break;
3660 case MEM_OFFLINE:
3661 case MEM_CANCEL_ONLINE:
3662 slab_mem_offline_callback(arg);
3663 break;
3664 case MEM_ONLINE:
3665 case MEM_CANCEL_OFFLINE:
3666 break;
3667 }
3668 if (ret)
3669 ret = notifier_from_errno(ret);
3670 else
3671 ret = NOTIFY_OK;
3672 return ret;
3673}
3674
3675#endif
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686static void __init kmem_cache_bootstrap_fixup(struct kmem_cache *s)
3687{
3688 int node;
3689
3690 list_add(&s->list, &slab_caches);
3691 s->refcount = -1;
3692
3693 for_each_node_state(node, N_NORMAL_MEMORY) {
3694 struct kmem_cache_node *n = get_node(s, node);
3695 struct page *p;
3696
3697 if (n) {
3698 list_for_each_entry(p, &n->partial, lru)
3699 p->slab = s;
3700
3701#ifdef CONFIG_SLUB_DEBUG
3702 list_for_each_entry(p, &n->full, lru)
3703 p->slab = s;
3704#endif
3705 }
3706 }
3707}
3708
3709void __init kmem_cache_init(void)
3710{
3711 int i;
3712 int caches = 0;
3713 struct kmem_cache *temp_kmem_cache;
3714 int order;
3715 struct kmem_cache *temp_kmem_cache_node;
3716 unsigned long kmalloc_size;
3717
3718 if (debug_guardpage_minorder())
3719 slub_max_order = 0;
3720
3721 kmem_size = offsetof(struct kmem_cache, node) +
3722 nr_node_ids * sizeof(struct kmem_cache_node *);
3723
3724
3725 kmalloc_size = ALIGN(kmem_size, cache_line_size());
3726 order = get_order(2 * kmalloc_size);
3727 kmem_cache = (void *)__get_free_pages(GFP_NOWAIT, order);
3728
3729
3730
3731
3732
3733
3734 kmem_cache_node = (void *)kmem_cache + kmalloc_size;
3735
3736 kmem_cache_open(kmem_cache_node, "kmem_cache_node",
3737 sizeof(struct kmem_cache_node),
3738 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
3739
3740 hotplug_memory_notifier(slab_memory_callback, SLAB_CALLBACK_PRI);
3741
3742
3743 slab_state = PARTIAL;
3744
3745 temp_kmem_cache = kmem_cache;
3746 kmem_cache_open(kmem_cache, "kmem_cache", kmem_size,
3747 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
3748 kmem_cache = kmem_cache_alloc(kmem_cache, GFP_NOWAIT);
3749 memcpy(kmem_cache, temp_kmem_cache, kmem_size);
3750
3751
3752
3753
3754
3755
3756 temp_kmem_cache_node = kmem_cache_node;
3757
3758 kmem_cache_node = kmem_cache_alloc(kmem_cache, GFP_NOWAIT);
3759 memcpy(kmem_cache_node, temp_kmem_cache_node, kmem_size);
3760
3761 kmem_cache_bootstrap_fixup(kmem_cache_node);
3762
3763 caches++;
3764 kmem_cache_bootstrap_fixup(kmem_cache);
3765 caches++;
3766
3767 free_pages((unsigned long)temp_kmem_cache, order);
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782 BUILD_BUG_ON(KMALLOC_MIN_SIZE > 256 ||
3783 (KMALLOC_MIN_SIZE & (KMALLOC_MIN_SIZE - 1)));
3784
3785 for (i = 8; i < KMALLOC_MIN_SIZE; i += 8) {
3786 int elem = size_index_elem(i);
3787 if (elem >= ARRAY_SIZE(size_index))
3788 break;
3789 size_index[elem] = KMALLOC_SHIFT_LOW;
3790 }
3791
3792 if (KMALLOC_MIN_SIZE == 64) {
3793
3794
3795
3796
3797 for (i = 64 + 8; i <= 96; i += 8)
3798 size_index[size_index_elem(i)] = 7;
3799 } else if (KMALLOC_MIN_SIZE == 128) {
3800
3801
3802
3803
3804
3805 for (i = 128 + 8; i <= 192; i += 8)
3806 size_index[size_index_elem(i)] = 8;
3807 }
3808
3809
3810 if (KMALLOC_MIN_SIZE <= 32) {
3811 kmalloc_caches[1] = create_kmalloc_cache("kmalloc-96", 96, 0);
3812 caches++;
3813 }
3814
3815 if (KMALLOC_MIN_SIZE <= 64) {
3816 kmalloc_caches[2] = create_kmalloc_cache("kmalloc-192", 192, 0);
3817 caches++;
3818 }
3819
3820 for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) {
3821 kmalloc_caches[i] = create_kmalloc_cache("kmalloc", 1 << i, 0);
3822 caches++;
3823 }
3824
3825 slab_state = UP;
3826
3827
3828 if (KMALLOC_MIN_SIZE <= 32) {
3829 kmalloc_caches[1]->name = kstrdup(kmalloc_caches[1]->name, GFP_NOWAIT);
3830 BUG_ON(!kmalloc_caches[1]->name);
3831 }
3832
3833 if (KMALLOC_MIN_SIZE <= 64) {
3834 kmalloc_caches[2]->name = kstrdup(kmalloc_caches[2]->name, GFP_NOWAIT);
3835 BUG_ON(!kmalloc_caches[2]->name);
3836 }
3837
3838 for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) {
3839 char *s = kasprintf(GFP_NOWAIT, "kmalloc-%d", 1 << i);
3840
3841 BUG_ON(!s);
3842 kmalloc_caches[i]->name = s;
3843 }
3844
3845#ifdef CONFIG_SMP
3846 register_cpu_notifier(&slab_notifier);
3847#endif
3848
3849#ifdef CONFIG_ZONE_DMA
3850 for (i = 0; i < SLUB_PAGE_SHIFT; i++) {
3851 struct kmem_cache *s = kmalloc_caches[i];
3852
3853 if (s && s->size) {
3854 char *name = kasprintf(GFP_NOWAIT,
3855 "dma-kmalloc-%d", s->object_size);
3856
3857 BUG_ON(!name);
3858 kmalloc_dma_caches[i] = create_kmalloc_cache(name,
3859 s->object_size, SLAB_CACHE_DMA);
3860 }
3861 }
3862#endif
3863 printk(KERN_INFO
3864 "SLUB: Genslabs=%d, HWalign=%d, Order=%d-%d, MinObjects=%d,"
3865 " CPUs=%d, Nodes=%d\n",
3866 caches, cache_line_size(),
3867 slub_min_order, slub_max_order, slub_min_objects,
3868 nr_cpu_ids, nr_node_ids);
3869}
3870
3871void __init kmem_cache_init_late(void)
3872{
3873}
3874
3875
3876
3877
3878static int slab_unmergeable(struct kmem_cache *s)
3879{
3880 if (slub_nomerge || (s->flags & SLUB_NEVER_MERGE))
3881 return 1;
3882
3883 if (s->ctor)
3884 return 1;
3885
3886
3887
3888
3889 if (s->refcount < 0)
3890 return 1;
3891
3892 return 0;
3893}
3894
3895static struct kmem_cache *find_mergeable(size_t size,
3896 size_t align, unsigned long flags, const char *name,
3897 void (*ctor)(void *))
3898{
3899 struct kmem_cache *s;
3900
3901 if (slub_nomerge || (flags & SLUB_NEVER_MERGE))
3902 return NULL;
3903
3904 if (ctor)
3905 return NULL;
3906
3907 size = ALIGN(size, sizeof(void *));
3908 align = calculate_alignment(flags, align, size);
3909 size = ALIGN(size, align);
3910 flags = kmem_cache_flags(size, flags, name, NULL);
3911
3912 list_for_each_entry(s, &slab_caches, list) {
3913 if (slab_unmergeable(s))
3914 continue;
3915
3916 if (size > s->size)
3917 continue;
3918
3919 if ((flags & SLUB_MERGE_SAME) != (s->flags & SLUB_MERGE_SAME))
3920 continue;
3921
3922
3923
3924
3925 if ((s->size & ~(align - 1)) != s->size)
3926 continue;
3927
3928 if (s->size - size >= sizeof(void *))
3929 continue;
3930
3931 return s;
3932 }
3933 return NULL;
3934}
3935
3936struct kmem_cache *__kmem_cache_create(const char *name, size_t size,
3937 size_t align, unsigned long flags, void (*ctor)(void *))
3938{
3939 struct kmem_cache *s;
3940 char *n;
3941
3942 s = find_mergeable(size, align, flags, name, ctor);
3943 if (s) {
3944 s->refcount++;
3945
3946
3947
3948
3949 s->object_size = max(s->object_size, (int)size);
3950 s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *)));
3951
3952 if (sysfs_slab_alias(s, name)) {
3953 s->refcount--;
3954 return NULL;
3955 }
3956 return s;
3957 }
3958
3959 n = kstrdup(name, GFP_KERNEL);
3960 if (!n)
3961 return NULL;
3962
3963 s = kmalloc(kmem_size, GFP_KERNEL);
3964 if (s) {
3965 if (kmem_cache_open(s, n,
3966 size, align, flags, ctor)) {
3967 int r;
3968
3969 list_add(&s->list, &slab_caches);
3970 mutex_unlock(&slab_mutex);
3971 r = sysfs_slab_add(s);
3972 mutex_lock(&slab_mutex);
3973
3974 if (!r)
3975 return s;
3976
3977 list_del(&s->list);
3978 kmem_cache_close(s);
3979 }
3980 kfree(s);
3981 }
3982 kfree(n);
3983 return NULL;
3984}
3985
3986#ifdef CONFIG_SMP
3987
3988
3989
3990
3991static int __cpuinit slab_cpuup_callback(struct notifier_block *nfb,
3992 unsigned long action, void *hcpu)
3993{
3994 long cpu = (long)hcpu;
3995 struct kmem_cache *s;
3996 unsigned long flags;
3997
3998 switch (action) {
3999 case CPU_UP_CANCELED:
4000 case CPU_UP_CANCELED_FROZEN:
4001 case CPU_DEAD:
4002 case CPU_DEAD_FROZEN:
4003 mutex_lock(&slab_mutex);
4004 list_for_each_entry(s, &slab_caches, list) {
4005 local_irq_save(flags);
4006 __flush_cpu_slab(s, cpu);
4007 local_irq_restore(flags);
4008 }
4009 mutex_unlock(&slab_mutex);
4010 break;
4011 default:
4012 break;
4013 }
4014 return NOTIFY_OK;
4015}
4016
4017static struct notifier_block __cpuinitdata slab_notifier = {
4018 .notifier_call = slab_cpuup_callback
4019};
4020
4021#endif
4022
4023void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
4024{
4025 struct kmem_cache *s;
4026 void *ret;
4027
4028 if (unlikely(size > SLUB_MAX_SIZE))
4029 return kmalloc_large(size, gfpflags);
4030
4031 s = get_slab(size, gfpflags);
4032
4033 if (unlikely(ZERO_OR_NULL_PTR(s)))
4034 return s;
4035
4036 ret = slab_alloc(s, gfpflags, NUMA_NO_NODE, caller);
4037
4038
4039 trace_kmalloc(caller, ret, size, s->size, gfpflags);
4040
4041 return ret;
4042}
4043
4044#ifdef CONFIG_NUMA
4045void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
4046 int node, unsigned long caller)
4047{
4048 struct kmem_cache *s;
4049 void *ret;
4050
4051 if (unlikely(size > SLUB_MAX_SIZE)) {
4052 ret = kmalloc_large_node(size, gfpflags, node);
4053
4054 trace_kmalloc_node(caller, ret,
4055 size, PAGE_SIZE << get_order(size),
4056 gfpflags, node);
4057
4058 return ret;
4059 }
4060
4061 s = get_slab(size, gfpflags);
4062
4063 if (unlikely(ZERO_OR_NULL_PTR(s)))
4064 return s;
4065
4066 ret = slab_alloc(s, gfpflags, node, caller);
4067
4068
4069 trace_kmalloc_node(caller, ret, size, s->size, gfpflags, node);
4070
4071 return ret;
4072}
4073#endif
4074
4075#ifdef CONFIG_SYSFS
4076static int count_inuse(struct page *page)
4077{
4078 return page->inuse;
4079}
4080
4081static int count_total(struct page *page)
4082{
4083 return page->objects;
4084}
4085#endif
4086
4087#ifdef CONFIG_SLUB_DEBUG
4088static int validate_slab(struct kmem_cache *s, struct page *page,
4089 unsigned long *map)
4090{
4091 void *p;
4092 void *addr = page_address(page);
4093
4094 if (!check_slab(s, page) ||
4095 !on_freelist(s, page, NULL))
4096 return 0;
4097
4098
4099 bitmap_zero(map, page->objects);
4100
4101 get_map(s, page, map);
4102 for_each_object(p, s, addr, page->objects) {
4103 if (test_bit(slab_index(p, s, addr), map))
4104 if (!check_object(s, page, p, SLUB_RED_INACTIVE))
4105 return 0;
4106 }
4107
4108 for_each_object(p, s, addr, page->objects)
4109 if (!test_bit(slab_index(p, s, addr), map))
4110 if (!check_object(s, page, p, SLUB_RED_ACTIVE))
4111 return 0;
4112 return 1;
4113}
4114
4115static void validate_slab_slab(struct kmem_cache *s, struct page *page,
4116 unsigned long *map)
4117{
4118 slab_lock(page);
4119 validate_slab(s, page, map);
4120 slab_unlock(page);
4121}
4122
4123static int validate_slab_node(struct kmem_cache *s,
4124 struct kmem_cache_node *n, unsigned long *map)
4125{
4126 unsigned long count = 0;
4127 struct page *page;
4128 unsigned long flags;
4129
4130 spin_lock_irqsave(&n->list_lock, flags);
4131
4132 list_for_each_entry(page, &n->partial, lru) {
4133 validate_slab_slab(s, page, map);
4134 count++;
4135 }
4136 if (count != n->nr_partial)
4137 printk(KERN_ERR "SLUB %s: %ld partial slabs counted but "
4138 "counter=%ld\n", s->name, count, n->nr_partial);
4139
4140 if (!(s->flags & SLAB_STORE_USER))
4141 goto out;
4142
4143 list_for_each_entry(page, &n->full, lru) {
4144 validate_slab_slab(s, page, map);
4145 count++;
4146 }
4147 if (count != atomic_long_read(&n->nr_slabs))
4148 printk(KERN_ERR "SLUB: %s %ld slabs counted but "
4149 "counter=%ld\n", s->name, count,
4150 atomic_long_read(&n->nr_slabs));
4151
4152out:
4153 spin_unlock_irqrestore(&n->list_lock, flags);
4154 return count;
4155}
4156
4157static long validate_slab_cache(struct kmem_cache *s)
4158{
4159 int node;
4160 unsigned long count = 0;
4161 unsigned long *map = kmalloc(BITS_TO_LONGS(oo_objects(s->max)) *
4162 sizeof(unsigned long), GFP_KERNEL);
4163
4164 if (!map)
4165 return -ENOMEM;
4166
4167 flush_all(s);
4168 for_each_node_state(node, N_NORMAL_MEMORY) {
4169 struct kmem_cache_node *n = get_node(s, node);
4170
4171 count += validate_slab_node(s, n, map);
4172 }
4173 kfree(map);
4174 return count;
4175}
4176
4177
4178
4179
4180
4181struct location {
4182 unsigned long count;
4183 unsigned long addr;
4184 long long sum_time;
4185 long min_time;
4186 long max_time;
4187 long min_pid;
4188 long max_pid;
4189 DECLARE_BITMAP(cpus, NR_CPUS);
4190 nodemask_t nodes;
4191};
4192
4193struct loc_track {
4194 unsigned long max;
4195 unsigned long count;
4196 struct location *loc;
4197};
4198
4199static void free_loc_track(struct loc_track *t)
4200{
4201 if (t->max)
4202 free_pages((unsigned long)t->loc,
4203 get_order(sizeof(struct location) * t->max));
4204}
4205
4206static int alloc_loc_track(struct loc_track *t, unsigned long max, gfp_t flags)
4207{
4208 struct location *l;
4209 int order;
4210
4211 order = get_order(sizeof(struct location) * max);
4212
4213 l = (void *)__get_free_pages(flags, order);
4214 if (!l)
4215 return 0;
4216
4217 if (t->count) {
4218 memcpy(l, t->loc, sizeof(struct location) * t->count);
4219 free_loc_track(t);
4220 }
4221 t->max = max;
4222 t->loc = l;
4223 return 1;
4224}
4225
4226static int add_location(struct loc_track *t, struct kmem_cache *s,
4227 const struct track *track)
4228{
4229 long start, end, pos;
4230 struct location *l;
4231 unsigned long caddr;
4232 unsigned long age = jiffies - track->when;
4233
4234 start = -1;
4235 end = t->count;
4236
4237 for ( ; ; ) {
4238 pos = start + (end - start + 1) / 2;
4239
4240
4241
4242
4243
4244 if (pos == end)
4245 break;
4246
4247 caddr = t->loc[pos].addr;
4248 if (track->addr == caddr) {
4249
4250 l = &t->loc[pos];
4251 l->count++;
4252 if (track->when) {
4253 l->sum_time += age;
4254 if (age < l->min_time)
4255 l->min_time = age;
4256 if (age > l->max_time)
4257 l->max_time = age;
4258
4259 if (track->pid < l->min_pid)
4260 l->min_pid = track->pid;
4261 if (track->pid > l->max_pid)
4262 l->max_pid = track->pid;
4263
4264 cpumask_set_cpu(track->cpu,
4265 to_cpumask(l->cpus));
4266 }
4267 node_set(page_to_nid(virt_to_page(track)), l->nodes);
4268 return 1;
4269 }
4270
4271 if (track->addr < caddr)
4272 end = pos;
4273 else
4274 start = pos;
4275 }
4276
4277
4278
4279
4280 if (t->count >= t->max && !alloc_loc_track(t, 2 * t->max, GFP_ATOMIC))
4281 return 0;
4282
4283 l = t->loc + pos;
4284 if (pos < t->count)
4285 memmove(l + 1, l,
4286 (t->count - pos) * sizeof(struct location));
4287 t->count++;
4288 l->count = 1;
4289 l->addr = track->addr;
4290 l->sum_time = age;
4291 l->min_time = age;
4292 l->max_time = age;
4293 l->min_pid = track->pid;
4294 l->max_pid = track->pid;
4295 cpumask_clear(to_cpumask(l->cpus));
4296 cpumask_set_cpu(track->cpu, to_cpumask(l->cpus));
4297 nodes_clear(l->nodes);
4298 node_set(page_to_nid(virt_to_page(track)), l->nodes);
4299 return 1;
4300}
4301
4302static void process_slab(struct loc_track *t, struct kmem_cache *s,
4303 struct page *page, enum track_item alloc,
4304 unsigned long *map)
4305{
4306 void *addr = page_address(page);
4307 void *p;
4308
4309 bitmap_zero(map, page->objects);
4310 get_map(s, page, map);
4311
4312 for_each_object(p, s, addr, page->objects)
4313 if (!test_bit(slab_index(p, s, addr), map))
4314 add_location(t, s, get_track(s, p, alloc));
4315}
4316
4317static int list_locations(struct kmem_cache *s, char *buf,
4318 enum track_item alloc)
4319{
4320 int len = 0;
4321 unsigned long i;
4322 struct loc_track t = { 0, 0, NULL };
4323 int node;
4324 unsigned long *map = kmalloc(BITS_TO_LONGS(oo_objects(s->max)) *
4325 sizeof(unsigned long), GFP_KERNEL);
4326
4327 if (!map || !alloc_loc_track(&t, PAGE_SIZE / sizeof(struct location),
4328 GFP_TEMPORARY)) {
4329 kfree(map);
4330 return sprintf(buf, "Out of memory\n");
4331 }
4332
4333 flush_all(s);
4334
4335 for_each_node_state(node, N_NORMAL_MEMORY) {
4336 struct kmem_cache_node *n = get_node(s, node);
4337 unsigned long flags;
4338 struct page *page;
4339
4340 if (!atomic_long_read(&n->nr_slabs))
4341 continue;
4342
4343 spin_lock_irqsave(&n->list_lock, flags);
4344 list_for_each_entry(page, &n->partial, lru)
4345 process_slab(&t, s, page, alloc, map);
4346 list_for_each_entry(page, &n->full, lru)
4347 process_slab(&t, s, page, alloc, map);
4348 spin_unlock_irqrestore(&n->list_lock, flags);
4349 }
4350
4351 for (i = 0; i < t.count; i++) {
4352 struct location *l = &t.loc[i];
4353
4354 if (len > PAGE_SIZE - KSYM_SYMBOL_LEN - 100)
4355 break;
4356 len += sprintf(buf + len, "%7ld ", l->count);
4357
4358 if (l->addr)
4359 len += sprintf(buf + len, "%pS", (void *)l->addr);
4360 else
4361 len += sprintf(buf + len, "<not-available>");
4362
4363 if (l->sum_time != l->min_time) {
4364 len += sprintf(buf + len, " age=%ld/%ld/%ld",
4365 l->min_time,
4366 (long)div_u64(l->sum_time, l->count),
4367 l->max_time);
4368 } else
4369 len += sprintf(buf + len, " age=%ld",
4370 l->min_time);
4371
4372 if (l->min_pid != l->max_pid)
4373 len += sprintf(buf + len, " pid=%ld-%ld",
4374 l->min_pid, l->max_pid);
4375 else
4376 len += sprintf(buf + len, " pid=%ld",
4377 l->min_pid);
4378
4379 if (num_online_cpus() > 1 &&
4380 !cpumask_empty(to_cpumask(l->cpus)) &&
4381 len < PAGE_SIZE - 60) {
4382 len += sprintf(buf + len, " cpus=");
4383 len += cpulist_scnprintf(buf + len, PAGE_SIZE - len - 50,
4384 to_cpumask(l->cpus));
4385 }
4386
4387 if (nr_online_nodes > 1 && !nodes_empty(l->nodes) &&
4388 len < PAGE_SIZE - 60) {
4389 len += sprintf(buf + len, " nodes=");
4390 len += nodelist_scnprintf(buf + len, PAGE_SIZE - len - 50,
4391 l->nodes);
4392 }
4393
4394 len += sprintf(buf + len, "\n");
4395 }
4396
4397 free_loc_track(&t);
4398 kfree(map);
4399 if (!t.count)
4400 len += sprintf(buf, "No data\n");
4401 return len;
4402}
4403#endif
4404
4405#ifdef SLUB_RESILIENCY_TEST
4406static void resiliency_test(void)
4407{
4408 u8 *p;
4409
4410 BUILD_BUG_ON(KMALLOC_MIN_SIZE > 16 || SLUB_PAGE_SHIFT < 10);
4411
4412 printk(KERN_ERR "SLUB resiliency testing\n");
4413 printk(KERN_ERR "-----------------------\n");
4414 printk(KERN_ERR "A. Corruption after allocation\n");
4415
4416 p = kzalloc(16, GFP_KERNEL);
4417 p[16] = 0x12;
4418 printk(KERN_ERR "\n1. kmalloc-16: Clobber Redzone/next pointer"
4419 " 0x12->0x%p\n\n", p + 16);
4420
4421 validate_slab_cache(kmalloc_caches[4]);
4422
4423
4424 p = kzalloc(32, GFP_KERNEL);
4425 p[32 + sizeof(void *)] = 0x34;
4426 printk(KERN_ERR "\n2. kmalloc-32: Clobber next pointer/next slab"
4427 " 0x34 -> -0x%p\n", p);
4428 printk(KERN_ERR
4429 "If allocated object is overwritten then not detectable\n\n");
4430
4431 validate_slab_cache(kmalloc_caches[5]);
4432 p = kzalloc(64, GFP_KERNEL);
4433 p += 64 + (get_cycles() & 0xff) * sizeof(void *);
4434 *p = 0x56;
4435 printk(KERN_ERR "\n3. kmalloc-64: corrupting random byte 0x56->0x%p\n",
4436 p);
4437 printk(KERN_ERR
4438 "If allocated object is overwritten then not detectable\n\n");
4439 validate_slab_cache(kmalloc_caches[6]);
4440
4441 printk(KERN_ERR "\nB. Corruption after free\n");
4442 p = kzalloc(128, GFP_KERNEL);
4443 kfree(p);
4444 *p = 0x78;
4445 printk(KERN_ERR "1. kmalloc-128: Clobber first word 0x78->0x%p\n\n", p);
4446 validate_slab_cache(kmalloc_caches[7]);
4447
4448 p = kzalloc(256, GFP_KERNEL);
4449 kfree(p);
4450 p[50] = 0x9a;
4451 printk(KERN_ERR "\n2. kmalloc-256: Clobber 50th byte 0x9a->0x%p\n\n",
4452 p);
4453 validate_slab_cache(kmalloc_caches[8]);
4454
4455 p = kzalloc(512, GFP_KERNEL);
4456 kfree(p);
4457 p[512] = 0xab;
4458 printk(KERN_ERR "\n3. kmalloc-512: Clobber redzone 0xab->0x%p\n\n", p);
4459 validate_slab_cache(kmalloc_caches[9]);
4460}
4461#else
4462#ifdef CONFIG_SYSFS
4463static void resiliency_test(void) {};
4464#endif
4465#endif
4466
4467#ifdef CONFIG_SYSFS
4468enum slab_stat_type {
4469 SL_ALL,
4470 SL_PARTIAL,
4471 SL_CPU,
4472 SL_OBJECTS,
4473 SL_TOTAL
4474};
4475
4476#define SO_ALL (1 << SL_ALL)
4477#define SO_PARTIAL (1 << SL_PARTIAL)
4478#define SO_CPU (1 << SL_CPU)
4479#define SO_OBJECTS (1 << SL_OBJECTS)
4480#define SO_TOTAL (1 << SL_TOTAL)
4481
4482static ssize_t show_slab_objects(struct kmem_cache *s,
4483 char *buf, unsigned long flags)
4484{
4485 unsigned long total = 0;
4486 int node;
4487 int x;
4488 unsigned long *nodes;
4489 unsigned long *per_cpu;
4490
4491 nodes = kzalloc(2 * sizeof(unsigned long) * nr_node_ids, GFP_KERNEL);
4492 if (!nodes)
4493 return -ENOMEM;
4494 per_cpu = nodes + nr_node_ids;
4495
4496 if (flags & SO_CPU) {
4497 int cpu;
4498
4499 for_each_possible_cpu(cpu) {
4500 struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
4501 int node;
4502 struct page *page;
4503
4504 page = ACCESS_ONCE(c->page);
4505 if (!page)
4506 continue;
4507
4508 node = page_to_nid(page);
4509 if (flags & SO_TOTAL)
4510 x = page->objects;
4511 else if (flags & SO_OBJECTS)
4512 x = page->inuse;
4513 else
4514 x = 1;
4515
4516 total += x;
4517 nodes[node] += x;
4518
4519 page = ACCESS_ONCE(c->partial);
4520 if (page) {
4521 x = page->pobjects;
4522 total += x;
4523 nodes[node] += x;
4524 }
4525
4526 per_cpu[node]++;
4527 }
4528 }
4529
4530 lock_memory_hotplug();
4531#ifdef CONFIG_SLUB_DEBUG
4532 if (flags & SO_ALL) {
4533 for_each_node_state(node, N_NORMAL_MEMORY) {
4534 struct kmem_cache_node *n = get_node(s, node);
4535
4536 if (flags & SO_TOTAL)
4537 x = atomic_long_read(&n->total_objects);
4538 else if (flags & SO_OBJECTS)
4539 x = atomic_long_read(&n->total_objects) -
4540 count_partial(n, count_free);
4541
4542 else
4543 x = atomic_long_read(&n->nr_slabs);
4544 total += x;
4545 nodes[node] += x;
4546 }
4547
4548 } else
4549#endif
4550 if (flags & SO_PARTIAL) {
4551 for_each_node_state(node, N_NORMAL_MEMORY) {
4552 struct kmem_cache_node *n = get_node(s, node);
4553
4554 if (flags & SO_TOTAL)
4555 x = count_partial(n, count_total);
4556 else if (flags & SO_OBJECTS)
4557 x = count_partial(n, count_inuse);
4558 else
4559 x = n->nr_partial;
4560 total += x;
4561 nodes[node] += x;
4562 }
4563 }
4564 x = sprintf(buf, "%lu", total);
4565#ifdef CONFIG_NUMA
4566 for_each_node_state(node, N_NORMAL_MEMORY)
4567 if (nodes[node])
4568 x += sprintf(buf + x, " N%d=%lu",
4569 node, nodes[node]);
4570#endif
4571 unlock_memory_hotplug();
4572 kfree(nodes);
4573 return x + sprintf(buf + x, "\n");
4574}
4575
4576#ifdef CONFIG_SLUB_DEBUG
4577static int any_slab_objects(struct kmem_cache *s)
4578{
4579 int node;
4580
4581 for_each_online_node(node) {
4582 struct kmem_cache_node *n = get_node(s, node);
4583
4584 if (!n)
4585 continue;
4586
4587 if (atomic_long_read(&n->total_objects))
4588 return 1;
4589 }
4590 return 0;
4591}
4592#endif
4593
4594#define to_slab_attr(n) container_of(n, struct slab_attribute, attr)
4595#define to_slab(n) container_of(n, struct kmem_cache, kobj)
4596
4597struct slab_attribute {
4598 struct attribute attr;
4599 ssize_t (*show)(struct kmem_cache *s, char *buf);
4600 ssize_t (*store)(struct kmem_cache *s, const char *x, size_t count);
4601};
4602
4603#define SLAB_ATTR_RO(_name) \
4604 static struct slab_attribute _name##_attr = \
4605 __ATTR(_name, 0400, _name##_show, NULL)
4606
4607#define SLAB_ATTR(_name) \
4608 static struct slab_attribute _name##_attr = \
4609 __ATTR(_name, 0600, _name##_show, _name##_store)
4610
4611static ssize_t slab_size_show(struct kmem_cache *s, char *buf)
4612{
4613 return sprintf(buf, "%d\n", s->size);
4614}
4615SLAB_ATTR_RO(slab_size);
4616
4617static ssize_t align_show(struct kmem_cache *s, char *buf)
4618{
4619 return sprintf(buf, "%d\n", s->align);
4620}
4621SLAB_ATTR_RO(align);
4622
4623static ssize_t object_size_show(struct kmem_cache *s, char *buf)
4624{
4625 return sprintf(buf, "%d\n", s->object_size);
4626}
4627SLAB_ATTR_RO(object_size);
4628
4629static ssize_t objs_per_slab_show(struct kmem_cache *s, char *buf)
4630{
4631 return sprintf(buf, "%d\n", oo_objects(s->oo));
4632}
4633SLAB_ATTR_RO(objs_per_slab);
4634
4635static ssize_t order_store(struct kmem_cache *s,
4636 const char *buf, size_t length)
4637{
4638 unsigned long order;
4639 int err;
4640
4641 err = strict_strtoul(buf, 10, &order);
4642 if (err)
4643 return err;
4644
4645 if (order > slub_max_order || order < slub_min_order)
4646 return -EINVAL;
4647
4648 calculate_sizes(s, order);
4649 return length;
4650}
4651
4652static ssize_t order_show(struct kmem_cache *s, char *buf)
4653{
4654 return sprintf(buf, "%d\n", oo_order(s->oo));
4655}
4656SLAB_ATTR(order);
4657
4658static ssize_t min_partial_show(struct kmem_cache *s, char *buf)
4659{
4660 return sprintf(buf, "%lu\n", s->min_partial);
4661}
4662
4663static ssize_t min_partial_store(struct kmem_cache *s, const char *buf,
4664 size_t length)
4665{
4666 unsigned long min;
4667 int err;
4668
4669 err = strict_strtoul(buf, 10, &min);
4670 if (err)
4671 return err;
4672
4673 set_min_partial(s, min);
4674 return length;
4675}
4676SLAB_ATTR(min_partial);
4677
4678static ssize_t cpu_partial_show(struct kmem_cache *s, char *buf)
4679{
4680 return sprintf(buf, "%u\n", s->cpu_partial);
4681}
4682
4683static ssize_t cpu_partial_store(struct kmem_cache *s, const char *buf,
4684 size_t length)
4685{
4686 unsigned long objects;
4687 int err;
4688
4689 err = strict_strtoul(buf, 10, &objects);
4690 if (err)
4691 return err;
4692 if (objects && kmem_cache_debug(s))
4693 return -EINVAL;
4694
4695 s->cpu_partial = objects;
4696 flush_all(s);
4697 return length;
4698}
4699SLAB_ATTR(cpu_partial);
4700
4701static ssize_t ctor_show(struct kmem_cache *s, char *buf)
4702{
4703 if (!s->ctor)
4704 return 0;
4705 return sprintf(buf, "%pS\n", s->ctor);
4706}
4707SLAB_ATTR_RO(ctor);
4708
4709static ssize_t aliases_show(struct kmem_cache *s, char *buf)
4710{
4711 return sprintf(buf, "%d\n", s->refcount - 1);
4712}
4713SLAB_ATTR_RO(aliases);
4714
4715static ssize_t partial_show(struct kmem_cache *s, char *buf)
4716{
4717 return show_slab_objects(s, buf, SO_PARTIAL);
4718}
4719SLAB_ATTR_RO(partial);
4720
4721static ssize_t cpu_slabs_show(struct kmem_cache *s, char *buf)
4722{
4723 return show_slab_objects(s, buf, SO_CPU);
4724}
4725SLAB_ATTR_RO(cpu_slabs);
4726
4727static ssize_t objects_show(struct kmem_cache *s, char *buf)
4728{
4729 return show_slab_objects(s, buf, SO_ALL|SO_OBJECTS);
4730}
4731SLAB_ATTR_RO(objects);
4732
4733static ssize_t objects_partial_show(struct kmem_cache *s, char *buf)
4734{
4735 return show_slab_objects(s, buf, SO_PARTIAL|SO_OBJECTS);
4736}
4737SLAB_ATTR_RO(objects_partial);
4738
4739static ssize_t slabs_cpu_partial_show(struct kmem_cache *s, char *buf)
4740{
4741 int objects = 0;
4742 int pages = 0;
4743 int cpu;
4744 int len;
4745
4746 for_each_online_cpu(cpu) {
4747 struct page *page = per_cpu_ptr(s->cpu_slab, cpu)->partial;
4748
4749 if (page) {
4750 pages += page->pages;
4751 objects += page->pobjects;
4752 }
4753 }
4754
4755 len = sprintf(buf, "%d(%d)", objects, pages);
4756
4757#ifdef CONFIG_SMP
4758 for_each_online_cpu(cpu) {
4759 struct page *page = per_cpu_ptr(s->cpu_slab, cpu) ->partial;
4760
4761 if (page && len < PAGE_SIZE - 20)
4762 len += sprintf(buf + len, " C%d=%d(%d)", cpu,
4763 page->pobjects, page->pages);
4764 }
4765#endif
4766 return len + sprintf(buf + len, "\n");
4767}
4768SLAB_ATTR_RO(slabs_cpu_partial);
4769
4770static ssize_t reclaim_account_show(struct kmem_cache *s, char *buf)
4771{
4772 return sprintf(buf, "%d\n", !!(s->flags & SLAB_RECLAIM_ACCOUNT));
4773}
4774
4775static ssize_t reclaim_account_store(struct kmem_cache *s,
4776 const char *buf, size_t length)
4777{
4778 s->flags &= ~SLAB_RECLAIM_ACCOUNT;
4779 if (buf[0] == '1')
4780 s->flags |= SLAB_RECLAIM_ACCOUNT;
4781 return length;
4782}
4783SLAB_ATTR(reclaim_account);
4784
4785static ssize_t hwcache_align_show(struct kmem_cache *s, char *buf)
4786{
4787 return sprintf(buf, "%d\n", !!(s->flags & SLAB_HWCACHE_ALIGN));
4788}
4789SLAB_ATTR_RO(hwcache_align);
4790
4791#ifdef CONFIG_ZONE_DMA
4792static ssize_t cache_dma_show(struct kmem_cache *s, char *buf)
4793{
4794 return sprintf(buf, "%d\n", !!(s->flags & SLAB_CACHE_DMA));
4795}
4796SLAB_ATTR_RO(cache_dma);
4797#endif
4798
4799static ssize_t destroy_by_rcu_show(struct kmem_cache *s, char *buf)
4800{
4801 return sprintf(buf, "%d\n", !!(s->flags & SLAB_DESTROY_BY_RCU));
4802}
4803SLAB_ATTR_RO(destroy_by_rcu);
4804
4805static ssize_t reserved_show(struct kmem_cache *s, char *buf)
4806{
4807 return sprintf(buf, "%d\n", s->reserved);
4808}
4809SLAB_ATTR_RO(reserved);
4810
4811#ifdef CONFIG_SLUB_DEBUG
4812static ssize_t slabs_show(struct kmem_cache *s, char *buf)
4813{
4814 return show_slab_objects(s, buf, SO_ALL);
4815}
4816SLAB_ATTR_RO(slabs);
4817
4818static ssize_t total_objects_show(struct kmem_cache *s, char *buf)
4819{
4820 return show_slab_objects(s, buf, SO_ALL|SO_TOTAL);
4821}
4822SLAB_ATTR_RO(total_objects);
4823
4824static ssize_t sanity_checks_show(struct kmem_cache *s, char *buf)
4825{
4826 return sprintf(buf, "%d\n", !!(s->flags & SLAB_DEBUG_FREE));
4827}
4828
4829static ssize_t sanity_checks_store(struct kmem_cache *s,
4830 const char *buf, size_t length)
4831{
4832 s->flags &= ~SLAB_DEBUG_FREE;
4833 if (buf[0] == '1') {
4834 s->flags &= ~__CMPXCHG_DOUBLE;
4835 s->flags |= SLAB_DEBUG_FREE;
4836 }
4837 return length;
4838}
4839SLAB_ATTR(sanity_checks);
4840
4841static ssize_t trace_show(struct kmem_cache *s, char *buf)
4842{
4843 return sprintf(buf, "%d\n", !!(s->flags & SLAB_TRACE));
4844}
4845
4846static ssize_t trace_store(struct kmem_cache *s, const char *buf,
4847 size_t length)
4848{
4849 s->flags &= ~SLAB_TRACE;
4850 if (buf[0] == '1') {
4851 s->flags &= ~__CMPXCHG_DOUBLE;
4852 s->flags |= SLAB_TRACE;
4853 }
4854 return length;
4855}
4856SLAB_ATTR(trace);
4857
4858static ssize_t red_zone_show(struct kmem_cache *s, char *buf)
4859{
4860 return sprintf(buf, "%d\n", !!(s->flags & SLAB_RED_ZONE));
4861}
4862
4863static ssize_t red_zone_store(struct kmem_cache *s,
4864 const char *buf, size_t length)
4865{
4866 if (any_slab_objects(s))
4867 return -EBUSY;
4868
4869 s->flags &= ~SLAB_RED_ZONE;
4870 if (buf[0] == '1') {
4871 s->flags &= ~__CMPXCHG_DOUBLE;
4872 s->flags |= SLAB_RED_ZONE;
4873 }
4874 calculate_sizes(s, -1);
4875 return length;
4876}
4877SLAB_ATTR(red_zone);
4878
4879static ssize_t poison_show(struct kmem_cache *s, char *buf)
4880{
4881 return sprintf(buf, "%d\n", !!(s->flags & SLAB_POISON));
4882}
4883
4884static ssize_t poison_store(struct kmem_cache *s,
4885 const char *buf, size_t length)
4886{
4887 if (any_slab_objects(s))
4888 return -EBUSY;
4889
4890 s->flags &= ~SLAB_POISON;
4891 if (buf[0] == '1') {
4892 s->flags &= ~__CMPXCHG_DOUBLE;
4893 s->flags |= SLAB_POISON;
4894 }
4895 calculate_sizes(s, -1);
4896 return length;
4897}
4898SLAB_ATTR(poison);
4899
4900static ssize_t store_user_show(struct kmem_cache *s, char *buf)
4901{
4902 return sprintf(buf, "%d\n", !!(s->flags & SLAB_STORE_USER));
4903}
4904
4905static ssize_t store_user_store(struct kmem_cache *s,
4906 const char *buf, size_t length)
4907{
4908 if (any_slab_objects(s))
4909 return -EBUSY;
4910
4911 s->flags &= ~SLAB_STORE_USER;
4912 if (buf[0] == '1') {
4913 s->flags &= ~__CMPXCHG_DOUBLE;
4914 s->flags |= SLAB_STORE_USER;
4915 }
4916 calculate_sizes(s, -1);
4917 return length;
4918}
4919SLAB_ATTR(store_user);
4920
4921static ssize_t validate_show(struct kmem_cache *s, char *buf)
4922{
4923 return 0;
4924}
4925
4926static ssize_t validate_store(struct kmem_cache *s,
4927 const char *buf, size_t length)
4928{
4929 int ret = -EINVAL;
4930
4931 if (buf[0] == '1') {
4932 ret = validate_slab_cache(s);
4933 if (ret >= 0)
4934 ret = length;
4935 }
4936 return ret;
4937}
4938SLAB_ATTR(validate);
4939
4940static ssize_t alloc_calls_show(struct kmem_cache *s, char *buf)
4941{
4942 if (!(s->flags & SLAB_STORE_USER))
4943 return -ENOSYS;
4944 return list_locations(s, buf, TRACK_ALLOC);
4945}
4946SLAB_ATTR_RO(alloc_calls);
4947
4948static ssize_t free_calls_show(struct kmem_cache *s, char *buf)
4949{
4950 if (!(s->flags & SLAB_STORE_USER))
4951 return -ENOSYS;
4952 return list_locations(s, buf, TRACK_FREE);
4953}
4954SLAB_ATTR_RO(free_calls);
4955#endif
4956
4957#ifdef CONFIG_FAILSLAB
4958static ssize_t failslab_show(struct kmem_cache *s, char *buf)
4959{
4960 return sprintf(buf, "%d\n", !!(s->flags & SLAB_FAILSLAB));
4961}
4962
4963static ssize_t failslab_store(struct kmem_cache *s, const char *buf,
4964 size_t length)
4965{
4966 s->flags &= ~SLAB_FAILSLAB;
4967 if (buf[0] == '1')
4968 s->flags |= SLAB_FAILSLAB;
4969 return length;
4970}
4971SLAB_ATTR(failslab);
4972#endif
4973
4974static ssize_t shrink_show(struct kmem_cache *s, char *buf)
4975{
4976 return 0;
4977}
4978
4979static ssize_t shrink_store(struct kmem_cache *s,
4980 const char *buf, size_t length)
4981{
4982 if (buf[0] == '1') {
4983 int rc = kmem_cache_shrink(s);
4984
4985 if (rc)
4986 return rc;
4987 } else
4988 return -EINVAL;
4989 return length;
4990}
4991SLAB_ATTR(shrink);
4992
4993#ifdef CONFIG_NUMA
4994static ssize_t remote_node_defrag_ratio_show(struct kmem_cache *s, char *buf)
4995{
4996 return sprintf(buf, "%d\n", s->remote_node_defrag_ratio / 10);
4997}
4998
4999static ssize_t remote_node_defrag_ratio_store(struct kmem_cache *s,
5000 const char *buf, size_t length)
5001{
5002 unsigned long ratio;
5003 int err;
5004
5005 err = strict_strtoul(buf, 10, &ratio);
5006 if (err)
5007 return err;
5008
5009 if (ratio <= 100)
5010 s->remote_node_defrag_ratio = ratio * 10;
5011
5012 return length;
5013}
5014SLAB_ATTR(remote_node_defrag_ratio);
5015#endif
5016
5017#ifdef CONFIG_SLUB_STATS
5018static int show_stat(struct kmem_cache *s, char *buf, enum stat_item si)
5019{
5020 unsigned long sum = 0;
5021 int cpu;
5022 int len;
5023 int *data = kmalloc(nr_cpu_ids * sizeof(int), GFP_KERNEL);
5024
5025 if (!data)
5026 return -ENOMEM;
5027
5028 for_each_online_cpu(cpu) {
5029 unsigned x = per_cpu_ptr(s->cpu_slab, cpu)->stat[si];
5030
5031 data[cpu] = x;
5032 sum += x;
5033 }
5034
5035 len = sprintf(buf, "%lu", sum);
5036
5037#ifdef CONFIG_SMP
5038 for_each_online_cpu(cpu) {
5039 if (data[cpu] && len < PAGE_SIZE - 20)
5040 len += sprintf(buf + len, " C%d=%u", cpu, data[cpu]);
5041 }
5042#endif
5043 kfree(data);
5044 return len + sprintf(buf + len, "\n");
5045}
5046
5047static void clear_stat(struct kmem_cache *s, enum stat_item si)
5048{
5049 int cpu;
5050
5051 for_each_online_cpu(cpu)
5052 per_cpu_ptr(s->cpu_slab, cpu)->stat[si] = 0;
5053}
5054
5055#define STAT_ATTR(si, text) \
5056static ssize_t text##_show(struct kmem_cache *s, char *buf) \
5057{ \
5058 return show_stat(s, buf, si); \
5059} \
5060static ssize_t text##_store(struct kmem_cache *s, \
5061 const char *buf, size_t length) \
5062{ \
5063 if (buf[0] != '0') \
5064 return -EINVAL; \
5065 clear_stat(s, si); \
5066 return length; \
5067} \
5068SLAB_ATTR(text); \
5069
5070STAT_ATTR(ALLOC_FASTPATH, alloc_fastpath);
5071STAT_ATTR(ALLOC_SLOWPATH, alloc_slowpath);
5072STAT_ATTR(FREE_FASTPATH, free_fastpath);
5073STAT_ATTR(FREE_SLOWPATH, free_slowpath);
5074STAT_ATTR(FREE_FROZEN, free_frozen);
5075STAT_ATTR(FREE_ADD_PARTIAL, free_add_partial);
5076STAT_ATTR(FREE_REMOVE_PARTIAL, free_remove_partial);
5077STAT_ATTR(ALLOC_FROM_PARTIAL, alloc_from_partial);
5078STAT_ATTR(ALLOC_SLAB, alloc_slab);
5079STAT_ATTR(ALLOC_REFILL, alloc_refill);
5080STAT_ATTR(ALLOC_NODE_MISMATCH, alloc_node_mismatch);
5081STAT_ATTR(FREE_SLAB, free_slab);
5082STAT_ATTR(CPUSLAB_FLUSH, cpuslab_flush);
5083STAT_ATTR(DEACTIVATE_FULL, deactivate_full);
5084STAT_ATTR(DEACTIVATE_EMPTY, deactivate_empty);
5085STAT_ATTR(DEACTIVATE_TO_HEAD, deactivate_to_head);
5086STAT_ATTR(DEACTIVATE_TO_TAIL, deactivate_to_tail);
5087STAT_ATTR(DEACTIVATE_REMOTE_FREES, deactivate_remote_frees);
5088STAT_ATTR(DEACTIVATE_BYPASS, deactivate_bypass);
5089STAT_ATTR(ORDER_FALLBACK, order_fallback);
5090STAT_ATTR(CMPXCHG_DOUBLE_CPU_FAIL, cmpxchg_double_cpu_fail);
5091STAT_ATTR(CMPXCHG_DOUBLE_FAIL, cmpxchg_double_fail);
5092STAT_ATTR(CPU_PARTIAL_ALLOC, cpu_partial_alloc);
5093STAT_ATTR(CPU_PARTIAL_FREE, cpu_partial_free);
5094STAT_ATTR(CPU_PARTIAL_NODE, cpu_partial_node);
5095STAT_ATTR(CPU_PARTIAL_DRAIN, cpu_partial_drain);
5096#endif
5097
5098static struct attribute *slab_attrs[] = {
5099 &slab_size_attr.attr,
5100 &object_size_attr.attr,
5101 &objs_per_slab_attr.attr,
5102 &order_attr.attr,
5103 &min_partial_attr.attr,
5104 &cpu_partial_attr.attr,
5105 &objects_attr.attr,
5106 &objects_partial_attr.attr,
5107 &partial_attr.attr,
5108 &cpu_slabs_attr.attr,
5109 &ctor_attr.attr,
5110 &aliases_attr.attr,
5111 &align_attr.attr,
5112 &hwcache_align_attr.attr,
5113 &reclaim_account_attr.attr,
5114 &destroy_by_rcu_attr.attr,
5115 &shrink_attr.attr,
5116 &reserved_attr.attr,
5117 &slabs_cpu_partial_attr.attr,
5118#ifdef CONFIG_SLUB_DEBUG
5119 &total_objects_attr.attr,
5120 &slabs_attr.attr,
5121 &sanity_checks_attr.attr,
5122 &trace_attr.attr,
5123 &red_zone_attr.attr,
5124 &poison_attr.attr,
5125 &store_user_attr.attr,
5126 &validate_attr.attr,
5127 &alloc_calls_attr.attr,
5128 &free_calls_attr.attr,
5129#endif
5130#ifdef CONFIG_ZONE_DMA
5131 &cache_dma_attr.attr,
5132#endif
5133#ifdef CONFIG_NUMA
5134 &remote_node_defrag_ratio_attr.attr,
5135#endif
5136#ifdef CONFIG_SLUB_STATS
5137 &alloc_fastpath_attr.attr,
5138 &alloc_slowpath_attr.attr,
5139 &free_fastpath_attr.attr,
5140 &free_slowpath_attr.attr,
5141 &free_frozen_attr.attr,
5142 &free_add_partial_attr.attr,
5143 &free_remove_partial_attr.attr,
5144 &alloc_from_partial_attr.attr,
5145 &alloc_slab_attr.attr,
5146 &alloc_refill_attr.attr,
5147 &alloc_node_mismatch_attr.attr,
5148 &free_slab_attr.attr,
5149 &cpuslab_flush_attr.attr,
5150 &deactivate_full_attr.attr,
5151 &deactivate_empty_attr.attr,
5152 &deactivate_to_head_attr.attr,
5153 &deactivate_to_tail_attr.attr,
5154 &deactivate_remote_frees_attr.attr,
5155 &deactivate_bypass_attr.attr,
5156 &order_fallback_attr.attr,
5157 &cmpxchg_double_fail_attr.attr,
5158 &cmpxchg_double_cpu_fail_attr.attr,
5159 &cpu_partial_alloc_attr.attr,
5160 &cpu_partial_free_attr.attr,
5161 &cpu_partial_node_attr.attr,
5162 &cpu_partial_drain_attr.attr,
5163#endif
5164#ifdef CONFIG_FAILSLAB
5165 &failslab_attr.attr,
5166#endif
5167
5168 NULL
5169};
5170
5171static struct attribute_group slab_attr_group = {
5172 .attrs = slab_attrs,
5173};
5174
5175static ssize_t slab_attr_show(struct kobject *kobj,
5176 struct attribute *attr,
5177 char *buf)
5178{
5179 struct slab_attribute *attribute;
5180 struct kmem_cache *s;
5181 int err;
5182
5183 attribute = to_slab_attr(attr);
5184 s = to_slab(kobj);
5185
5186 if (!attribute->show)
5187 return -EIO;
5188
5189 err = attribute->show(s, buf);
5190
5191 return err;
5192}
5193
5194static ssize_t slab_attr_store(struct kobject *kobj,
5195 struct attribute *attr,
5196 const char *buf, size_t len)
5197{
5198 struct slab_attribute *attribute;
5199 struct kmem_cache *s;
5200 int err;
5201
5202 attribute = to_slab_attr(attr);
5203 s = to_slab(kobj);
5204
5205 if (!attribute->store)
5206 return -EIO;
5207
5208 err = attribute->store(s, buf, len);
5209
5210 return err;
5211}
5212
5213static void kmem_cache_release(struct kobject *kobj)
5214{
5215 struct kmem_cache *s = to_slab(kobj);
5216
5217 kfree(s->name);
5218 kfree(s);
5219}
5220
5221static const struct sysfs_ops slab_sysfs_ops = {
5222 .show = slab_attr_show,
5223 .store = slab_attr_store,
5224};
5225
5226static struct kobj_type slab_ktype = {
5227 .sysfs_ops = &slab_sysfs_ops,
5228 .release = kmem_cache_release
5229};
5230
5231static int uevent_filter(struct kset *kset, struct kobject *kobj)
5232{
5233 struct kobj_type *ktype = get_ktype(kobj);
5234
5235 if (ktype == &slab_ktype)
5236 return 1;
5237 return 0;
5238}
5239
5240static const struct kset_uevent_ops slab_uevent_ops = {
5241 .filter = uevent_filter,
5242};
5243
5244static struct kset *slab_kset;
5245
5246#define ID_STR_LENGTH 64
5247
5248
5249
5250
5251
5252static char *create_unique_id(struct kmem_cache *s)
5253{
5254 char *name = kmalloc(ID_STR_LENGTH, GFP_KERNEL);
5255 char *p = name;
5256
5257 BUG_ON(!name);
5258
5259 *p++ = ':';
5260
5261
5262
5263
5264
5265
5266
5267 if (s->flags & SLAB_CACHE_DMA)
5268 *p++ = 'd';
5269 if (s->flags & SLAB_RECLAIM_ACCOUNT)
5270 *p++ = 'a';
5271 if (s->flags & SLAB_DEBUG_FREE)
5272 *p++ = 'F';
5273 if (!(s->flags & SLAB_NOTRACK))
5274 *p++ = 't';
5275 if (p != name + 1)
5276 *p++ = '-';
5277 p += sprintf(p, "%07d", s->size);
5278 BUG_ON(p > name + ID_STR_LENGTH - 1);
5279 return name;
5280}
5281
5282static int sysfs_slab_add(struct kmem_cache *s)
5283{
5284 int err;
5285 const char *name;
5286 int unmergeable;
5287
5288 if (slab_state < FULL)
5289
5290 return 0;
5291
5292 unmergeable = slab_unmergeable(s);
5293 if (unmergeable) {
5294
5295
5296
5297
5298
5299 sysfs_remove_link(&slab_kset->kobj, s->name);
5300 name = s->name;
5301 } else {
5302
5303
5304
5305
5306 name = create_unique_id(s);
5307 }
5308
5309 s->kobj.kset = slab_kset;
5310 err = kobject_init_and_add(&s->kobj, &slab_ktype, NULL, name);
5311 if (err) {
5312 kobject_put(&s->kobj);
5313 return err;
5314 }
5315
5316 err = sysfs_create_group(&s->kobj, &slab_attr_group);
5317 if (err) {
5318 kobject_del(&s->kobj);
5319 kobject_put(&s->kobj);
5320 return err;
5321 }
5322 kobject_uevent(&s->kobj, KOBJ_ADD);
5323 if (!unmergeable) {
5324
5325 sysfs_slab_alias(s, s->name);
5326 kfree(name);
5327 }
5328 return 0;
5329}
5330
5331static void sysfs_slab_remove(struct kmem_cache *s)
5332{
5333 if (slab_state < FULL)
5334
5335
5336
5337
5338 return;
5339
5340 kobject_uevent(&s->kobj, KOBJ_REMOVE);
5341 kobject_del(&s->kobj);
5342 kobject_put(&s->kobj);
5343}
5344
5345
5346
5347
5348
5349struct saved_alias {
5350 struct kmem_cache *s;
5351 const char *name;
5352 struct saved_alias *next;
5353};
5354
5355static struct saved_alias *alias_list;
5356
5357static int sysfs_slab_alias(struct kmem_cache *s, const char *name)
5358{
5359 struct saved_alias *al;
5360
5361 if (slab_state == FULL) {
5362
5363
5364
5365 sysfs_remove_link(&slab_kset->kobj, name);
5366 return sysfs_create_link(&slab_kset->kobj, &s->kobj, name);
5367 }
5368
5369 al = kmalloc(sizeof(struct saved_alias), GFP_KERNEL);
5370 if (!al)
5371 return -ENOMEM;
5372
5373 al->s = s;
5374 al->name = name;
5375 al->next = alias_list;
5376 alias_list = al;
5377 return 0;
5378}
5379
5380static int __init slab_sysfs_init(void)
5381{
5382 struct kmem_cache *s;
5383 int err;
5384
5385 mutex_lock(&slab_mutex);
5386
5387 slab_kset = kset_create_and_add("slab", &slab_uevent_ops, kernel_kobj);
5388 if (!slab_kset) {
5389 mutex_unlock(&slab_mutex);
5390 printk(KERN_ERR "Cannot register slab subsystem.\n");
5391 return -ENOSYS;
5392 }
5393
5394 slab_state = FULL;
5395
5396 list_for_each_entry(s, &slab_caches, list) {
5397 err = sysfs_slab_add(s);
5398 if (err)
5399 printk(KERN_ERR "SLUB: Unable to add boot slab %s"
5400 " to sysfs\n", s->name);
5401 }
5402
5403 while (alias_list) {
5404 struct saved_alias *al = alias_list;
5405
5406 alias_list = alias_list->next;
5407 err = sysfs_slab_alias(al->s, al->name);
5408 if (err)
5409 printk(KERN_ERR "SLUB: Unable to add boot slab alias"
5410 " %s to sysfs\n", al->name);
5411 kfree(al);
5412 }
5413
5414 mutex_unlock(&slab_mutex);
5415 resiliency_test();
5416 return 0;
5417}
5418
5419__initcall(slab_sysfs_init);
5420#endif
5421
5422
5423
5424
5425#ifdef CONFIG_SLABINFO
5426static void print_slabinfo_header(struct seq_file *m)
5427{
5428 seq_puts(m, "slabinfo - version: 2.1\n");
5429 seq_puts(m, "# name <active_objs> <num_objs> <object_size> "
5430 "<objperslab> <pagesperslab>");
5431 seq_puts(m, " : tunables <limit> <batchcount> <sharedfactor>");
5432 seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>");
5433 seq_putc(m, '\n');
5434}
5435
5436static void *s_start(struct seq_file *m, loff_t *pos)
5437{
5438 loff_t n = *pos;
5439
5440 mutex_lock(&slab_mutex);
5441 if (!n)
5442 print_slabinfo_header(m);
5443
5444 return seq_list_start(&slab_caches, *pos);
5445}
5446
5447static void *s_next(struct seq_file *m, void *p, loff_t *pos)
5448{
5449 return seq_list_next(p, &slab_caches, pos);
5450}
5451
5452static void s_stop(struct seq_file *m, void *p)
5453{
5454 mutex_unlock(&slab_mutex);
5455}
5456
5457static int s_show(struct seq_file *m, void *p)
5458{
5459 unsigned long nr_partials = 0;
5460 unsigned long nr_slabs = 0;
5461 unsigned long nr_inuse = 0;
5462 unsigned long nr_objs = 0;
5463 unsigned long nr_free = 0;
5464 struct kmem_cache *s;
5465 int node;
5466
5467 s = list_entry(p, struct kmem_cache, list);
5468
5469 for_each_online_node(node) {
5470 struct kmem_cache_node *n = get_node(s, node);
5471
5472 if (!n)
5473 continue;
5474
5475 nr_partials += n->nr_partial;
5476 nr_slabs += atomic_long_read(&n->nr_slabs);
5477 nr_objs += atomic_long_read(&n->total_objects);
5478 nr_free += count_partial(n, count_free);
5479 }
5480
5481 nr_inuse = nr_objs - nr_free;
5482
5483 seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d", s->name, nr_inuse,
5484 nr_objs, s->size, oo_objects(s->oo),
5485 (1 << oo_order(s->oo)));
5486 seq_printf(m, " : tunables %4u %4u %4u", 0, 0, 0);
5487 seq_printf(m, " : slabdata %6lu %6lu %6lu", nr_slabs, nr_slabs,
5488 0UL);
5489 seq_putc(m, '\n');
5490 return 0;
5491}
5492
5493static const struct seq_operations slabinfo_op = {
5494 .start = s_start,
5495 .next = s_next,
5496 .stop = s_stop,
5497 .show = s_show,
5498};
5499
5500static int slabinfo_open(struct inode *inode, struct file *file)
5501{
5502 return seq_open(file, &slabinfo_op);
5503}
5504
5505static const struct file_operations proc_slabinfo_operations = {
5506 .open = slabinfo_open,
5507 .read = seq_read,
5508 .llseek = seq_lseek,
5509 .release = seq_release,
5510};
5511
5512static int __init slab_proc_init(void)
5513{
5514 proc_create("slabinfo", S_IRUSR, NULL, &proc_slabinfo_operations);
5515 return 0;
5516}
5517module_init(slab_proc_init);
5518#endif
5519