1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16#include <linux/module.h>
17#include <linux/profile.h>
18#include <linux/bootmem.h>
19#include <linux/notifier.h>
20#include <linux/mm.h>
21#include <linux/cpumask.h>
22#include <linux/cpu.h>
23#include <linux/highmem.h>
24#include <linux/mutex.h>
25#include <linux/slab.h>
26#include <linux/vmalloc.h>
27#include <asm/sections.h>
28#include <asm/irq_regs.h>
29#include <asm/ptrace.h>
30
31struct profile_hit {
32 u32 pc, hits;
33};
34#define PROFILE_GRPSHIFT 3
35#define PROFILE_GRPSZ (1 << PROFILE_GRPSHIFT)
36#define NR_PROFILE_HIT (PAGE_SIZE/sizeof(struct profile_hit))
37#define NR_PROFILE_GRP (NR_PROFILE_HIT/PROFILE_GRPSZ)
38
39
40static int (*timer_hook)(struct pt_regs *) __read_mostly;
41
42static atomic_t *prof_buffer;
43static unsigned long prof_len, prof_shift;
44
45int prof_on __read_mostly;
46EXPORT_SYMBOL_GPL(prof_on);
47
48static cpumask_var_t prof_cpu_mask;
49#ifdef CONFIG_SMP
50static DEFINE_PER_CPU(struct profile_hit *[2], cpu_profile_hits);
51static DEFINE_PER_CPU(int, cpu_profile_flip);
52static DEFINE_MUTEX(profile_flip_mutex);
53#endif
54
55int profile_setup(char *str)
56{
57 static char schedstr[] = "schedule";
58 static char sleepstr[] = "sleep";
59 static char kvmstr[] = "kvm";
60 int par;
61
62 if (!strncmp(str, sleepstr, strlen(sleepstr))) {
63#ifdef CONFIG_SCHEDSTATS
64 prof_on = SLEEP_PROFILING;
65 if (str[strlen(sleepstr)] == ',')
66 str += strlen(sleepstr) + 1;
67 if (get_option(&str, &par))
68 prof_shift = par;
69 printk(KERN_INFO
70 "kernel sleep profiling enabled (shift: %ld)\n",
71 prof_shift);
72#else
73 printk(KERN_WARNING
74 "kernel sleep profiling requires CONFIG_SCHEDSTATS\n");
75#endif
76 } else if (!strncmp(str, schedstr, strlen(schedstr))) {
77 prof_on = SCHED_PROFILING;
78 if (str[strlen(schedstr)] == ',')
79 str += strlen(schedstr) + 1;
80 if (get_option(&str, &par))
81 prof_shift = par;
82 printk(KERN_INFO
83 "kernel schedule profiling enabled (shift: %ld)\n",
84 prof_shift);
85 } else if (!strncmp(str, kvmstr, strlen(kvmstr))) {
86 prof_on = KVM_PROFILING;
87 if (str[strlen(kvmstr)] == ',')
88 str += strlen(kvmstr) + 1;
89 if (get_option(&str, &par))
90 prof_shift = par;
91 printk(KERN_INFO
92 "kernel KVM profiling enabled (shift: %ld)\n",
93 prof_shift);
94 } else if (get_option(&str, &par)) {
95 prof_shift = par;
96 prof_on = CPU_PROFILING;
97 printk(KERN_INFO "kernel profiling enabled (shift: %ld)\n",
98 prof_shift);
99 }
100 return 1;
101}
102__setup("profile=", profile_setup);
103
104
105int __ref profile_init(void)
106{
107 int buffer_bytes;
108 if (!prof_on)
109 return 0;
110
111
112 prof_len = (_etext - _stext) >> prof_shift;
113 buffer_bytes = prof_len*sizeof(atomic_t);
114 if (!slab_is_available()) {
115 prof_buffer = alloc_bootmem(buffer_bytes);
116 alloc_bootmem_cpumask_var(&prof_cpu_mask);
117 cpumask_copy(prof_cpu_mask, cpu_possible_mask);
118 return 0;
119 }
120
121 if (!alloc_cpumask_var(&prof_cpu_mask, GFP_KERNEL))
122 return -ENOMEM;
123
124 cpumask_copy(prof_cpu_mask, cpu_possible_mask);
125
126 prof_buffer = kzalloc(buffer_bytes, GFP_KERNEL);
127 if (prof_buffer)
128 return 0;
129
130 prof_buffer = alloc_pages_exact(buffer_bytes, GFP_KERNEL|__GFP_ZERO);
131 if (prof_buffer)
132 return 0;
133
134 prof_buffer = vmalloc(buffer_bytes);
135 if (prof_buffer)
136 return 0;
137
138 free_cpumask_var(prof_cpu_mask);
139 return -ENOMEM;
140}
141
142
143
144static BLOCKING_NOTIFIER_HEAD(task_exit_notifier);
145static ATOMIC_NOTIFIER_HEAD(task_free_notifier);
146static BLOCKING_NOTIFIER_HEAD(munmap_notifier);
147
148void profile_task_exit(struct task_struct *task)
149{
150 blocking_notifier_call_chain(&task_exit_notifier, 0, task);
151}
152
153int profile_handoff_task(struct task_struct *task)
154{
155 int ret;
156 ret = atomic_notifier_call_chain(&task_free_notifier, 0, task);
157 return (ret == NOTIFY_OK) ? 1 : 0;
158}
159
160void profile_munmap(unsigned long addr)
161{
162 blocking_notifier_call_chain(&munmap_notifier, 0, (void *)addr);
163}
164
165int task_handoff_register(struct notifier_block *n)
166{
167 return atomic_notifier_chain_register(&task_free_notifier, n);
168}
169EXPORT_SYMBOL_GPL(task_handoff_register);
170
171int task_handoff_unregister(struct notifier_block *n)
172{
173 return atomic_notifier_chain_unregister(&task_free_notifier, n);
174}
175EXPORT_SYMBOL_GPL(task_handoff_unregister);
176
177int profile_event_register(enum profile_type type, struct notifier_block *n)
178{
179 int err = -EINVAL;
180
181 switch (type) {
182 case PROFILE_TASK_EXIT:
183 err = blocking_notifier_chain_register(
184 &task_exit_notifier, n);
185 break;
186 case PROFILE_MUNMAP:
187 err = blocking_notifier_chain_register(
188 &munmap_notifier, n);
189 break;
190 }
191
192 return err;
193}
194EXPORT_SYMBOL_GPL(profile_event_register);
195
196int profile_event_unregister(enum profile_type type, struct notifier_block *n)
197{
198 int err = -EINVAL;
199
200 switch (type) {
201 case PROFILE_TASK_EXIT:
202 err = blocking_notifier_chain_unregister(
203 &task_exit_notifier, n);
204 break;
205 case PROFILE_MUNMAP:
206 err = blocking_notifier_chain_unregister(
207 &munmap_notifier, n);
208 break;
209 }
210
211 return err;
212}
213EXPORT_SYMBOL_GPL(profile_event_unregister);
214
215int register_timer_hook(int (*hook)(struct pt_regs *))
216{
217 if (timer_hook)
218 return -EBUSY;
219 timer_hook = hook;
220 return 0;
221}
222EXPORT_SYMBOL_GPL(register_timer_hook);
223
224void unregister_timer_hook(int (*hook)(struct pt_regs *))
225{
226 WARN_ON(hook != timer_hook);
227 timer_hook = NULL;
228
229 synchronize_sched();
230}
231EXPORT_SYMBOL_GPL(unregister_timer_hook);
232
233
234#ifdef CONFIG_SMP
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266static void __profile_flip_buffers(void *unused)
267{
268 int cpu = smp_processor_id();
269
270 per_cpu(cpu_profile_flip, cpu) = !per_cpu(cpu_profile_flip, cpu);
271}
272
273static void profile_flip_buffers(void)
274{
275 int i, j, cpu;
276
277 mutex_lock(&profile_flip_mutex);
278 j = per_cpu(cpu_profile_flip, get_cpu());
279 put_cpu();
280 on_each_cpu(__profile_flip_buffers, NULL, 1);
281 for_each_online_cpu(cpu) {
282 struct profile_hit *hits = per_cpu(cpu_profile_hits, cpu)[j];
283 for (i = 0; i < NR_PROFILE_HIT; ++i) {
284 if (!hits[i].hits) {
285 if (hits[i].pc)
286 hits[i].pc = 0;
287 continue;
288 }
289 atomic_add(hits[i].hits, &prof_buffer[hits[i].pc]);
290 hits[i].hits = hits[i].pc = 0;
291 }
292 }
293 mutex_unlock(&profile_flip_mutex);
294}
295
296static void profile_discard_flip_buffers(void)
297{
298 int i, cpu;
299
300 mutex_lock(&profile_flip_mutex);
301 i = per_cpu(cpu_profile_flip, get_cpu());
302 put_cpu();
303 on_each_cpu(__profile_flip_buffers, NULL, 1);
304 for_each_online_cpu(cpu) {
305 struct profile_hit *hits = per_cpu(cpu_profile_hits, cpu)[i];
306 memset(hits, 0, NR_PROFILE_HIT*sizeof(struct profile_hit));
307 }
308 mutex_unlock(&profile_flip_mutex);
309}
310
311void profile_hits(int type, void *__pc, unsigned int nr_hits)
312{
313 unsigned long primary, secondary, flags, pc = (unsigned long)__pc;
314 int i, j, cpu;
315 struct profile_hit *hits;
316
317 if (prof_on != type || !prof_buffer)
318 return;
319 pc = min((pc - (unsigned long)_stext) >> prof_shift, prof_len - 1);
320 i = primary = (pc & (NR_PROFILE_GRP - 1)) << PROFILE_GRPSHIFT;
321 secondary = (~(pc << 1) & (NR_PROFILE_GRP - 1)) << PROFILE_GRPSHIFT;
322 cpu = get_cpu();
323 hits = per_cpu(cpu_profile_hits, cpu)[per_cpu(cpu_profile_flip, cpu)];
324 if (!hits) {
325 put_cpu();
326 return;
327 }
328
329
330
331
332
333 local_irq_save(flags);
334 do {
335 for (j = 0; j < PROFILE_GRPSZ; ++j) {
336 if (hits[i + j].pc == pc) {
337 hits[i + j].hits += nr_hits;
338 goto out;
339 } else if (!hits[i + j].hits) {
340 hits[i + j].pc = pc;
341 hits[i + j].hits = nr_hits;
342 goto out;
343 }
344 }
345 i = (i + secondary) & (NR_PROFILE_HIT - 1);
346 } while (i != primary);
347
348
349
350
351
352 atomic_add(nr_hits, &prof_buffer[pc]);
353 for (i = 0; i < NR_PROFILE_HIT; ++i) {
354 atomic_add(hits[i].hits, &prof_buffer[hits[i].pc]);
355 hits[i].pc = hits[i].hits = 0;
356 }
357out:
358 local_irq_restore(flags);
359 put_cpu();
360}
361
362static int __cpuinit profile_cpu_callback(struct notifier_block *info,
363 unsigned long action, void *__cpu)
364{
365 int node, cpu = (unsigned long)__cpu;
366 struct page *page;
367
368 switch (action) {
369 case CPU_UP_PREPARE:
370 case CPU_UP_PREPARE_FROZEN:
371 node = cpu_to_node(cpu);
372 per_cpu(cpu_profile_flip, cpu) = 0;
373 if (!per_cpu(cpu_profile_hits, cpu)[1]) {
374 page = alloc_pages_node(node,
375 GFP_KERNEL | __GFP_ZERO,
376 0);
377 if (!page)
378 return NOTIFY_BAD;
379 per_cpu(cpu_profile_hits, cpu)[1] = page_address(page);
380 }
381 if (!per_cpu(cpu_profile_hits, cpu)[0]) {
382 page = alloc_pages_node(node,
383 GFP_KERNEL | __GFP_ZERO,
384 0);
385 if (!page)
386 goto out_free;
387 per_cpu(cpu_profile_hits, cpu)[0] = page_address(page);
388 }
389 break;
390out_free:
391 page = virt_to_page(per_cpu(cpu_profile_hits, cpu)[1]);
392 per_cpu(cpu_profile_hits, cpu)[1] = NULL;
393 __free_page(page);
394 return NOTIFY_BAD;
395 case CPU_ONLINE:
396 case CPU_ONLINE_FROZEN:
397 if (prof_cpu_mask != NULL)
398 cpumask_set_cpu(cpu, prof_cpu_mask);
399 break;
400 case CPU_UP_CANCELED:
401 case CPU_UP_CANCELED_FROZEN:
402 case CPU_DEAD:
403 case CPU_DEAD_FROZEN:
404 if (prof_cpu_mask != NULL)
405 cpumask_clear_cpu(cpu, prof_cpu_mask);
406 if (per_cpu(cpu_profile_hits, cpu)[0]) {
407 page = virt_to_page(per_cpu(cpu_profile_hits, cpu)[0]);
408 per_cpu(cpu_profile_hits, cpu)[0] = NULL;
409 __free_page(page);
410 }
411 if (per_cpu(cpu_profile_hits, cpu)[1]) {
412 page = virt_to_page(per_cpu(cpu_profile_hits, cpu)[1]);
413 per_cpu(cpu_profile_hits, cpu)[1] = NULL;
414 __free_page(page);
415 }
416 break;
417 }
418 return NOTIFY_OK;
419}
420#else
421#define profile_flip_buffers() do { } while (0)
422#define profile_discard_flip_buffers() do { } while (0)
423#define profile_cpu_callback NULL
424
425void profile_hits(int type, void *__pc, unsigned int nr_hits)
426{
427 unsigned long pc;
428
429 if (prof_on != type || !prof_buffer)
430 return;
431 pc = ((unsigned long)__pc - (unsigned long)_stext) >> prof_shift;
432 atomic_add(nr_hits, &prof_buffer[min(pc, prof_len - 1)]);
433}
434#endif
435EXPORT_SYMBOL_GPL(profile_hits);
436
437void profile_tick(int type)
438{
439 struct pt_regs *regs = get_irq_regs();
440
441 if (type == CPU_PROFILING && timer_hook)
442 timer_hook(regs);
443 if (!user_mode(regs) && prof_cpu_mask != NULL &&
444 cpumask_test_cpu(smp_processor_id(), prof_cpu_mask))
445 profile_hit(type, (void *)profile_pc(regs));
446}
447
448#ifdef CONFIG_PROC_FS
449#include <linux/proc_fs.h>
450#include <asm/uaccess.h>
451
452static int prof_cpu_mask_read_proc(char *page, char **start, off_t off,
453 int count, int *eof, void *data)
454{
455 int len = cpumask_scnprintf(page, count, data);
456 if (count - len < 2)
457 return -EINVAL;
458 len += sprintf(page + len, "\n");
459 return len;
460}
461
462static int prof_cpu_mask_write_proc(struct file *file,
463 const char __user *buffer, unsigned long count, void *data)
464{
465 struct cpumask *mask = data;
466 unsigned long full_count = count, err;
467 cpumask_var_t new_value;
468
469 if (!alloc_cpumask_var(&new_value, GFP_KERNEL))
470 return -ENOMEM;
471
472 err = cpumask_parse_user(buffer, count, new_value);
473 if (!err) {
474 cpumask_copy(mask, new_value);
475 err = full_count;
476 }
477 free_cpumask_var(new_value);
478 return err;
479}
480
481void create_prof_cpu_mask(struct proc_dir_entry *root_irq_dir)
482{
483 struct proc_dir_entry *entry;
484
485
486 entry = create_proc_entry("prof_cpu_mask", 0600, root_irq_dir);
487 if (!entry)
488 return;
489 entry->data = prof_cpu_mask;
490 entry->read_proc = prof_cpu_mask_read_proc;
491 entry->write_proc = prof_cpu_mask_write_proc;
492}
493
494
495
496
497
498
499
500static ssize_t
501read_profile(struct file *file, char __user *buf, size_t count, loff_t *ppos)
502{
503 unsigned long p = *ppos;
504 ssize_t read;
505 char *pnt;
506 unsigned int sample_step = 1 << prof_shift;
507
508 profile_flip_buffers();
509 if (p >= (prof_len+1)*sizeof(unsigned int))
510 return 0;
511 if (count > (prof_len+1)*sizeof(unsigned int) - p)
512 count = (prof_len+1)*sizeof(unsigned int) - p;
513 read = 0;
514
515 while (p < sizeof(unsigned int) && count > 0) {
516 if (put_user(*((char *)(&sample_step)+p), buf))
517 return -EFAULT;
518 buf++; p++; count--; read++;
519 }
520 pnt = (char *)prof_buffer + p - sizeof(atomic_t);
521 if (copy_to_user(buf, (void *)pnt, count))
522 return -EFAULT;
523 read += count;
524 *ppos += read;
525 return read;
526}
527
528
529
530
531
532
533
534static ssize_t write_profile(struct file *file, const char __user *buf,
535 size_t count, loff_t *ppos)
536{
537#ifdef CONFIG_SMP
538 extern int setup_profiling_timer(unsigned int multiplier);
539
540 if (count == sizeof(int)) {
541 unsigned int multiplier;
542
543 if (copy_from_user(&multiplier, buf, sizeof(int)))
544 return -EFAULT;
545
546 if (setup_profiling_timer(multiplier))
547 return -EINVAL;
548 }
549#endif
550 profile_discard_flip_buffers();
551 memset(prof_buffer, 0, prof_len * sizeof(atomic_t));
552 return count;
553}
554
555static const struct file_operations proc_profile_operations = {
556 .read = read_profile,
557 .write = write_profile,
558};
559
560#ifdef CONFIG_SMP
561static void profile_nop(void *unused)
562{
563}
564
565static int create_hash_tables(void)
566{
567 int cpu;
568
569 for_each_online_cpu(cpu) {
570 int node = cpu_to_node(cpu);
571 struct page *page;
572
573 page = alloc_pages_node(node,
574 GFP_KERNEL | __GFP_ZERO | GFP_THISNODE,
575 0);
576 if (!page)
577 goto out_cleanup;
578 per_cpu(cpu_profile_hits, cpu)[1]
579 = (struct profile_hit *)page_address(page);
580 page = alloc_pages_node(node,
581 GFP_KERNEL | __GFP_ZERO | GFP_THISNODE,
582 0);
583 if (!page)
584 goto out_cleanup;
585 per_cpu(cpu_profile_hits, cpu)[0]
586 = (struct profile_hit *)page_address(page);
587 }
588 return 0;
589out_cleanup:
590 prof_on = 0;
591 smp_mb();
592 on_each_cpu(profile_nop, NULL, 1);
593 for_each_online_cpu(cpu) {
594 struct page *page;
595
596 if (per_cpu(cpu_profile_hits, cpu)[0]) {
597 page = virt_to_page(per_cpu(cpu_profile_hits, cpu)[0]);
598 per_cpu(cpu_profile_hits, cpu)[0] = NULL;
599 __free_page(page);
600 }
601 if (per_cpu(cpu_profile_hits, cpu)[1]) {
602 page = virt_to_page(per_cpu(cpu_profile_hits, cpu)[1]);
603 per_cpu(cpu_profile_hits, cpu)[1] = NULL;
604 __free_page(page);
605 }
606 }
607 return -1;
608}
609#else
610#define create_hash_tables() ({ 0; })
611#endif
612
613int __ref create_proc_profile(void)
614{
615 struct proc_dir_entry *entry;
616
617 if (!prof_on)
618 return 0;
619 if (create_hash_tables())
620 return -ENOMEM;
621 entry = proc_create("profile", S_IWUSR | S_IRUGO,
622 NULL, &proc_profile_operations);
623 if (!entry)
624 return 0;
625 entry->size = (1+prof_len) * sizeof(atomic_t);
626 hotcpu_notifier(profile_cpu_callback, 0);
627 return 0;
628}
629module_init(create_proc_profile);
630#endif
631