1
2
3
4
5
6
7
8
9
10
11
12#include <linux/fs.h>
13#include <linux/mm.h>
14#include <linux/cpu.h>
15#include <linux/smp.h>
16#include <linux/idr.h>
17#include <linux/file.h>
18#include <linux/poll.h>
19#include <linux/slab.h>
20#include <linux/hash.h>
21#include <linux/sysfs.h>
22#include <linux/dcache.h>
23#include <linux/percpu.h>
24#include <linux/ptrace.h>
25#include <linux/reboot.h>
26#include <linux/vmstat.h>
27#include <linux/device.h>
28#include <linux/export.h>
29#include <linux/vmalloc.h>
30#include <linux/hardirq.h>
31#include <linux/rculist.h>
32#include <linux/uaccess.h>
33#include <linux/syscalls.h>
34#include <linux/anon_inodes.h>
35#include <linux/kernel_stat.h>
36#include <linux/perf_event.h>
37#include <linux/ftrace_event.h>
38#include <linux/hw_breakpoint.h>
39
40#include "internal.h"
41
42#include <asm/irq_regs.h>
43
44struct remote_function_call {
45 struct task_struct *p;
46 int (*func)(void *info);
47 void *info;
48 int ret;
49};
50
51static void remote_function(void *data)
52{
53 struct remote_function_call *tfc = data;
54 struct task_struct *p = tfc->p;
55
56 if (p) {
57 tfc->ret = -EAGAIN;
58 if (task_cpu(p) != smp_processor_id() || !task_curr(p))
59 return;
60 }
61
62 tfc->ret = tfc->func(tfc->info);
63}
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78static int
79task_function_call(struct task_struct *p, int (*func) (void *info), void *info)
80{
81 struct remote_function_call data = {
82 .p = p,
83 .func = func,
84 .info = info,
85 .ret = -ESRCH,
86 };
87
88 if (task_curr(p))
89 smp_call_function_single(task_cpu(p), remote_function, &data, 1);
90
91 return data.ret;
92}
93
94
95
96
97
98
99
100
101
102
103static int cpu_function_call(int cpu, int (*func) (void *info), void *info)
104{
105 struct remote_function_call data = {
106 .p = NULL,
107 .func = func,
108 .info = info,
109 .ret = -ENXIO,
110 };
111
112 smp_call_function_single(cpu, remote_function, &data, 1);
113
114 return data.ret;
115}
116
117#define PERF_FLAG_ALL (PERF_FLAG_FD_NO_GROUP |\
118 PERF_FLAG_FD_OUTPUT |\
119 PERF_FLAG_PID_CGROUP)
120
121
122
123
124#define PERF_SAMPLE_BRANCH_PERM_PLM \
125 (PERF_SAMPLE_BRANCH_KERNEL |\
126 PERF_SAMPLE_BRANCH_HV)
127
128enum event_type_t {
129 EVENT_FLEXIBLE = 0x1,
130 EVENT_PINNED = 0x2,
131 EVENT_ALL = EVENT_FLEXIBLE | EVENT_PINNED,
132};
133
134
135
136
137
138struct static_key_deferred perf_sched_events __read_mostly;
139static DEFINE_PER_CPU(atomic_t, perf_cgroup_events);
140static DEFINE_PER_CPU(atomic_t, perf_branch_stack_events);
141
142static atomic_t nr_mmap_events __read_mostly;
143static atomic_t nr_comm_events __read_mostly;
144static atomic_t nr_task_events __read_mostly;
145
146static LIST_HEAD(pmus);
147static DEFINE_MUTEX(pmus_lock);
148static struct srcu_struct pmus_srcu;
149
150
151
152
153
154
155
156
157int sysctl_perf_event_paranoid __read_mostly = 1;
158
159
160int sysctl_perf_event_mlock __read_mostly = 512 + (PAGE_SIZE / 1024);
161
162
163
164
165#define DEFAULT_MAX_SAMPLE_RATE 100000
166int sysctl_perf_event_sample_rate __read_mostly = DEFAULT_MAX_SAMPLE_RATE;
167static int max_samples_per_tick __read_mostly =
168 DIV_ROUND_UP(DEFAULT_MAX_SAMPLE_RATE, HZ);
169
170int perf_proc_update_handler(struct ctl_table *table, int write,
171 void __user *buffer, size_t *lenp,
172 loff_t *ppos)
173{
174 int ret = proc_dointvec(table, write, buffer, lenp, ppos);
175
176 if (ret || !write)
177 return ret;
178
179 max_samples_per_tick = DIV_ROUND_UP(sysctl_perf_event_sample_rate, HZ);
180
181 return 0;
182}
183
184static atomic64_t perf_event_id;
185
186static void cpu_ctx_sched_out(struct perf_cpu_context *cpuctx,
187 enum event_type_t event_type);
188
189static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx,
190 enum event_type_t event_type,
191 struct task_struct *task);
192
193static void update_context_time(struct perf_event_context *ctx);
194static u64 perf_event_time(struct perf_event *event);
195
196static void ring_buffer_attach(struct perf_event *event,
197 struct ring_buffer *rb);
198
199void __weak perf_event_print_debug(void) { }
200
201extern __weak const char *perf_pmu_name(void)
202{
203 return "pmu";
204}
205
206static inline u64 perf_clock(void)
207{
208 return local_clock();
209}
210
211static inline struct perf_cpu_context *
212__get_cpu_context(struct perf_event_context *ctx)
213{
214 return this_cpu_ptr(ctx->pmu->pmu_cpu_context);
215}
216
217static void perf_ctx_lock(struct perf_cpu_context *cpuctx,
218 struct perf_event_context *ctx)
219{
220 raw_spin_lock(&cpuctx->ctx.lock);
221 if (ctx)
222 raw_spin_lock(&ctx->lock);
223}
224
225static void perf_ctx_unlock(struct perf_cpu_context *cpuctx,
226 struct perf_event_context *ctx)
227{
228 if (ctx)
229 raw_spin_unlock(&ctx->lock);
230 raw_spin_unlock(&cpuctx->ctx.lock);
231}
232
233#ifdef CONFIG_CGROUP_PERF
234
235
236
237
238
239
240static inline struct perf_cgroup *
241perf_cgroup_from_task(struct task_struct *task)
242{
243 return container_of(task_subsys_state(task, perf_subsys_id),
244 struct perf_cgroup, css);
245}
246
247static inline bool
248perf_cgroup_match(struct perf_event *event)
249{
250 struct perf_event_context *ctx = event->ctx;
251 struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
252
253 return !event->cgrp || event->cgrp == cpuctx->cgrp;
254}
255
256static inline bool perf_tryget_cgroup(struct perf_event *event)
257{
258 return css_tryget(&event->cgrp->css);
259}
260
261static inline void perf_put_cgroup(struct perf_event *event)
262{
263 css_put(&event->cgrp->css);
264}
265
266static inline void perf_detach_cgroup(struct perf_event *event)
267{
268 perf_put_cgroup(event);
269 event->cgrp = NULL;
270}
271
272static inline int is_cgroup_event(struct perf_event *event)
273{
274 return event->cgrp != NULL;
275}
276
277static inline u64 perf_cgroup_event_time(struct perf_event *event)
278{
279 struct perf_cgroup_info *t;
280
281 t = per_cpu_ptr(event->cgrp->info, event->cpu);
282 return t->time;
283}
284
285static inline void __update_cgrp_time(struct perf_cgroup *cgrp)
286{
287 struct perf_cgroup_info *info;
288 u64 now;
289
290 now = perf_clock();
291
292 info = this_cpu_ptr(cgrp->info);
293
294 info->time += now - info->timestamp;
295 info->timestamp = now;
296}
297
298static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx)
299{
300 struct perf_cgroup *cgrp_out = cpuctx->cgrp;
301 if (cgrp_out)
302 __update_cgrp_time(cgrp_out);
303}
304
305static inline void update_cgrp_time_from_event(struct perf_event *event)
306{
307 struct perf_cgroup *cgrp;
308
309
310
311
312
313 if (!is_cgroup_event(event))
314 return;
315
316 cgrp = perf_cgroup_from_task(current);
317
318
319
320 if (cgrp == event->cgrp)
321 __update_cgrp_time(event->cgrp);
322}
323
324static inline void
325perf_cgroup_set_timestamp(struct task_struct *task,
326 struct perf_event_context *ctx)
327{
328 struct perf_cgroup *cgrp;
329 struct perf_cgroup_info *info;
330
331
332
333
334
335
336 if (!task || !ctx->nr_cgroups)
337 return;
338
339 cgrp = perf_cgroup_from_task(task);
340 info = this_cpu_ptr(cgrp->info);
341 info->timestamp = ctx->timestamp;
342}
343
344#define PERF_CGROUP_SWOUT 0x1
345#define PERF_CGROUP_SWIN 0x2
346
347
348
349
350
351
352
353void perf_cgroup_switch(struct task_struct *task, int mode)
354{
355 struct perf_cpu_context *cpuctx;
356 struct pmu *pmu;
357 unsigned long flags;
358
359
360
361
362
363
364 local_irq_save(flags);
365
366
367
368
369
370 rcu_read_lock();
371
372 list_for_each_entry_rcu(pmu, &pmus, entry) {
373 cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
374
375
376
377
378
379
380
381
382 if (cpuctx->ctx.nr_cgroups > 0) {
383 perf_ctx_lock(cpuctx, cpuctx->task_ctx);
384 perf_pmu_disable(cpuctx->ctx.pmu);
385
386 if (mode & PERF_CGROUP_SWOUT) {
387 cpu_ctx_sched_out(cpuctx, EVENT_ALL);
388
389
390
391
392 cpuctx->cgrp = NULL;
393 }
394
395 if (mode & PERF_CGROUP_SWIN) {
396 WARN_ON_ONCE(cpuctx->cgrp);
397
398
399
400
401 cpuctx->cgrp = perf_cgroup_from_task(task);
402 cpu_ctx_sched_in(cpuctx, EVENT_ALL, task);
403 }
404 perf_pmu_enable(cpuctx->ctx.pmu);
405 perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
406 }
407 }
408
409 rcu_read_unlock();
410
411 local_irq_restore(flags);
412}
413
414static inline void perf_cgroup_sched_out(struct task_struct *task,
415 struct task_struct *next)
416{
417 struct perf_cgroup *cgrp1;
418 struct perf_cgroup *cgrp2 = NULL;
419
420
421
422
423 cgrp1 = perf_cgroup_from_task(task);
424
425
426
427
428
429 if (next)
430 cgrp2 = perf_cgroup_from_task(next);
431
432
433
434
435
436
437 if (cgrp1 != cgrp2)
438 perf_cgroup_switch(task, PERF_CGROUP_SWOUT);
439}
440
441static inline void perf_cgroup_sched_in(struct task_struct *prev,
442 struct task_struct *task)
443{
444 struct perf_cgroup *cgrp1;
445 struct perf_cgroup *cgrp2 = NULL;
446
447
448
449
450 cgrp1 = perf_cgroup_from_task(task);
451
452
453 cgrp2 = perf_cgroup_from_task(prev);
454
455
456
457
458
459
460 if (cgrp1 != cgrp2)
461 perf_cgroup_switch(task, PERF_CGROUP_SWIN);
462}
463
464static inline int perf_cgroup_connect(int fd, struct perf_event *event,
465 struct perf_event_attr *attr,
466 struct perf_event *group_leader)
467{
468 struct perf_cgroup *cgrp;
469 struct cgroup_subsys_state *css;
470 struct file *file;
471 int ret = 0, fput_needed;
472
473 file = fget_light(fd, &fput_needed);
474 if (!file)
475 return -EBADF;
476
477 css = cgroup_css_from_dir(file, perf_subsys_id);
478 if (IS_ERR(css)) {
479 ret = PTR_ERR(css);
480 goto out;
481 }
482
483 cgrp = container_of(css, struct perf_cgroup, css);
484 event->cgrp = cgrp;
485
486
487 if (!perf_tryget_cgroup(event)) {
488 event->cgrp = NULL;
489 ret = -ENOENT;
490 goto out;
491 }
492
493
494
495
496
497
498 if (group_leader && group_leader->cgrp != cgrp) {
499 perf_detach_cgroup(event);
500 ret = -EINVAL;
501 }
502out:
503 fput_light(file, fput_needed);
504 return ret;
505}
506
507static inline void
508perf_cgroup_set_shadow_time(struct perf_event *event, u64 now)
509{
510 struct perf_cgroup_info *t;
511 t = per_cpu_ptr(event->cgrp->info, event->cpu);
512 event->shadow_ctx_time = now - t->timestamp;
513}
514
515static inline void
516perf_cgroup_defer_enabled(struct perf_event *event)
517{
518
519
520
521
522
523
524 if (is_cgroup_event(event) && !perf_cgroup_match(event))
525 event->cgrp_defer_enabled = 1;
526}
527
528static inline void
529perf_cgroup_mark_enabled(struct perf_event *event,
530 struct perf_event_context *ctx)
531{
532 struct perf_event *sub;
533 u64 tstamp = perf_event_time(event);
534
535 if (!event->cgrp_defer_enabled)
536 return;
537
538 event->cgrp_defer_enabled = 0;
539
540 event->tstamp_enabled = tstamp - event->total_time_enabled;
541 list_for_each_entry(sub, &event->sibling_list, group_entry) {
542 if (sub->state >= PERF_EVENT_STATE_INACTIVE) {
543 sub->tstamp_enabled = tstamp - sub->total_time_enabled;
544 sub->cgrp_defer_enabled = 0;
545 }
546 }
547}
548#else
549
550static inline bool
551perf_cgroup_match(struct perf_event *event)
552{
553 return true;
554}
555
556static inline void perf_detach_cgroup(struct perf_event *event)
557{}
558
559static inline int is_cgroup_event(struct perf_event *event)
560{
561 return 0;
562}
563
564static inline u64 perf_cgroup_event_cgrp_time(struct perf_event *event)
565{
566 return 0;
567}
568
569static inline void update_cgrp_time_from_event(struct perf_event *event)
570{
571}
572
573static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx)
574{
575}
576
577static inline void perf_cgroup_sched_out(struct task_struct *task,
578 struct task_struct *next)
579{
580}
581
582static inline void perf_cgroup_sched_in(struct task_struct *prev,
583 struct task_struct *task)
584{
585}
586
587static inline int perf_cgroup_connect(pid_t pid, struct perf_event *event,
588 struct perf_event_attr *attr,
589 struct perf_event *group_leader)
590{
591 return -EINVAL;
592}
593
594static inline void
595perf_cgroup_set_timestamp(struct task_struct *task,
596 struct perf_event_context *ctx)
597{
598}
599
600void
601perf_cgroup_switch(struct task_struct *task, struct task_struct *next)
602{
603}
604
605static inline void
606perf_cgroup_set_shadow_time(struct perf_event *event, u64 now)
607{
608}
609
610static inline u64 perf_cgroup_event_time(struct perf_event *event)
611{
612 return 0;
613}
614
615static inline void
616perf_cgroup_defer_enabled(struct perf_event *event)
617{
618}
619
620static inline void
621perf_cgroup_mark_enabled(struct perf_event *event,
622 struct perf_event_context *ctx)
623{
624}
625#endif
626
627void perf_pmu_disable(struct pmu *pmu)
628{
629 int *count = this_cpu_ptr(pmu->pmu_disable_count);
630 if (!(*count)++)
631 pmu->pmu_disable(pmu);
632}
633
634void perf_pmu_enable(struct pmu *pmu)
635{
636 int *count = this_cpu_ptr(pmu->pmu_disable_count);
637 if (!--(*count))
638 pmu->pmu_enable(pmu);
639}
640
641static DEFINE_PER_CPU(struct list_head, rotation_list);
642
643
644
645
646
647
648static void perf_pmu_rotate_start(struct pmu *pmu)
649{
650 struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
651 struct list_head *head = &__get_cpu_var(rotation_list);
652
653 WARN_ON(!irqs_disabled());
654
655 if (list_empty(&cpuctx->rotation_list))
656 list_add(&cpuctx->rotation_list, head);
657}
658
659static void get_ctx(struct perf_event_context *ctx)
660{
661 WARN_ON(!atomic_inc_not_zero(&ctx->refcount));
662}
663
664static void put_ctx(struct perf_event_context *ctx)
665{
666 if (atomic_dec_and_test(&ctx->refcount)) {
667 if (ctx->parent_ctx)
668 put_ctx(ctx->parent_ctx);
669 if (ctx->task)
670 put_task_struct(ctx->task);
671 kfree_rcu(ctx, rcu_head);
672 }
673}
674
675static void unclone_ctx(struct perf_event_context *ctx)
676{
677 if (ctx->parent_ctx) {
678 put_ctx(ctx->parent_ctx);
679 ctx->parent_ctx = NULL;
680 }
681}
682
683static u32 perf_event_pid(struct perf_event *event, struct task_struct *p)
684{
685
686
687
688 if (event->parent)
689 event = event->parent;
690
691 return task_tgid_nr_ns(p, event->ns);
692}
693
694static u32 perf_event_tid(struct perf_event *event, struct task_struct *p)
695{
696
697
698
699 if (event->parent)
700 event = event->parent;
701
702 return task_pid_nr_ns(p, event->ns);
703}
704
705
706
707
708
709static u64 primary_event_id(struct perf_event *event)
710{
711 u64 id = event->id;
712
713 if (event->parent)
714 id = event->parent->id;
715
716 return id;
717}
718
719
720
721
722
723
724static struct perf_event_context *
725perf_lock_task_context(struct task_struct *task, int ctxn, unsigned long *flags)
726{
727 struct perf_event_context *ctx;
728
729 rcu_read_lock();
730retry:
731 ctx = rcu_dereference(task->perf_event_ctxp[ctxn]);
732 if (ctx) {
733
734
735
736
737
738
739
740
741
742
743 raw_spin_lock_irqsave(&ctx->lock, *flags);
744 if (ctx != rcu_dereference(task->perf_event_ctxp[ctxn])) {
745 raw_spin_unlock_irqrestore(&ctx->lock, *flags);
746 goto retry;
747 }
748
749 if (!atomic_inc_not_zero(&ctx->refcount)) {
750 raw_spin_unlock_irqrestore(&ctx->lock, *flags);
751 ctx = NULL;
752 }
753 }
754 rcu_read_unlock();
755 return ctx;
756}
757
758
759
760
761
762
763static struct perf_event_context *
764perf_pin_task_context(struct task_struct *task, int ctxn)
765{
766 struct perf_event_context *ctx;
767 unsigned long flags;
768
769 ctx = perf_lock_task_context(task, ctxn, &flags);
770 if (ctx) {
771 ++ctx->pin_count;
772 raw_spin_unlock_irqrestore(&ctx->lock, flags);
773 }
774 return ctx;
775}
776
777static void perf_unpin_context(struct perf_event_context *ctx)
778{
779 unsigned long flags;
780
781 raw_spin_lock_irqsave(&ctx->lock, flags);
782 --ctx->pin_count;
783 raw_spin_unlock_irqrestore(&ctx->lock, flags);
784}
785
786
787
788
789static void update_context_time(struct perf_event_context *ctx)
790{
791 u64 now = perf_clock();
792
793 ctx->time += now - ctx->timestamp;
794 ctx->timestamp = now;
795}
796
797static u64 perf_event_time(struct perf_event *event)
798{
799 struct perf_event_context *ctx = event->ctx;
800
801 if (is_cgroup_event(event))
802 return perf_cgroup_event_time(event);
803
804 return ctx ? ctx->time : 0;
805}
806
807
808
809
810
811static void update_event_times(struct perf_event *event)
812{
813 struct perf_event_context *ctx = event->ctx;
814 u64 run_end;
815
816 if (event->state < PERF_EVENT_STATE_INACTIVE ||
817 event->group_leader->state < PERF_EVENT_STATE_INACTIVE)
818 return;
819
820
821
822
823
824
825
826
827
828
829 if (is_cgroup_event(event))
830 run_end = perf_cgroup_event_time(event);
831 else if (ctx->is_active)
832 run_end = ctx->time;
833 else
834 run_end = event->tstamp_stopped;
835
836 event->total_time_enabled = run_end - event->tstamp_enabled;
837
838 if (event->state == PERF_EVENT_STATE_INACTIVE)
839 run_end = event->tstamp_stopped;
840 else
841 run_end = perf_event_time(event);
842
843 event->total_time_running = run_end - event->tstamp_running;
844
845}
846
847
848
849
850static void update_group_times(struct perf_event *leader)
851{
852 struct perf_event *event;
853
854 update_event_times(leader);
855 list_for_each_entry(event, &leader->sibling_list, group_entry)
856 update_event_times(event);
857}
858
859static struct list_head *
860ctx_group_list(struct perf_event *event, struct perf_event_context *ctx)
861{
862 if (event->attr.pinned)
863 return &ctx->pinned_groups;
864 else
865 return &ctx->flexible_groups;
866}
867
868
869
870
871
872static void
873list_add_event(struct perf_event *event, struct perf_event_context *ctx)
874{
875 WARN_ON_ONCE(event->attach_state & PERF_ATTACH_CONTEXT);
876 event->attach_state |= PERF_ATTACH_CONTEXT;
877
878
879
880
881
882
883 if (event->group_leader == event) {
884 struct list_head *list;
885
886 if (is_software_event(event))
887 event->group_flags |= PERF_GROUP_SOFTWARE;
888
889 list = ctx_group_list(event, ctx);
890 list_add_tail(&event->group_entry, list);
891 }
892
893 if (is_cgroup_event(event))
894 ctx->nr_cgroups++;
895
896 if (has_branch_stack(event))
897 ctx->nr_branch_stack++;
898
899 list_add_rcu(&event->event_entry, &ctx->event_list);
900 if (!ctx->nr_events)
901 perf_pmu_rotate_start(ctx->pmu);
902 ctx->nr_events++;
903 if (event->attr.inherit_stat)
904 ctx->nr_stat++;
905}
906
907
908
909
910
911static void perf_event__read_size(struct perf_event *event)
912{
913 int entry = sizeof(u64);
914 int size = 0;
915 int nr = 1;
916
917 if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
918 size += sizeof(u64);
919
920 if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
921 size += sizeof(u64);
922
923 if (event->attr.read_format & PERF_FORMAT_ID)
924 entry += sizeof(u64);
925
926 if (event->attr.read_format & PERF_FORMAT_GROUP) {
927 nr += event->group_leader->nr_siblings;
928 size += sizeof(u64);
929 }
930
931 size += entry * nr;
932 event->read_size = size;
933}
934
935static void perf_event__header_size(struct perf_event *event)
936{
937 struct perf_sample_data *data;
938 u64 sample_type = event->attr.sample_type;
939 u16 size = 0;
940
941 perf_event__read_size(event);
942
943 if (sample_type & PERF_SAMPLE_IP)
944 size += sizeof(data->ip);
945
946 if (sample_type & PERF_SAMPLE_ADDR)
947 size += sizeof(data->addr);
948
949 if (sample_type & PERF_SAMPLE_PERIOD)
950 size += sizeof(data->period);
951
952 if (sample_type & PERF_SAMPLE_READ)
953 size += event->read_size;
954
955 event->header_size = size;
956}
957
958static void perf_event__id_header_size(struct perf_event *event)
959{
960 struct perf_sample_data *data;
961 u64 sample_type = event->attr.sample_type;
962 u16 size = 0;
963
964 if (sample_type & PERF_SAMPLE_TID)
965 size += sizeof(data->tid_entry);
966
967 if (sample_type & PERF_SAMPLE_TIME)
968 size += sizeof(data->time);
969
970 if (sample_type & PERF_SAMPLE_ID)
971 size += sizeof(data->id);
972
973 if (sample_type & PERF_SAMPLE_STREAM_ID)
974 size += sizeof(data->stream_id);
975
976 if (sample_type & PERF_SAMPLE_CPU)
977 size += sizeof(data->cpu_entry);
978
979 event->id_header_size = size;
980}
981
982static void perf_group_attach(struct perf_event *event)
983{
984 struct perf_event *group_leader = event->group_leader, *pos;
985
986
987
988
989 if (event->attach_state & PERF_ATTACH_GROUP)
990 return;
991
992 event->attach_state |= PERF_ATTACH_GROUP;
993
994 if (group_leader == event)
995 return;
996
997 if (group_leader->group_flags & PERF_GROUP_SOFTWARE &&
998 !is_software_event(event))
999 group_leader->group_flags &= ~PERF_GROUP_SOFTWARE;
1000
1001 list_add_tail(&event->group_entry, &group_leader->sibling_list);
1002 group_leader->nr_siblings++;
1003
1004 perf_event__header_size(group_leader);
1005
1006 list_for_each_entry(pos, &group_leader->sibling_list, group_entry)
1007 perf_event__header_size(pos);
1008}
1009
1010
1011
1012
1013
1014static void
1015list_del_event(struct perf_event *event, struct perf_event_context *ctx)
1016{
1017 struct perf_cpu_context *cpuctx;
1018
1019
1020
1021 if (!(event->attach_state & PERF_ATTACH_CONTEXT))
1022 return;
1023
1024 event->attach_state &= ~PERF_ATTACH_CONTEXT;
1025
1026 if (is_cgroup_event(event)) {
1027 ctx->nr_cgroups--;
1028 cpuctx = __get_cpu_context(ctx);
1029
1030
1031
1032
1033
1034 if (!ctx->nr_cgroups)
1035 cpuctx->cgrp = NULL;
1036 }
1037
1038 if (has_branch_stack(event))
1039 ctx->nr_branch_stack--;
1040
1041 ctx->nr_events--;
1042 if (event->attr.inherit_stat)
1043 ctx->nr_stat--;
1044
1045 list_del_rcu(&event->event_entry);
1046
1047 if (event->group_leader == event)
1048 list_del_init(&event->group_entry);
1049
1050 update_group_times(event);
1051
1052
1053
1054
1055
1056
1057
1058
1059 if (event->state > PERF_EVENT_STATE_OFF)
1060 event->state = PERF_EVENT_STATE_OFF;
1061}
1062
1063static void perf_group_detach(struct perf_event *event)
1064{
1065 struct perf_event *sibling, *tmp;
1066 struct list_head *list = NULL;
1067
1068
1069
1070
1071 if (!(event->attach_state & PERF_ATTACH_GROUP))
1072 return;
1073
1074 event->attach_state &= ~PERF_ATTACH_GROUP;
1075
1076
1077
1078
1079 if (event->group_leader != event) {
1080 list_del_init(&event->group_entry);
1081 event->group_leader->nr_siblings--;
1082 goto out;
1083 }
1084
1085 if (!list_empty(&event->group_entry))
1086 list = &event->group_entry;
1087
1088
1089
1090
1091
1092
1093 list_for_each_entry_safe(sibling, tmp, &event->sibling_list, group_entry) {
1094 if (list)
1095 list_move_tail(&sibling->group_entry, list);
1096 sibling->group_leader = sibling;
1097
1098
1099 sibling->group_flags = event->group_flags;
1100 }
1101
1102out:
1103 perf_event__header_size(event->group_leader);
1104
1105 list_for_each_entry(tmp, &event->group_leader->sibling_list, group_entry)
1106 perf_event__header_size(tmp);
1107}
1108
1109static inline int
1110event_filter_match(struct perf_event *event)
1111{
1112 return (event->cpu == -1 || event->cpu == smp_processor_id())
1113 && perf_cgroup_match(event);
1114}
1115
1116static void
1117event_sched_out(struct perf_event *event,
1118 struct perf_cpu_context *cpuctx,
1119 struct perf_event_context *ctx)
1120{
1121 u64 tstamp = perf_event_time(event);
1122 u64 delta;
1123
1124
1125
1126
1127
1128
1129 if (event->state == PERF_EVENT_STATE_INACTIVE
1130 && !event_filter_match(event)) {
1131 delta = tstamp - event->tstamp_stopped;
1132 event->tstamp_running += delta;
1133 event->tstamp_stopped = tstamp;
1134 }
1135
1136 if (event->state != PERF_EVENT_STATE_ACTIVE)
1137 return;
1138
1139 event->state = PERF_EVENT_STATE_INACTIVE;
1140 if (event->pending_disable) {
1141 event->pending_disable = 0;
1142 event->state = PERF_EVENT_STATE_OFF;
1143 }
1144 event->tstamp_stopped = tstamp;
1145 event->pmu->del(event, 0);
1146 event->oncpu = -1;
1147
1148 if (!is_software_event(event))
1149 cpuctx->active_oncpu--;
1150 ctx->nr_active--;
1151 if (event->attr.freq && event->attr.sample_freq)
1152 ctx->nr_freq--;
1153 if (event->attr.exclusive || !cpuctx->active_oncpu)
1154 cpuctx->exclusive = 0;
1155}
1156
1157static void
1158group_sched_out(struct perf_event *group_event,
1159 struct perf_cpu_context *cpuctx,
1160 struct perf_event_context *ctx)
1161{
1162 struct perf_event *event;
1163 int state = group_event->state;
1164
1165 event_sched_out(group_event, cpuctx, ctx);
1166
1167
1168
1169
1170 list_for_each_entry(event, &group_event->sibling_list, group_entry)
1171 event_sched_out(event, cpuctx, ctx);
1172
1173 if (state == PERF_EVENT_STATE_ACTIVE && group_event->attr.exclusive)
1174 cpuctx->exclusive = 0;
1175}
1176
1177
1178
1179
1180
1181
1182
1183static int __perf_remove_from_context(void *info)
1184{
1185 struct perf_event *event = info;
1186 struct perf_event_context *ctx = event->ctx;
1187 struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
1188
1189 raw_spin_lock(&ctx->lock);
1190 event_sched_out(event, cpuctx, ctx);
1191 list_del_event(event, ctx);
1192 if (!ctx->nr_events && cpuctx->task_ctx == ctx) {
1193 ctx->is_active = 0;
1194 cpuctx->task_ctx = NULL;
1195 }
1196 raw_spin_unlock(&ctx->lock);
1197
1198 return 0;
1199}
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215static void perf_remove_from_context(struct perf_event *event)
1216{
1217 struct perf_event_context *ctx = event->ctx;
1218 struct task_struct *task = ctx->task;
1219
1220 lockdep_assert_held(&ctx->mutex);
1221
1222 if (!task) {
1223
1224
1225
1226
1227 cpu_function_call(event->cpu, __perf_remove_from_context, event);
1228 return;
1229 }
1230
1231retry:
1232 if (!task_function_call(task, __perf_remove_from_context, event))
1233 return;
1234
1235 raw_spin_lock_irq(&ctx->lock);
1236
1237
1238
1239
1240 if (ctx->is_active) {
1241 raw_spin_unlock_irq(&ctx->lock);
1242 goto retry;
1243 }
1244
1245
1246
1247
1248
1249 list_del_event(event, ctx);
1250 raw_spin_unlock_irq(&ctx->lock);
1251}
1252
1253
1254
1255
1256int __perf_event_disable(void *info)
1257{
1258 struct perf_event *event = info;
1259 struct perf_event_context *ctx = event->ctx;
1260 struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
1261
1262
1263
1264
1265
1266
1267
1268
1269 if (ctx->task && cpuctx->task_ctx != ctx)
1270 return -EINVAL;
1271
1272 raw_spin_lock(&ctx->lock);
1273
1274
1275
1276
1277
1278 if (event->state >= PERF_EVENT_STATE_INACTIVE) {
1279 update_context_time(ctx);
1280 update_cgrp_time_from_event(event);
1281 update_group_times(event);
1282 if (event == event->group_leader)
1283 group_sched_out(event, cpuctx, ctx);
1284 else
1285 event_sched_out(event, cpuctx, ctx);
1286 event->state = PERF_EVENT_STATE_OFF;
1287 }
1288
1289 raw_spin_unlock(&ctx->lock);
1290
1291 return 0;
1292}
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307void perf_event_disable(struct perf_event *event)
1308{
1309 struct perf_event_context *ctx = event->ctx;
1310 struct task_struct *task = ctx->task;
1311
1312 if (!task) {
1313
1314
1315
1316 cpu_function_call(event->cpu, __perf_event_disable, event);
1317 return;
1318 }
1319
1320retry:
1321 if (!task_function_call(task, __perf_event_disable, event))
1322 return;
1323
1324 raw_spin_lock_irq(&ctx->lock);
1325
1326
1327
1328 if (event->state == PERF_EVENT_STATE_ACTIVE) {
1329 raw_spin_unlock_irq(&ctx->lock);
1330
1331
1332
1333
1334 task = ctx->task;
1335 goto retry;
1336 }
1337
1338
1339
1340
1341
1342 if (event->state == PERF_EVENT_STATE_INACTIVE) {
1343 update_group_times(event);
1344 event->state = PERF_EVENT_STATE_OFF;
1345 }
1346 raw_spin_unlock_irq(&ctx->lock);
1347}
1348EXPORT_SYMBOL_GPL(perf_event_disable);
1349
1350static void perf_set_shadow_time(struct perf_event *event,
1351 struct perf_event_context *ctx,
1352 u64 tstamp)
1353{
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379 if (is_cgroup_event(event))
1380 perf_cgroup_set_shadow_time(event, tstamp);
1381 else
1382 event->shadow_ctx_time = tstamp - ctx->timestamp;
1383}
1384
1385#define MAX_INTERRUPTS (~0ULL)
1386
1387static void perf_log_throttle(struct perf_event *event, int enable);
1388
1389static int
1390event_sched_in(struct perf_event *event,
1391 struct perf_cpu_context *cpuctx,
1392 struct perf_event_context *ctx)
1393{
1394 u64 tstamp = perf_event_time(event);
1395
1396 if (event->state <= PERF_EVENT_STATE_OFF)
1397 return 0;
1398
1399 event->state = PERF_EVENT_STATE_ACTIVE;
1400 event->oncpu = smp_processor_id();
1401
1402
1403
1404
1405
1406
1407 if (unlikely(event->hw.interrupts == MAX_INTERRUPTS)) {
1408 perf_log_throttle(event, 1);
1409 event->hw.interrupts = 0;
1410 }
1411
1412
1413
1414
1415 smp_wmb();
1416
1417 if (event->pmu->add(event, PERF_EF_START)) {
1418 event->state = PERF_EVENT_STATE_INACTIVE;
1419 event->oncpu = -1;
1420 return -EAGAIN;
1421 }
1422
1423 event->tstamp_running += tstamp - event->tstamp_stopped;
1424
1425 perf_set_shadow_time(event, ctx, tstamp);
1426
1427 if (!is_software_event(event))
1428 cpuctx->active_oncpu++;
1429 ctx->nr_active++;
1430 if (event->attr.freq && event->attr.sample_freq)
1431 ctx->nr_freq++;
1432
1433 if (event->attr.exclusive)
1434 cpuctx->exclusive = 1;
1435
1436 return 0;
1437}
1438
1439static int
1440group_sched_in(struct perf_event *group_event,
1441 struct perf_cpu_context *cpuctx,
1442 struct perf_event_context *ctx)
1443{
1444 struct perf_event *event, *partial_group = NULL;
1445 struct pmu *pmu = group_event->pmu;
1446 u64 now = ctx->time;
1447 bool simulate = false;
1448
1449 if (group_event->state == PERF_EVENT_STATE_OFF)
1450 return 0;
1451
1452 pmu->start_txn(pmu);
1453
1454 if (event_sched_in(group_event, cpuctx, ctx)) {
1455 pmu->cancel_txn(pmu);
1456 return -EAGAIN;
1457 }
1458
1459
1460
1461
1462 list_for_each_entry(event, &group_event->sibling_list, group_entry) {
1463 if (event_sched_in(event, cpuctx, ctx)) {
1464 partial_group = event;
1465 goto group_error;
1466 }
1467 }
1468
1469 if (!pmu->commit_txn(pmu))
1470 return 0;
1471
1472group_error:
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487 list_for_each_entry(event, &group_event->sibling_list, group_entry) {
1488 if (event == partial_group)
1489 simulate = true;
1490
1491 if (simulate) {
1492 event->tstamp_running += now - event->tstamp_stopped;
1493 event->tstamp_stopped = now;
1494 } else {
1495 event_sched_out(event, cpuctx, ctx);
1496 }
1497 }
1498 event_sched_out(group_event, cpuctx, ctx);
1499
1500 pmu->cancel_txn(pmu);
1501
1502 return -EAGAIN;
1503}
1504
1505
1506
1507
1508static int group_can_go_on(struct perf_event *event,
1509 struct perf_cpu_context *cpuctx,
1510 int can_add_hw)
1511{
1512
1513
1514
1515 if (event->group_flags & PERF_GROUP_SOFTWARE)
1516 return 1;
1517
1518
1519
1520
1521 if (cpuctx->exclusive)
1522 return 0;
1523
1524
1525
1526
1527 if (event->attr.exclusive && cpuctx->active_oncpu)
1528 return 0;
1529
1530
1531
1532
1533 return can_add_hw;
1534}
1535
1536static void add_event_to_ctx(struct perf_event *event,
1537 struct perf_event_context *ctx)
1538{
1539 u64 tstamp = perf_event_time(event);
1540
1541 list_add_event(event, ctx);
1542 perf_group_attach(event);
1543 event->tstamp_enabled = tstamp;
1544 event->tstamp_running = tstamp;
1545 event->tstamp_stopped = tstamp;
1546}
1547
1548static void task_ctx_sched_out(struct perf_event_context *ctx);
1549static void
1550ctx_sched_in(struct perf_event_context *ctx,
1551 struct perf_cpu_context *cpuctx,
1552 enum event_type_t event_type,
1553 struct task_struct *task);
1554
1555static void perf_event_sched_in(struct perf_cpu_context *cpuctx,
1556 struct perf_event_context *ctx,
1557 struct task_struct *task)
1558{
1559 cpu_ctx_sched_in(cpuctx, EVENT_PINNED, task);
1560 if (ctx)
1561 ctx_sched_in(ctx, cpuctx, EVENT_PINNED, task);
1562 cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE, task);
1563 if (ctx)
1564 ctx_sched_in(ctx, cpuctx, EVENT_FLEXIBLE, task);
1565}
1566
1567
1568
1569
1570
1571
1572static int __perf_install_in_context(void *info)
1573{
1574 struct perf_event *event = info;
1575 struct perf_event_context *ctx = event->ctx;
1576 struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
1577 struct perf_event_context *task_ctx = cpuctx->task_ctx;
1578 struct task_struct *task = current;
1579
1580 perf_ctx_lock(cpuctx, task_ctx);
1581 perf_pmu_disable(cpuctx->ctx.pmu);
1582
1583
1584
1585
1586 if (task_ctx)
1587 task_ctx_sched_out(task_ctx);
1588
1589
1590
1591
1592
1593 if (ctx->task && task_ctx != ctx) {
1594 if (task_ctx)
1595 raw_spin_unlock(&task_ctx->lock);
1596 raw_spin_lock(&ctx->lock);
1597 task_ctx = ctx;
1598 }
1599
1600 if (task_ctx) {
1601 cpuctx->task_ctx = task_ctx;
1602 task = task_ctx->task;
1603 }
1604
1605 cpu_ctx_sched_out(cpuctx, EVENT_ALL);
1606
1607 update_context_time(ctx);
1608
1609
1610
1611
1612
1613 update_cgrp_time_from_event(event);
1614
1615 add_event_to_ctx(event, ctx);
1616
1617
1618
1619
1620 perf_event_sched_in(cpuctx, task_ctx, task);
1621
1622 perf_pmu_enable(cpuctx->ctx.pmu);
1623 perf_ctx_unlock(cpuctx, task_ctx);
1624
1625 return 0;
1626}
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638static void
1639perf_install_in_context(struct perf_event_context *ctx,
1640 struct perf_event *event,
1641 int cpu)
1642{
1643 struct task_struct *task = ctx->task;
1644
1645 lockdep_assert_held(&ctx->mutex);
1646
1647 event->ctx = ctx;
1648 if (event->cpu != -1)
1649 event->cpu = cpu;
1650
1651 if (!task) {
1652
1653
1654
1655
1656 cpu_function_call(cpu, __perf_install_in_context, event);
1657 return;
1658 }
1659
1660retry:
1661 if (!task_function_call(task, __perf_install_in_context, event))
1662 return;
1663
1664 raw_spin_lock_irq(&ctx->lock);
1665
1666
1667
1668
1669 if (ctx->is_active) {
1670 raw_spin_unlock_irq(&ctx->lock);
1671 goto retry;
1672 }
1673
1674
1675
1676
1677
1678 add_event_to_ctx(event, ctx);
1679 raw_spin_unlock_irq(&ctx->lock);
1680}
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690static void __perf_event_mark_enabled(struct perf_event *event)
1691{
1692 struct perf_event *sub;
1693 u64 tstamp = perf_event_time(event);
1694
1695 event->state = PERF_EVENT_STATE_INACTIVE;
1696 event->tstamp_enabled = tstamp - event->total_time_enabled;
1697 list_for_each_entry(sub, &event->sibling_list, group_entry) {
1698 if (sub->state >= PERF_EVENT_STATE_INACTIVE)
1699 sub->tstamp_enabled = tstamp - sub->total_time_enabled;
1700 }
1701}
1702
1703
1704
1705
1706static int __perf_event_enable(void *info)
1707{
1708 struct perf_event *event = info;
1709 struct perf_event_context *ctx = event->ctx;
1710 struct perf_event *leader = event->group_leader;
1711 struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
1712 int err;
1713
1714 if (WARN_ON_ONCE(!ctx->is_active))
1715 return -EINVAL;
1716
1717 raw_spin_lock(&ctx->lock);
1718 update_context_time(ctx);
1719
1720 if (event->state >= PERF_EVENT_STATE_INACTIVE)
1721 goto unlock;
1722
1723
1724
1725
1726 perf_cgroup_set_timestamp(current, ctx);
1727
1728 __perf_event_mark_enabled(event);
1729
1730 if (!event_filter_match(event)) {
1731 if (is_cgroup_event(event))
1732 perf_cgroup_defer_enabled(event);
1733 goto unlock;
1734 }
1735
1736
1737
1738
1739
1740 if (leader != event && leader->state != PERF_EVENT_STATE_ACTIVE)
1741 goto unlock;
1742
1743 if (!group_can_go_on(event, cpuctx, 1)) {
1744 err = -EEXIST;
1745 } else {
1746 if (event == leader)
1747 err = group_sched_in(event, cpuctx, ctx);
1748 else
1749 err = event_sched_in(event, cpuctx, ctx);
1750 }
1751
1752 if (err) {
1753
1754
1755
1756
1757 if (leader != event)
1758 group_sched_out(leader, cpuctx, ctx);
1759 if (leader->attr.pinned) {
1760 update_group_times(leader);
1761 leader->state = PERF_EVENT_STATE_ERROR;
1762 }
1763 }
1764
1765unlock:
1766 raw_spin_unlock(&ctx->lock);
1767
1768 return 0;
1769}
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780void perf_event_enable(struct perf_event *event)
1781{
1782 struct perf_event_context *ctx = event->ctx;
1783 struct task_struct *task = ctx->task;
1784
1785 if (!task) {
1786
1787
1788
1789 cpu_function_call(event->cpu, __perf_event_enable, event);
1790 return;
1791 }
1792
1793 raw_spin_lock_irq(&ctx->lock);
1794 if (event->state >= PERF_EVENT_STATE_INACTIVE)
1795 goto out;
1796
1797
1798
1799
1800
1801
1802
1803
1804 if (event->state == PERF_EVENT_STATE_ERROR)
1805 event->state = PERF_EVENT_STATE_OFF;
1806
1807retry:
1808 if (!ctx->is_active) {
1809 __perf_event_mark_enabled(event);
1810 goto out;
1811 }
1812
1813 raw_spin_unlock_irq(&ctx->lock);
1814
1815 if (!task_function_call(task, __perf_event_enable, event))
1816 return;
1817
1818 raw_spin_lock_irq(&ctx->lock);
1819
1820
1821
1822
1823
1824 if (ctx->is_active && event->state == PERF_EVENT_STATE_OFF) {
1825
1826
1827
1828
1829 task = ctx->task;
1830 goto retry;
1831 }
1832
1833out:
1834 raw_spin_unlock_irq(&ctx->lock);
1835}
1836EXPORT_SYMBOL_GPL(perf_event_enable);
1837
1838int perf_event_refresh(struct perf_event *event, int refresh)
1839{
1840
1841
1842
1843 if (event->attr.inherit || !is_sampling_event(event))
1844 return -EINVAL;
1845
1846 atomic_add(refresh, &event->event_limit);
1847 perf_event_enable(event);
1848
1849 return 0;
1850}
1851EXPORT_SYMBOL_GPL(perf_event_refresh);
1852
1853static void ctx_sched_out(struct perf_event_context *ctx,
1854 struct perf_cpu_context *cpuctx,
1855 enum event_type_t event_type)
1856{
1857 struct perf_event *event;
1858 int is_active = ctx->is_active;
1859
1860 ctx->is_active &= ~event_type;
1861 if (likely(!ctx->nr_events))
1862 return;
1863
1864 update_context_time(ctx);
1865 update_cgrp_time_from_cpuctx(cpuctx);
1866 if (!ctx->nr_active)
1867 return;
1868
1869 perf_pmu_disable(ctx->pmu);
1870 if ((is_active & EVENT_PINNED) && (event_type & EVENT_PINNED)) {
1871 list_for_each_entry(event, &ctx->pinned_groups, group_entry)
1872 group_sched_out(event, cpuctx, ctx);
1873 }
1874
1875 if ((is_active & EVENT_FLEXIBLE) && (event_type & EVENT_FLEXIBLE)) {
1876 list_for_each_entry(event, &ctx->flexible_groups, group_entry)
1877 group_sched_out(event, cpuctx, ctx);
1878 }
1879 perf_pmu_enable(ctx->pmu);
1880}
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893static int context_equiv(struct perf_event_context *ctx1,
1894 struct perf_event_context *ctx2)
1895{
1896 return ctx1->parent_ctx && ctx1->parent_ctx == ctx2->parent_ctx
1897 && ctx1->parent_gen == ctx2->parent_gen
1898 && !ctx1->pin_count && !ctx2->pin_count;
1899}
1900
1901static void __perf_event_sync_stat(struct perf_event *event,
1902 struct perf_event *next_event)
1903{
1904 u64 value;
1905
1906 if (!event->attr.inherit_stat)
1907 return;
1908
1909
1910
1911
1912
1913
1914
1915
1916 switch (event->state) {
1917 case PERF_EVENT_STATE_ACTIVE:
1918 event->pmu->read(event);
1919
1920
1921 case PERF_EVENT_STATE_INACTIVE:
1922 update_event_times(event);
1923 break;
1924
1925 default:
1926 break;
1927 }
1928
1929
1930
1931
1932
1933 value = local64_read(&next_event->count);
1934 value = local64_xchg(&event->count, value);
1935 local64_set(&next_event->count, value);
1936
1937 swap(event->total_time_enabled, next_event->total_time_enabled);
1938 swap(event->total_time_running, next_event->total_time_running);
1939
1940
1941
1942
1943 perf_event_update_userpage(event);
1944 perf_event_update_userpage(next_event);
1945}
1946
1947#define list_next_entry(pos, member) \
1948 list_entry(pos->member.next, typeof(*pos), member)
1949
1950static void perf_event_sync_stat(struct perf_event_context *ctx,
1951 struct perf_event_context *next_ctx)
1952{
1953 struct perf_event *event, *next_event;
1954
1955 if (!ctx->nr_stat)
1956 return;
1957
1958 update_context_time(ctx);
1959
1960 event = list_first_entry(&ctx->event_list,
1961 struct perf_event, event_entry);
1962
1963 next_event = list_first_entry(&next_ctx->event_list,
1964 struct perf_event, event_entry);
1965
1966 while (&event->event_entry != &ctx->event_list &&
1967 &next_event->event_entry != &next_ctx->event_list) {
1968
1969 __perf_event_sync_stat(event, next_event);
1970
1971 event = list_next_entry(event, event_entry);
1972 next_event = list_next_entry(next_event, event_entry);
1973 }
1974}
1975
1976static void perf_event_context_sched_out(struct task_struct *task, int ctxn,
1977 struct task_struct *next)
1978{
1979 struct perf_event_context *ctx = task->perf_event_ctxp[ctxn];
1980 struct perf_event_context *next_ctx;
1981 struct perf_event_context *parent;
1982 struct perf_cpu_context *cpuctx;
1983 int do_switch = 1;
1984
1985 if (likely(!ctx))
1986 return;
1987
1988 cpuctx = __get_cpu_context(ctx);
1989 if (!cpuctx->task_ctx)
1990 return;
1991
1992 rcu_read_lock();
1993 parent = rcu_dereference(ctx->parent_ctx);
1994 next_ctx = next->perf_event_ctxp[ctxn];
1995 if (parent && next_ctx &&
1996 rcu_dereference(next_ctx->parent_ctx) == parent) {
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006 raw_spin_lock(&ctx->lock);
2007 raw_spin_lock_nested(&next_ctx->lock, SINGLE_DEPTH_NESTING);
2008 if (context_equiv(ctx, next_ctx)) {
2009
2010
2011
2012
2013 task->perf_event_ctxp[ctxn] = next_ctx;
2014 next->perf_event_ctxp[ctxn] = ctx;
2015 ctx->task = next;
2016 next_ctx->task = task;
2017 do_switch = 0;
2018
2019 perf_event_sync_stat(ctx, next_ctx);
2020 }
2021 raw_spin_unlock(&next_ctx->lock);
2022 raw_spin_unlock(&ctx->lock);
2023 }
2024 rcu_read_unlock();
2025
2026 if (do_switch) {
2027 raw_spin_lock(&ctx->lock);
2028 ctx_sched_out(ctx, cpuctx, EVENT_ALL);
2029 cpuctx->task_ctx = NULL;
2030 raw_spin_unlock(&ctx->lock);
2031 }
2032}
2033
2034#define for_each_task_context_nr(ctxn) \
2035 for ((ctxn) = 0; (ctxn) < perf_nr_task_contexts; (ctxn)++)
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048void __perf_event_task_sched_out(struct task_struct *task,
2049 struct task_struct *next)
2050{
2051 int ctxn;
2052
2053 for_each_task_context_nr(ctxn)
2054 perf_event_context_sched_out(task, ctxn, next);
2055
2056
2057
2058
2059
2060
2061 if (atomic_read(&__get_cpu_var(perf_cgroup_events)))
2062 perf_cgroup_sched_out(task, next);
2063}
2064
2065static void task_ctx_sched_out(struct perf_event_context *ctx)
2066{
2067 struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
2068
2069 if (!cpuctx->task_ctx)
2070 return;
2071
2072 if (WARN_ON_ONCE(ctx != cpuctx->task_ctx))
2073 return;
2074
2075 ctx_sched_out(ctx, cpuctx, EVENT_ALL);
2076 cpuctx->task_ctx = NULL;
2077}
2078
2079
2080
2081
2082static void cpu_ctx_sched_out(struct perf_cpu_context *cpuctx,
2083 enum event_type_t event_type)
2084{
2085 ctx_sched_out(&cpuctx->ctx, cpuctx, event_type);
2086}
2087
2088static void
2089ctx_pinned_sched_in(struct perf_event_context *ctx,
2090 struct perf_cpu_context *cpuctx)
2091{
2092 struct perf_event *event;
2093
2094 list_for_each_entry(event, &ctx->pinned_groups, group_entry) {
2095 if (event->state <= PERF_EVENT_STATE_OFF)
2096 continue;
2097 if (!event_filter_match(event))
2098 continue;
2099
2100
2101 if (is_cgroup_event(event))
2102 perf_cgroup_mark_enabled(event, ctx);
2103
2104 if (group_can_go_on(event, cpuctx, 1))
2105 group_sched_in(event, cpuctx, ctx);
2106
2107
2108
2109
2110
2111 if (event->state == PERF_EVENT_STATE_INACTIVE) {
2112 update_group_times(event);
2113 event->state = PERF_EVENT_STATE_ERROR;
2114 }
2115 }
2116}
2117
2118static void
2119ctx_flexible_sched_in(struct perf_event_context *ctx,
2120 struct perf_cpu_context *cpuctx)
2121{
2122 struct perf_event *event;
2123 int can_add_hw = 1;
2124
2125 list_for_each_entry(event, &ctx->flexible_groups, group_entry) {
2126
2127 if (event->state <= PERF_EVENT_STATE_OFF)
2128 continue;
2129
2130
2131
2132
2133 if (!event_filter_match(event))
2134 continue;
2135
2136
2137 if (is_cgroup_event(event))
2138 perf_cgroup_mark_enabled(event, ctx);
2139
2140 if (group_can_go_on(event, cpuctx, can_add_hw)) {
2141 if (group_sched_in(event, cpuctx, ctx))
2142 can_add_hw = 0;
2143 }
2144 }
2145}
2146
2147static void
2148ctx_sched_in(struct perf_event_context *ctx,
2149 struct perf_cpu_context *cpuctx,
2150 enum event_type_t event_type,
2151 struct task_struct *task)
2152{
2153 u64 now;
2154 int is_active = ctx->is_active;
2155
2156 ctx->is_active |= event_type;
2157 if (likely(!ctx->nr_events))
2158 return;
2159
2160 now = perf_clock();
2161 ctx->timestamp = now;
2162 perf_cgroup_set_timestamp(task, ctx);
2163
2164
2165
2166
2167 if (!(is_active & EVENT_PINNED) && (event_type & EVENT_PINNED))
2168 ctx_pinned_sched_in(ctx, cpuctx);
2169
2170
2171 if (!(is_active & EVENT_FLEXIBLE) && (event_type & EVENT_FLEXIBLE))
2172 ctx_flexible_sched_in(ctx, cpuctx);
2173}
2174
2175static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx,
2176 enum event_type_t event_type,
2177 struct task_struct *task)
2178{
2179 struct perf_event_context *ctx = &cpuctx->ctx;
2180
2181 ctx_sched_in(ctx, cpuctx, event_type, task);
2182}
2183
2184static void perf_event_context_sched_in(struct perf_event_context *ctx,
2185 struct task_struct *task)
2186{
2187 struct perf_cpu_context *cpuctx;
2188
2189 cpuctx = __get_cpu_context(ctx);
2190 if (cpuctx->task_ctx == ctx)
2191 return;
2192
2193 perf_ctx_lock(cpuctx, ctx);
2194 perf_pmu_disable(ctx->pmu);
2195
2196
2197
2198
2199
2200 cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
2201
2202 if (ctx->nr_events)
2203 cpuctx->task_ctx = ctx;
2204
2205 perf_event_sched_in(cpuctx, cpuctx->task_ctx, task);
2206
2207 perf_pmu_enable(ctx->pmu);
2208 perf_ctx_unlock(cpuctx, ctx);
2209
2210
2211
2212
2213
2214 perf_pmu_rotate_start(ctx->pmu);
2215}
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233static void perf_branch_stack_sched_in(struct task_struct *prev,
2234 struct task_struct *task)
2235{
2236 struct perf_cpu_context *cpuctx;
2237 struct pmu *pmu;
2238 unsigned long flags;
2239
2240
2241 if (prev == task)
2242 return;
2243
2244 local_irq_save(flags);
2245
2246 rcu_read_lock();
2247
2248 list_for_each_entry_rcu(pmu, &pmus, entry) {
2249 cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
2250
2251
2252
2253
2254
2255 if (cpuctx->ctx.nr_branch_stack > 0
2256 && pmu->flush_branch_stack) {
2257
2258 pmu = cpuctx->ctx.pmu;
2259
2260 perf_ctx_lock(cpuctx, cpuctx->task_ctx);
2261
2262 perf_pmu_disable(pmu);
2263
2264 pmu->flush_branch_stack();
2265
2266 perf_pmu_enable(pmu);
2267
2268 perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
2269 }
2270 }
2271
2272 rcu_read_unlock();
2273
2274 local_irq_restore(flags);
2275}
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288void __perf_event_task_sched_in(struct task_struct *prev,
2289 struct task_struct *task)
2290{
2291 struct perf_event_context *ctx;
2292 int ctxn;
2293
2294 for_each_task_context_nr(ctxn) {
2295 ctx = task->perf_event_ctxp[ctxn];
2296 if (likely(!ctx))
2297 continue;
2298
2299 perf_event_context_sched_in(ctx, task);
2300 }
2301
2302
2303
2304
2305
2306 if (atomic_read(&__get_cpu_var(perf_cgroup_events)))
2307 perf_cgroup_sched_in(prev, task);
2308
2309
2310 if (atomic_read(&__get_cpu_var(perf_branch_stack_events)))
2311 perf_branch_stack_sched_in(prev, task);
2312}
2313
2314static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count)
2315{
2316 u64 frequency = event->attr.sample_freq;
2317 u64 sec = NSEC_PER_SEC;
2318 u64 divisor, dividend;
2319
2320 int count_fls, nsec_fls, frequency_fls, sec_fls;
2321
2322 count_fls = fls64(count);
2323 nsec_fls = fls64(nsec);
2324 frequency_fls = fls64(frequency);
2325 sec_fls = 30;
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341#define REDUCE_FLS(a, b) \
2342do { \
2343 if (a##_fls > b##_fls) { \
2344 a >>= 1; \
2345 a##_fls--; \
2346 } else { \
2347 b >>= 1; \
2348 b##_fls--; \
2349 } \
2350} while (0)
2351
2352
2353
2354
2355
2356 while (count_fls + sec_fls > 64 && nsec_fls + frequency_fls > 64) {
2357 REDUCE_FLS(nsec, frequency);
2358 REDUCE_FLS(sec, count);
2359 }
2360
2361 if (count_fls + sec_fls > 64) {
2362 divisor = nsec * frequency;
2363
2364 while (count_fls + sec_fls > 64) {
2365 REDUCE_FLS(count, sec);
2366 divisor >>= 1;
2367 }
2368
2369 dividend = count * sec;
2370 } else {
2371 dividend = count * sec;
2372
2373 while (nsec_fls + frequency_fls > 64) {
2374 REDUCE_FLS(nsec, frequency);
2375 dividend >>= 1;
2376 }
2377
2378 divisor = nsec * frequency;
2379 }
2380
2381 if (!divisor)
2382 return dividend;
2383
2384 return div64_u64(dividend, divisor);
2385}
2386
2387static DEFINE_PER_CPU(int, perf_throttled_count);
2388static DEFINE_PER_CPU(u64, perf_throttled_seq);
2389
2390static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count, bool disable)
2391{
2392 struct hw_perf_event *hwc = &event->hw;
2393 s64 period, sample_period;
2394 s64 delta;
2395
2396 period = perf_calculate_period(event, nsec, count);
2397
2398 delta = (s64)(period - hwc->sample_period);
2399 delta = (delta + 7) / 8;
2400
2401 sample_period = hwc->sample_period + delta;
2402
2403 if (!sample_period)
2404 sample_period = 1;
2405
2406 hwc->sample_period = sample_period;
2407
2408 if (local64_read(&hwc->period_left) > 8*sample_period) {
2409 if (disable)
2410 event->pmu->stop(event, PERF_EF_UPDATE);
2411
2412 local64_set(&hwc->period_left, 0);
2413
2414 if (disable)
2415 event->pmu->start(event, PERF_EF_RELOAD);
2416 }
2417}
2418
2419
2420
2421
2422
2423
2424static void perf_adjust_freq_unthr_context(struct perf_event_context *ctx,
2425 int needs_unthr)
2426{
2427 struct perf_event *event;
2428 struct hw_perf_event *hwc;
2429 u64 now, period = TICK_NSEC;
2430 s64 delta;
2431
2432
2433
2434
2435
2436
2437 if (!(ctx->nr_freq || needs_unthr))
2438 return;
2439
2440 raw_spin_lock(&ctx->lock);
2441 perf_pmu_disable(ctx->pmu);
2442
2443 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
2444 if (event->state != PERF_EVENT_STATE_ACTIVE)
2445 continue;
2446
2447 if (!event_filter_match(event))
2448 continue;
2449
2450 hwc = &event->hw;
2451
2452 if (needs_unthr && hwc->interrupts == MAX_INTERRUPTS) {
2453 hwc->interrupts = 0;
2454 perf_log_throttle(event, 1);
2455 event->pmu->start(event, 0);
2456 }
2457
2458 if (!event->attr.freq || !event->attr.sample_freq)
2459 continue;
2460
2461
2462
2463
2464 event->pmu->stop(event, PERF_EF_UPDATE);
2465
2466 now = local64_read(&event->count);
2467 delta = now - hwc->freq_count_stamp;
2468 hwc->freq_count_stamp = now;
2469
2470
2471
2472
2473
2474
2475
2476
2477 if (delta > 0)
2478 perf_adjust_period(event, period, delta, false);
2479
2480 event->pmu->start(event, delta > 0 ? PERF_EF_RELOAD : 0);
2481 }
2482
2483 perf_pmu_enable(ctx->pmu);
2484 raw_spin_unlock(&ctx->lock);
2485}
2486
2487
2488
2489
2490static void rotate_ctx(struct perf_event_context *ctx)
2491{
2492
2493
2494
2495
2496 if (!ctx->rotate_disable)
2497 list_rotate_left(&ctx->flexible_groups);
2498}
2499
2500
2501
2502
2503
2504
2505static void perf_rotate_context(struct perf_cpu_context *cpuctx)
2506{
2507 struct perf_event_context *ctx = NULL;
2508 int rotate = 0, remove = 1;
2509
2510 if (cpuctx->ctx.nr_events) {
2511 remove = 0;
2512 if (cpuctx->ctx.nr_events != cpuctx->ctx.nr_active)
2513 rotate = 1;
2514 }
2515
2516 ctx = cpuctx->task_ctx;
2517 if (ctx && ctx->nr_events) {
2518 remove = 0;
2519 if (ctx->nr_events != ctx->nr_active)
2520 rotate = 1;
2521 }
2522
2523 if (!rotate)
2524 goto done;
2525
2526 perf_ctx_lock(cpuctx, cpuctx->task_ctx);
2527 perf_pmu_disable(cpuctx->ctx.pmu);
2528
2529 cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
2530 if (ctx)
2531 ctx_sched_out(ctx, cpuctx, EVENT_FLEXIBLE);
2532
2533 rotate_ctx(&cpuctx->ctx);
2534 if (ctx)
2535 rotate_ctx(ctx);
2536
2537 perf_event_sched_in(cpuctx, ctx, current);
2538
2539 perf_pmu_enable(cpuctx->ctx.pmu);
2540 perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
2541done:
2542 if (remove)
2543 list_del_init(&cpuctx->rotation_list);
2544}
2545
2546void perf_event_task_tick(void)
2547{
2548 struct list_head *head = &__get_cpu_var(rotation_list);
2549 struct perf_cpu_context *cpuctx, *tmp;
2550 struct perf_event_context *ctx;
2551 int throttled;
2552
2553 WARN_ON(!irqs_disabled());
2554
2555 __this_cpu_inc(perf_throttled_seq);
2556 throttled = __this_cpu_xchg(perf_throttled_count, 0);
2557
2558 list_for_each_entry_safe(cpuctx, tmp, head, rotation_list) {
2559 ctx = &cpuctx->ctx;
2560 perf_adjust_freq_unthr_context(ctx, throttled);
2561
2562 ctx = cpuctx->task_ctx;
2563 if (ctx)
2564 perf_adjust_freq_unthr_context(ctx, throttled);
2565
2566 if (cpuctx->jiffies_interval == 1 ||
2567 !(jiffies % cpuctx->jiffies_interval))
2568 perf_rotate_context(cpuctx);
2569 }
2570}
2571
2572static int event_enable_on_exec(struct perf_event *event,
2573 struct perf_event_context *ctx)
2574{
2575 if (!event->attr.enable_on_exec)
2576 return 0;
2577
2578 event->attr.enable_on_exec = 0;
2579 if (event->state >= PERF_EVENT_STATE_INACTIVE)
2580 return 0;
2581
2582 __perf_event_mark_enabled(event);
2583
2584 return 1;
2585}
2586
2587
2588
2589
2590
2591static void perf_event_enable_on_exec(struct perf_event_context *ctx)
2592{
2593 struct perf_event *event;
2594 unsigned long flags;
2595 int enabled = 0;
2596 int ret;
2597
2598 local_irq_save(flags);
2599 if (!ctx || !ctx->nr_events)
2600 goto out;
2601
2602
2603
2604
2605
2606
2607
2608
2609 perf_cgroup_sched_out(current, NULL);
2610
2611 raw_spin_lock(&ctx->lock);
2612 task_ctx_sched_out(ctx);
2613
2614 list_for_each_entry(event, &ctx->event_list, event_entry) {
2615 ret = event_enable_on_exec(event, ctx);
2616 if (ret)
2617 enabled = 1;
2618 }
2619
2620
2621
2622
2623 if (enabled)
2624 unclone_ctx(ctx);
2625
2626 raw_spin_unlock(&ctx->lock);
2627
2628
2629
2630
2631 perf_event_context_sched_in(ctx, ctx->task);
2632out:
2633 local_irq_restore(flags);
2634}
2635
2636
2637
2638
2639static void __perf_event_read(void *info)
2640{
2641 struct perf_event *event = info;
2642 struct perf_event_context *ctx = event->ctx;
2643 struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
2644
2645
2646
2647
2648
2649
2650
2651
2652 if (ctx->task && cpuctx->task_ctx != ctx)
2653 return;
2654
2655 raw_spin_lock(&ctx->lock);
2656 if (ctx->is_active) {
2657 update_context_time(ctx);
2658 update_cgrp_time_from_event(event);
2659 }
2660 update_event_times(event);
2661 if (event->state == PERF_EVENT_STATE_ACTIVE)
2662 event->pmu->read(event);
2663 raw_spin_unlock(&ctx->lock);
2664}
2665
2666static inline u64 perf_event_count(struct perf_event *event)
2667{
2668 return local64_read(&event->count) + atomic64_read(&event->child_count);
2669}
2670
2671static u64 perf_event_read(struct perf_event *event)
2672{
2673
2674
2675
2676
2677 if (event->state == PERF_EVENT_STATE_ACTIVE) {
2678 smp_call_function_single(event->oncpu,
2679 __perf_event_read, event, 1);
2680 } else if (event->state == PERF_EVENT_STATE_INACTIVE) {
2681 struct perf_event_context *ctx = event->ctx;
2682 unsigned long flags;
2683
2684 raw_spin_lock_irqsave(&ctx->lock, flags);
2685
2686
2687
2688
2689
2690 if (ctx->is_active) {
2691 update_context_time(ctx);
2692 update_cgrp_time_from_event(event);
2693 }
2694 update_event_times(event);
2695 raw_spin_unlock_irqrestore(&ctx->lock, flags);
2696 }
2697
2698 return perf_event_count(event);
2699}
2700
2701
2702
2703
2704static void __perf_event_init_context(struct perf_event_context *ctx)
2705{
2706 raw_spin_lock_init(&ctx->lock);
2707 mutex_init(&ctx->mutex);
2708 INIT_LIST_HEAD(&ctx->pinned_groups);
2709 INIT_LIST_HEAD(&ctx->flexible_groups);
2710 INIT_LIST_HEAD(&ctx->event_list);
2711 atomic_set(&ctx->refcount, 1);
2712}
2713
2714static struct perf_event_context *
2715alloc_perf_context(struct pmu *pmu, struct task_struct *task)
2716{
2717 struct perf_event_context *ctx;
2718
2719 ctx = kzalloc(sizeof(struct perf_event_context), GFP_KERNEL);
2720 if (!ctx)
2721 return NULL;
2722
2723 __perf_event_init_context(ctx);
2724 if (task) {
2725 ctx->task = task;
2726 get_task_struct(task);
2727 }
2728 ctx->pmu = pmu;
2729
2730 return ctx;
2731}
2732
2733static struct task_struct *
2734find_lively_task_by_vpid(pid_t vpid)
2735{
2736 struct task_struct *task;
2737 int err;
2738
2739 rcu_read_lock();
2740 if (!vpid)
2741 task = current;
2742 else
2743 task = find_task_by_vpid(vpid);
2744 if (task)
2745 get_task_struct(task);
2746 rcu_read_unlock();
2747
2748 if (!task)
2749 return ERR_PTR(-ESRCH);
2750
2751
2752 err = -EACCES;
2753 if (!ptrace_may_access(task, PTRACE_MODE_READ))
2754 goto errout;
2755
2756 return task;
2757errout:
2758 put_task_struct(task);
2759 return ERR_PTR(err);
2760
2761}
2762
2763
2764
2765
2766static struct perf_event_context *
2767find_get_context(struct pmu *pmu, struct task_struct *task, int cpu)
2768{
2769 struct perf_event_context *ctx;
2770 struct perf_cpu_context *cpuctx;
2771 unsigned long flags;
2772 int ctxn, err;
2773
2774 if (!task) {
2775
2776 if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
2777 return ERR_PTR(-EACCES);
2778
2779
2780
2781
2782
2783
2784 if (!cpu_online(cpu))
2785 return ERR_PTR(-ENODEV);
2786
2787 cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
2788 ctx = &cpuctx->ctx;
2789 get_ctx(ctx);
2790 ++ctx->pin_count;
2791
2792 return ctx;
2793 }
2794
2795 err = -EINVAL;
2796 ctxn = pmu->task_ctx_nr;
2797 if (ctxn < 0)
2798 goto errout;
2799
2800retry:
2801 ctx = perf_lock_task_context(task, ctxn, &flags);
2802 if (ctx) {
2803 unclone_ctx(ctx);
2804 ++ctx->pin_count;
2805 raw_spin_unlock_irqrestore(&ctx->lock, flags);
2806 } else {
2807 ctx = alloc_perf_context(pmu, task);
2808 err = -ENOMEM;
2809 if (!ctx)
2810 goto errout;
2811
2812 err = 0;
2813 mutex_lock(&task->perf_event_mutex);
2814
2815
2816
2817
2818 if (task->flags & PF_EXITING)
2819 err = -ESRCH;
2820 else if (task->perf_event_ctxp[ctxn])
2821 err = -EAGAIN;
2822 else {
2823 get_ctx(ctx);
2824 ++ctx->pin_count;
2825 rcu_assign_pointer(task->perf_event_ctxp[ctxn], ctx);
2826 }
2827 mutex_unlock(&task->perf_event_mutex);
2828
2829 if (unlikely(err)) {
2830 put_ctx(ctx);
2831
2832 if (err == -EAGAIN)
2833 goto retry;
2834 goto errout;
2835 }
2836 }
2837
2838 return ctx;
2839
2840errout:
2841 return ERR_PTR(err);
2842}
2843
2844static void perf_event_free_filter(struct perf_event *event);
2845
2846static void free_event_rcu(struct rcu_head *head)
2847{
2848 struct perf_event *event;
2849
2850 event = container_of(head, struct perf_event, rcu_head);
2851 if (event->ns)
2852 put_pid_ns(event->ns);
2853 perf_event_free_filter(event);
2854 kfree(event);
2855}
2856
2857static void ring_buffer_put(struct ring_buffer *rb);
2858
2859static void free_event(struct perf_event *event)
2860{
2861 irq_work_sync(&event->pending);
2862
2863 if (!event->parent) {
2864 if (event->attach_state & PERF_ATTACH_TASK)
2865 static_key_slow_dec_deferred(&perf_sched_events);
2866 if (event->attr.mmap || event->attr.mmap_data)
2867 atomic_dec(&nr_mmap_events);
2868 if (event->attr.comm)
2869 atomic_dec(&nr_comm_events);
2870 if (event->attr.task)
2871 atomic_dec(&nr_task_events);
2872 if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN)
2873 put_callchain_buffers();
2874 if (is_cgroup_event(event)) {
2875 atomic_dec(&per_cpu(perf_cgroup_events, event->cpu));
2876 static_key_slow_dec_deferred(&perf_sched_events);
2877 }
2878
2879 if (has_branch_stack(event)) {
2880 static_key_slow_dec_deferred(&perf_sched_events);
2881
2882 if (!(event->attach_state & PERF_ATTACH_TASK))
2883 atomic_dec(&per_cpu(perf_branch_stack_events,
2884 event->cpu));
2885 }
2886 }
2887
2888 if (event->rb) {
2889 ring_buffer_put(event->rb);
2890 event->rb = NULL;
2891 }
2892
2893 if (is_cgroup_event(event))
2894 perf_detach_cgroup(event);
2895
2896 if (event->destroy)
2897 event->destroy(event);
2898
2899 if (event->ctx)
2900 put_ctx(event->ctx);
2901
2902 call_rcu(&event->rcu_head, free_event_rcu);
2903}
2904
2905int perf_event_release_kernel(struct perf_event *event)
2906{
2907 struct perf_event_context *ctx = event->ctx;
2908
2909 WARN_ON_ONCE(ctx->parent_ctx);
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922 mutex_lock_nested(&ctx->mutex, SINGLE_DEPTH_NESTING);
2923 raw_spin_lock_irq(&ctx->lock);
2924 perf_group_detach(event);
2925 raw_spin_unlock_irq(&ctx->lock);
2926 perf_remove_from_context(event);
2927 mutex_unlock(&ctx->mutex);
2928
2929 free_event(event);
2930
2931 return 0;
2932}
2933EXPORT_SYMBOL_GPL(perf_event_release_kernel);
2934
2935
2936
2937
2938static void put_event(struct perf_event *event)
2939{
2940 struct task_struct *owner;
2941
2942 if (!atomic_long_dec_and_test(&event->refcount))
2943 return;
2944
2945 rcu_read_lock();
2946 owner = ACCESS_ONCE(event->owner);
2947
2948
2949
2950
2951
2952
2953 smp_read_barrier_depends();
2954 if (owner) {
2955
2956
2957
2958
2959
2960 get_task_struct(owner);
2961 }
2962 rcu_read_unlock();
2963
2964 if (owner) {
2965 mutex_lock(&owner->perf_event_mutex);
2966
2967
2968
2969
2970
2971
2972 if (event->owner)
2973 list_del_init(&event->owner_entry);
2974 mutex_unlock(&owner->perf_event_mutex);
2975 put_task_struct(owner);
2976 }
2977
2978 perf_event_release_kernel(event);
2979}
2980
2981static int perf_release(struct inode *inode, struct file *file)
2982{
2983 put_event(file->private_data);
2984 return 0;
2985}
2986
2987u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running)
2988{
2989 struct perf_event *child;
2990 u64 total = 0;
2991
2992 *enabled = 0;
2993 *running = 0;
2994
2995 mutex_lock(&event->child_mutex);
2996 total += perf_event_read(event);
2997 *enabled += event->total_time_enabled +
2998 atomic64_read(&event->child_total_time_enabled);
2999 *running += event->total_time_running +
3000 atomic64_read(&event->child_total_time_running);
3001
3002 list_for_each_entry(child, &event->child_list, child_list) {
3003 total += perf_event_read(child);
3004 *enabled += child->total_time_enabled;
3005 *running += child->total_time_running;
3006 }
3007 mutex_unlock(&event->child_mutex);
3008
3009 return total;
3010}
3011EXPORT_SYMBOL_GPL(perf_event_read_value);
3012
3013static int perf_event_read_group(struct perf_event *event,
3014 u64 read_format, char __user *buf)
3015{
3016 struct perf_event *leader = event->group_leader, *sub;
3017 int n = 0, size = 0, ret = -EFAULT;
3018 struct perf_event_context *ctx = leader->ctx;
3019 u64 values[5];
3020 u64 count, enabled, running;
3021
3022 mutex_lock(&ctx->mutex);
3023 count = perf_event_read_value(leader, &enabled, &running);
3024
3025 values[n++] = 1 + leader->nr_siblings;
3026 if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
3027 values[n++] = enabled;
3028 if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
3029 values[n++] = running;
3030 values[n++] = count;
3031 if (read_format & PERF_FORMAT_ID)
3032 values[n++] = primary_event_id(leader);
3033
3034 size = n * sizeof(u64);
3035
3036 if (copy_to_user(buf, values, size))
3037 goto unlock;
3038
3039 ret = size;
3040
3041 list_for_each_entry(sub, &leader->sibling_list, group_entry) {
3042 n = 0;
3043
3044 values[n++] = perf_event_read_value(sub, &enabled, &running);
3045 if (read_format & PERF_FORMAT_ID)
3046 values[n++] = primary_event_id(sub);
3047
3048 size = n * sizeof(u64);
3049
3050 if (copy_to_user(buf + ret, values, size)) {
3051 ret = -EFAULT;
3052 goto unlock;
3053 }
3054
3055 ret += size;
3056 }
3057unlock:
3058 mutex_unlock(&ctx->mutex);
3059
3060 return ret;
3061}
3062
3063static int perf_event_read_one(struct perf_event *event,
3064 u64 read_format, char __user *buf)
3065{
3066 u64 enabled, running;
3067 u64 values[4];
3068 int n = 0;
3069
3070 values[n++] = perf_event_read_value(event, &enabled, &running);
3071 if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
3072 values[n++] = enabled;
3073 if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
3074 values[n++] = running;
3075 if (read_format & PERF_FORMAT_ID)
3076 values[n++] = primary_event_id(event);
3077
3078 if (copy_to_user(buf, values, n * sizeof(u64)))
3079 return -EFAULT;
3080
3081 return n * sizeof(u64);
3082}
3083
3084
3085
3086
3087static ssize_t
3088perf_read_hw(struct perf_event *event, char __user *buf, size_t count)
3089{
3090 u64 read_format = event->attr.read_format;
3091 int ret;
3092
3093
3094
3095
3096
3097
3098 if (event->state == PERF_EVENT_STATE_ERROR)
3099 return 0;
3100
3101 if (count < event->read_size)
3102 return -ENOSPC;
3103
3104 WARN_ON_ONCE(event->ctx->parent_ctx);
3105 if (read_format & PERF_FORMAT_GROUP)
3106 ret = perf_event_read_group(event, read_format, buf);
3107 else
3108 ret = perf_event_read_one(event, read_format, buf);
3109
3110 return ret;
3111}
3112
3113static ssize_t
3114perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
3115{
3116 struct perf_event *event = file->private_data;
3117
3118 return perf_read_hw(event, buf, count);
3119}
3120
3121static unsigned int perf_poll(struct file *file, poll_table *wait)
3122{
3123 struct perf_event *event = file->private_data;
3124 struct ring_buffer *rb;
3125 unsigned int events = POLL_HUP;
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142 mutex_lock(&event->mmap_mutex);
3143
3144 rcu_read_lock();
3145 rb = rcu_dereference(event->rb);
3146 if (rb) {
3147 ring_buffer_attach(event, rb);
3148 events = atomic_xchg(&rb->poll, 0);
3149 }
3150 rcu_read_unlock();
3151
3152 mutex_unlock(&event->mmap_mutex);
3153
3154 poll_wait(file, &event->waitq, wait);
3155
3156 return events;
3157}
3158
3159static void perf_event_reset(struct perf_event *event)
3160{
3161 (void)perf_event_read(event);
3162 local64_set(&event->count, 0);
3163 perf_event_update_userpage(event);
3164}
3165
3166
3167
3168
3169
3170
3171
3172static void perf_event_for_each_child(struct perf_event *event,
3173 void (*func)(struct perf_event *))
3174{
3175 struct perf_event *child;
3176
3177 WARN_ON_ONCE(event->ctx->parent_ctx);
3178 mutex_lock(&event->child_mutex);
3179 func(event);
3180 list_for_each_entry(child, &event->child_list, child_list)
3181 func(child);
3182 mutex_unlock(&event->child_mutex);
3183}
3184
3185static void perf_event_for_each(struct perf_event *event,
3186 void (*func)(struct perf_event *))
3187{
3188 struct perf_event_context *ctx = event->ctx;
3189 struct perf_event *sibling;
3190
3191 WARN_ON_ONCE(ctx->parent_ctx);
3192 mutex_lock(&ctx->mutex);
3193 event = event->group_leader;
3194
3195 perf_event_for_each_child(event, func);
3196 list_for_each_entry(sibling, &event->sibling_list, group_entry)
3197 perf_event_for_each_child(sibling, func);
3198 mutex_unlock(&ctx->mutex);
3199}
3200
3201static int perf_event_period(struct perf_event *event, u64 __user *arg)
3202{
3203 struct perf_event_context *ctx = event->ctx;
3204 int ret = 0;
3205 u64 value;
3206
3207 if (!is_sampling_event(event))
3208 return -EINVAL;
3209
3210 if (copy_from_user(&value, arg, sizeof(value)))
3211 return -EFAULT;
3212
3213 if (!value)
3214 return -EINVAL;
3215
3216 raw_spin_lock_irq(&ctx->lock);
3217 if (event->attr.freq) {
3218 if (value > sysctl_perf_event_sample_rate) {
3219 ret = -EINVAL;
3220 goto unlock;
3221 }
3222
3223 event->attr.sample_freq = value;
3224 } else {
3225 event->attr.sample_period = value;
3226 event->hw.sample_period = value;
3227 }
3228unlock:
3229 raw_spin_unlock_irq(&ctx->lock);
3230
3231 return ret;
3232}
3233
3234static const struct file_operations perf_fops;
3235
3236static struct file *perf_fget_light(int fd, int *fput_needed)
3237{
3238 struct file *file;
3239
3240 file = fget_light(fd, fput_needed);
3241 if (!file)
3242 return ERR_PTR(-EBADF);
3243
3244 if (file->f_op != &perf_fops) {
3245 fput_light(file, *fput_needed);
3246 *fput_needed = 0;
3247 return ERR_PTR(-EBADF);
3248 }
3249
3250 return file;
3251}
3252
3253static int perf_event_set_output(struct perf_event *event,
3254 struct perf_event *output_event);
3255static int perf_event_set_filter(struct perf_event *event, void __user *arg);
3256
3257static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
3258{
3259 struct perf_event *event = file->private_data;
3260 void (*func)(struct perf_event *);
3261 u32 flags = arg;
3262
3263 switch (cmd) {
3264 case PERF_EVENT_IOC_ENABLE:
3265 func = perf_event_enable;
3266 break;
3267 case PERF_EVENT_IOC_DISABLE:
3268 func = perf_event_disable;
3269 break;
3270 case PERF_EVENT_IOC_RESET:
3271 func = perf_event_reset;
3272 break;
3273
3274 case PERF_EVENT_IOC_REFRESH:
3275 return perf_event_refresh(event, arg);
3276
3277 case PERF_EVENT_IOC_PERIOD:
3278 return perf_event_period(event, (u64 __user *)arg);
3279
3280 case PERF_EVENT_IOC_SET_OUTPUT:
3281 {
3282 struct file *output_file = NULL;
3283 struct perf_event *output_event = NULL;
3284 int fput_needed = 0;
3285 int ret;
3286
3287 if (arg != -1) {
3288 output_file = perf_fget_light(arg, &fput_needed);
3289 if (IS_ERR(output_file))
3290 return PTR_ERR(output_file);
3291 output_event = output_file->private_data;
3292 }
3293
3294 ret = perf_event_set_output(event, output_event);
3295 if (output_event)
3296 fput_light(output_file, fput_needed);
3297
3298 return ret;
3299 }
3300
3301 case PERF_EVENT_IOC_SET_FILTER:
3302 return perf_event_set_filter(event, (void __user *)arg);
3303
3304 default:
3305 return -ENOTTY;
3306 }
3307
3308 if (flags & PERF_IOC_FLAG_GROUP)
3309 perf_event_for_each(event, func);
3310 else
3311 perf_event_for_each_child(event, func);
3312
3313 return 0;
3314}
3315
3316int perf_event_task_enable(void)
3317{
3318 struct perf_event *event;
3319
3320 mutex_lock(¤t->perf_event_mutex);
3321 list_for_each_entry(event, ¤t->perf_event_list, owner_entry)
3322 perf_event_for_each_child(event, perf_event_enable);
3323 mutex_unlock(¤t->perf_event_mutex);
3324
3325 return 0;
3326}
3327
3328int perf_event_task_disable(void)
3329{
3330 struct perf_event *event;
3331
3332 mutex_lock(¤t->perf_event_mutex);
3333 list_for_each_entry(event, ¤t->perf_event_list, owner_entry)
3334 perf_event_for_each_child(event, perf_event_disable);
3335 mutex_unlock(¤t->perf_event_mutex);
3336
3337 return 0;
3338}
3339
3340static int perf_event_index(struct perf_event *event)
3341{
3342 if (event->hw.state & PERF_HES_STOPPED)
3343 return 0;
3344
3345 if (event->state != PERF_EVENT_STATE_ACTIVE)
3346 return 0;
3347
3348 return event->pmu->event_idx(event);
3349}
3350
3351static void calc_timer_values(struct perf_event *event,
3352 u64 *now,
3353 u64 *enabled,
3354 u64 *running)
3355{
3356 u64 ctx_time;
3357
3358 *now = perf_clock();
3359 ctx_time = event->shadow_ctx_time + *now;
3360 *enabled = ctx_time - event->tstamp_enabled;
3361 *running = ctx_time - event->tstamp_running;
3362}
3363
3364void __weak arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now)
3365{
3366}
3367
3368
3369
3370
3371
3372
3373void perf_event_update_userpage(struct perf_event *event)
3374{
3375 struct perf_event_mmap_page *userpg;
3376 struct ring_buffer *rb;
3377 u64 enabled, running, now;
3378
3379 rcu_read_lock();
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389 calc_timer_values(event, &now, &enabled, &running);
3390 rb = rcu_dereference(event->rb);
3391 if (!rb)
3392 goto unlock;
3393
3394 userpg = rb->user_page;
3395
3396
3397
3398
3399
3400 preempt_disable();
3401 ++userpg->lock;
3402 barrier();
3403 userpg->index = perf_event_index(event);
3404 userpg->offset = perf_event_count(event);
3405 if (userpg->index)
3406 userpg->offset -= local64_read(&event->hw.prev_count);
3407
3408 userpg->time_enabled = enabled +
3409 atomic64_read(&event->child_total_time_enabled);
3410
3411 userpg->time_running = running +
3412 atomic64_read(&event->child_total_time_running);
3413
3414 arch_perf_update_userpage(userpg, now);
3415
3416 barrier();
3417 ++userpg->lock;
3418 preempt_enable();
3419unlock:
3420 rcu_read_unlock();
3421}
3422
3423static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
3424{
3425 struct perf_event *event = vma->vm_file->private_data;
3426 struct ring_buffer *rb;
3427 int ret = VM_FAULT_SIGBUS;
3428
3429 if (vmf->flags & FAULT_FLAG_MKWRITE) {
3430 if (vmf->pgoff == 0)
3431 ret = 0;
3432 return ret;
3433 }
3434
3435 rcu_read_lock();
3436 rb = rcu_dereference(event->rb);
3437 if (!rb)
3438 goto unlock;
3439
3440 if (vmf->pgoff && (vmf->flags & FAULT_FLAG_WRITE))
3441 goto unlock;
3442
3443 vmf->page = perf_mmap_to_page(rb, vmf->pgoff);
3444 if (!vmf->page)
3445 goto unlock;
3446
3447 get_page(vmf->page);
3448 vmf->page->mapping = vma->vm_file->f_mapping;
3449 vmf->page->index = vmf->pgoff;
3450
3451 ret = 0;
3452unlock:
3453 rcu_read_unlock();
3454
3455 return ret;
3456}
3457
3458static void ring_buffer_attach(struct perf_event *event,
3459 struct ring_buffer *rb)
3460{
3461 unsigned long flags;
3462
3463 if (!list_empty(&event->rb_entry))
3464 return;
3465
3466 spin_lock_irqsave(&rb->event_lock, flags);
3467 if (!list_empty(&event->rb_entry))
3468 goto unlock;
3469
3470 list_add(&event->rb_entry, &rb->event_list);
3471unlock:
3472 spin_unlock_irqrestore(&rb->event_lock, flags);
3473}
3474
3475static void ring_buffer_detach(struct perf_event *event,
3476 struct ring_buffer *rb)
3477{
3478 unsigned long flags;
3479
3480 if (list_empty(&event->rb_entry))
3481 return;
3482
3483 spin_lock_irqsave(&rb->event_lock, flags);
3484 list_del_init(&event->rb_entry);
3485 wake_up_all(&event->waitq);
3486 spin_unlock_irqrestore(&rb->event_lock, flags);
3487}
3488
3489static void ring_buffer_wakeup(struct perf_event *event)
3490{
3491 struct ring_buffer *rb;
3492
3493 rcu_read_lock();
3494 rb = rcu_dereference(event->rb);
3495 if (!rb)
3496 goto unlock;
3497
3498 list_for_each_entry_rcu(event, &rb->event_list, rb_entry)
3499 wake_up_all(&event->waitq);
3500
3501unlock:
3502 rcu_read_unlock();
3503}
3504
3505static void rb_free_rcu(struct rcu_head *rcu_head)
3506{
3507 struct ring_buffer *rb;
3508
3509 rb = container_of(rcu_head, struct ring_buffer, rcu_head);
3510 rb_free(rb);
3511}
3512
3513static struct ring_buffer *ring_buffer_get(struct perf_event *event)
3514{
3515 struct ring_buffer *rb;
3516
3517 rcu_read_lock();
3518 rb = rcu_dereference(event->rb);
3519 if (rb) {
3520 if (!atomic_inc_not_zero(&rb->refcount))
3521 rb = NULL;
3522 }
3523 rcu_read_unlock();
3524
3525 return rb;
3526}
3527
3528static void ring_buffer_put(struct ring_buffer *rb)
3529{
3530 struct perf_event *event, *n;
3531 unsigned long flags;
3532
3533 if (!atomic_dec_and_test(&rb->refcount))
3534 return;
3535
3536 spin_lock_irqsave(&rb->event_lock, flags);
3537 list_for_each_entry_safe(event, n, &rb->event_list, rb_entry) {
3538 list_del_init(&event->rb_entry);
3539 wake_up_all(&event->waitq);
3540 }
3541 spin_unlock_irqrestore(&rb->event_lock, flags);
3542
3543 call_rcu(&rb->rcu_head, rb_free_rcu);
3544}
3545
3546static void perf_mmap_open(struct vm_area_struct *vma)
3547{
3548 struct perf_event *event = vma->vm_file->private_data;
3549
3550 atomic_inc(&event->mmap_count);
3551}
3552
3553static void perf_mmap_close(struct vm_area_struct *vma)
3554{
3555 struct perf_event *event = vma->vm_file->private_data;
3556
3557 if (atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) {
3558 unsigned long size = perf_data_size(event->rb);
3559 struct user_struct *user = event->mmap_user;
3560 struct ring_buffer *rb = event->rb;
3561
3562 atomic_long_sub((size >> PAGE_SHIFT) + 1, &user->locked_vm);
3563 vma->vm_mm->pinned_vm -= event->mmap_locked;
3564 rcu_assign_pointer(event->rb, NULL);
3565 ring_buffer_detach(event, rb);
3566 mutex_unlock(&event->mmap_mutex);
3567
3568 ring_buffer_put(rb);
3569 free_uid(user);
3570 }
3571}
3572
3573static const struct vm_operations_struct perf_mmap_vmops = {
3574 .open = perf_mmap_open,
3575 .close = perf_mmap_close,
3576 .fault = perf_mmap_fault,
3577 .page_mkwrite = perf_mmap_fault,
3578};
3579
3580static int perf_mmap(struct file *file, struct vm_area_struct *vma)
3581{
3582 struct perf_event *event = file->private_data;
3583 unsigned long user_locked, user_lock_limit;
3584 struct user_struct *user = current_user();
3585 unsigned long locked, lock_limit;
3586 struct ring_buffer *rb;
3587 unsigned long vma_size;
3588 unsigned long nr_pages;
3589 long user_extra, extra;
3590 int ret = 0, flags = 0;
3591
3592
3593
3594
3595
3596
3597 if (event->cpu == -1 && event->attr.inherit)
3598 return -EINVAL;
3599
3600 if (!(vma->vm_flags & VM_SHARED))
3601 return -EINVAL;
3602
3603 vma_size = vma->vm_end - vma->vm_start;
3604 nr_pages = (vma_size / PAGE_SIZE) - 1;
3605
3606
3607
3608
3609
3610 if (nr_pages != 0 && !is_power_of_2(nr_pages))
3611 return -EINVAL;
3612
3613 if (vma_size != PAGE_SIZE * (1 + nr_pages))
3614 return -EINVAL;
3615
3616 if (vma->vm_pgoff != 0)
3617 return -EINVAL;
3618
3619 WARN_ON_ONCE(event->ctx->parent_ctx);
3620 mutex_lock(&event->mmap_mutex);
3621 if (event->rb) {
3622 if (event->rb->nr_pages == nr_pages)
3623 atomic_inc(&event->rb->refcount);
3624 else
3625 ret = -EINVAL;
3626 goto unlock;
3627 }
3628
3629 user_extra = nr_pages + 1;
3630 user_lock_limit = sysctl_perf_event_mlock >> (PAGE_SHIFT - 10);
3631
3632
3633
3634
3635 user_lock_limit *= num_online_cpus();
3636
3637 user_locked = atomic_long_read(&user->locked_vm) + user_extra;
3638
3639 extra = 0;
3640 if (user_locked > user_lock_limit)
3641 extra = user_locked - user_lock_limit;
3642
3643 lock_limit = rlimit(RLIMIT_MEMLOCK);
3644 lock_limit >>= PAGE_SHIFT;
3645 locked = vma->vm_mm->pinned_vm + extra;
3646
3647 if ((locked > lock_limit) && perf_paranoid_tracepoint_raw() &&
3648 !capable(CAP_IPC_LOCK)) {
3649 ret = -EPERM;
3650 goto unlock;
3651 }
3652
3653 WARN_ON(event->rb);
3654
3655 if (vma->vm_flags & VM_WRITE)
3656 flags |= RING_BUFFER_WRITABLE;
3657
3658 rb = rb_alloc(nr_pages,
3659 event->attr.watermark ? event->attr.wakeup_watermark : 0,
3660 event->cpu, flags);
3661
3662 if (!rb) {
3663 ret = -ENOMEM;
3664 goto unlock;
3665 }
3666 rcu_assign_pointer(event->rb, rb);
3667
3668 atomic_long_add(user_extra, &user->locked_vm);
3669 event->mmap_locked = extra;
3670 event->mmap_user = get_current_user();
3671 vma->vm_mm->pinned_vm += event->mmap_locked;
3672
3673 perf_event_update_userpage(event);
3674
3675unlock:
3676 if (!ret)
3677 atomic_inc(&event->mmap_count);
3678 mutex_unlock(&event->mmap_mutex);
3679
3680 vma->vm_flags |= VM_RESERVED;
3681 vma->vm_ops = &perf_mmap_vmops;
3682
3683 return ret;
3684}
3685
3686static int perf_fasync(int fd, struct file *filp, int on)
3687{
3688 struct inode *inode = filp->f_path.dentry->d_inode;
3689 struct perf_event *event = filp->private_data;
3690 int retval;
3691
3692 mutex_lock(&inode->i_mutex);
3693 retval = fasync_helper(fd, filp, on, &event->fasync);
3694 mutex_unlock(&inode->i_mutex);
3695
3696 if (retval < 0)
3697 return retval;
3698
3699 return 0;
3700}
3701
3702static const struct file_operations perf_fops = {
3703 .llseek = no_llseek,
3704 .release = perf_release,
3705 .read = perf_read,
3706 .poll = perf_poll,
3707 .unlocked_ioctl = perf_ioctl,
3708 .compat_ioctl = perf_ioctl,
3709 .mmap = perf_mmap,
3710 .fasync = perf_fasync,
3711};
3712
3713
3714
3715
3716
3717
3718
3719
3720void perf_event_wakeup(struct perf_event *event)
3721{
3722 ring_buffer_wakeup(event);
3723
3724 if (event->pending_kill) {
3725 kill_fasync(&event->fasync, SIGIO, event->pending_kill);
3726 event->pending_kill = 0;
3727 }
3728}
3729
3730static void perf_pending_event(struct irq_work *entry)
3731{
3732 struct perf_event *event = container_of(entry,
3733 struct perf_event, pending);
3734
3735 if (event->pending_disable) {
3736 event->pending_disable = 0;
3737 __perf_event_disable(event);
3738 }
3739
3740 if (event->pending_wakeup) {
3741 event->pending_wakeup = 0;
3742 perf_event_wakeup(event);
3743 }
3744}
3745
3746
3747
3748
3749
3750
3751struct perf_guest_info_callbacks *perf_guest_cbs;
3752
3753int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *cbs)
3754{
3755 perf_guest_cbs = cbs;
3756 return 0;
3757}
3758EXPORT_SYMBOL_GPL(perf_register_guest_info_callbacks);
3759
3760int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *cbs)
3761{
3762 perf_guest_cbs = NULL;
3763 return 0;
3764}
3765EXPORT_SYMBOL_GPL(perf_unregister_guest_info_callbacks);
3766
3767static void __perf_event_header__init_id(struct perf_event_header *header,
3768 struct perf_sample_data *data,
3769 struct perf_event *event)
3770{
3771 u64 sample_type = event->attr.sample_type;
3772
3773 data->type = sample_type;
3774 header->size += event->id_header_size;
3775
3776 if (sample_type & PERF_SAMPLE_TID) {
3777
3778 data->tid_entry.pid = perf_event_pid(event, current);
3779 data->tid_entry.tid = perf_event_tid(event, current);
3780 }
3781
3782 if (sample_type & PERF_SAMPLE_TIME)
3783 data->time = perf_clock();
3784
3785 if (sample_type & PERF_SAMPLE_ID)
3786 data->id = primary_event_id(event);
3787
3788 if (sample_type & PERF_SAMPLE_STREAM_ID)
3789 data->stream_id = event->id;
3790
3791 if (sample_type & PERF_SAMPLE_CPU) {
3792 data->cpu_entry.cpu = raw_smp_processor_id();
3793 data->cpu_entry.reserved = 0;
3794 }
3795}
3796
3797void perf_event_header__init_id(struct perf_event_header *header,
3798 struct perf_sample_data *data,
3799 struct perf_event *event)
3800{
3801 if (event->attr.sample_id_all)
3802 __perf_event_header__init_id(header, data, event);
3803}
3804
3805static void __perf_event__output_id_sample(struct perf_output_handle *handle,
3806 struct perf_sample_data *data)
3807{
3808 u64 sample_type = data->type;
3809
3810 if (sample_type & PERF_SAMPLE_TID)
3811 perf_output_put(handle, data->tid_entry);
3812
3813 if (sample_type & PERF_SAMPLE_TIME)
3814 perf_output_put(handle, data->time);
3815
3816 if (sample_type & PERF_SAMPLE_ID)
3817 perf_output_put(handle, data->id);
3818
3819 if (sample_type & PERF_SAMPLE_STREAM_ID)
3820 perf_output_put(handle, data->stream_id);
3821
3822 if (sample_type & PERF_SAMPLE_CPU)
3823 perf_output_put(handle, data->cpu_entry);
3824}
3825
3826void perf_event__output_id_sample(struct perf_event *event,
3827 struct perf_output_handle *handle,
3828 struct perf_sample_data *sample)
3829{
3830 if (event->attr.sample_id_all)
3831 __perf_event__output_id_sample(handle, sample);
3832}
3833
3834static void perf_output_read_one(struct perf_output_handle *handle,
3835 struct perf_event *event,
3836 u64 enabled, u64 running)
3837{
3838 u64 read_format = event->attr.read_format;
3839 u64 values[4];
3840 int n = 0;
3841
3842 values[n++] = perf_event_count(event);
3843 if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
3844 values[n++] = enabled +
3845 atomic64_read(&event->child_total_time_enabled);
3846 }
3847 if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
3848 values[n++] = running +
3849 atomic64_read(&event->child_total_time_running);
3850 }
3851 if (read_format & PERF_FORMAT_ID)
3852 values[n++] = primary_event_id(event);
3853
3854 __output_copy(handle, values, n * sizeof(u64));
3855}
3856
3857
3858
3859
3860static void perf_output_read_group(struct perf_output_handle *handle,
3861 struct perf_event *event,
3862 u64 enabled, u64 running)
3863{
3864 struct perf_event *leader = event->group_leader, *sub;
3865 u64 read_format = event->attr.read_format;
3866 u64 values[5];
3867 int n = 0;
3868
3869 values[n++] = 1 + leader->nr_siblings;
3870
3871 if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
3872 values[n++] = enabled;
3873
3874 if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
3875 values[n++] = running;
3876
3877 if (leader != event)
3878 leader->pmu->read(leader);
3879
3880 values[n++] = perf_event_count(leader);
3881 if (read_format & PERF_FORMAT_ID)
3882 values[n++] = primary_event_id(leader);
3883
3884 __output_copy(handle, values, n * sizeof(u64));
3885
3886 list_for_each_entry(sub, &leader->sibling_list, group_entry) {
3887 n = 0;
3888
3889 if (sub != event)
3890 sub->pmu->read(sub);
3891
3892 values[n++] = perf_event_count(sub);
3893 if (read_format & PERF_FORMAT_ID)
3894 values[n++] = primary_event_id(sub);
3895
3896 __output_copy(handle, values, n * sizeof(u64));
3897 }
3898}
3899
3900#define PERF_FORMAT_TOTAL_TIMES (PERF_FORMAT_TOTAL_TIME_ENABLED|\
3901 PERF_FORMAT_TOTAL_TIME_RUNNING)
3902
3903static void perf_output_read(struct perf_output_handle *handle,
3904 struct perf_event *event)
3905{
3906 u64 enabled = 0, running = 0, now;
3907 u64 read_format = event->attr.read_format;
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918 if (read_format & PERF_FORMAT_TOTAL_TIMES)
3919 calc_timer_values(event, &now, &enabled, &running);
3920
3921 if (event->attr.read_format & PERF_FORMAT_GROUP)
3922 perf_output_read_group(handle, event, enabled, running);
3923 else
3924 perf_output_read_one(handle, event, enabled, running);
3925}
3926
3927void perf_output_sample(struct perf_output_handle *handle,
3928 struct perf_event_header *header,
3929 struct perf_sample_data *data,
3930 struct perf_event *event)
3931{
3932 u64 sample_type = data->type;
3933
3934 perf_output_put(handle, *header);
3935
3936 if (sample_type & PERF_SAMPLE_IP)
3937 perf_output_put(handle, data->ip);
3938
3939 if (sample_type & PERF_SAMPLE_TID)
3940 perf_output_put(handle, data->tid_entry);
3941
3942 if (sample_type & PERF_SAMPLE_TIME)
3943 perf_output_put(handle, data->time);
3944
3945 if (sample_type & PERF_SAMPLE_ADDR)
3946 perf_output_put(handle, data->addr);
3947
3948 if (sample_type & PERF_SAMPLE_ID)
3949 perf_output_put(handle, data->id);
3950
3951 if (sample_type & PERF_SAMPLE_STREAM_ID)
3952 perf_output_put(handle, data->stream_id);
3953
3954 if (sample_type & PERF_SAMPLE_CPU)
3955 perf_output_put(handle, data->cpu_entry);
3956
3957 if (sample_type & PERF_SAMPLE_PERIOD)
3958 perf_output_put(handle, data->period);
3959
3960 if (sample_type & PERF_SAMPLE_READ)
3961 perf_output_read(handle, event);
3962
3963 if (sample_type & PERF_SAMPLE_CALLCHAIN) {
3964 if (data->callchain) {
3965 int size = 1;
3966
3967 if (data->callchain)
3968 size += data->callchain->nr;
3969
3970 size *= sizeof(u64);
3971
3972 __output_copy(handle, data->callchain, size);
3973 } else {
3974 u64 nr = 0;
3975 perf_output_put(handle, nr);
3976 }
3977 }
3978
3979 if (sample_type & PERF_SAMPLE_RAW) {
3980 if (data->raw) {
3981 perf_output_put(handle, data->raw->size);
3982 __output_copy(handle, data->raw->data,
3983 data->raw->size);
3984 } else {
3985 struct {
3986 u32 size;
3987 u32 data;
3988 } raw = {
3989 .size = sizeof(u32),
3990 .data = 0,
3991 };
3992 perf_output_put(handle, raw);
3993 }
3994 }
3995
3996 if (!event->attr.watermark) {
3997 int wakeup_events = event->attr.wakeup_events;
3998
3999 if (wakeup_events) {
4000 struct ring_buffer *rb = handle->rb;
4001 int events = local_inc_return(&rb->events);
4002
4003 if (events >= wakeup_events) {
4004 local_sub(wakeup_events, &rb->events);
4005 local_inc(&rb->wakeup);
4006 }
4007 }
4008 }
4009
4010 if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
4011 if (data->br_stack) {
4012 size_t size;
4013
4014 size = data->br_stack->nr
4015 * sizeof(struct perf_branch_entry);
4016
4017 perf_output_put(handle, data->br_stack->nr);
4018 perf_output_copy(handle, data->br_stack->entries, size);
4019 } else {
4020
4021
4022
4023 u64 nr = 0;
4024 perf_output_put(handle, nr);
4025 }
4026 }
4027}
4028
4029void perf_prepare_sample(struct perf_event_header *header,
4030 struct perf_sample_data *data,
4031 struct perf_event *event,
4032 struct pt_regs *regs)
4033{
4034 u64 sample_type = event->attr.sample_type;
4035
4036 header->type = PERF_RECORD_SAMPLE;
4037 header->size = sizeof(*header) + event->header_size;
4038
4039 header->misc = 0;
4040 header->misc |= perf_misc_flags(regs);
4041
4042 __perf_event_header__init_id(header, data, event);
4043
4044 if (sample_type & PERF_SAMPLE_IP)
4045 data->ip = perf_instruction_pointer(regs);
4046
4047 if (sample_type & PERF_SAMPLE_CALLCHAIN) {
4048 int size = 1;
4049
4050 data->callchain = perf_callchain(event, regs);
4051
4052 if (data->callchain)
4053 size += data->callchain->nr;
4054
4055 header->size += size * sizeof(u64);
4056 }
4057
4058 if (sample_type & PERF_SAMPLE_RAW) {
4059 int size = sizeof(u32);
4060
4061 if (data->raw)
4062 size += data->raw->size;
4063 else
4064 size += sizeof(u32);
4065
4066 WARN_ON_ONCE(size & (sizeof(u64)-1));
4067 header->size += size;
4068 }
4069
4070 if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
4071 int size = sizeof(u64);
4072 if (data->br_stack) {
4073 size += data->br_stack->nr
4074 * sizeof(struct perf_branch_entry);
4075 }
4076 header->size += size;
4077 }
4078}
4079
4080static void perf_event_output(struct perf_event *event,
4081 struct perf_sample_data *data,
4082 struct pt_regs *regs)
4083{
4084 struct perf_output_handle handle;
4085 struct perf_event_header header;
4086
4087
4088 rcu_read_lock();
4089
4090 perf_prepare_sample(&header, data, event, regs);
4091
4092 if (perf_output_begin(&handle, event, header.size))
4093 goto exit;
4094
4095 perf_output_sample(&handle, &header, data, event);
4096
4097 perf_output_end(&handle);
4098
4099exit:
4100 rcu_read_unlock();
4101}
4102
4103
4104
4105
4106
4107struct perf_read_event {
4108 struct perf_event_header header;
4109
4110 u32 pid;
4111 u32 tid;
4112};
4113
4114static void
4115perf_event_read_event(struct perf_event *event,
4116 struct task_struct *task)
4117{
4118 struct perf_output_handle handle;
4119 struct perf_sample_data sample;
4120 struct perf_read_event read_event = {
4121 .header = {
4122 .type = PERF_RECORD_READ,
4123 .misc = 0,
4124 .size = sizeof(read_event) + event->read_size,
4125 },
4126 .pid = perf_event_pid(event, task),
4127 .tid = perf_event_tid(event, task),
4128 };
4129 int ret;
4130
4131 perf_event_header__init_id(&read_event.header, &sample, event);
4132 ret = perf_output_begin(&handle, event, read_event.header.size);
4133 if (ret)
4134 return;
4135
4136 perf_output_put(&handle, read_event);
4137 perf_output_read(&handle, event);
4138 perf_event__output_id_sample(event, &handle, &sample);
4139
4140 perf_output_end(&handle);
4141}
4142
4143
4144
4145
4146
4147
4148
4149struct perf_task_event {
4150 struct task_struct *task;
4151 struct perf_event_context *task_ctx;
4152
4153 struct {
4154 struct perf_event_header header;
4155
4156 u32 pid;
4157 u32 ppid;
4158 u32 tid;
4159 u32 ptid;
4160 u64 time;
4161 } event_id;
4162};
4163
4164static void perf_event_task_output(struct perf_event *event,
4165 struct perf_task_event *task_event)
4166{
4167 struct perf_output_handle handle;
4168 struct perf_sample_data sample;
4169 struct task_struct *task = task_event->task;
4170 int ret, size = task_event->event_id.header.size;
4171
4172 perf_event_header__init_id(&task_event->event_id.header, &sample, event);
4173
4174 ret = perf_output_begin(&handle, event,
4175 task_event->event_id.header.size);
4176 if (ret)
4177 goto out;
4178
4179 task_event->event_id.pid = perf_event_pid(event, task);
4180 task_event->event_id.ppid = perf_event_pid(event, current);
4181
4182 task_event->event_id.tid = perf_event_tid(event, task);
4183 task_event->event_id.ptid = perf_event_tid(event, current);
4184
4185 perf_output_put(&handle, task_event->event_id);
4186
4187 perf_event__output_id_sample(event, &handle, &sample);
4188
4189 perf_output_end(&handle);
4190out:
4191 task_event->event_id.header.size = size;
4192}
4193
4194static int perf_event_task_match(struct perf_event *event)
4195{
4196 if (event->state < PERF_EVENT_STATE_INACTIVE)
4197 return 0;
4198
4199 if (!event_filter_match(event))
4200 return 0;
4201
4202 if (event->attr.comm || event->attr.mmap ||
4203 event->attr.mmap_data || event->attr.task)
4204 return 1;
4205
4206 return 0;
4207}
4208
4209static void perf_event_task_ctx(struct perf_event_context *ctx,
4210 struct perf_task_event *task_event)
4211{
4212 struct perf_event *event;
4213
4214 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
4215 if (perf_event_task_match(event))
4216 perf_event_task_output(event, task_event);
4217 }
4218}
4219
4220static void perf_event_task_event(struct perf_task_event *task_event)
4221{
4222 struct perf_cpu_context *cpuctx;
4223 struct perf_event_context *ctx;
4224 struct pmu *pmu;
4225 int ctxn;
4226
4227 rcu_read_lock();
4228 list_for_each_entry_rcu(pmu, &pmus, entry) {
4229 cpuctx = get_cpu_ptr(pmu->pmu_cpu_context);
4230 if (cpuctx->active_pmu != pmu)
4231 goto next;
4232 perf_event_task_ctx(&cpuctx->ctx, task_event);
4233
4234 ctx = task_event->task_ctx;
4235 if (!ctx) {
4236 ctxn = pmu->task_ctx_nr;
4237 if (ctxn < 0)
4238 goto next;
4239 ctx = rcu_dereference(current->perf_event_ctxp[ctxn]);
4240 }
4241 if (ctx)
4242 perf_event_task_ctx(ctx, task_event);
4243next:
4244 put_cpu_ptr(pmu->pmu_cpu_context);
4245 }
4246 rcu_read_unlock();
4247}
4248
4249static void perf_event_task(struct task_struct *task,
4250 struct perf_event_context *task_ctx,
4251 int new)
4252{
4253 struct perf_task_event task_event;
4254
4255 if (!atomic_read(&nr_comm_events) &&
4256 !atomic_read(&nr_mmap_events) &&
4257 !atomic_read(&nr_task_events))
4258 return;
4259
4260 task_event = (struct perf_task_event){
4261 .task = task,
4262 .task_ctx = task_ctx,
4263 .event_id = {
4264 .header = {
4265 .type = new ? PERF_RECORD_FORK : PERF_RECORD_EXIT,
4266 .misc = 0,
4267 .size = sizeof(task_event.event_id),
4268 },
4269
4270
4271
4272
4273 .time = perf_clock(),
4274 },
4275 };
4276
4277 perf_event_task_event(&task_event);
4278}
4279
4280void perf_event_fork(struct task_struct *task)
4281{
4282 perf_event_task(task, NULL, 1);
4283}
4284
4285
4286
4287
4288
4289struct perf_comm_event {
4290 struct task_struct *task;
4291 char *comm;
4292 int comm_size;
4293
4294 struct {
4295 struct perf_event_header header;
4296
4297 u32 pid;
4298 u32 tid;
4299 } event_id;
4300};
4301
4302static void perf_event_comm_output(struct perf_event *event,
4303 struct perf_comm_event *comm_event)
4304{
4305 struct perf_output_handle handle;
4306 struct perf_sample_data sample;
4307 int size = comm_event->event_id.header.size;
4308 int ret;
4309
4310 perf_event_header__init_id(&comm_event->event_id.header, &sample, event);
4311 ret = perf_output_begin(&handle, event,
4312 comm_event->event_id.header.size);
4313
4314 if (ret)
4315 goto out;
4316
4317 comm_event->event_id.pid = perf_event_pid(event, comm_event->task);
4318 comm_event->event_id.tid = perf_event_tid(event, comm_event->task);
4319
4320 perf_output_put(&handle, comm_event->event_id);
4321 __output_copy(&handle, comm_event->comm,
4322 comm_event->comm_size);
4323
4324 perf_event__output_id_sample(event, &handle, &sample);
4325
4326 perf_output_end(&handle);
4327out:
4328 comm_event->event_id.header.size = size;
4329}
4330
4331static int perf_event_comm_match(struct perf_event *event)
4332{
4333 if (event->state < PERF_EVENT_STATE_INACTIVE)
4334 return 0;
4335
4336 if (!event_filter_match(event))
4337 return 0;
4338
4339 if (event->attr.comm)
4340 return 1;
4341
4342 return 0;
4343}
4344
4345static void perf_event_comm_ctx(struct perf_event_context *ctx,
4346 struct perf_comm_event *comm_event)
4347{
4348 struct perf_event *event;
4349
4350 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
4351 if (perf_event_comm_match(event))
4352 perf_event_comm_output(event, comm_event);
4353 }
4354}
4355
4356static void perf_event_comm_event(struct perf_comm_event *comm_event)
4357{
4358 struct perf_cpu_context *cpuctx;
4359 struct perf_event_context *ctx;
4360 char comm[TASK_COMM_LEN];
4361 unsigned int size;
4362 struct pmu *pmu;
4363 int ctxn;
4364
4365 memset(comm, 0, sizeof(comm));
4366 strlcpy(comm, comm_event->task->comm, sizeof(comm));
4367 size = ALIGN(strlen(comm)+1, sizeof(u64));
4368
4369 comm_event->comm = comm;
4370 comm_event->comm_size = size;
4371
4372 comm_event->event_id.header.size = sizeof(comm_event->event_id) + size;
4373 rcu_read_lock();
4374 list_for_each_entry_rcu(pmu, &pmus, entry) {
4375 cpuctx = get_cpu_ptr(pmu->pmu_cpu_context);
4376 if (cpuctx->active_pmu != pmu)
4377 goto next;
4378 perf_event_comm_ctx(&cpuctx->ctx, comm_event);
4379
4380 ctxn = pmu->task_ctx_nr;
4381 if (ctxn < 0)
4382 goto next;
4383
4384 ctx = rcu_dereference(current->perf_event_ctxp[ctxn]);
4385 if (ctx)
4386 perf_event_comm_ctx(ctx, comm_event);
4387next:
4388 put_cpu_ptr(pmu->pmu_cpu_context);
4389 }
4390 rcu_read_unlock();
4391}
4392
4393void perf_event_comm(struct task_struct *task)
4394{
4395 struct perf_comm_event comm_event;
4396 struct perf_event_context *ctx;
4397 int ctxn;
4398
4399 for_each_task_context_nr(ctxn) {
4400 ctx = task->perf_event_ctxp[ctxn];
4401 if (!ctx)
4402 continue;
4403
4404 perf_event_enable_on_exec(ctx);
4405 }
4406
4407 if (!atomic_read(&nr_comm_events))
4408 return;
4409
4410 comm_event = (struct perf_comm_event){
4411 .task = task,
4412
4413
4414 .event_id = {
4415 .header = {
4416 .type = PERF_RECORD_COMM,
4417 .misc = 0,
4418
4419 },
4420
4421
4422 },
4423 };
4424
4425 perf_event_comm_event(&comm_event);
4426}
4427
4428
4429
4430
4431
4432struct perf_mmap_event {
4433 struct vm_area_struct *vma;
4434
4435 const char *file_name;
4436 int file_size;
4437
4438 struct {
4439 struct perf_event_header header;
4440
4441 u32 pid;
4442 u32 tid;
4443 u64 start;
4444 u64 len;
4445 u64 pgoff;
4446 } event_id;
4447};
4448
4449static void perf_event_mmap_output(struct perf_event *event,
4450 struct perf_mmap_event *mmap_event)
4451{
4452 struct perf_output_handle handle;
4453 struct perf_sample_data sample;
4454 int size = mmap_event->event_id.header.size;
4455 int ret;
4456
4457 perf_event_header__init_id(&mmap_event->event_id.header, &sample, event);
4458 ret = perf_output_begin(&handle, event,
4459 mmap_event->event_id.header.size);
4460 if (ret)
4461 goto out;
4462
4463 mmap_event->event_id.pid = perf_event_pid(event, current);
4464 mmap_event->event_id.tid = perf_event_tid(event, current);
4465
4466 perf_output_put(&handle, mmap_event->event_id);
4467 __output_copy(&handle, mmap_event->file_name,
4468 mmap_event->file_size);
4469
4470 perf_event__output_id_sample(event, &handle, &sample);
4471
4472 perf_output_end(&handle);
4473out:
4474 mmap_event->event_id.header.size = size;
4475}
4476
4477static int perf_event_mmap_match(struct perf_event *event,
4478 struct perf_mmap_event *mmap_event,
4479 int executable)
4480{
4481 if (event->state < PERF_EVENT_STATE_INACTIVE)
4482 return 0;
4483
4484 if (!event_filter_match(event))
4485 return 0;
4486
4487 if ((!executable && event->attr.mmap_data) ||
4488 (executable && event->attr.mmap))
4489 return 1;
4490
4491 return 0;
4492}
4493
4494static void perf_event_mmap_ctx(struct perf_event_context *ctx,
4495 struct perf_mmap_event *mmap_event,
4496 int executable)
4497{
4498 struct perf_event *event;
4499
4500 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
4501 if (perf_event_mmap_match(event, mmap_event, executable))
4502 perf_event_mmap_output(event, mmap_event);
4503 }
4504}
4505
4506static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
4507{
4508 struct perf_cpu_context *cpuctx;
4509 struct perf_event_context *ctx;
4510 struct vm_area_struct *vma = mmap_event->vma;
4511 struct file *file = vma->vm_file;
4512 unsigned int size;
4513 char tmp[16];
4514 char *buf = NULL;
4515 const char *name;
4516 struct pmu *pmu;
4517 int ctxn;
4518
4519 memset(tmp, 0, sizeof(tmp));
4520
4521 if (file) {
4522
4523
4524
4525
4526
4527 buf = kzalloc(PATH_MAX + sizeof(u64), GFP_KERNEL);
4528 if (!buf) {
4529 name = strncpy(tmp, "//enomem", sizeof(tmp));
4530 goto got_name;
4531 }
4532 name = d_path(&file->f_path, buf, PATH_MAX);
4533 if (IS_ERR(name)) {
4534 name = strncpy(tmp, "//toolong", sizeof(tmp));
4535 goto got_name;
4536 }
4537 } else {
4538 if (arch_vma_name(mmap_event->vma)) {
4539 name = strncpy(tmp, arch_vma_name(mmap_event->vma),
4540 sizeof(tmp));
4541 goto got_name;
4542 }
4543
4544 if (!vma->vm_mm) {
4545 name = strncpy(tmp, "[vdso]", sizeof(tmp));
4546 goto got_name;
4547 } else if (vma->vm_start <= vma->vm_mm->start_brk &&
4548 vma->vm_end >= vma->vm_mm->brk) {
4549 name = strncpy(tmp, "[heap]", sizeof(tmp));
4550 goto got_name;
4551 } else if (vma->vm_start <= vma->vm_mm->start_stack &&
4552 vma->vm_end >= vma->vm_mm->start_stack) {
4553 name = strncpy(tmp, "[stack]", sizeof(tmp));
4554 goto got_name;
4555 }
4556
4557 name = strncpy(tmp, "//anon", sizeof(tmp));
4558 goto got_name;
4559 }
4560
4561got_name:
4562 size = ALIGN(strlen(name)+1, sizeof(u64));
4563
4564 mmap_event->file_name = name;
4565 mmap_event->file_size = size;
4566
4567 mmap_event->event_id.header.size = sizeof(mmap_event->event_id) + size;
4568
4569 rcu_read_lock();
4570 list_for_each_entry_rcu(pmu, &pmus, entry) {
4571 cpuctx = get_cpu_ptr(pmu->pmu_cpu_context);
4572 if (cpuctx->active_pmu != pmu)
4573 goto next;
4574 perf_event_mmap_ctx(&cpuctx->ctx, mmap_event,
4575 vma->vm_flags & VM_EXEC);
4576
4577 ctxn = pmu->task_ctx_nr;
4578 if (ctxn < 0)
4579 goto next;
4580
4581 ctx = rcu_dereference(current->perf_event_ctxp[ctxn]);
4582 if (ctx) {
4583 perf_event_mmap_ctx(ctx, mmap_event,
4584 vma->vm_flags & VM_EXEC);
4585 }
4586next:
4587 put_cpu_ptr(pmu->pmu_cpu_context);
4588 }
4589 rcu_read_unlock();
4590
4591 kfree(buf);
4592}
4593
4594void perf_event_mmap(struct vm_area_struct *vma)
4595{
4596 struct perf_mmap_event mmap_event;
4597
4598 if (!atomic_read(&nr_mmap_events))
4599 return;
4600
4601 mmap_event = (struct perf_mmap_event){
4602 .vma = vma,
4603
4604
4605 .event_id = {
4606 .header = {
4607 .type = PERF_RECORD_MMAP,
4608 .misc = PERF_RECORD_MISC_USER,
4609
4610 },
4611
4612
4613 .start = vma->vm_start,
4614 .len = vma->vm_end - vma->vm_start,
4615 .pgoff = (u64)vma->vm_pgoff << PAGE_SHIFT,
4616 },
4617 };
4618
4619 perf_event_mmap_event(&mmap_event);
4620}
4621
4622
4623
4624
4625
4626static void perf_log_throttle(struct perf_event *event, int enable)
4627{
4628 struct perf_output_handle handle;
4629 struct perf_sample_data sample;
4630 int ret;
4631
4632 struct {
4633 struct perf_event_header header;
4634 u64 time;
4635 u64 id;
4636 u64 stream_id;
4637 } throttle_event = {
4638 .header = {
4639 .type = PERF_RECORD_THROTTLE,
4640 .misc = 0,
4641 .size = sizeof(throttle_event),
4642 },
4643 .time = perf_clock(),
4644 .id = primary_event_id(event),
4645 .stream_id = event->id,
4646 };
4647
4648 if (enable)
4649 throttle_event.header.type = PERF_RECORD_UNTHROTTLE;
4650
4651 perf_event_header__init_id(&throttle_event.header, &sample, event);
4652
4653 ret = perf_output_begin(&handle, event,
4654 throttle_event.header.size);
4655 if (ret)
4656 return;
4657
4658 perf_output_put(&handle, throttle_event);
4659 perf_event__output_id_sample(event, &handle, &sample);
4660 perf_output_end(&handle);
4661}
4662
4663
4664
4665
4666
4667static int __perf_event_overflow(struct perf_event *event,
4668 int throttle, struct perf_sample_data *data,
4669 struct pt_regs *regs)
4670{
4671 int events = atomic_read(&event->event_limit);
4672 struct hw_perf_event *hwc = &event->hw;
4673 u64 seq;
4674 int ret = 0;
4675
4676
4677
4678
4679
4680 if (unlikely(!is_sampling_event(event)))
4681 return 0;
4682
4683 seq = __this_cpu_read(perf_throttled_seq);
4684 if (seq != hwc->interrupts_seq) {
4685 hwc->interrupts_seq = seq;
4686 hwc->interrupts = 1;
4687 } else {
4688 hwc->interrupts++;
4689 if (unlikely(throttle
4690 && hwc->interrupts >= max_samples_per_tick)) {
4691 __this_cpu_inc(perf_throttled_count);
4692 hwc->interrupts = MAX_INTERRUPTS;
4693 perf_log_throttle(event, 0);
4694 ret = 1;
4695 }
4696 }
4697
4698 if (event->attr.freq) {
4699 u64 now = perf_clock();
4700 s64 delta = now - hwc->freq_time_stamp;
4701
4702 hwc->freq_time_stamp = now;
4703
4704 if (delta > 0 && delta < 2*TICK_NSEC)
4705 perf_adjust_period(event, delta, hwc->last_period, true);
4706 }
4707
4708
4709
4710
4711
4712
4713 event->pending_kill = POLL_IN;
4714 if (events && atomic_dec_and_test(&event->event_limit)) {
4715 ret = 1;
4716 event->pending_kill = POLL_HUP;
4717 event->pending_disable = 1;
4718 irq_work_queue(&event->pending);
4719 }
4720
4721 if (event->overflow_handler)
4722 event->overflow_handler(event, data, regs);
4723 else
4724 perf_event_output(event, data, regs);
4725
4726 if (event->fasync && event->pending_kill) {
4727 event->pending_wakeup = 1;
4728 irq_work_queue(&event->pending);
4729 }
4730
4731 return ret;
4732}
4733
4734int perf_event_overflow(struct perf_event *event,
4735 struct perf_sample_data *data,
4736 struct pt_regs *regs)
4737{
4738 return __perf_event_overflow(event, 1, data, regs);
4739}
4740
4741
4742
4743
4744
4745struct swevent_htable {
4746 struct swevent_hlist *swevent_hlist;
4747 struct mutex hlist_mutex;
4748 int hlist_refcount;
4749
4750
4751 int recursion[PERF_NR_CONTEXTS];
4752};
4753
4754static DEFINE_PER_CPU(struct swevent_htable, swevent_htable);
4755
4756
4757
4758
4759
4760
4761
4762
4763static u64 perf_swevent_set_period(struct perf_event *event)
4764{
4765 struct hw_perf_event *hwc = &event->hw;
4766 u64 period = hwc->last_period;
4767 u64 nr, offset;
4768 s64 old, val;
4769
4770 hwc->last_period = hwc->sample_period;
4771
4772again:
4773 old = val = local64_read(&hwc->period_left);
4774 if (val < 0)
4775 return 0;
4776
4777 nr = div64_u64(period + val, period);
4778 offset = nr * period;
4779 val -= offset;
4780 if (local64_cmpxchg(&hwc->period_left, old, val) != old)
4781 goto again;
4782
4783 return nr;
4784}
4785
4786static void perf_swevent_overflow(struct perf_event *event, u64 overflow,
4787 struct perf_sample_data *data,
4788 struct pt_regs *regs)
4789{
4790 struct hw_perf_event *hwc = &event->hw;
4791 int throttle = 0;
4792
4793 if (!overflow)
4794 overflow = perf_swevent_set_period(event);
4795
4796 if (hwc->interrupts == MAX_INTERRUPTS)
4797 return;
4798
4799 for (; overflow; overflow--) {
4800 if (__perf_event_overflow(event, throttle,
4801 data, regs)) {
4802
4803
4804
4805
4806 break;
4807 }
4808 throttle = 1;
4809 }
4810}
4811
4812static void perf_swevent_event(struct perf_event *event, u64 nr,
4813 struct perf_sample_data *data,
4814 struct pt_regs *regs)
4815{
4816 struct hw_perf_event *hwc = &event->hw;
4817
4818 local64_add(nr, &event->count);
4819
4820 if (!regs)
4821 return;
4822
4823 if (!is_sampling_event(event))
4824 return;
4825
4826 if ((event->attr.sample_type & PERF_SAMPLE_PERIOD) && !event->attr.freq) {
4827 data->period = nr;
4828 return perf_swevent_overflow(event, 1, data, regs);
4829 } else
4830 data->period = event->hw.last_period;
4831
4832 if (nr == 1 && hwc->sample_period == 1 && !event->attr.freq)
4833 return perf_swevent_overflow(event, 1, data, regs);
4834
4835 if (local64_add_negative(nr, &hwc->period_left))
4836 return;
4837
4838 perf_swevent_overflow(event, 0, data, regs);
4839}
4840
4841static int perf_exclude_event(struct perf_event *event,
4842 struct pt_regs *regs)
4843{
4844 if (event->hw.state & PERF_HES_STOPPED)
4845 return 1;
4846
4847 if (regs) {
4848 if (event->attr.exclude_user && user_mode(regs))
4849 return 1;
4850
4851 if (event->attr.exclude_kernel && !user_mode(regs))
4852 return 1;
4853 }
4854
4855 return 0;
4856}
4857
4858static int perf_swevent_match(struct perf_event *event,
4859 enum perf_type_id type,
4860 u32 event_id,
4861 struct perf_sample_data *data,
4862 struct pt_regs *regs)
4863{
4864 if (event->attr.type != type)
4865 return 0;
4866
4867 if (event->attr.config != event_id)
4868 return 0;
4869
4870 if (perf_exclude_event(event, regs))
4871 return 0;
4872
4873 return 1;
4874}
4875
4876static inline u64 swevent_hash(u64 type, u32 event_id)
4877{
4878 u64 val = event_id | (type << 32);
4879
4880 return hash_64(val, SWEVENT_HLIST_BITS);
4881}
4882
4883static inline struct hlist_head *
4884__find_swevent_head(struct swevent_hlist *hlist, u64 type, u32 event_id)
4885{
4886 u64 hash = swevent_hash(type, event_id);
4887
4888 return &hlist->heads[hash];
4889}
4890
4891
4892static inline struct hlist_head *
4893find_swevent_head_rcu(struct swevent_htable *swhash, u64 type, u32 event_id)
4894{
4895 struct swevent_hlist *hlist;
4896
4897 hlist = rcu_dereference(swhash->swevent_hlist);
4898 if (!hlist)
4899 return NULL;
4900
4901 return __find_swevent_head(hlist, type, event_id);
4902}
4903
4904
4905static inline struct hlist_head *
4906find_swevent_head(struct swevent_htable *swhash, struct perf_event *event)
4907{
4908 struct swevent_hlist *hlist;
4909 u32 event_id = event->attr.config;
4910 u64 type = event->attr.type;
4911
4912
4913
4914
4915
4916
4917 hlist = rcu_dereference_protected(swhash->swevent_hlist,
4918 lockdep_is_held(&event->ctx->lock));
4919 if (!hlist)
4920 return NULL;
4921
4922 return __find_swevent_head(hlist, type, event_id);
4923}
4924
4925static void do_perf_sw_event(enum perf_type_id type, u32 event_id,
4926 u64 nr,
4927 struct perf_sample_data *data,
4928 struct pt_regs *regs)
4929{
4930 struct swevent_htable *swhash = &__get_cpu_var(swevent_htable);
4931 struct perf_event *event;
4932 struct hlist_node *node;
4933 struct hlist_head *head;
4934
4935 rcu_read_lock();
4936 head = find_swevent_head_rcu(swhash, type, event_id);
4937 if (!head)
4938 goto end;
4939
4940 hlist_for_each_entry_rcu(event, node, head, hlist_entry) {
4941 if (perf_swevent_match(event, type, event_id, data, regs))
4942 perf_swevent_event(event, nr, data, regs);
4943 }
4944end:
4945 rcu_read_unlock();
4946}
4947
4948int perf_swevent_get_recursion_context(void)
4949{
4950 struct swevent_htable *swhash = &__get_cpu_var(swevent_htable);
4951
4952 return get_recursion_context(swhash->recursion);
4953}
4954EXPORT_SYMBOL_GPL(perf_swevent_get_recursion_context);
4955
4956inline void perf_swevent_put_recursion_context(int rctx)
4957{
4958 struct swevent_htable *swhash = &__get_cpu_var(swevent_htable);
4959
4960 put_recursion_context(swhash->recursion, rctx);
4961}
4962
4963void __perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr)
4964{
4965 struct perf_sample_data data;
4966 int rctx;
4967
4968 preempt_disable_notrace();
4969 rctx = perf_swevent_get_recursion_context();
4970 if (rctx < 0)
4971 return;
4972
4973 perf_sample_data_init(&data, addr, 0);
4974
4975 do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, &data, regs);
4976
4977 perf_swevent_put_recursion_context(rctx);
4978 preempt_enable_notrace();
4979}
4980
4981static void perf_swevent_read(struct perf_event *event)
4982{
4983}
4984
4985static int perf_swevent_add(struct perf_event *event, int flags)
4986{
4987 struct swevent_htable *swhash = &__get_cpu_var(swevent_htable);
4988 struct hw_perf_event *hwc = &event->hw;
4989 struct hlist_head *head;
4990
4991 if (is_sampling_event(event)) {
4992 hwc->last_period = hwc->sample_period;
4993 perf_swevent_set_period(event);
4994 }
4995
4996 hwc->state = !(flags & PERF_EF_START);
4997
4998 head = find_swevent_head(swhash, event);
4999 if (WARN_ON_ONCE(!head))
5000 return -EINVAL;
5001
5002 hlist_add_head_rcu(&event->hlist_entry, head);
5003
5004 return 0;
5005}
5006
5007static void perf_swevent_del(struct perf_event *event, int flags)
5008{
5009 hlist_del_rcu(&event->hlist_entry);
5010}
5011
5012static void perf_swevent_start(struct perf_event *event, int flags)
5013{
5014 event->hw.state = 0;
5015}
5016
5017static void perf_swevent_stop(struct perf_event *event, int flags)
5018{
5019 event->hw.state = PERF_HES_STOPPED;
5020}
5021
5022
5023static inline struct swevent_hlist *
5024swevent_hlist_deref(struct swevent_htable *swhash)
5025{
5026 return rcu_dereference_protected(swhash->swevent_hlist,
5027 lockdep_is_held(&swhash->hlist_mutex));
5028}
5029
5030static void swevent_hlist_release(struct swevent_htable *swhash)
5031{
5032 struct swevent_hlist *hlist = swevent_hlist_deref(swhash);
5033
5034 if (!hlist)
5035 return;
5036
5037 rcu_assign_pointer(swhash->swevent_hlist, NULL);
5038 kfree_rcu(hlist, rcu_head);
5039}
5040
5041static void swevent_hlist_put_cpu(struct perf_event *event, int cpu)
5042{
5043 struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu);
5044
5045 mutex_lock(&swhash->hlist_mutex);
5046
5047 if (!--swhash->hlist_refcount)
5048 swevent_hlist_release(swhash);
5049
5050 mutex_unlock(&swhash->hlist_mutex);
5051}
5052
5053static void swevent_hlist_put(struct perf_event *event)
5054{
5055 int cpu;
5056
5057 if (event->cpu != -1) {
5058 swevent_hlist_put_cpu(event, event->cpu);
5059 return;
5060 }
5061
5062 for_each_possible_cpu(cpu)
5063 swevent_hlist_put_cpu(event, cpu);
5064}
5065
5066static int swevent_hlist_get_cpu(struct perf_event *event, int cpu)
5067{
5068 struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu);
5069 int err = 0;
5070
5071 mutex_lock(&swhash->hlist_mutex);
5072
5073 if (!swevent_hlist_deref(swhash) && cpu_online(cpu)) {
5074 struct swevent_hlist *hlist;
5075
5076 hlist = kzalloc(sizeof(*hlist), GFP_KERNEL);
5077 if (!hlist) {
5078 err = -ENOMEM;
5079 goto exit;
5080 }
5081 rcu_assign_pointer(swhash->swevent_hlist, hlist);
5082 }
5083 swhash->hlist_refcount++;
5084exit:
5085 mutex_unlock(&swhash->hlist_mutex);
5086
5087 return err;
5088}
5089
5090static int swevent_hlist_get(struct perf_event *event)
5091{
5092 int err;
5093 int cpu, failed_cpu;
5094
5095 if (event->cpu != -1)
5096 return swevent_hlist_get_cpu(event, event->cpu);
5097
5098 get_online_cpus();
5099 for_each_possible_cpu(cpu) {
5100 err = swevent_hlist_get_cpu(event, cpu);
5101 if (err) {
5102 failed_cpu = cpu;
5103 goto fail;
5104 }
5105 }
5106 put_online_cpus();
5107
5108 return 0;
5109fail:
5110 for_each_possible_cpu(cpu) {
5111 if (cpu == failed_cpu)
5112 break;
5113 swevent_hlist_put_cpu(event, cpu);
5114 }
5115
5116 put_online_cpus();
5117 return err;
5118}
5119
5120struct static_key perf_swevent_enabled[PERF_COUNT_SW_MAX];
5121
5122static void sw_perf_event_destroy(struct perf_event *event)
5123{
5124 u64 event_id = event->attr.config;
5125
5126 WARN_ON(event->parent);
5127
5128 static_key_slow_dec(&perf_swevent_enabled[event_id]);
5129 swevent_hlist_put(event);
5130}
5131
5132static int perf_swevent_init(struct perf_event *event)
5133{
5134 int event_id = event->attr.config;
5135
5136 if (event->attr.type != PERF_TYPE_SOFTWARE)
5137 return -ENOENT;
5138
5139
5140
5141
5142 if (has_branch_stack(event))
5143 return -EOPNOTSUPP;
5144
5145 switch (event_id) {
5146 case PERF_COUNT_SW_CPU_CLOCK:
5147 case PERF_COUNT_SW_TASK_CLOCK:
5148 return -ENOENT;
5149
5150 default:
5151 break;
5152 }
5153
5154 if (event_id >= PERF_COUNT_SW_MAX)
5155 return -ENOENT;
5156
5157 if (!event->parent) {
5158 int err;
5159
5160 err = swevent_hlist_get(event);
5161 if (err)
5162 return err;
5163
5164 static_key_slow_inc(&perf_swevent_enabled[event_id]);
5165 event->destroy = sw_perf_event_destroy;
5166 }
5167
5168 return 0;
5169}
5170
5171static int perf_swevent_event_idx(struct perf_event *event)
5172{
5173 return 0;
5174}
5175
5176static struct pmu perf_swevent = {
5177 .task_ctx_nr = perf_sw_context,
5178
5179 .event_init = perf_swevent_init,
5180 .add = perf_swevent_add,
5181 .del = perf_swevent_del,
5182 .start = perf_swevent_start,
5183 .stop = perf_swevent_stop,
5184 .read = perf_swevent_read,
5185
5186 .event_idx = perf_swevent_event_idx,
5187};
5188
5189#ifdef CONFIG_EVENT_TRACING
5190
5191static int perf_tp_filter_match(struct perf_event *event,
5192 struct perf_sample_data *data)
5193{
5194 void *record = data->raw->data;
5195
5196 if (likely(!event->filter) || filter_match_preds(event->filter, record))
5197 return 1;
5198 return 0;
5199}
5200
5201static int perf_tp_event_match(struct perf_event *event,
5202 struct perf_sample_data *data,
5203 struct pt_regs *regs)
5204{
5205 if (event->hw.state & PERF_HES_STOPPED)
5206 return 0;
5207
5208
5209
5210 if (event->attr.exclude_kernel)
5211 return 0;
5212
5213 if (!perf_tp_filter_match(event, data))
5214 return 0;
5215
5216 return 1;
5217}
5218
5219void perf_tp_event(u64 addr, u64 count, void *record, int entry_size,
5220 struct pt_regs *regs, struct hlist_head *head, int rctx,
5221 struct task_struct *task)
5222{
5223 struct perf_sample_data data;
5224 struct perf_event *event;
5225 struct hlist_node *node;
5226
5227 struct perf_raw_record raw = {
5228 .size = entry_size,
5229 .data = record,
5230 };
5231
5232 perf_sample_data_init(&data, addr, 0);
5233 data.raw = &raw;
5234
5235 hlist_for_each_entry_rcu(event, node, head, hlist_entry) {
5236 if (perf_tp_event_match(event, &data, regs))
5237 perf_swevent_event(event, count, &data, regs);
5238 }
5239
5240
5241
5242
5243
5244 if (task && task != current) {
5245 struct perf_event_context *ctx;
5246 struct trace_entry *entry = record;
5247
5248 rcu_read_lock();
5249 ctx = rcu_dereference(task->perf_event_ctxp[perf_sw_context]);
5250 if (!ctx)
5251 goto unlock;
5252
5253 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
5254 if (event->attr.type != PERF_TYPE_TRACEPOINT)
5255 continue;
5256 if (event->attr.config != entry->type)
5257 continue;
5258 if (perf_tp_event_match(event, &data, regs))
5259 perf_swevent_event(event, count, &data, regs);
5260 }
5261unlock:
5262 rcu_read_unlock();
5263 }
5264
5265 perf_swevent_put_recursion_context(rctx);
5266}
5267EXPORT_SYMBOL_GPL(perf_tp_event);
5268
5269static void tp_perf_event_destroy(struct perf_event *event)
5270{
5271 perf_trace_destroy(event);
5272}
5273
5274static int perf_tp_event_init(struct perf_event *event)
5275{
5276 int err;
5277
5278 if (event->attr.type != PERF_TYPE_TRACEPOINT)
5279 return -ENOENT;
5280
5281
5282
5283
5284 if (has_branch_stack(event))
5285 return -EOPNOTSUPP;
5286
5287 err = perf_trace_init(event);
5288 if (err)
5289 return err;
5290
5291 event->destroy = tp_perf_event_destroy;
5292
5293 return 0;
5294}
5295
5296static struct pmu perf_tracepoint = {
5297 .task_ctx_nr = perf_sw_context,
5298
5299 .event_init = perf_tp_event_init,
5300 .add = perf_trace_add,
5301 .del = perf_trace_del,
5302 .start = perf_swevent_start,
5303 .stop = perf_swevent_stop,
5304 .read = perf_swevent_read,
5305
5306 .event_idx = perf_swevent_event_idx,
5307};
5308
5309static inline void perf_tp_register(void)
5310{
5311 perf_pmu_register(&perf_tracepoint, "tracepoint", PERF_TYPE_TRACEPOINT);
5312}
5313
5314static int perf_event_set_filter(struct perf_event *event, void __user *arg)
5315{
5316 char *filter_str;
5317 int ret;
5318
5319 if (event->attr.type != PERF_TYPE_TRACEPOINT)
5320 return -EINVAL;
5321
5322 filter_str = strndup_user(arg, PAGE_SIZE);
5323 if (IS_ERR(filter_str))
5324 return PTR_ERR(filter_str);
5325
5326 ret = ftrace_profile_set_filter(event, event->attr.config, filter_str);
5327
5328 kfree(filter_str);
5329 return ret;
5330}
5331
5332static void perf_event_free_filter(struct perf_event *event)
5333{
5334 ftrace_profile_free_filter(event);
5335}
5336
5337#else
5338
5339static inline void perf_tp_register(void)
5340{
5341}
5342
5343static int perf_event_set_filter(struct perf_event *event, void __user *arg)
5344{
5345 return -ENOENT;
5346}
5347
5348static void perf_event_free_filter(struct perf_event *event)
5349{
5350}
5351
5352#endif
5353
5354#ifdef CONFIG_HAVE_HW_BREAKPOINT
5355void perf_bp_event(struct perf_event *bp, void *data)
5356{
5357 struct perf_sample_data sample;
5358 struct pt_regs *regs = data;
5359
5360 perf_sample_data_init(&sample, bp->attr.bp_addr, 0);
5361
5362 if (!bp->hw.state && !perf_exclude_event(bp, regs))
5363 perf_swevent_event(bp, 1, &sample, regs);
5364}
5365#endif
5366
5367
5368
5369
5370
5371static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer)
5372{
5373 enum hrtimer_restart ret = HRTIMER_RESTART;
5374 struct perf_sample_data data;
5375 struct pt_regs *regs;
5376 struct perf_event *event;
5377 u64 period;
5378
5379 event = container_of(hrtimer, struct perf_event, hw.hrtimer);
5380
5381 if (event->state != PERF_EVENT_STATE_ACTIVE)
5382 return HRTIMER_NORESTART;
5383
5384 event->pmu->read(event);
5385
5386 perf_sample_data_init(&data, 0, event->hw.last_period);
5387 regs = get_irq_regs();
5388
5389 if (regs && !perf_exclude_event(event, regs)) {
5390 if (!(event->attr.exclude_idle && is_idle_task(current)))
5391 if (__perf_event_overflow(event, 1, &data, regs))
5392 ret = HRTIMER_NORESTART;
5393 }
5394
5395 period = max_t(u64, 10000, event->hw.sample_period);
5396 hrtimer_forward_now(hrtimer, ns_to_ktime(period));
5397
5398 return ret;
5399}
5400
5401static void perf_swevent_start_hrtimer(struct perf_event *event)
5402{
5403 struct hw_perf_event *hwc = &event->hw;
5404 s64 period;
5405
5406 if (!is_sampling_event(event))
5407 return;
5408
5409 period = local64_read(&hwc->period_left);
5410 if (period) {
5411 if (period < 0)
5412 period = 10000;
5413
5414 local64_set(&hwc->period_left, 0);
5415 } else {
5416 period = max_t(u64, 10000, hwc->sample_period);
5417 }
5418 __hrtimer_start_range_ns(&hwc->hrtimer,
5419 ns_to_ktime(period), 0,
5420 HRTIMER_MODE_REL_PINNED, 0);
5421}
5422
5423static void perf_swevent_cancel_hrtimer(struct perf_event *event)
5424{
5425 struct hw_perf_event *hwc = &event->hw;
5426
5427 if (is_sampling_event(event)) {
5428 ktime_t remaining = hrtimer_get_remaining(&hwc->hrtimer);
5429 local64_set(&hwc->period_left, ktime_to_ns(remaining));
5430
5431 hrtimer_cancel(&hwc->hrtimer);
5432 }
5433}
5434
5435static void perf_swevent_init_hrtimer(struct perf_event *event)
5436{
5437 struct hw_perf_event *hwc = &event->hw;
5438
5439 if (!is_sampling_event(event))
5440 return;
5441
5442 hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
5443 hwc->hrtimer.function = perf_swevent_hrtimer;
5444
5445
5446
5447
5448
5449 if (event->attr.freq) {
5450 long freq = event->attr.sample_freq;
5451
5452 event->attr.sample_period = NSEC_PER_SEC / freq;
5453 hwc->sample_period = event->attr.sample_period;
5454 local64_set(&hwc->period_left, hwc->sample_period);
5455 event->attr.freq = 0;
5456 }
5457}
5458
5459
5460
5461
5462
5463static void cpu_clock_event_update(struct perf_event *event)
5464{
5465 s64 prev;
5466 u64 now;
5467
5468 now = local_clock();
5469 prev = local64_xchg(&event->hw.prev_count, now);
5470 local64_add(now - prev, &event->count);
5471}
5472
5473static void cpu_clock_event_start(struct perf_event *event, int flags)
5474{
5475 local64_set(&event->hw.prev_count, local_clock());
5476 perf_swevent_start_hrtimer(event);
5477}
5478
5479static void cpu_clock_event_stop(struct perf_event *event, int flags)
5480{
5481 perf_swevent_cancel_hrtimer(event);
5482 cpu_clock_event_update(event);
5483}
5484
5485static int cpu_clock_event_add(struct perf_event *event, int flags)
5486{
5487 if (flags & PERF_EF_START)
5488 cpu_clock_event_start(event, flags);
5489
5490 return 0;
5491}
5492
5493static void cpu_clock_event_del(struct perf_event *event, int flags)
5494{
5495 cpu_clock_event_stop(event, flags);
5496}
5497
5498static void cpu_clock_event_read(struct perf_event *event)
5499{
5500 cpu_clock_event_update(event);
5501}
5502
5503static int cpu_clock_event_init(struct perf_event *event)
5504{
5505 if (event->attr.type != PERF_TYPE_SOFTWARE)
5506 return -ENOENT;
5507
5508 if (event->attr.config != PERF_COUNT_SW_CPU_CLOCK)
5509 return -ENOENT;
5510
5511
5512
5513
5514 if (has_branch_stack(event))
5515 return -EOPNOTSUPP;
5516
5517 perf_swevent_init_hrtimer(event);
5518
5519 return 0;
5520}
5521
5522static struct pmu perf_cpu_clock = {
5523 .task_ctx_nr = perf_sw_context,
5524
5525 .event_init = cpu_clock_event_init,
5526 .add = cpu_clock_event_add,
5527 .del = cpu_clock_event_del,
5528 .start = cpu_clock_event_start,
5529 .stop = cpu_clock_event_stop,
5530 .read = cpu_clock_event_read,
5531
5532 .event_idx = perf_swevent_event_idx,
5533};
5534
5535
5536
5537
5538
5539static void task_clock_event_update(struct perf_event *event, u64 now)
5540{
5541 u64 prev;
5542 s64 delta;
5543
5544 prev = local64_xchg(&event->hw.prev_count, now);
5545 delta = now - prev;
5546 local64_add(delta, &event->count);
5547}
5548
5549static void task_clock_event_start(struct perf_event *event, int flags)
5550{
5551 local64_set(&event->hw.prev_count, event->ctx->time);
5552 perf_swevent_start_hrtimer(event);
5553}
5554
5555static void task_clock_event_stop(struct perf_event *event, int flags)
5556{
5557 perf_swevent_cancel_hrtimer(event);
5558 task_clock_event_update(event, event->ctx->time);
5559}
5560
5561static int task_clock_event_add(struct perf_event *event, int flags)
5562{
5563 if (flags & PERF_EF_START)
5564 task_clock_event_start(event, flags);
5565
5566 return 0;
5567}
5568
5569static void task_clock_event_del(struct perf_event *event, int flags)
5570{
5571 task_clock_event_stop(event, PERF_EF_UPDATE);
5572}
5573
5574static void task_clock_event_read(struct perf_event *event)
5575{
5576 u64 now = perf_clock();
5577 u64 delta = now - event->ctx->timestamp;
5578 u64 time = event->ctx->time + delta;
5579
5580 task_clock_event_update(event, time);
5581}
5582
5583static int task_clock_event_init(struct perf_event *event)
5584{
5585 if (event->attr.type != PERF_TYPE_SOFTWARE)
5586 return -ENOENT;
5587
5588 if (event->attr.config != PERF_COUNT_SW_TASK_CLOCK)
5589 return -ENOENT;
5590
5591
5592
5593
5594 if (has_branch_stack(event))
5595 return -EOPNOTSUPP;
5596
5597 perf_swevent_init_hrtimer(event);
5598
5599 return 0;
5600}
5601
5602static struct pmu perf_task_clock = {
5603 .task_ctx_nr = perf_sw_context,
5604
5605 .event_init = task_clock_event_init,
5606 .add = task_clock_event_add,
5607 .del = task_clock_event_del,
5608 .start = task_clock_event_start,
5609 .stop = task_clock_event_stop,
5610 .read = task_clock_event_read,
5611
5612 .event_idx = perf_swevent_event_idx,
5613};
5614
5615static void perf_pmu_nop_void(struct pmu *pmu)
5616{
5617}
5618
5619static int perf_pmu_nop_int(struct pmu *pmu)
5620{
5621 return 0;
5622}
5623
5624static void perf_pmu_start_txn(struct pmu *pmu)
5625{
5626 perf_pmu_disable(pmu);
5627}
5628
5629static int perf_pmu_commit_txn(struct pmu *pmu)
5630{
5631 perf_pmu_enable(pmu);
5632 return 0;
5633}
5634
5635static void perf_pmu_cancel_txn(struct pmu *pmu)
5636{
5637 perf_pmu_enable(pmu);
5638}
5639
5640static int perf_event_idx_default(struct perf_event *event)
5641{
5642 return event->hw.idx + 1;
5643}
5644
5645
5646
5647
5648
5649static void *find_pmu_context(int ctxn)
5650{
5651 struct pmu *pmu;
5652
5653 if (ctxn < 0)
5654 return NULL;
5655
5656 list_for_each_entry(pmu, &pmus, entry) {
5657 if (pmu->task_ctx_nr == ctxn)
5658 return pmu->pmu_cpu_context;
5659 }
5660
5661 return NULL;
5662}
5663
5664static void update_pmu_context(struct pmu *pmu, struct pmu *old_pmu)
5665{
5666 int cpu;
5667
5668 for_each_possible_cpu(cpu) {
5669 struct perf_cpu_context *cpuctx;
5670
5671 cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
5672
5673 if (cpuctx->active_pmu == old_pmu)
5674 cpuctx->active_pmu = pmu;
5675 }