1
2
3
4
5
6
7
8
9
10
11#include <linux/fs.h>
12#include <linux/mm.h>
13#include <linux/cpu.h>
14#include <linux/smp.h>
15#include <linux/idr.h>
16#include <linux/file.h>
17#include <linux/poll.h>
18#include <linux/slab.h>
19#include <linux/hash.h>
20#include <linux/tick.h>
21#include <linux/sysfs.h>
22#include <linux/dcache.h>
23#include <linux/percpu.h>
24#include <linux/ptrace.h>
25#include <linux/reboot.h>
26#include <linux/vmstat.h>
27#include <linux/device.h>
28#include <linux/export.h>
29#include <linux/vmalloc.h>
30#include <linux/hardirq.h>
31#include <linux/hugetlb.h>
32#include <linux/rculist.h>
33#include <linux/uaccess.h>
34#include <linux/syscalls.h>
35#include <linux/anon_inodes.h>
36#include <linux/kernel_stat.h>
37#include <linux/cgroup.h>
38#include <linux/perf_event.h>
39#include <linux/trace_events.h>
40#include <linux/hw_breakpoint.h>
41#include <linux/mm_types.h>
42#include <linux/module.h>
43#include <linux/mman.h>
44#include <linux/compat.h>
45#include <linux/bpf.h>
46#include <linux/filter.h>
47#include <linux/namei.h>
48#include <linux/parser.h>
49#include <linux/sched/clock.h>
50#include <linux/sched/mm.h>
51#include <linux/proc_ns.h>
52#include <linux/mount.h>
53#include <linux/min_heap.h>
54#include <linux/highmem.h>
55#include <linux/pgtable.h>
56#include <linux/buildid.h>
57
58#include "internal.h"
59
60#include <asm/irq_regs.h>
61
62typedef int (*remote_function_f)(void *);
63
64struct remote_function_call {
65 struct task_struct *p;
66 remote_function_f func;
67 void *info;
68 int ret;
69};
70
71static void remote_function(void *data)
72{
73 struct remote_function_call *tfc = data;
74 struct task_struct *p = tfc->p;
75
76 if (p) {
77
78 if (task_cpu(p) != smp_processor_id())
79 return;
80
81
82
83
84
85
86 tfc->ret = -ESRCH;
87 if (p != current)
88 return;
89 }
90
91 tfc->ret = tfc->func(tfc->info);
92}
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107static int
108task_function_call(struct task_struct *p, remote_function_f func, void *info)
109{
110 struct remote_function_call data = {
111 .p = p,
112 .func = func,
113 .info = info,
114 .ret = -EAGAIN,
115 };
116 int ret;
117
118 for (;;) {
119 ret = smp_call_function_single(task_cpu(p), remote_function,
120 &data, 1);
121 if (!ret)
122 ret = data.ret;
123
124 if (ret != -EAGAIN)
125 break;
126
127 cond_resched();
128 }
129
130 return ret;
131}
132
133
134
135
136
137
138
139
140
141
142
143static int cpu_function_call(int cpu, remote_function_f func, void *info)
144{
145 struct remote_function_call data = {
146 .p = NULL,
147 .func = func,
148 .info = info,
149 .ret = -ENXIO,
150 };
151
152 smp_call_function_single(cpu, remote_function, &data, 1);
153
154 return data.ret;
155}
156
157static inline struct perf_cpu_context *
158__get_cpu_context(struct perf_event_context *ctx)
159{
160 return this_cpu_ptr(ctx->pmu->pmu_cpu_context);
161}
162
163static void perf_ctx_lock(struct perf_cpu_context *cpuctx,
164 struct perf_event_context *ctx)
165{
166 raw_spin_lock(&cpuctx->ctx.lock);
167 if (ctx)
168 raw_spin_lock(&ctx->lock);
169}
170
171static void perf_ctx_unlock(struct perf_cpu_context *cpuctx,
172 struct perf_event_context *ctx)
173{
174 if (ctx)
175 raw_spin_unlock(&ctx->lock);
176 raw_spin_unlock(&cpuctx->ctx.lock);
177}
178
179#define TASK_TOMBSTONE ((void *)-1L)
180
181static bool is_kernel_event(struct perf_event *event)
182{
183 return READ_ONCE(event->owner) == TASK_TOMBSTONE;
184}
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205typedef void (*event_f)(struct perf_event *, struct perf_cpu_context *,
206 struct perf_event_context *, void *);
207
208struct event_function_struct {
209 struct perf_event *event;
210 event_f func;
211 void *data;
212};
213
214static int event_function(void *info)
215{
216 struct event_function_struct *efs = info;
217 struct perf_event *event = efs->event;
218 struct perf_event_context *ctx = event->ctx;
219 struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
220 struct perf_event_context *task_ctx = cpuctx->task_ctx;
221 int ret = 0;
222
223 lockdep_assert_irqs_disabled();
224
225 perf_ctx_lock(cpuctx, task_ctx);
226
227
228
229
230 if (ctx->task) {
231 if (ctx->task != current) {
232 ret = -ESRCH;
233 goto unlock;
234 }
235
236
237
238
239
240
241
242
243 WARN_ON_ONCE(!ctx->is_active);
244
245
246
247
248 WARN_ON_ONCE(task_ctx != ctx);
249 } else {
250 WARN_ON_ONCE(&cpuctx->ctx != ctx);
251 }
252
253 efs->func(event, cpuctx, ctx, efs->data);
254unlock:
255 perf_ctx_unlock(cpuctx, task_ctx);
256
257 return ret;
258}
259
260static void event_function_call(struct perf_event *event, event_f func, void *data)
261{
262 struct perf_event_context *ctx = event->ctx;
263 struct task_struct *task = READ_ONCE(ctx->task);
264 struct event_function_struct efs = {
265 .event = event,
266 .func = func,
267 .data = data,
268 };
269
270 if (!event->parent) {
271
272
273
274
275
276 lockdep_assert_held(&ctx->mutex);
277 }
278
279 if (!task) {
280 cpu_function_call(event->cpu, event_function, &efs);
281 return;
282 }
283
284 if (task == TASK_TOMBSTONE)
285 return;
286
287again:
288 if (!task_function_call(task, event_function, &efs))
289 return;
290
291 raw_spin_lock_irq(&ctx->lock);
292
293
294
295
296 task = ctx->task;
297 if (task == TASK_TOMBSTONE) {
298 raw_spin_unlock_irq(&ctx->lock);
299 return;
300 }
301 if (ctx->is_active) {
302 raw_spin_unlock_irq(&ctx->lock);
303 goto again;
304 }
305 func(event, NULL, ctx, data);
306 raw_spin_unlock_irq(&ctx->lock);
307}
308
309
310
311
312
313static void event_function_local(struct perf_event *event, event_f func, void *data)
314{
315 struct perf_event_context *ctx = event->ctx;
316 struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
317 struct task_struct *task = READ_ONCE(ctx->task);
318 struct perf_event_context *task_ctx = NULL;
319
320 lockdep_assert_irqs_disabled();
321
322 if (task) {
323 if (task == TASK_TOMBSTONE)
324 return;
325
326 task_ctx = ctx;
327 }
328
329 perf_ctx_lock(cpuctx, task_ctx);
330
331 task = ctx->task;
332 if (task == TASK_TOMBSTONE)
333 goto unlock;
334
335 if (task) {
336
337
338
339
340
341 if (ctx->is_active) {
342 if (WARN_ON_ONCE(task != current))
343 goto unlock;
344
345 if (WARN_ON_ONCE(cpuctx->task_ctx != ctx))
346 goto unlock;
347 }
348 } else {
349 WARN_ON_ONCE(&cpuctx->ctx != ctx);
350 }
351
352 func(event, cpuctx, ctx, data);
353unlock:
354 perf_ctx_unlock(cpuctx, task_ctx);
355}
356
357#define PERF_FLAG_ALL (PERF_FLAG_FD_NO_GROUP |\
358 PERF_FLAG_FD_OUTPUT |\
359 PERF_FLAG_PID_CGROUP |\
360 PERF_FLAG_FD_CLOEXEC)
361
362
363
364
365#define PERF_SAMPLE_BRANCH_PERM_PLM \
366 (PERF_SAMPLE_BRANCH_KERNEL |\
367 PERF_SAMPLE_BRANCH_HV)
368
369enum event_type_t {
370 EVENT_FLEXIBLE = 0x1,
371 EVENT_PINNED = 0x2,
372 EVENT_TIME = 0x4,
373
374 EVENT_CPU = 0x8,
375 EVENT_ALL = EVENT_FLEXIBLE | EVENT_PINNED,
376};
377
378
379
380
381
382
383static void perf_sched_delayed(struct work_struct *work);
384DEFINE_STATIC_KEY_FALSE(perf_sched_events);
385static DECLARE_DELAYED_WORK(perf_sched_work, perf_sched_delayed);
386static DEFINE_MUTEX(perf_sched_mutex);
387static atomic_t perf_sched_count;
388
389static DEFINE_PER_CPU(atomic_t, perf_cgroup_events);
390static DEFINE_PER_CPU(int, perf_sched_cb_usages);
391static DEFINE_PER_CPU(struct pmu_event_list, pmu_sb_events);
392
393static atomic_t nr_mmap_events __read_mostly;
394static atomic_t nr_comm_events __read_mostly;
395static atomic_t nr_namespaces_events __read_mostly;
396static atomic_t nr_task_events __read_mostly;
397static atomic_t nr_freq_events __read_mostly;
398static atomic_t nr_switch_events __read_mostly;
399static atomic_t nr_ksymbol_events __read_mostly;
400static atomic_t nr_bpf_events __read_mostly;
401static atomic_t nr_cgroup_events __read_mostly;
402static atomic_t nr_text_poke_events __read_mostly;
403static atomic_t nr_build_id_events __read_mostly;
404
405static LIST_HEAD(pmus);
406static DEFINE_MUTEX(pmus_lock);
407static struct srcu_struct pmus_srcu;
408static cpumask_var_t perf_online_mask;
409static struct kmem_cache *perf_event_cache;
410
411
412
413
414
415
416
417
418int sysctl_perf_event_paranoid __read_mostly = 2;
419
420
421int sysctl_perf_event_mlock __read_mostly = 512 + (PAGE_SIZE / 1024);
422
423
424
425
426#define DEFAULT_MAX_SAMPLE_RATE 100000
427#define DEFAULT_SAMPLE_PERIOD_NS (NSEC_PER_SEC / DEFAULT_MAX_SAMPLE_RATE)
428#define DEFAULT_CPU_TIME_MAX_PERCENT 25
429
430int sysctl_perf_event_sample_rate __read_mostly = DEFAULT_MAX_SAMPLE_RATE;
431
432static int max_samples_per_tick __read_mostly = DIV_ROUND_UP(DEFAULT_MAX_SAMPLE_RATE, HZ);
433static int perf_sample_period_ns __read_mostly = DEFAULT_SAMPLE_PERIOD_NS;
434
435static int perf_sample_allowed_ns __read_mostly =
436 DEFAULT_SAMPLE_PERIOD_NS * DEFAULT_CPU_TIME_MAX_PERCENT / 100;
437
438static void update_perf_cpu_limits(void)
439{
440 u64 tmp = perf_sample_period_ns;
441
442 tmp *= sysctl_perf_cpu_time_max_percent;
443 tmp = div_u64(tmp, 100);
444 if (!tmp)
445 tmp = 1;
446
447 WRITE_ONCE(perf_sample_allowed_ns, tmp);
448}
449
450static bool perf_rotate_context(struct perf_cpu_context *cpuctx);
451
452int perf_proc_update_handler(struct ctl_table *table, int write,
453 void *buffer, size_t *lenp, loff_t *ppos)
454{
455 int ret;
456 int perf_cpu = sysctl_perf_cpu_time_max_percent;
457
458
459
460 if (write && (perf_cpu == 100 || perf_cpu == 0))
461 return -EINVAL;
462
463 ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
464 if (ret || !write)
465 return ret;
466
467 max_samples_per_tick = DIV_ROUND_UP(sysctl_perf_event_sample_rate, HZ);
468 perf_sample_period_ns = NSEC_PER_SEC / sysctl_perf_event_sample_rate;
469 update_perf_cpu_limits();
470
471 return 0;
472}
473
474int sysctl_perf_cpu_time_max_percent __read_mostly = DEFAULT_CPU_TIME_MAX_PERCENT;
475
476int perf_cpu_time_max_percent_handler(struct ctl_table *table, int write,
477 void *buffer, size_t *lenp, loff_t *ppos)
478{
479 int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
480
481 if (ret || !write)
482 return ret;
483
484 if (sysctl_perf_cpu_time_max_percent == 100 ||
485 sysctl_perf_cpu_time_max_percent == 0) {
486 printk(KERN_WARNING
487 "perf: Dynamic interrupt throttling disabled, can hang your system!\n");
488 WRITE_ONCE(perf_sample_allowed_ns, 0);
489 } else {
490 update_perf_cpu_limits();
491 }
492
493 return 0;
494}
495
496
497
498
499
500
501
502#define NR_ACCUMULATED_SAMPLES 128
503static DEFINE_PER_CPU(u64, running_sample_length);
504
505static u64 __report_avg;
506static u64 __report_allowed;
507
508static void perf_duration_warn(struct irq_work *w)
509{
510 printk_ratelimited(KERN_INFO
511 "perf: interrupt took too long (%lld > %lld), lowering "
512 "kernel.perf_event_max_sample_rate to %d\n",
513 __report_avg, __report_allowed,
514 sysctl_perf_event_sample_rate);
515}
516
517static DEFINE_IRQ_WORK(perf_duration_work, perf_duration_warn);
518
519void perf_sample_event_took(u64 sample_len_ns)
520{
521 u64 max_len = READ_ONCE(perf_sample_allowed_ns);
522 u64 running_len;
523 u64 avg_len;
524 u32 max;
525
526 if (max_len == 0)
527 return;
528
529
530 running_len = __this_cpu_read(running_sample_length);
531 running_len -= running_len/NR_ACCUMULATED_SAMPLES;
532 running_len += sample_len_ns;
533 __this_cpu_write(running_sample_length, running_len);
534
535
536
537
538
539
540 avg_len = running_len/NR_ACCUMULATED_SAMPLES;
541 if (avg_len <= max_len)
542 return;
543
544 __report_avg = avg_len;
545 __report_allowed = max_len;
546
547
548
549
550 avg_len += avg_len / 4;
551 max = (TICK_NSEC / 100) * sysctl_perf_cpu_time_max_percent;
552 if (avg_len < max)
553 max /= (u32)avg_len;
554 else
555 max = 1;
556
557 WRITE_ONCE(perf_sample_allowed_ns, avg_len);
558 WRITE_ONCE(max_samples_per_tick, max);
559
560 sysctl_perf_event_sample_rate = max * HZ;
561 perf_sample_period_ns = NSEC_PER_SEC / sysctl_perf_event_sample_rate;
562
563 if (!irq_work_queue(&perf_duration_work)) {
564 early_printk("perf: interrupt took too long (%lld > %lld), lowering "
565 "kernel.perf_event_max_sample_rate to %d\n",
566 __report_avg, __report_allowed,
567 sysctl_perf_event_sample_rate);
568 }
569}
570
571static atomic64_t perf_event_id;
572
573static void cpu_ctx_sched_out(struct perf_cpu_context *cpuctx,
574 enum event_type_t event_type);
575
576static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx,
577 enum event_type_t event_type,
578 struct task_struct *task);
579
580static void update_context_time(struct perf_event_context *ctx);
581static u64 perf_event_time(struct perf_event *event);
582
583void __weak perf_event_print_debug(void) { }
584
585static inline u64 perf_clock(void)
586{
587 return local_clock();
588}
589
590static inline u64 perf_event_clock(struct perf_event *event)
591{
592 return event->clock();
593}
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617static __always_inline enum perf_event_state
618__perf_effective_state(struct perf_event *event)
619{
620 struct perf_event *leader = event->group_leader;
621
622 if (leader->state <= PERF_EVENT_STATE_OFF)
623 return leader->state;
624
625 return event->state;
626}
627
628static __always_inline void
629__perf_update_times(struct perf_event *event, u64 now, u64 *enabled, u64 *running)
630{
631 enum perf_event_state state = __perf_effective_state(event);
632 u64 delta = now - event->tstamp;
633
634 *enabled = event->total_time_enabled;
635 if (state >= PERF_EVENT_STATE_INACTIVE)
636 *enabled += delta;
637
638 *running = event->total_time_running;
639 if (state >= PERF_EVENT_STATE_ACTIVE)
640 *running += delta;
641}
642
643static void perf_event_update_time(struct perf_event *event)
644{
645 u64 now = perf_event_time(event);
646
647 __perf_update_times(event, now, &event->total_time_enabled,
648 &event->total_time_running);
649 event->tstamp = now;
650}
651
652static void perf_event_update_sibling_time(struct perf_event *leader)
653{
654 struct perf_event *sibling;
655
656 for_each_sibling_event(sibling, leader)
657 perf_event_update_time(sibling);
658}
659
660static void
661perf_event_set_state(struct perf_event *event, enum perf_event_state state)
662{
663 if (event->state == state)
664 return;
665
666 perf_event_update_time(event);
667
668
669
670
671 if ((event->state < 0) ^ (state < 0))
672 perf_event_update_sibling_time(event);
673
674 WRITE_ONCE(event->state, state);
675}
676
677#ifdef CONFIG_CGROUP_PERF
678
679static inline bool
680perf_cgroup_match(struct perf_event *event)
681{
682 struct perf_event_context *ctx = event->ctx;
683 struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
684
685
686 if (!event->cgrp)
687 return true;
688
689
690 if (!cpuctx->cgrp)
691 return false;
692
693
694
695
696
697
698
699 return cgroup_is_descendant(cpuctx->cgrp->css.cgroup,
700 event->cgrp->css.cgroup);
701}
702
703static inline void perf_detach_cgroup(struct perf_event *event)
704{
705 css_put(&event->cgrp->css);
706 event->cgrp = NULL;
707}
708
709static inline int is_cgroup_event(struct perf_event *event)
710{
711 return event->cgrp != NULL;
712}
713
714static inline u64 perf_cgroup_event_time(struct perf_event *event)
715{
716 struct perf_cgroup_info *t;
717
718 t = per_cpu_ptr(event->cgrp->info, event->cpu);
719 return t->time;
720}
721
722static inline void __update_cgrp_time(struct perf_cgroup *cgrp)
723{
724 struct perf_cgroup_info *info;
725 u64 now;
726
727 now = perf_clock();
728
729 info = this_cpu_ptr(cgrp->info);
730
731 info->time += now - info->timestamp;
732 info->timestamp = now;
733}
734
735static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx)
736{
737 struct perf_cgroup *cgrp = cpuctx->cgrp;
738 struct cgroup_subsys_state *css;
739
740 if (cgrp) {
741 for (css = &cgrp->css; css; css = css->parent) {
742 cgrp = container_of(css, struct perf_cgroup, css);
743 __update_cgrp_time(cgrp);
744 }
745 }
746}
747
748static inline void update_cgrp_time_from_event(struct perf_event *event)
749{
750 struct perf_cgroup *cgrp;
751
752
753
754
755
756 if (!is_cgroup_event(event))
757 return;
758
759 cgrp = perf_cgroup_from_task(current, event->ctx);
760
761
762
763 if (cgroup_is_descendant(cgrp->css.cgroup, event->cgrp->css.cgroup))
764 __update_cgrp_time(event->cgrp);
765}
766
767static inline void
768perf_cgroup_set_timestamp(struct task_struct *task,
769 struct perf_event_context *ctx)
770{
771 struct perf_cgroup *cgrp;
772 struct perf_cgroup_info *info;
773 struct cgroup_subsys_state *css;
774
775
776
777
778
779
780 if (!task || !ctx->nr_cgroups)
781 return;
782
783 cgrp = perf_cgroup_from_task(task, ctx);
784
785 for (css = &cgrp->css; css; css = css->parent) {
786 cgrp = container_of(css, struct perf_cgroup, css);
787 info = this_cpu_ptr(cgrp->info);
788 info->timestamp = ctx->timestamp;
789 }
790}
791
792static DEFINE_PER_CPU(struct list_head, cgrp_cpuctx_list);
793
794#define PERF_CGROUP_SWOUT 0x1
795#define PERF_CGROUP_SWIN 0x2
796
797
798
799
800
801
802
803static void perf_cgroup_switch(struct task_struct *task, int mode)
804{
805 struct perf_cpu_context *cpuctx;
806 struct list_head *list;
807 unsigned long flags;
808
809
810
811
812
813 local_irq_save(flags);
814
815 list = this_cpu_ptr(&cgrp_cpuctx_list);
816 list_for_each_entry(cpuctx, list, cgrp_cpuctx_entry) {
817 WARN_ON_ONCE(cpuctx->ctx.nr_cgroups == 0);
818
819 perf_ctx_lock(cpuctx, cpuctx->task_ctx);
820 perf_pmu_disable(cpuctx->ctx.pmu);
821
822 if (mode & PERF_CGROUP_SWOUT) {
823 cpu_ctx_sched_out(cpuctx, EVENT_ALL);
824
825
826
827
828 cpuctx->cgrp = NULL;
829 }
830
831 if (mode & PERF_CGROUP_SWIN) {
832 WARN_ON_ONCE(cpuctx->cgrp);
833
834
835
836
837
838
839
840 cpuctx->cgrp = perf_cgroup_from_task(task,
841 &cpuctx->ctx);
842 cpu_ctx_sched_in(cpuctx, EVENT_ALL, task);
843 }
844 perf_pmu_enable(cpuctx->ctx.pmu);
845 perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
846 }
847
848 local_irq_restore(flags);
849}
850
851static inline void perf_cgroup_sched_out(struct task_struct *task,
852 struct task_struct *next)
853{
854 struct perf_cgroup *cgrp1;
855 struct perf_cgroup *cgrp2 = NULL;
856
857 rcu_read_lock();
858
859
860
861
862
863 cgrp1 = perf_cgroup_from_task(task, NULL);
864 cgrp2 = perf_cgroup_from_task(next, NULL);
865
866
867
868
869
870
871 if (cgrp1 != cgrp2)
872 perf_cgroup_switch(task, PERF_CGROUP_SWOUT);
873
874 rcu_read_unlock();
875}
876
877static inline void perf_cgroup_sched_in(struct task_struct *prev,
878 struct task_struct *task)
879{
880 struct perf_cgroup *cgrp1;
881 struct perf_cgroup *cgrp2 = NULL;
882
883 rcu_read_lock();
884
885
886
887
888
889 cgrp1 = perf_cgroup_from_task(task, NULL);
890 cgrp2 = perf_cgroup_from_task(prev, NULL);
891
892
893
894
895
896
897 if (cgrp1 != cgrp2)
898 perf_cgroup_switch(task, PERF_CGROUP_SWIN);
899
900 rcu_read_unlock();
901}
902
903static int perf_cgroup_ensure_storage(struct perf_event *event,
904 struct cgroup_subsys_state *css)
905{
906 struct perf_cpu_context *cpuctx;
907 struct perf_event **storage;
908 int cpu, heap_size, ret = 0;
909
910
911
912
913
914 for (heap_size = 1; css; css = css->parent)
915 heap_size++;
916
917 for_each_possible_cpu(cpu) {
918 cpuctx = per_cpu_ptr(event->pmu->pmu_cpu_context, cpu);
919 if (heap_size <= cpuctx->heap_size)
920 continue;
921
922 storage = kmalloc_node(heap_size * sizeof(struct perf_event *),
923 GFP_KERNEL, cpu_to_node(cpu));
924 if (!storage) {
925 ret = -ENOMEM;
926 break;
927 }
928
929 raw_spin_lock_irq(&cpuctx->ctx.lock);
930 if (cpuctx->heap_size < heap_size) {
931 swap(cpuctx->heap, storage);
932 if (storage == cpuctx->heap_default)
933 storage = NULL;
934 cpuctx->heap_size = heap_size;
935 }
936 raw_spin_unlock_irq(&cpuctx->ctx.lock);
937
938 kfree(storage);
939 }
940
941 return ret;
942}
943
944static inline int perf_cgroup_connect(int fd, struct perf_event *event,
945 struct perf_event_attr *attr,
946 struct perf_event *group_leader)
947{
948 struct perf_cgroup *cgrp;
949 struct cgroup_subsys_state *css;
950 struct fd f = fdget(fd);
951 int ret = 0;
952
953 if (!f.file)
954 return -EBADF;
955
956 css = css_tryget_online_from_dir(f.file->f_path.dentry,
957 &perf_event_cgrp_subsys);
958 if (IS_ERR(css)) {
959 ret = PTR_ERR(css);
960 goto out;
961 }
962
963 ret = perf_cgroup_ensure_storage(event, css);
964 if (ret)
965 goto out;
966
967 cgrp = container_of(css, struct perf_cgroup, css);
968 event->cgrp = cgrp;
969
970
971
972
973
974
975 if (group_leader && group_leader->cgrp != cgrp) {
976 perf_detach_cgroup(event);
977 ret = -EINVAL;
978 }
979out:
980 fdput(f);
981 return ret;
982}
983
984static inline void
985perf_cgroup_set_shadow_time(struct perf_event *event, u64 now)
986{
987 struct perf_cgroup_info *t;
988 t = per_cpu_ptr(event->cgrp->info, event->cpu);
989 event->shadow_ctx_time = now - t->timestamp;
990}
991
992static inline void
993perf_cgroup_event_enable(struct perf_event *event, struct perf_event_context *ctx)
994{
995 struct perf_cpu_context *cpuctx;
996
997 if (!is_cgroup_event(event))
998 return;
999
1000
1001
1002
1003
1004 cpuctx = container_of(ctx, struct perf_cpu_context, ctx);
1005
1006
1007
1008
1009
1010
1011
1012 if (ctx->is_active && !cpuctx->cgrp) {
1013 struct perf_cgroup *cgrp = perf_cgroup_from_task(current, ctx);
1014
1015 if (cgroup_is_descendant(cgrp->css.cgroup, event->cgrp->css.cgroup))
1016 cpuctx->cgrp = cgrp;
1017 }
1018
1019 if (ctx->nr_cgroups++)
1020 return;
1021
1022 list_add(&cpuctx->cgrp_cpuctx_entry,
1023 per_cpu_ptr(&cgrp_cpuctx_list, event->cpu));
1024}
1025
1026static inline void
1027perf_cgroup_event_disable(struct perf_event *event, struct perf_event_context *ctx)
1028{
1029 struct perf_cpu_context *cpuctx;
1030
1031 if (!is_cgroup_event(event))
1032 return;
1033
1034
1035
1036
1037
1038 cpuctx = container_of(ctx, struct perf_cpu_context, ctx);
1039
1040 if (--ctx->nr_cgroups)
1041 return;
1042
1043 if (ctx->is_active && cpuctx->cgrp)
1044 cpuctx->cgrp = NULL;
1045
1046 list_del(&cpuctx->cgrp_cpuctx_entry);
1047}
1048
1049#else
1050
1051static inline bool
1052perf_cgroup_match(struct perf_event *event)
1053{
1054 return true;
1055}
1056
1057static inline void perf_detach_cgroup(struct perf_event *event)
1058{}
1059
1060static inline int is_cgroup_event(struct perf_event *event)
1061{
1062 return 0;
1063}
1064
1065static inline void update_cgrp_time_from_event(struct perf_event *event)
1066{
1067}
1068
1069static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx)
1070{
1071}
1072
1073static inline void perf_cgroup_sched_out(struct task_struct *task,
1074 struct task_struct *next)
1075{
1076}
1077
1078static inline void perf_cgroup_sched_in(struct task_struct *prev,
1079 struct task_struct *task)
1080{
1081}
1082
1083static inline int perf_cgroup_connect(pid_t pid, struct perf_event *event,
1084 struct perf_event_attr *attr,
1085 struct perf_event *group_leader)
1086{
1087 return -EINVAL;
1088}
1089
1090static inline void
1091perf_cgroup_set_timestamp(struct task_struct *task,
1092 struct perf_event_context *ctx)
1093{
1094}
1095
1096static inline void
1097perf_cgroup_switch(struct task_struct *task, struct task_struct *next)
1098{
1099}
1100
1101static inline void
1102perf_cgroup_set_shadow_time(struct perf_event *event, u64 now)
1103{
1104}
1105
1106static inline u64 perf_cgroup_event_time(struct perf_event *event)
1107{
1108 return 0;
1109}
1110
1111static inline void
1112perf_cgroup_event_enable(struct perf_event *event, struct perf_event_context *ctx)
1113{
1114}
1115
1116static inline void
1117perf_cgroup_event_disable(struct perf_event *event, struct perf_event_context *ctx)
1118{
1119}
1120#endif
1121
1122
1123
1124
1125
1126#define PERF_CPU_HRTIMER (1000 / HZ)
1127
1128
1129
1130static enum hrtimer_restart perf_mux_hrtimer_handler(struct hrtimer *hr)
1131{
1132 struct perf_cpu_context *cpuctx;
1133 bool rotations;
1134
1135 lockdep_assert_irqs_disabled();
1136
1137 cpuctx = container_of(hr, struct perf_cpu_context, hrtimer);
1138 rotations = perf_rotate_context(cpuctx);
1139
1140 raw_spin_lock(&cpuctx->hrtimer_lock);
1141 if (rotations)
1142 hrtimer_forward_now(hr, cpuctx->hrtimer_interval);
1143 else
1144 cpuctx->hrtimer_active = 0;
1145 raw_spin_unlock(&cpuctx->hrtimer_lock);
1146
1147 return rotations ? HRTIMER_RESTART : HRTIMER_NORESTART;
1148}
1149
1150static void __perf_mux_hrtimer_init(struct perf_cpu_context *cpuctx, int cpu)
1151{
1152 struct hrtimer *timer = &cpuctx->hrtimer;
1153 struct pmu *pmu = cpuctx->ctx.pmu;
1154 u64 interval;
1155
1156
1157 if (pmu->task_ctx_nr == perf_sw_context)
1158 return;
1159
1160
1161
1162
1163
1164 interval = pmu->hrtimer_interval_ms;
1165 if (interval < 1)
1166 interval = pmu->hrtimer_interval_ms = PERF_CPU_HRTIMER;
1167
1168 cpuctx->hrtimer_interval = ns_to_ktime(NSEC_PER_MSEC * interval);
1169
1170 raw_spin_lock_init(&cpuctx->hrtimer_lock);
1171 hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED_HARD);
1172 timer->function = perf_mux_hrtimer_handler;
1173}
1174
1175static int perf_mux_hrtimer_restart(struct perf_cpu_context *cpuctx)
1176{
1177 struct hrtimer *timer = &cpuctx->hrtimer;
1178 struct pmu *pmu = cpuctx->ctx.pmu;
1179 unsigned long flags;
1180
1181
1182 if (pmu->task_ctx_nr == perf_sw_context)
1183 return 0;
1184
1185 raw_spin_lock_irqsave(&cpuctx->hrtimer_lock, flags);
1186 if (!cpuctx->hrtimer_active) {
1187 cpuctx->hrtimer_active = 1;
1188 hrtimer_forward_now(timer, cpuctx->hrtimer_interval);
1189 hrtimer_start_expires(timer, HRTIMER_MODE_ABS_PINNED_HARD);
1190 }
1191 raw_spin_unlock_irqrestore(&cpuctx->hrtimer_lock, flags);
1192
1193 return 0;
1194}
1195
1196void perf_pmu_disable(struct pmu *pmu)
1197{
1198 int *count = this_cpu_ptr(pmu->pmu_disable_count);
1199 if (!(*count)++)
1200 pmu->pmu_disable(pmu);
1201}
1202
1203void perf_pmu_enable(struct pmu *pmu)
1204{
1205 int *count = this_cpu_ptr(pmu->pmu_disable_count);
1206 if (!--(*count))
1207 pmu->pmu_enable(pmu);
1208}
1209
1210static DEFINE_PER_CPU(struct list_head, active_ctx_list);
1211
1212
1213
1214
1215
1216
1217
1218static void perf_event_ctx_activate(struct perf_event_context *ctx)
1219{
1220 struct list_head *head = this_cpu_ptr(&active_ctx_list);
1221
1222 lockdep_assert_irqs_disabled();
1223
1224 WARN_ON(!list_empty(&ctx->active_ctx_list));
1225
1226 list_add(&ctx->active_ctx_list, head);
1227}
1228
1229static void perf_event_ctx_deactivate(struct perf_event_context *ctx)
1230{
1231 lockdep_assert_irqs_disabled();
1232
1233 WARN_ON(list_empty(&ctx->active_ctx_list));
1234
1235 list_del_init(&ctx->active_ctx_list);
1236}
1237
1238static void get_ctx(struct perf_event_context *ctx)
1239{
1240 refcount_inc(&ctx->refcount);
1241}
1242
1243static void *alloc_task_ctx_data(struct pmu *pmu)
1244{
1245 if (pmu->task_ctx_cache)
1246 return kmem_cache_zalloc(pmu->task_ctx_cache, GFP_KERNEL);
1247
1248 return NULL;
1249}
1250
1251static void free_task_ctx_data(struct pmu *pmu, void *task_ctx_data)
1252{
1253 if (pmu->task_ctx_cache && task_ctx_data)
1254 kmem_cache_free(pmu->task_ctx_cache, task_ctx_data);
1255}
1256
1257static void free_ctx(struct rcu_head *head)
1258{
1259 struct perf_event_context *ctx;
1260
1261 ctx = container_of(head, struct perf_event_context, rcu_head);
1262 free_task_ctx_data(ctx->pmu, ctx->task_ctx_data);
1263 kfree(ctx);
1264}
1265
1266static void put_ctx(struct perf_event_context *ctx)
1267{
1268 if (refcount_dec_and_test(&ctx->refcount)) {
1269 if (ctx->parent_ctx)
1270 put_ctx(ctx->parent_ctx);
1271 if (ctx->task && ctx->task != TASK_TOMBSTONE)
1272 put_task_struct(ctx->task);
1273 call_rcu(&ctx->rcu_head, free_ctx);
1274 }
1275}
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343static struct perf_event_context *
1344perf_event_ctx_lock_nested(struct perf_event *event, int nesting)
1345{
1346 struct perf_event_context *ctx;
1347
1348again:
1349 rcu_read_lock();
1350 ctx = READ_ONCE(event->ctx);
1351 if (!refcount_inc_not_zero(&ctx->refcount)) {
1352 rcu_read_unlock();
1353 goto again;
1354 }
1355 rcu_read_unlock();
1356
1357 mutex_lock_nested(&ctx->mutex, nesting);
1358 if (event->ctx != ctx) {
1359 mutex_unlock(&ctx->mutex);
1360 put_ctx(ctx);
1361 goto again;
1362 }
1363
1364 return ctx;
1365}
1366
1367static inline struct perf_event_context *
1368perf_event_ctx_lock(struct perf_event *event)
1369{
1370 return perf_event_ctx_lock_nested(event, 0);
1371}
1372
1373static void perf_event_ctx_unlock(struct perf_event *event,
1374 struct perf_event_context *ctx)
1375{
1376 mutex_unlock(&ctx->mutex);
1377 put_ctx(ctx);
1378}
1379
1380
1381
1382
1383
1384
1385static __must_check struct perf_event_context *
1386unclone_ctx(struct perf_event_context *ctx)
1387{
1388 struct perf_event_context *parent_ctx = ctx->parent_ctx;
1389
1390 lockdep_assert_held(&ctx->lock);
1391
1392 if (parent_ctx)
1393 ctx->parent_ctx = NULL;
1394 ctx->generation++;
1395
1396 return parent_ctx;
1397}
1398
1399static u32 perf_event_pid_type(struct perf_event *event, struct task_struct *p,
1400 enum pid_type type)
1401{
1402 u32 nr;
1403
1404
1405
1406 if (event->parent)
1407 event = event->parent;
1408
1409 nr = __task_pid_nr_ns(p, type, event->ns);
1410
1411 if (!nr && !pid_alive(p))
1412 nr = -1;
1413 return nr;
1414}
1415
1416static u32 perf_event_pid(struct perf_event *event, struct task_struct *p)
1417{
1418 return perf_event_pid_type(event, p, PIDTYPE_TGID);
1419}
1420
1421static u32 perf_event_tid(struct perf_event *event, struct task_struct *p)
1422{
1423 return perf_event_pid_type(event, p, PIDTYPE_PID);
1424}
1425
1426
1427
1428
1429
1430static u64 primary_event_id(struct perf_event *event)
1431{
1432 u64 id = event->id;
1433
1434 if (event->parent)
1435 id = event->parent->id;
1436
1437 return id;
1438}
1439
1440
1441
1442
1443
1444
1445
1446static struct perf_event_context *
1447perf_lock_task_context(struct task_struct *task, int ctxn, unsigned long *flags)
1448{
1449 struct perf_event_context *ctx;
1450
1451retry:
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461 local_irq_save(*flags);
1462 rcu_read_lock();
1463 ctx = rcu_dereference(task->perf_event_ctxp[ctxn]);
1464 if (ctx) {
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475 raw_spin_lock(&ctx->lock);
1476 if (ctx != rcu_dereference(task->perf_event_ctxp[ctxn])) {
1477 raw_spin_unlock(&ctx->lock);
1478 rcu_read_unlock();
1479 local_irq_restore(*flags);
1480 goto retry;
1481 }
1482
1483 if (ctx->task == TASK_TOMBSTONE ||
1484 !refcount_inc_not_zero(&ctx->refcount)) {
1485 raw_spin_unlock(&ctx->lock);
1486 ctx = NULL;
1487 } else {
1488 WARN_ON_ONCE(ctx->task != task);
1489 }
1490 }
1491 rcu_read_unlock();
1492 if (!ctx)
1493 local_irq_restore(*flags);
1494 return ctx;
1495}
1496
1497
1498
1499
1500
1501
1502static struct perf_event_context *
1503perf_pin_task_context(struct task_struct *task, int ctxn)
1504{
1505 struct perf_event_context *ctx;
1506 unsigned long flags;
1507
1508 ctx = perf_lock_task_context(task, ctxn, &flags);
1509 if (ctx) {
1510 ++ctx->pin_count;
1511 raw_spin_unlock_irqrestore(&ctx->lock, flags);
1512 }
1513 return ctx;
1514}
1515
1516static void perf_unpin_context(struct perf_event_context *ctx)
1517{
1518 unsigned long flags;
1519
1520 raw_spin_lock_irqsave(&ctx->lock, flags);
1521 --ctx->pin_count;
1522 raw_spin_unlock_irqrestore(&ctx->lock, flags);
1523}
1524
1525
1526
1527
1528static void update_context_time(struct perf_event_context *ctx)
1529{
1530 u64 now = perf_clock();
1531
1532 ctx->time += now - ctx->timestamp;
1533 ctx->timestamp = now;
1534}
1535
1536static u64 perf_event_time(struct perf_event *event)
1537{
1538 struct perf_event_context *ctx = event->ctx;
1539
1540 if (is_cgroup_event(event))
1541 return perf_cgroup_event_time(event);
1542
1543 return ctx ? ctx->time : 0;
1544}
1545
1546static enum event_type_t get_event_type(struct perf_event *event)
1547{
1548 struct perf_event_context *ctx = event->ctx;
1549 enum event_type_t event_type;
1550
1551 lockdep_assert_held(&ctx->lock);
1552
1553
1554
1555
1556
1557 if (event->group_leader != event)
1558 event = event->group_leader;
1559
1560 event_type = event->attr.pinned ? EVENT_PINNED : EVENT_FLEXIBLE;
1561 if (!ctx->task)
1562 event_type |= EVENT_CPU;
1563
1564 return event_type;
1565}
1566
1567
1568
1569
1570static void init_event_group(struct perf_event *event)
1571{
1572 RB_CLEAR_NODE(&event->group_node);
1573 event->group_index = 0;
1574}
1575
1576
1577
1578
1579
1580static struct perf_event_groups *
1581get_event_groups(struct perf_event *event, struct perf_event_context *ctx)
1582{
1583 if (event->attr.pinned)
1584 return &ctx->pinned_groups;
1585 else
1586 return &ctx->flexible_groups;
1587}
1588
1589
1590
1591
1592static void perf_event_groups_init(struct perf_event_groups *groups)
1593{
1594 groups->tree = RB_ROOT;
1595 groups->index = 0;
1596}
1597
1598static inline struct cgroup *event_cgroup(const struct perf_event *event)
1599{
1600 struct cgroup *cgroup = NULL;
1601
1602#ifdef CONFIG_CGROUP_PERF
1603 if (event->cgrp)
1604 cgroup = event->cgrp->css.cgroup;
1605#endif
1606
1607 return cgroup;
1608}
1609
1610
1611
1612
1613
1614
1615
1616static __always_inline int
1617perf_event_groups_cmp(const int left_cpu, const struct cgroup *left_cgroup,
1618 const u64 left_group_index, const struct perf_event *right)
1619{
1620 if (left_cpu < right->cpu)
1621 return -1;
1622 if (left_cpu > right->cpu)
1623 return 1;
1624
1625#ifdef CONFIG_CGROUP_PERF
1626 {
1627 const struct cgroup *right_cgroup = event_cgroup(right);
1628
1629 if (left_cgroup != right_cgroup) {
1630 if (!left_cgroup) {
1631
1632
1633
1634
1635 return -1;
1636 }
1637 if (!right_cgroup) {
1638
1639
1640
1641
1642 return 1;
1643 }
1644
1645 if (cgroup_id(left_cgroup) < cgroup_id(right_cgroup))
1646 return -1;
1647
1648 return 1;
1649 }
1650 }
1651#endif
1652
1653 if (left_group_index < right->group_index)
1654 return -1;
1655 if (left_group_index > right->group_index)
1656 return 1;
1657
1658 return 0;
1659}
1660
1661#define __node_2_pe(node) \
1662 rb_entry((node), struct perf_event, group_node)
1663
1664static inline bool __group_less(struct rb_node *a, const struct rb_node *b)
1665{
1666 struct perf_event *e = __node_2_pe(a);
1667 return perf_event_groups_cmp(e->cpu, event_cgroup(e), e->group_index,
1668 __node_2_pe(b)) < 0;
1669}
1670
1671struct __group_key {
1672 int cpu;
1673 struct cgroup *cgroup;
1674};
1675
1676static inline int __group_cmp(const void *key, const struct rb_node *node)
1677{
1678 const struct __group_key *a = key;
1679 const struct perf_event *b = __node_2_pe(node);
1680
1681
1682 return perf_event_groups_cmp(a->cpu, a->cgroup, b->group_index, b);
1683}
1684
1685
1686
1687
1688
1689
1690static void
1691perf_event_groups_insert(struct perf_event_groups *groups,
1692 struct perf_event *event)
1693{
1694 event->group_index = ++groups->index;
1695
1696 rb_add(&event->group_node, &groups->tree, __group_less);
1697}
1698
1699
1700
1701
1702static void
1703add_event_to_groups(struct perf_event *event, struct perf_event_context *ctx)
1704{
1705 struct perf_event_groups *groups;
1706
1707 groups = get_event_groups(event, ctx);
1708 perf_event_groups_insert(groups, event);
1709}
1710
1711
1712
1713
1714static void
1715perf_event_groups_delete(struct perf_event_groups *groups,
1716 struct perf_event *event)
1717{
1718 WARN_ON_ONCE(RB_EMPTY_NODE(&event->group_node) ||
1719 RB_EMPTY_ROOT(&groups->tree));
1720
1721 rb_erase(&event->group_node, &groups->tree);
1722 init_event_group(event);
1723}
1724
1725
1726
1727
1728static void
1729del_event_from_groups(struct perf_event *event, struct perf_event_context *ctx)
1730{
1731 struct perf_event_groups *groups;
1732
1733 groups = get_event_groups(event, ctx);
1734 perf_event_groups_delete(groups, event);
1735}
1736
1737
1738
1739
1740static struct perf_event *
1741perf_event_groups_first(struct perf_event_groups *groups, int cpu,
1742 struct cgroup *cgrp)
1743{
1744 struct __group_key key = {
1745 .cpu = cpu,
1746 .cgroup = cgrp,
1747 };
1748 struct rb_node *node;
1749
1750 node = rb_find_first(&key, &groups->tree, __group_cmp);
1751 if (node)
1752 return __node_2_pe(node);
1753
1754 return NULL;
1755}
1756
1757
1758
1759
1760static struct perf_event *
1761perf_event_groups_next(struct perf_event *event)
1762{
1763 struct __group_key key = {
1764 .cpu = event->cpu,
1765 .cgroup = event_cgroup(event),
1766 };
1767 struct rb_node *next;
1768
1769 next = rb_next_match(&key, &event->group_node, __group_cmp);
1770 if (next)
1771 return __node_2_pe(next);
1772
1773 return NULL;
1774}
1775
1776
1777
1778
1779#define perf_event_groups_for_each(event, groups) \
1780 for (event = rb_entry_safe(rb_first(&((groups)->tree)), \
1781 typeof(*event), group_node); event; \
1782 event = rb_entry_safe(rb_next(&event->group_node), \
1783 typeof(*event), group_node))
1784
1785
1786
1787
1788
1789static void
1790list_add_event(struct perf_event *event, struct perf_event_context *ctx)
1791{
1792 lockdep_assert_held(&ctx->lock);
1793
1794 WARN_ON_ONCE(event->attach_state & PERF_ATTACH_CONTEXT);
1795 event->attach_state |= PERF_ATTACH_CONTEXT;
1796
1797 event->tstamp = perf_event_time(event);
1798
1799
1800
1801
1802
1803
1804 if (event->group_leader == event) {
1805 event->group_caps = event->event_caps;
1806 add_event_to_groups(event, ctx);
1807 }
1808
1809 list_add_rcu(&event->event_entry, &ctx->event_list);
1810 ctx->nr_events++;
1811 if (event->attr.inherit_stat)
1812 ctx->nr_stat++;
1813
1814 if (event->state > PERF_EVENT_STATE_OFF)
1815 perf_cgroup_event_enable(event, ctx);
1816
1817 ctx->generation++;
1818}
1819
1820
1821
1822
1823static inline void perf_event__state_init(struct perf_event *event)
1824{
1825 event->state = event->attr.disabled ? PERF_EVENT_STATE_OFF :
1826 PERF_EVENT_STATE_INACTIVE;
1827}
1828
1829static void __perf_event_read_size(struct perf_event *event, int nr_siblings)
1830{
1831 int entry = sizeof(u64);
1832 int size = 0;
1833 int nr = 1;
1834
1835 if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
1836 size += sizeof(u64);
1837
1838 if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
1839 size += sizeof(u64);
1840
1841 if (event->attr.read_format & PERF_FORMAT_ID)
1842 entry += sizeof(u64);
1843
1844 if (event->attr.read_format & PERF_FORMAT_GROUP) {
1845 nr += nr_siblings;
1846 size += sizeof(u64);
1847 }
1848
1849 size += entry * nr;
1850 event->read_size = size;
1851}
1852
1853static void __perf_event_header_size(struct perf_event *event, u64 sample_type)
1854{
1855 struct perf_sample_data *data;
1856 u16 size = 0;
1857
1858 if (sample_type & PERF_SAMPLE_IP)
1859 size += sizeof(data->ip);
1860
1861 if (sample_type & PERF_SAMPLE_ADDR)
1862 size += sizeof(data->addr);
1863
1864 if (sample_type & PERF_SAMPLE_PERIOD)
1865 size += sizeof(data->period);
1866
1867 if (sample_type & PERF_SAMPLE_WEIGHT_TYPE)
1868 size += sizeof(data->weight.full);
1869
1870 if (sample_type & PERF_SAMPLE_READ)
1871 size += event->read_size;
1872
1873 if (sample_type & PERF_SAMPLE_DATA_SRC)
1874 size += sizeof(data->data_src.val);
1875
1876 if (sample_type & PERF_SAMPLE_TRANSACTION)
1877 size += sizeof(data->txn);
1878
1879 if (sample_type & PERF_SAMPLE_PHYS_ADDR)
1880 size += sizeof(data->phys_addr);
1881
1882 if (sample_type & PERF_SAMPLE_CGROUP)
1883 size += sizeof(data->cgroup);
1884
1885 if (sample_type & PERF_SAMPLE_DATA_PAGE_SIZE)
1886 size += sizeof(data->data_page_size);
1887
1888 if (sample_type & PERF_SAMPLE_CODE_PAGE_SIZE)
1889 size += sizeof(data->code_page_size);
1890
1891 event->header_size = size;
1892}
1893
1894
1895
1896
1897
1898static void perf_event__header_size(struct perf_event *event)
1899{
1900 __perf_event_read_size(event,
1901 event->group_leader->nr_siblings);
1902 __perf_event_header_size(event, event->attr.sample_type);
1903}
1904
1905static void perf_event__id_header_size(struct perf_event *event)
1906{
1907 struct perf_sample_data *data;
1908 u64 sample_type = event->attr.sample_type;
1909 u16 size = 0;
1910
1911 if (sample_type & PERF_SAMPLE_TID)
1912 size += sizeof(data->tid_entry);
1913
1914 if (sample_type & PERF_SAMPLE_TIME)
1915 size += sizeof(data->time);
1916
1917 if (sample_type & PERF_SAMPLE_IDENTIFIER)
1918 size += sizeof(data->id);
1919
1920 if (sample_type & PERF_SAMPLE_ID)
1921 size += sizeof(data->id);
1922
1923 if (sample_type & PERF_SAMPLE_STREAM_ID)
1924 size += sizeof(data->stream_id);
1925
1926 if (sample_type & PERF_SAMPLE_CPU)
1927 size += sizeof(data->cpu_entry);
1928
1929 event->id_header_size = size;
1930}
1931
1932static bool perf_event_validate_size(struct perf_event *event)
1933{
1934
1935
1936
1937
1938 __perf_event_read_size(event, event->group_leader->nr_siblings + 1);
1939 __perf_event_header_size(event, event->attr.sample_type & ~PERF_SAMPLE_READ);
1940 perf_event__id_header_size(event);
1941
1942
1943
1944
1945
1946 if (event->read_size + event->header_size +
1947 event->id_header_size + sizeof(struct perf_event_header) >= 16*1024)
1948 return false;
1949
1950 return true;
1951}
1952
1953static void perf_group_attach(struct perf_event *event)
1954{
1955 struct perf_event *group_leader = event->group_leader, *pos;
1956
1957 lockdep_assert_held(&event->ctx->lock);
1958
1959
1960
1961
1962 if (event->attach_state & PERF_ATTACH_GROUP)
1963 return;
1964
1965 event->attach_state |= PERF_ATTACH_GROUP;
1966
1967 if (group_leader == event)
1968 return;
1969
1970 WARN_ON_ONCE(group_leader->ctx != event->ctx);
1971
1972 group_leader->group_caps &= event->event_caps;
1973
1974 list_add_tail(&event->sibling_list, &group_leader->sibling_list);
1975 group_leader->nr_siblings++;
1976
1977 perf_event__header_size(group_leader);
1978
1979 for_each_sibling_event(pos, group_leader)
1980 perf_event__header_size(pos);
1981}
1982
1983
1984
1985
1986
1987static void
1988list_del_event(struct perf_event *event, struct perf_event_context *ctx)
1989{
1990 WARN_ON_ONCE(event->ctx != ctx);
1991 lockdep_assert_held(&ctx->lock);
1992
1993
1994
1995
1996 if (!(event->attach_state & PERF_ATTACH_CONTEXT))
1997 return;
1998
1999 event->attach_state &= ~PERF_ATTACH_CONTEXT;
2000
2001 ctx->nr_events--;
2002 if (event->attr.inherit_stat)
2003 ctx->nr_stat--;
2004
2005 list_del_rcu(&event->event_entry);
2006
2007 if (event->group_leader == event)
2008 del_event_from_groups(event, ctx);
2009
2010
2011
2012
2013
2014
2015
2016
2017 if (event->state > PERF_EVENT_STATE_OFF) {
2018 perf_cgroup_event_disable(event, ctx);
2019 perf_event_set_state(event, PERF_EVENT_STATE_OFF);
2020 }
2021
2022 ctx->generation++;
2023}
2024
2025static int
2026perf_aux_output_match(struct perf_event *event, struct perf_event *aux_event)
2027{
2028 if (!has_aux(aux_event))
2029 return 0;
2030
2031 if (!event->pmu->aux_output_match)
2032 return 0;
2033
2034 return event->pmu->aux_output_match(aux_event);
2035}
2036
2037static void put_event(struct perf_event *event);
2038static void event_sched_out(struct perf_event *event,
2039 struct perf_cpu_context *cpuctx,
2040 struct perf_event_context *ctx);
2041
2042static void perf_put_aux_event(struct perf_event *event)
2043{
2044 struct perf_event_context *ctx = event->ctx;
2045 struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
2046 struct perf_event *iter;
2047
2048
2049
2050
2051 if (event->aux_event) {
2052 iter = event->aux_event;
2053 event->aux_event = NULL;
2054 put_event(iter);
2055 return;
2056 }
2057
2058
2059
2060
2061
2062 for_each_sibling_event(iter, event->group_leader) {
2063 if (iter->aux_event != event)
2064 continue;
2065
2066 iter->aux_event = NULL;
2067 put_event(event);
2068
2069
2070
2071
2072
2073
2074 event_sched_out(iter, cpuctx, ctx);
2075 perf_event_set_state(event, PERF_EVENT_STATE_ERROR);
2076 }
2077}
2078
2079static bool perf_need_aux_event(struct perf_event *event)
2080{
2081 return !!event->attr.aux_output || !!event->attr.aux_sample_size;
2082}
2083
2084static int perf_get_aux_event(struct perf_event *event,
2085 struct perf_event *group_leader)
2086{
2087
2088
2089
2090
2091
2092
2093 if (!group_leader)
2094 return 0;
2095
2096
2097
2098
2099 if (event->attr.aux_output && event->attr.aux_sample_size)
2100 return 0;
2101
2102 if (event->attr.aux_output &&
2103 !perf_aux_output_match(event, group_leader))
2104 return 0;
2105
2106 if (event->attr.aux_sample_size && !group_leader->pmu->snapshot_aux)
2107 return 0;
2108
2109 if (!atomic_long_inc_not_zero(&group_leader->refcount))
2110 return 0;
2111
2112
2113
2114
2115
2116
2117
2118 event->aux_event = group_leader;
2119
2120 return 1;
2121}
2122
2123static inline struct list_head *get_event_list(struct perf_event *event)
2124{
2125 struct perf_event_context *ctx = event->ctx;
2126 return event->attr.pinned ? &ctx->pinned_active : &ctx->flexible_active;
2127}
2128
2129
2130
2131
2132
2133
2134
2135static inline void perf_remove_sibling_event(struct perf_event *event)
2136{
2137 struct perf_event_context *ctx = event->ctx;
2138 struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
2139
2140 event_sched_out(event, cpuctx, ctx);
2141 perf_event_set_state(event, PERF_EVENT_STATE_ERROR);
2142}
2143
2144static void perf_group_detach(struct perf_event *event)
2145{
2146 struct perf_event *leader = event->group_leader;
2147 struct perf_event *sibling, *tmp;
2148 struct perf_event_context *ctx = event->ctx;
2149
2150 lockdep_assert_held(&ctx->lock);
2151
2152
2153
2154
2155 if (!(event->attach_state & PERF_ATTACH_GROUP))
2156 return;
2157
2158 event->attach_state &= ~PERF_ATTACH_GROUP;
2159
2160 perf_put_aux_event(event);
2161
2162
2163
2164
2165 if (leader != event) {
2166 list_del_init(&event->sibling_list);
2167 event->group_leader->nr_siblings--;
2168 goto out;
2169 }
2170
2171
2172
2173
2174
2175
2176 list_for_each_entry_safe(sibling, tmp, &event->sibling_list, sibling_list) {
2177
2178 if (sibling->event_caps & PERF_EV_CAP_SIBLING)
2179 perf_remove_sibling_event(sibling);
2180
2181 sibling->group_leader = sibling;
2182 list_del_init(&sibling->sibling_list);
2183
2184
2185 sibling->group_caps = event->group_caps;
2186
2187 if (!RB_EMPTY_NODE(&event->group_node)) {
2188 add_event_to_groups(sibling, event->ctx);
2189
2190 if (sibling->state == PERF_EVENT_STATE_ACTIVE)
2191 list_add_tail(&sibling->active_list, get_event_list(sibling));
2192 }
2193
2194 WARN_ON_ONCE(sibling->ctx != event->ctx);
2195 }
2196
2197out:
2198 for_each_sibling_event(tmp, leader)
2199 perf_event__header_size(tmp);
2200
2201 perf_event__header_size(leader);
2202}
2203
2204static void sync_child_event(struct perf_event *child_event);
2205
2206static void perf_child_detach(struct perf_event *event)
2207{
2208 struct perf_event *parent_event = event->parent;
2209
2210 if (!(event->attach_state & PERF_ATTACH_CHILD))
2211 return;
2212
2213 event->attach_state &= ~PERF_ATTACH_CHILD;
2214
2215 if (WARN_ON_ONCE(!parent_event))
2216 return;
2217
2218 lockdep_assert_held(&parent_event->child_mutex);
2219
2220 sync_child_event(event);
2221 list_del_init(&event->child_list);
2222}
2223
2224static bool is_orphaned_event(struct perf_event *event)
2225{
2226 return event->state == PERF_EVENT_STATE_DEAD;
2227}
2228
2229static inline int __pmu_filter_match(struct perf_event *event)
2230{
2231 struct pmu *pmu = event->pmu;
2232 return pmu->filter_match ? pmu->filter_match(event) : 1;
2233}
2234
2235
2236
2237
2238
2239
2240
2241static inline int pmu_filter_match(struct perf_event *event)
2242{
2243 struct perf_event *sibling;
2244
2245 if (!__pmu_filter_match(event))
2246 return 0;
2247
2248 for_each_sibling_event(sibling, event) {
2249 if (!__pmu_filter_match(sibling))
2250 return 0;
2251 }
2252
2253 return 1;
2254}
2255
2256static inline int
2257event_filter_match(struct perf_event *event)
2258{
2259 return (event->cpu == -1 || event->cpu == smp_processor_id()) &&
2260 perf_cgroup_match(event) && pmu_filter_match(event);
2261}
2262
2263static void
2264event_sched_out(struct perf_event *event,
2265 struct perf_cpu_context *cpuctx,
2266 struct perf_event_context *ctx)
2267{
2268 enum perf_event_state state = PERF_EVENT_STATE_INACTIVE;
2269
2270 WARN_ON_ONCE(event->ctx != ctx);
2271 lockdep_assert_held(&ctx->lock);
2272
2273 if (event->state != PERF_EVENT_STATE_ACTIVE)
2274 return;
2275
2276
2277
2278
2279
2280
2281 list_del_init(&event->active_list);
2282
2283 perf_pmu_disable(event->pmu);
2284
2285 event->pmu->del(event, 0);
2286 event->oncpu = -1;
2287
2288 if (READ_ONCE(event->pending_disable) >= 0) {
2289 WRITE_ONCE(event->pending_disable, -1);
2290 perf_cgroup_event_disable(event, ctx);
2291 state = PERF_EVENT_STATE_OFF;
2292 }
2293 perf_event_set_state(event, state);
2294
2295 if (!is_software_event(event))
2296 cpuctx->active_oncpu--;
2297 if (!--ctx->nr_active)
2298 perf_event_ctx_deactivate(ctx);
2299 if (event->attr.freq && event->attr.sample_freq)
2300 ctx->nr_freq--;
2301 if (event->attr.exclusive || !cpuctx->active_oncpu)
2302 cpuctx->exclusive = 0;
2303
2304 perf_pmu_enable(event->pmu);
2305}
2306
2307static void
2308group_sched_out(struct perf_event *group_event,
2309 struct perf_cpu_context *cpuctx,
2310 struct perf_event_context *ctx)
2311{
2312 struct perf_event *event;
2313
2314 if (group_event->state != PERF_EVENT_STATE_ACTIVE)
2315 return;
2316
2317 perf_pmu_disable(ctx->pmu);
2318
2319 event_sched_out(group_event, cpuctx, ctx);
2320
2321
2322
2323
2324 for_each_sibling_event(event, group_event)
2325 event_sched_out(event, cpuctx, ctx);
2326
2327 perf_pmu_enable(ctx->pmu);
2328}
2329
2330#define DETACH_GROUP 0x01UL
2331#define DETACH_CHILD 0x02UL
2332
2333
2334
2335
2336
2337
2338
2339static void
2340__perf_remove_from_context(struct perf_event *event,
2341 struct perf_cpu_context *cpuctx,
2342 struct perf_event_context *ctx,
2343 void *info)
2344{
2345 unsigned long flags = (unsigned long)info;
2346
2347 if (ctx->is_active & EVENT_TIME) {
2348 update_context_time(ctx);
2349 update_cgrp_time_from_cpuctx(cpuctx);
2350 }
2351
2352 event_sched_out(event, cpuctx, ctx);
2353 if (flags & DETACH_GROUP)
2354 perf_group_detach(event);
2355 if (flags & DETACH_CHILD)
2356 perf_child_detach(event);
2357 list_del_event(event, ctx);
2358
2359 if (!ctx->nr_events && ctx->is_active) {
2360 ctx->is_active = 0;
2361 ctx->rotate_necessary = 0;
2362 if (ctx->task) {
2363 WARN_ON_ONCE(cpuctx->task_ctx != ctx);
2364 cpuctx->task_ctx = NULL;
2365 }
2366 }
2367}
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379static void perf_remove_from_context(struct perf_event *event, unsigned long flags)
2380{
2381 struct perf_event_context *ctx = event->ctx;
2382
2383 lockdep_assert_held(&ctx->mutex);
2384
2385
2386
2387
2388
2389
2390 raw_spin_lock_irq(&ctx->lock);
2391 if (!ctx->is_active) {
2392 __perf_remove_from_context(event, __get_cpu_context(ctx),
2393 ctx, (void *)flags);
2394 raw_spin_unlock_irq(&ctx->lock);
2395 return;
2396 }
2397 raw_spin_unlock_irq(&ctx->lock);
2398
2399 event_function_call(event, __perf_remove_from_context, (void *)flags);
2400}
2401
2402
2403
2404
2405static void __perf_event_disable(struct perf_event *event,
2406 struct perf_cpu_context *cpuctx,
2407 struct perf_event_context *ctx,
2408 void *info)
2409{
2410 if (event->state < PERF_EVENT_STATE_INACTIVE)
2411 return;
2412
2413 if (ctx->is_active & EVENT_TIME) {
2414 update_context_time(ctx);
2415 update_cgrp_time_from_event(event);
2416 }
2417
2418 if (event == event->group_leader)
2419 group_sched_out(event, cpuctx, ctx);
2420 else
2421 event_sched_out(event, cpuctx, ctx);
2422
2423 perf_event_set_state(event, PERF_EVENT_STATE_OFF);
2424 perf_cgroup_event_disable(event, ctx);
2425}
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441static void _perf_event_disable(struct perf_event *event)
2442{
2443 struct perf_event_context *ctx = event->ctx;
2444
2445 raw_spin_lock_irq(&ctx->lock);
2446 if (event->state <= PERF_EVENT_STATE_OFF) {
2447 raw_spin_unlock_irq(&ctx->lock);
2448 return;
2449 }
2450 raw_spin_unlock_irq(&ctx->lock);
2451
2452 event_function_call(event, __perf_event_disable, NULL);
2453}
2454
2455void perf_event_disable_local(struct perf_event *event)
2456{
2457 event_function_local(event, __perf_event_disable, NULL);
2458}
2459
2460
2461
2462
2463
2464void perf_event_disable(struct perf_event *event)
2465{
2466 struct perf_event_context *ctx;
2467
2468 ctx = perf_event_ctx_lock(event);
2469 _perf_event_disable(event);
2470 perf_event_ctx_unlock(event, ctx);
2471}
2472EXPORT_SYMBOL_GPL(perf_event_disable);
2473
2474void perf_event_disable_inatomic(struct perf_event *event)
2475{
2476 WRITE_ONCE(event->pending_disable, smp_processor_id());
2477
2478 irq_work_queue(&event->pending);
2479}
2480
2481static void perf_set_shadow_time(struct perf_event *event,
2482 struct perf_event_context *ctx)
2483{
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509 if (is_cgroup_event(event))
2510 perf_cgroup_set_shadow_time(event, event->tstamp);
2511 else
2512 event->shadow_ctx_time = event->tstamp - ctx->timestamp;
2513}
2514
2515#define MAX_INTERRUPTS (~0ULL)
2516
2517static void perf_log_throttle(struct perf_event *event, int enable);
2518static void perf_log_itrace_start(struct perf_event *event);
2519
2520static int
2521event_sched_in(struct perf_event *event,
2522 struct perf_cpu_context *cpuctx,
2523 struct perf_event_context *ctx)
2524{
2525 int ret = 0;
2526
2527 WARN_ON_ONCE(event->ctx != ctx);
2528
2529 lockdep_assert_held(&ctx->lock);
2530
2531 if (event->state <= PERF_EVENT_STATE_OFF)
2532 return 0;
2533
2534 WRITE_ONCE(event->oncpu, smp_processor_id());
2535
2536
2537
2538
2539
2540 smp_wmb();
2541 perf_event_set_state(event, PERF_EVENT_STATE_ACTIVE);
2542
2543
2544
2545
2546
2547
2548 if (unlikely(event->hw.interrupts == MAX_INTERRUPTS)) {
2549 perf_log_throttle(event, 1);
2550 event->hw.interrupts = 0;
2551 }
2552
2553 perf_pmu_disable(event->pmu);
2554
2555 perf_set_shadow_time(event, ctx);
2556
2557 perf_log_itrace_start(event);
2558
2559 if (event->pmu->add(event, PERF_EF_START)) {
2560 perf_event_set_state(event, PERF_EVENT_STATE_INACTIVE);
2561 event->oncpu = -1;
2562 ret = -EAGAIN;
2563 goto out;
2564 }
2565
2566 if (!is_software_event(event))
2567 cpuctx->active_oncpu++;
2568 if (!ctx->nr_active++)
2569 perf_event_ctx_activate(ctx);
2570 if (event->attr.freq && event->attr.sample_freq)
2571 ctx->nr_freq++;
2572
2573 if (event->attr.exclusive)
2574 cpuctx->exclusive = 1;
2575
2576out:
2577 perf_pmu_enable(event->pmu);
2578
2579 return ret;
2580}
2581
2582static int
2583group_sched_in(struct perf_event *group_event,
2584 struct perf_cpu_context *cpuctx,
2585 struct perf_event_context *ctx)
2586{
2587 struct perf_event *event, *partial_group = NULL;
2588 struct pmu *pmu = ctx->pmu;
2589
2590 if (group_event->state == PERF_EVENT_STATE_OFF)
2591 return 0;
2592
2593 pmu->start_txn(pmu, PERF_PMU_TXN_ADD);
2594
2595 if (event_sched_in(group_event, cpuctx, ctx))
2596 goto error;
2597
2598
2599
2600
2601 for_each_sibling_event(event, group_event) {
2602 if (event_sched_in(event, cpuctx, ctx)) {
2603 partial_group = event;
2604 goto group_error;
2605 }
2606 }
2607
2608 if (!pmu->commit_txn(pmu))
2609 return 0;
2610
2611group_error:
2612
2613
2614
2615
2616
2617 for_each_sibling_event(event, group_event) {
2618 if (event == partial_group)
2619 break;
2620
2621 event_sched_out(event, cpuctx, ctx);
2622 }
2623 event_sched_out(group_event, cpuctx, ctx);
2624
2625error:
2626 pmu->cancel_txn(pmu);
2627 return -EAGAIN;
2628}
2629
2630
2631
2632
2633static int group_can_go_on(struct perf_event *event,
2634 struct perf_cpu_context *cpuctx,
2635 int can_add_hw)
2636{
2637
2638
2639
2640 if (event->group_caps & PERF_EV_CAP_SOFTWARE)
2641 return 1;
2642
2643
2644
2645
2646 if (cpuctx->exclusive)
2647 return 0;
2648
2649
2650
2651
2652 if (event->attr.exclusive && !list_empty(get_event_list(event)))
2653 return 0;
2654
2655
2656
2657
2658 return can_add_hw;
2659}
2660
2661static void add_event_to_ctx(struct perf_event *event,
2662 struct perf_event_context *ctx)
2663{
2664 list_add_event(event, ctx);
2665 perf_group_attach(event);
2666}
2667
2668static void ctx_sched_out(struct perf_event_context *ctx,
2669 struct perf_cpu_context *cpuctx,
2670 enum event_type_t event_type);
2671static void
2672ctx_sched_in(struct perf_event_context *ctx,
2673 struct perf_cpu_context *cpuctx,
2674 enum event_type_t event_type,
2675 struct task_struct *task);
2676
2677static void task_ctx_sched_out(struct perf_cpu_context *cpuctx,
2678 struct perf_event_context *ctx,
2679 enum event_type_t event_type)
2680{
2681 if (!cpuctx->task_ctx)
2682 return;
2683
2684 if (WARN_ON_ONCE(ctx != cpuctx->task_ctx))
2685 return;
2686
2687 ctx_sched_out(ctx, cpuctx, event_type);
2688}
2689
2690static void perf_event_sched_in(struct perf_cpu_context *cpuctx,
2691 struct perf_event_context *ctx,
2692 struct task_struct *task)
2693{
2694 cpu_ctx_sched_in(cpuctx, EVENT_PINNED, task);
2695 if (ctx)
2696 ctx_sched_in(ctx, cpuctx, EVENT_PINNED, task);
2697 cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE, task);
2698 if (ctx)
2699 ctx_sched_in(ctx, cpuctx, EVENT_FLEXIBLE, task);
2700}
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717static void ctx_resched(struct perf_cpu_context *cpuctx,
2718 struct perf_event_context *task_ctx,
2719 enum event_type_t event_type)
2720{
2721 enum event_type_t ctx_event_type;
2722 bool cpu_event = !!(event_type & EVENT_CPU);
2723
2724
2725
2726
2727
2728 if (event_type & EVENT_PINNED)
2729 event_type |= EVENT_FLEXIBLE;
2730
2731 ctx_event_type = event_type & EVENT_ALL;
2732
2733 perf_pmu_disable(cpuctx->ctx.pmu);
2734 if (task_ctx)
2735 task_ctx_sched_out(cpuctx, task_ctx, event_type);
2736
2737
2738
2739
2740
2741
2742
2743
2744 if (cpu_event)
2745 cpu_ctx_sched_out(cpuctx, ctx_event_type);
2746 else if (ctx_event_type & EVENT_PINNED)
2747 cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
2748
2749 perf_event_sched_in(cpuctx, task_ctx, current);
2750 perf_pmu_enable(cpuctx->ctx.pmu);
2751}
2752
2753void perf_pmu_resched(struct pmu *pmu)
2754{
2755 struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
2756 struct perf_event_context *task_ctx = cpuctx->task_ctx;
2757
2758 perf_ctx_lock(cpuctx, task_ctx);
2759 ctx_resched(cpuctx, task_ctx, EVENT_ALL|EVENT_CPU);
2760 perf_ctx_unlock(cpuctx, task_ctx);
2761}
2762
2763
2764
2765
2766
2767
2768
2769static int __perf_install_in_context(void *info)
2770{
2771 struct perf_event *event = info;
2772 struct perf_event_context *ctx = event->ctx;
2773 struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
2774 struct perf_event_context *task_ctx = cpuctx->task_ctx;
2775 bool reprogram = true;
2776 int ret = 0;
2777
2778 raw_spin_lock(&cpuctx->ctx.lock);
2779 if (ctx->task) {
2780 raw_spin_lock(&ctx->lock);
2781 task_ctx = ctx;
2782
2783 reprogram = (ctx->task == current);
2784
2785
2786
2787
2788
2789
2790
2791
2792 if (task_curr(ctx->task) && !reprogram) {
2793 ret = -ESRCH;
2794 goto unlock;
2795 }
2796
2797 WARN_ON_ONCE(reprogram && cpuctx->task_ctx && cpuctx->task_ctx != ctx);
2798 } else if (task_ctx) {
2799 raw_spin_lock(&task_ctx->lock);
2800 }
2801
2802#ifdef CONFIG_CGROUP_PERF
2803 if (event->state > PERF_EVENT_STATE_OFF && is_cgroup_event(event)) {
2804
2805
2806
2807
2808 struct perf_cgroup *cgrp = perf_cgroup_from_task(current, ctx);
2809 reprogram = cgroup_is_descendant(cgrp->css.cgroup,
2810 event->cgrp->css.cgroup);
2811 }
2812#endif
2813
2814 if (reprogram) {
2815 ctx_sched_out(ctx, cpuctx, EVENT_TIME);
2816 add_event_to_ctx(event, ctx);
2817 ctx_resched(cpuctx, task_ctx, get_event_type(event));
2818 } else {
2819 add_event_to_ctx(event, ctx);
2820 }
2821
2822unlock:
2823 perf_ctx_unlock(cpuctx, task_ctx);
2824
2825 return ret;
2826}
2827
2828static bool exclusive_event_installable(struct perf_event *event,
2829 struct perf_event_context *ctx);
2830
2831
2832
2833
2834
2835
2836static void
2837perf_install_in_context(struct perf_event_context *ctx,
2838 struct perf_event *event,
2839 int cpu)
2840{
2841 struct task_struct *task = READ_ONCE(ctx->task);
2842
2843 lockdep_assert_held(&ctx->mutex);
2844
2845 WARN_ON_ONCE(!exclusive_event_installable(event, ctx));
2846
2847 if (event->cpu != -1)
2848 event->cpu = cpu;
2849
2850
2851
2852
2853
2854 smp_store_release(&event->ctx, ctx);
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864 if (__perf_effective_state(event) == PERF_EVENT_STATE_OFF && ctx->nr_events) {
2865 raw_spin_lock_irq(&ctx->lock);
2866 if (ctx->task == TASK_TOMBSTONE) {
2867 raw_spin_unlock_irq(&ctx->lock);
2868 return;
2869 }
2870 add_event_to_ctx(event, ctx);
2871 raw_spin_unlock_irq(&ctx->lock);
2872 return;
2873 }
2874
2875 if (!task) {
2876 cpu_function_call(cpu, __perf_install_in_context, event);
2877 return;
2878 }
2879
2880
2881
2882
2883 if (WARN_ON_ONCE(task == TASK_TOMBSTONE))
2884 return;
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916 smp_mb();
2917again:
2918 if (!task_function_call(task, __perf_install_in_context, event))
2919 return;
2920
2921 raw_spin_lock_irq(&ctx->lock);
2922 task = ctx->task;
2923 if (WARN_ON_ONCE(task == TASK_TOMBSTONE)) {
2924
2925
2926
2927
2928
2929 raw_spin_unlock_irq(&ctx->lock);
2930 return;
2931 }
2932
2933
2934
2935
2936 if (task_curr(task)) {
2937 raw_spin_unlock_irq(&ctx->lock);
2938 goto again;
2939 }
2940 add_event_to_ctx(event, ctx);
2941 raw_spin_unlock_irq(&ctx->lock);
2942}
2943
2944
2945
2946
2947static void __perf_event_enable(struct perf_event *event,
2948 struct perf_cpu_context *cpuctx,
2949 struct perf_event_context *ctx,
2950 void *info)
2951{
2952 struct perf_event *leader = event->group_leader;
2953 struct perf_event_context *task_ctx;
2954
2955 if (event->state >= PERF_EVENT_STATE_INACTIVE ||
2956 event->state <= PERF_EVENT_STATE_ERROR)
2957 return;
2958
2959 if (ctx->is_active)
2960 ctx_sched_out(ctx, cpuctx, EVENT_TIME);
2961
2962 perf_event_set_state(event, PERF_EVENT_STATE_INACTIVE);
2963 perf_cgroup_event_enable(event, ctx);
2964
2965 if (!ctx->is_active)
2966 return;
2967
2968 if (!event_filter_match(event)) {
2969 ctx_sched_in(ctx, cpuctx, EVENT_TIME, current);
2970 return;
2971 }
2972
2973
2974
2975
2976
2977 if (leader != event && leader->state != PERF_EVENT_STATE_ACTIVE) {
2978 ctx_sched_in(ctx, cpuctx, EVENT_TIME, current);
2979 return;
2980 }
2981
2982 task_ctx = cpuctx->task_ctx;
2983 if (ctx->task)
2984 WARN_ON_ONCE(task_ctx != ctx);
2985
2986 ctx_resched(cpuctx, task_ctx, get_event_type(event));
2987}
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998static void _perf_event_enable(struct perf_event *event)
2999{
3000 struct perf_event_context *ctx = event->ctx;
3001
3002 raw_spin_lock_irq(&ctx->lock);
3003 if (event->state >= PERF_EVENT_STATE_INACTIVE ||
3004 event->state < PERF_EVENT_STATE_ERROR) {
3005out:
3006 raw_spin_unlock_irq(&ctx->lock);
3007 return;
3008 }
3009
3010
3011
3012
3013
3014
3015
3016
3017 if (event->state == PERF_EVENT_STATE_ERROR) {
3018
3019
3020
3021 if (event->event_caps & PERF_EV_CAP_SIBLING &&
3022 event->group_leader == event)
3023 goto out;
3024
3025 event->state = PERF_EVENT_STATE_OFF;
3026 }
3027 raw_spin_unlock_irq(&ctx->lock);
3028
3029 event_function_call(event, __perf_event_enable, NULL);
3030}
3031
3032
3033
3034
3035void perf_event_enable(struct perf_event *event)
3036{
3037 struct perf_event_context *ctx;
3038
3039 ctx = perf_event_ctx_lock(event);
3040 _perf_event_enable(event);
3041 perf_event_ctx_unlock(event, ctx);
3042}
3043EXPORT_SYMBOL_GPL(perf_event_enable);
3044
3045struct stop_event_data {
3046 struct perf_event *event;
3047 unsigned int restart;
3048};
3049
3050static int __perf_event_stop(void *info)
3051{
3052 struct stop_event_data *sd = info;
3053 struct perf_event *event = sd->event;
3054
3055
3056 if (READ_ONCE(event->state) != PERF_EVENT_STATE_ACTIVE)
3057 return 0;
3058
3059
3060 smp_rmb();
3061
3062
3063
3064
3065
3066 if (READ_ONCE(event->oncpu) != smp_processor_id())
3067 return -EAGAIN;
3068
3069 event->pmu->stop(event, PERF_EF_UPDATE);
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080 if (sd->restart)
3081 event->pmu->start(event, 0);
3082
3083 return 0;
3084}
3085
3086static int perf_event_stop(struct perf_event *event, int restart)
3087{
3088 struct stop_event_data sd = {
3089 .event = event,
3090 .restart = restart,
3091 };
3092 int ret = 0;
3093
3094 do {
3095 if (READ_ONCE(event->state) != PERF_EVENT_STATE_ACTIVE)
3096 return 0;
3097
3098
3099 smp_rmb();
3100
3101
3102
3103
3104
3105
3106 ret = cpu_function_call(READ_ONCE(event->oncpu),
3107 __perf_event_stop, &sd);
3108 } while (ret == -EAGAIN);
3109
3110 return ret;
3111}
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135void perf_event_addr_filters_sync(struct perf_event *event)
3136{
3137 struct perf_addr_filters_head *ifh = perf_event_addr_filters(event);
3138
3139 if (!has_addr_filter(event))
3140 return;
3141
3142 raw_spin_lock(&ifh->lock);
3143 if (event->addr_filters_gen != event->hw.addr_filters_gen) {
3144 event->pmu->addr_filters_sync(event);
3145 event->hw.addr_filters_gen = event->addr_filters_gen;
3146 }
3147 raw_spin_unlock(&ifh->lock);
3148}
3149EXPORT_SYMBOL_GPL(perf_event_addr_filters_sync);
3150
3151static int _perf_event_refresh(struct perf_event *event, int refresh)
3152{
3153
3154
3155
3156 if (event->attr.inherit || !is_sampling_event(event))
3157 return -EINVAL;
3158
3159 atomic_add(refresh, &event->event_limit);
3160 _perf_event_enable(event);
3161
3162 return 0;
3163}
3164
3165
3166
3167
3168int perf_event_refresh(struct perf_event *event, int refresh)
3169{
3170 struct perf_event_context *ctx;
3171 int ret;
3172
3173 ctx = perf_event_ctx_lock(event);
3174 ret = _perf_event_refresh(event, refresh);
3175 perf_event_ctx_unlock(event, ctx);
3176
3177 return ret;
3178}
3179EXPORT_SYMBOL_GPL(perf_event_refresh);
3180
3181static int perf_event_modify_breakpoint(struct perf_event *bp,
3182 struct perf_event_attr *attr)
3183{
3184 int err;
3185
3186 _perf_event_disable(bp);
3187
3188 err = modify_user_hw_breakpoint_check(bp, attr, true);
3189
3190 if (!bp->attr.disabled)
3191 _perf_event_enable(bp);
3192
3193 return err;
3194}
3195
3196static int perf_event_modify_attr(struct perf_event *event,
3197 struct perf_event_attr *attr)
3198{
3199 int (*func)(struct perf_event *, struct perf_event_attr *);
3200 struct perf_event *child;
3201 int err;
3202
3203 if (event->attr.type != attr->type)
3204 return -EINVAL;
3205
3206 switch (event->attr.type) {
3207 case PERF_TYPE_BREAKPOINT:
3208 func = perf_event_modify_breakpoint;
3209 break;
3210 default:
3211
3212 return -EOPNOTSUPP;
3213 }
3214
3215 WARN_ON_ONCE(event->ctx->parent_ctx);
3216
3217 mutex_lock(&event->child_mutex);
3218 err = func(event, attr);
3219 if (err)
3220 goto out;
3221 list_for_each_entry(child, &event->child_list, child_list) {
3222 err = func(child, attr);
3223 if (err)
3224 goto out;
3225 }
3226out:
3227 mutex_unlock(&event->child_mutex);
3228 return err;
3229}
3230
3231static void ctx_sched_out(struct perf_event_context *ctx,
3232 struct perf_cpu_context *cpuctx,
3233 enum event_type_t event_type)
3234{
3235 struct perf_event *event, *tmp;
3236 int is_active = ctx->is_active;
3237
3238 lockdep_assert_held(&ctx->lock);
3239
3240 if (likely(!ctx->nr_events)) {
3241
3242
3243
3244 WARN_ON_ONCE(ctx->is_active);
3245 if (ctx->task)
3246 WARN_ON_ONCE(cpuctx->task_ctx);
3247 return;
3248 }
3249
3250 ctx->is_active &= ~event_type;
3251 if (!(ctx->is_active & EVENT_ALL))
3252 ctx->is_active = 0;
3253
3254 if (ctx->task) {
3255 WARN_ON_ONCE(cpuctx->task_ctx != ctx);
3256 if (!ctx->is_active)
3257 cpuctx->task_ctx = NULL;
3258 }
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270 if (is_active & EVENT_TIME) {
3271
3272 update_context_time(ctx);
3273 update_cgrp_time_from_cpuctx(cpuctx);
3274 }
3275
3276 is_active ^= ctx->is_active;
3277
3278 if (!ctx->nr_active || !(is_active & EVENT_ALL))
3279 return;
3280
3281 perf_pmu_disable(ctx->pmu);
3282 if (is_active & EVENT_PINNED) {
3283 list_for_each_entry_safe(event, tmp, &ctx->pinned_active, active_list)
3284 group_sched_out(event, cpuctx, ctx);
3285 }
3286
3287 if (is_active & EVENT_FLEXIBLE) {
3288 list_for_each_entry_safe(event, tmp, &ctx->flexible_active, active_list)
3289 group_sched_out(event, cpuctx, ctx);
3290
3291
3292
3293
3294
3295
3296 ctx->rotate_necessary = 0;
3297 }
3298 perf_pmu_enable(ctx->pmu);
3299}
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309static int context_equiv(struct perf_event_context *ctx1,
3310 struct perf_event_context *ctx2)
3311{
3312 lockdep_assert_held(&ctx1->lock);
3313 lockdep_assert_held(&ctx2->lock);
3314
3315
3316 if (ctx1->pin_count || ctx2->pin_count)
3317 return 0;
3318
3319
3320 if (ctx1 == ctx2->parent_ctx && ctx1->generation == ctx2->parent_gen)
3321 return 1;
3322
3323
3324 if (ctx1->parent_ctx == ctx2 && ctx1->parent_gen == ctx2->generation)
3325 return 1;
3326
3327
3328
3329
3330
3331 if (ctx1->parent_ctx && ctx1->parent_ctx == ctx2->parent_ctx &&
3332 ctx1->parent_gen == ctx2->parent_gen)
3333 return 1;
3334
3335
3336 return 0;
3337}
3338
3339static void __perf_event_sync_stat(struct perf_event *event,
3340 struct perf_event *next_event)
3341{
3342 u64 value;
3343
3344 if (!event->attr.inherit_stat)
3345 return;
3346
3347
3348
3349
3350
3351
3352
3353
3354 if (event->state == PERF_EVENT_STATE_ACTIVE)
3355 event->pmu->read(event);
3356
3357 perf_event_update_time(event);
3358
3359
3360
3361
3362
3363 value = local64_read(&next_event->count);
3364 value = local64_xchg(&event->count, value);
3365 local64_set(&next_event->count, value);
3366
3367 swap(event->total_time_enabled, next_event->total_time_enabled);
3368 swap(event->total_time_running, next_event->total_time_running);
3369
3370
3371
3372
3373 perf_event_update_userpage(event);
3374 perf_event_update_userpage(next_event);
3375}
3376
3377static void perf_event_sync_stat(struct perf_event_context *ctx,
3378 struct perf_event_context *next_ctx)
3379{
3380 struct perf_event *event, *next_event;
3381
3382 if (!ctx->nr_stat)
3383 return;
3384
3385 update_context_time(ctx);
3386
3387 event = list_first_entry(&ctx->event_list,
3388 struct perf_event, event_entry);
3389
3390 next_event = list_first_entry(&next_ctx->event_list,
3391 struct perf_event, event_entry);
3392
3393 while (&event->event_entry != &ctx->event_list &&
3394 &next_event->event_entry != &next_ctx->event_list) {
3395
3396 __perf_event_sync_stat(event, next_event);
3397
3398 event = list_next_entry(event, event_entry);
3399 next_event = list_next_entry(next_event, event_entry);
3400 }
3401}
3402
3403static void perf_event_context_sched_out(struct task_struct *task, int ctxn,
3404 struct task_struct *next)
3405{
3406 struct perf_event_context *ctx = task->perf_event_ctxp[ctxn];
3407 struct perf_event_context *next_ctx;
3408 struct perf_event_context *parent, *next_parent;
3409 struct perf_cpu_context *cpuctx;
3410 int do_switch = 1;
3411 struct pmu *pmu;
3412
3413 if (likely(!ctx))
3414 return;
3415
3416 pmu = ctx->pmu;
3417 cpuctx = __get_cpu_context(ctx);
3418 if (!cpuctx->task_ctx)
3419 return;
3420
3421 rcu_read_lock();
3422 next_ctx = next->perf_event_ctxp[ctxn];
3423 if (!next_ctx)
3424 goto unlock;
3425
3426 parent = rcu_dereference(ctx->parent_ctx);
3427 next_parent = rcu_dereference(next_ctx->parent_ctx);
3428
3429
3430 if (!parent && !next_parent)
3431 goto unlock;
3432
3433 if (next_parent == ctx || next_ctx == parent || next_parent == parent) {
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443 raw_spin_lock(&ctx->lock);
3444 raw_spin_lock_nested(&next_ctx->lock, SINGLE_DEPTH_NESTING);
3445 if (context_equiv(ctx, next_ctx)) {
3446
3447 WRITE_ONCE(ctx->task, next);
3448 WRITE_ONCE(next_ctx->task, task);
3449
3450 perf_pmu_disable(pmu);
3451
3452 if (cpuctx->sched_cb_usage && pmu->sched_task)
3453 pmu->sched_task(ctx, false);
3454
3455
3456
3457
3458
3459
3460
3461 if (pmu->swap_task_ctx)
3462 pmu->swap_task_ctx(ctx, next_ctx);
3463 else
3464 swap(ctx->task_ctx_data, next_ctx->task_ctx_data);
3465
3466 perf_pmu_enable(pmu);
3467
3468
3469
3470
3471
3472
3473
3474
3475 RCU_INIT_POINTER(task->perf_event_ctxp[ctxn], next_ctx);
3476 RCU_INIT_POINTER(next->perf_event_ctxp[ctxn], ctx);
3477
3478 do_switch = 0;
3479
3480 perf_event_sync_stat(ctx, next_ctx);
3481 }
3482 raw_spin_unlock(&next_ctx->lock);
3483 raw_spin_unlock(&ctx->lock);
3484 }
3485unlock:
3486 rcu_read_unlock();
3487
3488 if (do_switch) {
3489 raw_spin_lock(&ctx->lock);
3490 perf_pmu_disable(pmu);
3491
3492 if (cpuctx->sched_cb_usage && pmu->sched_task)
3493 pmu->sched_task(ctx, false);
3494 task_ctx_sched_out(cpuctx, ctx, EVENT_ALL);
3495
3496 perf_pmu_enable(pmu);
3497 raw_spin_unlock(&ctx->lock);
3498 }
3499}
3500
3501static DEFINE_PER_CPU(struct list_head, sched_cb_list);
3502
3503void perf_sched_cb_dec(struct pmu *pmu)
3504{
3505 struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
3506
3507 this_cpu_dec(perf_sched_cb_usages);
3508
3509 if (!--cpuctx->sched_cb_usage)
3510 list_del(&cpuctx->sched_cb_entry);
3511}
3512
3513
3514void perf_sched_cb_inc(struct pmu *pmu)
3515{
3516 struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
3517
3518 if (!cpuctx->sched_cb_usage++)
3519 list_add(&cpuctx->sched_cb_entry, this_cpu_ptr(&sched_cb_list));
3520
3521 this_cpu_inc(perf_sched_cb_usages);
3522}
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532static void __perf_pmu_sched_task(struct perf_cpu_context *cpuctx, bool sched_in)
3533{
3534 struct pmu *pmu;
3535
3536 pmu = cpuctx->ctx.pmu;
3537
3538 if (WARN_ON_ONCE(!pmu->sched_task))
3539 return;
3540
3541 perf_ctx_lock(cpuctx, cpuctx->task_ctx);
3542 perf_pmu_disable(pmu);
3543
3544 pmu->sched_task(cpuctx->task_ctx, sched_in);
3545
3546 perf_pmu_enable(pmu);
3547 perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
3548}
3549
3550static void perf_pmu_sched_task(struct task_struct *prev,
3551 struct task_struct *next,
3552 bool sched_in)
3553{
3554 struct perf_cpu_context *cpuctx;
3555
3556 if (prev == next)
3557 return;
3558
3559 list_for_each_entry(cpuctx, this_cpu_ptr(&sched_cb_list), sched_cb_entry) {
3560
3561 if (cpuctx->task_ctx)
3562 continue;
3563
3564 __perf_pmu_sched_task(cpuctx, sched_in);
3565 }
3566}
3567
3568static void perf_event_switch(struct task_struct *task,
3569 struct task_struct *next_prev, bool sched_in);
3570
3571#define for_each_task_context_nr(ctxn) \
3572 for ((ctxn) = 0; (ctxn) < perf_nr_task_contexts; (ctxn)++)
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585void __perf_event_task_sched_out(struct task_struct *task,
3586 struct task_struct *next)
3587{
3588 int ctxn;
3589
3590 if (__this_cpu_read(perf_sched_cb_usages))
3591 perf_pmu_sched_task(task, next, false);
3592
3593 if (atomic_read(&nr_switch_events))
3594 perf_event_switch(task, next, false);
3595
3596 for_each_task_context_nr(ctxn)
3597 perf_event_context_sched_out(task, ctxn, next);
3598
3599
3600
3601
3602
3603
3604 if (atomic_read(this_cpu_ptr(&perf_cgroup_events)))
3605 perf_cgroup_sched_out(task, next);
3606}
3607
3608
3609
3610
3611static void cpu_ctx_sched_out(struct perf_cpu_context *cpuctx,
3612 enum event_type_t event_type)
3613{
3614 ctx_sched_out(&cpuctx->ctx, cpuctx, event_type);
3615}
3616
3617static bool perf_less_group_idx(const void *l, const void *r)
3618{
3619 const struct perf_event *le = *(const struct perf_event **)l;
3620 const struct perf_event *re = *(const struct perf_event **)r;
3621
3622 return le->group_index < re->group_index;
3623}
3624
3625static void swap_ptr(void *l, void *r)
3626{
3627 void **lp = l, **rp = r;
3628
3629 swap(*lp, *rp);
3630}
3631
3632static const struct min_heap_callbacks perf_min_heap = {
3633 .elem_size = sizeof(struct perf_event *),
3634 .less = perf_less_group_idx,
3635 .swp = swap_ptr,
3636};
3637
3638static void __heap_add(struct min_heap *heap, struct perf_event *event)
3639{
3640 struct perf_event **itrs = heap->data;
3641
3642 if (event) {
3643 itrs[heap->nr] = event;
3644 heap->nr++;
3645 }
3646}
3647
3648static noinline int visit_groups_merge(struct perf_cpu_context *cpuctx,
3649 struct perf_event_groups *groups, int cpu,
3650 int (*func)(struct perf_event *, void *),
3651 void *data)
3652{
3653#ifdef CONFIG_CGROUP_PERF
3654 struct cgroup_subsys_state *css = NULL;
3655#endif
3656
3657 struct perf_event *itrs[2];
3658 struct min_heap event_heap;
3659 struct perf_event **evt;
3660 int ret;
3661
3662 if (cpuctx) {
3663 event_heap = (struct min_heap){
3664 .data = cpuctx->heap,
3665 .nr = 0,
3666 .size = cpuctx->heap_size,
3667 };
3668
3669 lockdep_assert_held(&cpuctx->ctx.lock);
3670
3671#ifdef CONFIG_CGROUP_PERF
3672 if (cpuctx->cgrp)
3673 css = &cpuctx->cgrp->css;
3674#endif
3675 } else {
3676 event_heap = (struct min_heap){
3677 .data = itrs,
3678 .nr = 0,
3679 .size = ARRAY_SIZE(itrs),
3680 };
3681
3682 __heap_add(&event_heap, perf_event_groups_first(groups, -1, NULL));
3683 }
3684 evt = event_heap.data;
3685
3686 __heap_add(&event_heap, perf_event_groups_first(groups, cpu, NULL));
3687
3688#ifdef CONFIG_CGROUP_PERF
3689 for (; css; css = css->parent)
3690 __heap_add(&event_heap, perf_event_groups_first(groups, cpu, css->cgroup));
3691#endif
3692
3693 min_heapify_all(&event_heap, &perf_min_heap);
3694
3695 while (event_heap.nr) {
3696 ret = func(*evt, data);
3697 if (ret)
3698 return ret;
3699
3700 *evt = perf_event_groups_next(*evt);
3701 if (*evt)
3702 min_heapify(&event_heap, 0, &perf_min_heap);
3703 else
3704 min_heap_pop(&event_heap, &perf_min_heap);
3705 }
3706
3707 return 0;
3708}
3709
3710static int merge_sched_in(struct perf_event *event, void *data)
3711{
3712 struct perf_event_context *ctx = event->ctx;
3713 struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
3714 int *can_add_hw = data;
3715
3716 if (event->state <= PERF_EVENT_STATE_OFF)
3717 return 0;
3718
3719 if (!event_filter_match(event))
3720 return 0;
3721
3722 if (group_can_go_on(event, cpuctx, *can_add_hw)) {
3723 if (!group_sched_in(event, cpuctx, ctx))
3724 list_add_tail(&event->active_list, get_event_list(event));
3725 }
3726
3727 if (event->state == PERF_EVENT_STATE_INACTIVE) {
3728 if (event->attr.pinned) {
3729 perf_cgroup_event_disable(event, ctx);
3730 perf_event_set_state(event, PERF_EVENT_STATE_ERROR);
3731 }
3732
3733 *can_add_hw = 0;
3734 ctx->rotate_necessary = 1;
3735 perf_mux_hrtimer_restart(cpuctx);
3736 }
3737
3738 return 0;
3739}
3740
3741static void
3742ctx_pinned_sched_in(struct perf_event_context *ctx,
3743 struct perf_cpu_context *cpuctx)
3744{
3745 int can_add_hw = 1;
3746
3747 if (ctx != &cpuctx->ctx)
3748 cpuctx = NULL;
3749
3750 visit_groups_merge(cpuctx, &ctx->pinned_groups,
3751 smp_processor_id(),
3752 merge_sched_in, &can_add_hw);
3753}
3754
3755static void
3756ctx_flexible_sched_in(struct perf_event_context *ctx,
3757 struct perf_cpu_context *cpuctx)
3758{
3759 int can_add_hw = 1;
3760
3761 if (ctx != &cpuctx->ctx)
3762 cpuctx = NULL;
3763
3764 visit_groups_merge(cpuctx, &ctx->flexible_groups,
3765 smp_processor_id(),
3766 merge_sched_in, &can_add_hw);
3767}
3768
3769static void
3770ctx_sched_in(struct perf_event_context *ctx,
3771 struct perf_cpu_context *cpuctx,
3772 enum event_type_t event_type,
3773 struct task_struct *task)
3774{
3775 int is_active = ctx->is_active;
3776 u64 now;
3777
3778 lockdep_assert_held(&ctx->lock);
3779
3780 if (likely(!ctx->nr_events))
3781 return;
3782
3783 ctx->is_active |= (event_type | EVENT_TIME);
3784 if (ctx->task) {
3785 if (!is_active)
3786 cpuctx->task_ctx = ctx;
3787 else
3788 WARN_ON_ONCE(cpuctx->task_ctx != ctx);
3789 }
3790
3791 is_active ^= ctx->is_active;
3792
3793 if (is_active & EVENT_TIME) {
3794
3795 now = perf_clock();
3796 ctx->timestamp = now;
3797 perf_cgroup_set_timestamp(task, ctx);
3798 }
3799
3800
3801
3802
3803
3804 if (is_active & EVENT_PINNED)
3805 ctx_pinned_sched_in(ctx, cpuctx);
3806
3807
3808 if (is_active & EVENT_FLEXIBLE)
3809 ctx_flexible_sched_in(ctx, cpuctx);
3810}
3811
3812static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx,
3813 enum event_type_t event_type,
3814 struct task_struct *task)
3815{
3816 struct perf_event_context *ctx = &cpuctx->ctx;
3817
3818 ctx_sched_in(ctx, cpuctx, event_type, task);
3819}
3820
3821static void perf_event_context_sched_in(struct perf_event_context *ctx,
3822 struct task_struct *task)
3823{
3824 struct perf_cpu_context *cpuctx;
3825 struct pmu *pmu;
3826
3827 cpuctx = __get_cpu_context(ctx);
3828
3829
3830
3831
3832
3833 pmu = ctx->pmu = cpuctx->ctx.pmu;
3834
3835 if (cpuctx->task_ctx == ctx) {
3836 if (cpuctx->sched_cb_usage)
3837 __perf_pmu_sched_task(cpuctx, true);
3838 return;
3839 }
3840
3841 perf_ctx_lock(cpuctx, ctx);
3842
3843
3844
3845
3846 if (!ctx->nr_events)
3847 goto unlock;
3848
3849 perf_pmu_disable(pmu);
3850
3851
3852
3853
3854
3855
3856
3857
3858 if (!RB_EMPTY_ROOT(&ctx->pinned_groups.tree))
3859 cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
3860 perf_event_sched_in(cpuctx, ctx, task);
3861
3862 if (cpuctx->sched_cb_usage && pmu->sched_task)
3863 pmu->sched_task(cpuctx->task_ctx, true);
3864
3865 perf_pmu_enable(pmu);
3866
3867unlock:
3868 perf_ctx_unlock(cpuctx, ctx);
3869}
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882void __perf_event_task_sched_in(struct task_struct *prev,
3883 struct task_struct *task)
3884{
3885 struct perf_event_context *ctx;
3886 int ctxn;
3887
3888
3889
3890
3891
3892
3893
3894
3895 if (atomic_read(this_cpu_ptr(&perf_cgroup_events)))
3896 perf_cgroup_sched_in(prev, task);
3897
3898 for_each_task_context_nr(ctxn) {
3899 ctx = task->perf_event_ctxp[ctxn];
3900 if (likely(!ctx))
3901 continue;
3902
3903 perf_event_context_sched_in(ctx, task);
3904 }
3905
3906 if (atomic_read(&nr_switch_events))
3907 perf_event_switch(task, prev, true);
3908
3909 if (__this_cpu_read(perf_sched_cb_usages))
3910 perf_pmu_sched_task(prev, task, true);
3911}
3912
3913static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count)
3914{
3915 u64 frequency = event->attr.sample_freq;
3916 u64 sec = NSEC_PER_SEC;
3917 u64 divisor, dividend;
3918
3919 int count_fls, nsec_fls, frequency_fls, sec_fls;
3920
3921 count_fls = fls64(count);
3922 nsec_fls = fls64(nsec);
3923 frequency_fls = fls64(frequency);
3924 sec_fls = 30;
3925
3926
3927
3928
3929
3930
3931
3932
3933
3934
3935
3936
3937
3938
3939
3940#define REDUCE_FLS(a, b) \
3941do { \
3942 if (a##_fls > b##_fls) { \
3943 a >>= 1; \
3944 a##_fls--; \
3945 } else { \
3946 b >>= 1; \
3947 b##_fls--; \
3948 } \
3949} while (0)
3950
3951
3952
3953
3954
3955 while (count_fls + sec_fls > 64 && nsec_fls + frequency_fls > 64) {
3956 REDUCE_FLS(nsec, frequency);
3957 REDUCE_FLS(sec, count);
3958 }
3959
3960 if (count_fls + sec_fls > 64) {
3961 divisor = nsec * frequency;
3962
3963 while (count_fls + sec_fls > 64) {
3964 REDUCE_FLS(count, sec);
3965 divisor >>= 1;
3966 }
3967
3968 dividend = count * sec;
3969 } else {
3970 dividend = count * sec;
3971
3972 while (nsec_fls + frequency_fls > 64) {
3973 REDUCE_FLS(nsec, frequency);
3974 dividend >>= 1;
3975 }
3976
3977 divisor = nsec * frequency;
3978 }
3979
3980 if (!divisor)
3981 return dividend;
3982
3983 return div64_u64(dividend, divisor);
3984}
3985
3986static DEFINE_PER_CPU(int, perf_throttled_count);
3987static DEFINE_PER_CPU(u64, perf_throttled_seq);
3988
3989static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count, bool disable)
3990{
3991 struct hw_perf_event *hwc = &event->hw;
3992 s64 period, sample_period;
3993 s64 delta;
3994
3995 period = perf_calculate_period(event, nsec, count);
3996
3997 delta = (s64)(period - hwc->sample_period);
3998 delta = (delta + 7) / 8;
3999
4000 sample_period = hwc->sample_period + delta;
4001
4002 if (!sample_period)
4003 sample_period = 1;
4004
4005 hwc->sample_period = sample_period;
4006
4007 if (local64_read(&hwc->period_left) > 8*sample_period) {
4008 if (disable)
4009 event->pmu->stop(event, PERF_EF_UPDATE);
4010
4011 local64_set(&hwc->period_left, 0);
4012
4013 if (disable)
4014 event->pmu->start(event, PERF_EF_RELOAD);
4015 }
4016}
4017
4018
4019
4020
4021
4022
4023static void perf_adjust_freq_unthr_context(struct perf_event_context *ctx,
4024 int needs_unthr)
4025{
4026 struct perf_event *event;
4027 struct hw_perf_event *hwc;
4028 u64 now, period = TICK_NSEC;
4029 s64 delta;
4030
4031
4032
4033
4034
4035
4036 if (!(ctx->nr_freq || needs_unthr))
4037 return;
4038
4039 raw_spin_lock(&ctx->lock);
4040 perf_pmu_disable(ctx->pmu);
4041
4042 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
4043 if (event->state != PERF_EVENT_STATE_ACTIVE)
4044 continue;
4045
4046 if (!event_filter_match(event))
4047 continue;
4048
4049 perf_pmu_disable(event->pmu);
4050
4051 hwc = &event->hw;
4052
4053 if (hwc->interrupts == MAX_INTERRUPTS) {
4054 hwc->interrupts = 0;
4055 perf_log_throttle(event, 1);
4056 event->pmu->start(event, 0);
4057 }
4058
4059 if (!event->attr.freq || !event->attr.sample_freq)
4060 goto next;
4061
4062
4063
4064
4065 event->pmu->stop(event, PERF_EF_UPDATE);
4066
4067 now = local64_read(&event->count);
4068 delta = now - hwc->freq_count_stamp;
4069 hwc->freq_count_stamp = now;
4070
4071
4072
4073
4074
4075
4076
4077
4078 if (delta > 0)
4079 perf_adjust_period(event, period, delta, false);
4080
4081 event->pmu->start(event, delta > 0 ? PERF_EF_RELOAD : 0);
4082 next:
4083 perf_pmu_enable(event->pmu);
4084 }
4085
4086 perf_pmu_enable(ctx->pmu);
4087 raw_spin_unlock(&ctx->lock);
4088}
4089
4090
4091
4092
4093static void rotate_ctx(struct perf_event_context *ctx, struct perf_event *event)
4094{
4095
4096
4097
4098
4099 if (ctx->rotate_disable)
4100 return;
4101
4102 perf_event_groups_delete(&ctx->flexible_groups, event);
4103 perf_event_groups_insert(&ctx->flexible_groups, event);
4104}
4105
4106
4107static inline struct perf_event *
4108ctx_event_to_rotate(struct perf_event_context *ctx)
4109{
4110 struct perf_event *event;
4111
4112
4113 event = list_first_entry_or_null(&ctx->flexible_active,
4114 struct perf_event, active_list);
4115
4116
4117 if (!event) {
4118 event = rb_entry_safe(rb_first(&ctx->flexible_groups.tree),
4119 typeof(*event), group_node);
4120 }
4121
4122
4123
4124
4125
4126 ctx->rotate_necessary = 0;
4127
4128 return event;
4129}
4130
4131static bool perf_rotate_context(struct perf_cpu_context *cpuctx)
4132{
4133 struct perf_event *cpu_event = NULL, *task_event = NULL;
4134 struct perf_event_context *task_ctx = NULL;
4135 int cpu_rotate, task_rotate;
4136
4137
4138
4139
4140
4141
4142 cpu_rotate = cpuctx->ctx.rotate_necessary;
4143 task_ctx = cpuctx->task_ctx;
4144 task_rotate = task_ctx ? task_ctx->rotate_necessary : 0;
4145
4146 if (!(cpu_rotate || task_rotate))
4147 return false;
4148
4149 perf_ctx_lock(cpuctx, cpuctx->task_ctx);
4150 perf_pmu_disable(cpuctx->ctx.pmu);
4151
4152 if (task_rotate)
4153 task_event = ctx_event_to_rotate(task_ctx);
4154 if (cpu_rotate)
4155 cpu_event = ctx_event_to_rotate(&cpuctx->ctx);
4156
4157
4158
4159
4160
4161 if (task_event || (task_ctx && cpu_event))
4162 ctx_sched_out(task_ctx, cpuctx, EVENT_FLEXIBLE);
4163 if (cpu_event)
4164 cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
4165
4166 if (task_event)
4167 rotate_ctx(task_ctx, task_event);
4168 if (cpu_event)
4169 rotate_ctx(&cpuctx->ctx, cpu_event);
4170
4171 perf_event_sched_in(cpuctx, task_ctx, current);
4172
4173 perf_pmu_enable(cpuctx->ctx.pmu);
4174 perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
4175
4176 return true;
4177}
4178
4179void perf_event_task_tick(void)
4180{
4181 struct list_head *head = this_cpu_ptr(&active_ctx_list);
4182 struct perf_event_context *ctx, *tmp;
4183 int throttled;
4184
4185 lockdep_assert_irqs_disabled();
4186
4187 __this_cpu_inc(perf_throttled_seq);
4188 throttled = __this_cpu_xchg(perf_throttled_count, 0);
4189 tick_dep_clear_cpu(smp_processor_id(), TICK_DEP_BIT_PERF_EVENTS);
4190
4191 list_for_each_entry_safe(ctx, tmp, head, active_ctx_list)
4192 perf_adjust_freq_unthr_context(ctx, throttled);
4193}
4194
4195static int event_enable_on_exec(struct perf_event *event,
4196 struct perf_event_context *ctx)
4197{
4198 if (!event->attr.enable_on_exec)
4199 return 0;
4200
4201 event->attr.enable_on_exec = 0;
4202 if (event->state >= PERF_EVENT_STATE_INACTIVE)
4203 return 0;
4204
4205 perf_event_set_state(event, PERF_EVENT_STATE_INACTIVE);
4206
4207 return 1;
4208}
4209
4210
4211
4212
4213
4214static void perf_event_enable_on_exec(int ctxn)
4215{
4216 struct perf_event_context *ctx, *clone_ctx = NULL;
4217 enum event_type_t event_type = 0;
4218 struct perf_cpu_context *cpuctx;
4219 struct perf_event *event;
4220 unsigned long flags;
4221 int enabled = 0;
4222
4223 local_irq_save(flags);
4224 ctx = current->perf_event_ctxp[ctxn];
4225 if (!ctx || !ctx->nr_events)
4226 goto out;
4227
4228 cpuctx = __get_cpu_context(ctx);
4229 perf_ctx_lock(cpuctx, ctx);
4230 ctx_sched_out(ctx, cpuctx, EVENT_TIME);
4231 list_for_each_entry(event, &ctx->event_list, event_entry) {
4232 enabled |= event_enable_on_exec(event, ctx);
4233 event_type |= get_event_type(event);
4234 }
4235
4236
4237
4238
4239 if (enabled) {
4240 clone_ctx = unclone_ctx(ctx);
4241 ctx_resched(cpuctx, ctx, event_type);
4242 } else {
4243 ctx_sched_in(ctx, cpuctx, EVENT_TIME, current);
4244 }
4245 perf_ctx_unlock(cpuctx, ctx);
4246
4247out:
4248 local_irq_restore(flags);
4249
4250 if (clone_ctx)
4251 put_ctx(clone_ctx);
4252}
4253
4254static void perf_remove_from_owner(struct perf_event *event);
4255static void perf_event_exit_event(struct perf_event *event,
4256 struct perf_event_context *ctx);
4257
4258
4259
4260
4261
4262static void perf_event_remove_on_exec(int ctxn)
4263{
4264 struct perf_event_context *ctx, *clone_ctx = NULL;
4265 struct perf_event *event, *next;
4266 LIST_HEAD(free_list);
4267 unsigned long flags;
4268 bool modified = false;
4269
4270 ctx = perf_pin_task_context(current, ctxn);
4271 if (!ctx)
4272 return;
4273
4274 mutex_lock(&ctx->mutex);
4275
4276 if (WARN_ON_ONCE(ctx->task != current))
4277 goto unlock;
4278
4279 list_for_each_entry_safe(event, next, &ctx->event_list, event_entry) {
4280 if (!event->attr.remove_on_exec)
4281 continue;
4282
4283 if (!is_kernel_event(event))
4284 perf_remove_from_owner(event);
4285
4286 modified = true;
4287
4288 perf_event_exit_event(event, ctx);
4289 }
4290
4291 raw_spin_lock_irqsave(&ctx->lock, flags);
4292 if (modified)
4293 clone_ctx = unclone_ctx(ctx);
4294 --ctx->pin_count;
4295 raw_spin_unlock_irqrestore(&ctx->lock, flags);
4296
4297unlock:
4298 mutex_unlock(&ctx->mutex);
4299
4300 put_ctx(ctx);
4301 if (clone_ctx)
4302 put_ctx(clone_ctx);
4303}
4304
4305struct perf_read_data {
4306 struct perf_event *event;
4307 bool group;
4308 int ret;
4309};
4310
4311static int __perf_event_read_cpu(struct perf_event *event, int event_cpu)
4312{
4313 u16 local_pkg, event_pkg;
4314
4315 if (event->group_caps & PERF_EV_CAP_READ_ACTIVE_PKG) {
4316 int local_cpu = smp_processor_id();
4317
4318 event_pkg = topology_physical_package_id(event_cpu);
4319 local_pkg = topology_physical_package_id(local_cpu);
4320
4321 if (event_pkg == local_pkg)
4322 return local_cpu;
4323 }
4324
4325 return event_cpu;
4326}
4327
4328
4329
4330
4331static void __perf_event_read(void *info)
4332{
4333 struct perf_read_data *data = info;
4334 struct perf_event *sub, *event = data->event;
4335 struct perf_event_context *ctx = event->ctx;
4336 struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
4337 struct pmu *pmu = event->pmu;
4338
4339
4340
4341
4342
4343
4344
4345
4346 if (ctx->task && cpuctx->task_ctx != ctx)
4347 return;
4348
4349 raw_spin_lock(&ctx->lock);
4350 if (ctx->is_active & EVENT_TIME) {
4351 update_context_time(ctx);
4352 update_cgrp_time_from_event(event);
4353 }
4354
4355 perf_event_update_time(event);
4356 if (data->group)
4357 perf_event_update_sibling_time(event);
4358
4359 if (event->state != PERF_EVENT_STATE_ACTIVE)
4360 goto unlock;
4361
4362 if (!data->group) {
4363 pmu->read(event);
4364 data->ret = 0;
4365 goto unlock;
4366 }
4367
4368 pmu->start_txn(pmu, PERF_PMU_TXN_READ);
4369
4370 pmu->read(event);
4371
4372 for_each_sibling_event(sub, event) {
4373 if (sub->state == PERF_EVENT_STATE_ACTIVE) {
4374
4375
4376
4377
4378 sub->pmu->read(sub);
4379 }
4380 }
4381
4382 data->ret = pmu->commit_txn(pmu);
4383
4384unlock:
4385 raw_spin_unlock(&ctx->lock);
4386}
4387
4388static inline u64 perf_event_count(struct perf_event *event)
4389{
4390 return local64_read(&event->count) + atomic64_read(&event->child_count);
4391}
4392
4393
4394
4395
4396
4397
4398
4399
4400
4401int perf_event_read_local(struct perf_event *event, u64 *value,
4402 u64 *enabled, u64 *running)
4403{
4404 unsigned long flags;
4405 int ret = 0;
4406
4407
4408
4409
4410
4411 local_irq_save(flags);
4412
4413
4414
4415
4416
4417 if (event->attr.inherit) {
4418 ret = -EOPNOTSUPP;
4419 goto out;
4420 }
4421
4422
4423 if ((event->attach_state & PERF_ATTACH_TASK) &&
4424 event->hw.target != current) {
4425 ret = -EINVAL;
4426 goto out;
4427 }
4428
4429
4430 if (!(event->attach_state & PERF_ATTACH_TASK) &&
4431 event->cpu != smp_processor_id()) {
4432 ret = -EINVAL;
4433 goto out;
4434 }
4435
4436
4437 if (event->attr.pinned && event->oncpu != smp_processor_id()) {
4438 ret = -EBUSY;
4439 goto out;
4440 }
4441
4442
4443
4444
4445
4446
4447 if (event->oncpu == smp_processor_id())
4448 event->pmu->read(event);
4449
4450 *value = local64_read(&event->count);
4451 if (enabled || running) {
4452 u64 now = event->shadow_ctx_time + perf_clock();
4453 u64 __enabled, __running;
4454
4455 __perf_update_times(event, now, &__enabled, &__running);
4456 if (enabled)
4457 *enabled = __enabled;
4458 if (running)
4459 *running = __running;
4460 }
4461out:
4462 local_irq_restore(flags);
4463
4464 return ret;
4465}
4466
4467static int perf_event_read(struct perf_event *event, bool group)
4468{
4469 enum perf_event_state state = READ_ONCE(event->state);
4470 int event_cpu, ret = 0;
4471
4472
4473
4474
4475
4476again:
4477 if (state == PERF_EVENT_STATE_ACTIVE) {
4478 struct perf_read_data data;
4479
4480
4481
4482
4483
4484
4485
4486 smp_rmb();
4487
4488 event_cpu = READ_ONCE(event->oncpu);
4489 if ((unsigned)event_cpu >= nr_cpu_ids)
4490 return 0;
4491
4492 data = (struct perf_read_data){
4493 .event = event,
4494 .group = group,
4495 .ret = 0,
4496 };
4497
4498 preempt_disable();
4499 event_cpu = __perf_event_read_cpu(event, event_cpu);
4500
4501
4502
4503
4504
4505
4506
4507
4508
4509
4510
4511 (void)smp_call_function_single(event_cpu, __perf_event_read, &data, 1);
4512 preempt_enable();
4513 ret = data.ret;
4514
4515 } else if (state == PERF_EVENT_STATE_INACTIVE) {
4516 struct perf_event_context *ctx = event->ctx;
4517 unsigned long flags;
4518
4519 raw_spin_lock_irqsave(&ctx->lock, flags);
4520 state = event->state;
4521 if (state != PERF_EVENT_STATE_INACTIVE) {
4522 raw_spin_unlock_irqrestore(&ctx->lock, flags);
4523 goto again;
4524 }
4525
4526
4527
4528
4529
4530 if (ctx->is_active & EVENT_TIME) {
4531 update_context_time(ctx);
4532 update_cgrp_time_from_event(event);
4533 }
4534
4535 perf_event_update_time(event);
4536 if (group)
4537 perf_event_update_sibling_time(event);
4538 raw_spin_unlock_irqrestore(&ctx->lock, flags);
4539 }
4540
4541 return ret;
4542}
4543
4544
4545
4546
4547static void __perf_event_init_context(struct perf_event_context *ctx)
4548{
4549 raw_spin_lock_init(&ctx->lock);
4550 mutex_init(&ctx->mutex);
4551 INIT_LIST_HEAD(&ctx->active_ctx_list);
4552 perf_event_groups_init(&ctx->pinned_groups);
4553 perf_event_groups_init(&ctx->flexible_groups);
4554 INIT_LIST_HEAD(&ctx->event_list);
4555 INIT_LIST_HEAD(&ctx->pinned_active);
4556 INIT_LIST_HEAD(&ctx->flexible_active);
4557 refcount_set(&ctx->refcount, 1);
4558}
4559
4560static struct perf_event_context *
4561alloc_perf_context(struct pmu *pmu, struct task_struct *task)
4562{
4563 struct perf_event_context *ctx;
4564
4565 ctx = kzalloc(sizeof(struct perf_event_context), GFP_KERNEL);
4566 if (!ctx)
4567 return NULL;
4568
4569 __perf_event_init_context(ctx);
4570 if (task)
4571 ctx->task = get_task_struct(task);
4572 ctx->pmu = pmu;
4573
4574 return ctx;
4575}
4576
4577static struct task_struct *
4578find_lively_task_by_vpid(pid_t vpid)
4579{
4580 struct task_struct *task;
4581
4582 rcu_read_lock();
4583 if (!vpid)
4584 task = current;
4585 else
4586 task = find_task_by_vpid(vpid);
4587 if (task)
4588 get_task_struct(task);
4589 rcu_read_unlock();
4590
4591 if (!task)
4592 return ERR_PTR(-ESRCH);
4593
4594 return task;
4595}
4596
4597
4598
4599
4600static struct perf_event_context *
4601find_get_context(struct pmu *pmu, struct task_struct *task,
4602 struct perf_event *event)
4603{
4604 struct perf_event_context *ctx, *clone_ctx = NULL;
4605 struct perf_cpu_context *cpuctx;
4606 void *task_ctx_data = NULL;
4607 unsigned long flags;
4608 int ctxn, err;
4609 int cpu = event->cpu;
4610
4611 if (!task) {
4612
4613 err = perf_allow_cpu(&event->attr);
4614 if (err)
4615 return ERR_PTR(err);
4616
4617 cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
4618 ctx = &cpuctx->ctx;
4619 get_ctx(ctx);
4620 raw_spin_lock_irqsave(&ctx->lock, flags);
4621 ++ctx->pin_count;
4622 raw_spin_unlock_irqrestore(&ctx->lock, flags);
4623
4624 return ctx;
4625 }
4626
4627 err = -EINVAL;
4628 ctxn = pmu->task_ctx_nr;
4629 if (ctxn < 0)
4630 goto errout;
4631
4632 if (event->attach_state & PERF_ATTACH_TASK_DATA) {
4633 task_ctx_data = alloc_task_ctx_data(pmu);
4634 if (!task_ctx_data) {
4635 err = -ENOMEM;
4636 goto errout;
4637 }
4638 }
4639
4640retry:
4641 ctx = perf_lock_task_context(task, ctxn, &flags);
4642 if (ctx) {
4643 clone_ctx = unclone_ctx(ctx);
4644 ++ctx->pin_count;
4645
4646 if (task_ctx_data && !ctx->task_ctx_data) {
4647 ctx->task_ctx_data = task_ctx_data;
4648 task_ctx_data = NULL;
4649 }
4650 raw_spin_unlock_irqrestore(&ctx->lock, flags);
4651
4652 if (clone_ctx)
4653 put_ctx(clone_ctx);
4654 } else {
4655 ctx = alloc_perf_context(pmu, task);
4656 err = -ENOMEM;
4657 if (!ctx)
4658 goto errout;
4659
4660 if (task_ctx_data) {
4661 ctx->task_ctx_data = task_ctx_data;
4662 task_ctx_data = NULL;
4663 }
4664
4665 err = 0;
4666 mutex_lock(&task->perf_event_mutex);
4667
4668
4669
4670
4671 if (task->flags & PF_EXITING)
4672 err = -ESRCH;
4673 else if (task->perf_event_ctxp[ctxn])
4674 err = -EAGAIN;
4675 else {
4676 get_ctx(ctx);
4677 ++ctx->pin_count;
4678 rcu_assign_pointer(task->perf_event_ctxp[ctxn], ctx);
4679 }
4680 mutex_unlock(&task->perf_event_mutex);
4681
4682 if (unlikely(err)) {
4683 put_ctx(ctx);
4684
4685 if (err == -EAGAIN)
4686 goto retry;
4687 goto errout;
4688 }
4689 }
4690
4691 free_task_ctx_data(pmu, task_ctx_data);
4692 return ctx;
4693
4694errout:
4695 free_task_ctx_data(pmu, task_ctx_data);
4696 return ERR_PTR(err);
4697}
4698
4699static void perf_event_free_filter(struct perf_event *event);
4700static void perf_event_free_bpf_prog(struct perf_event *event);
4701
4702static void free_event_rcu(struct rcu_head *head)
4703{
4704 struct perf_event *event;
4705
4706 event = container_of(head, struct perf_event, rcu_head);
4707 if (event->ns)
4708 put_pid_ns(event->ns);
4709 perf_event_free_filter(event);
4710 kmem_cache_free(perf_event_cache, event);
4711}
4712
4713static void ring_buffer_attach(struct perf_event *event,
4714 struct perf_buffer *rb);
4715
4716static void detach_sb_event(struct perf_event *event)
4717{
4718 struct pmu_event_list *pel = per_cpu_ptr(&pmu_sb_events, event->cpu);
4719
4720 raw_spin_lock(&pel->lock);
4721 list_del_rcu(&event->sb_list);
4722 raw_spin_unlock(&pel->lock);
4723}
4724
4725static bool is_sb_event(struct perf_event *event)
4726{
4727 struct perf_event_attr *attr = &event->attr;
4728
4729 if (event->parent)
4730 return false;
4731
4732 if (event->attach_state & PERF_ATTACH_TASK)
4733 return false;
4734
4735 if (attr->mmap || attr->mmap_data || attr->mmap2 ||
4736 attr->comm || attr->comm_exec ||
4737 attr->task || attr->ksymbol ||
4738 attr->context_switch || attr->text_poke ||
4739 attr->bpf_event)
4740 return true;
4741 return false;
4742}
4743
4744static void unaccount_pmu_sb_event(struct perf_event *event)
4745{
4746 if (is_sb_event(event))
4747 detach_sb_event(event);
4748}
4749
4750static void unaccount_event_cpu(struct perf_event *event, int cpu)
4751{
4752 if (event->parent)
4753 return;
4754
4755 if (is_cgroup_event(event))
4756 atomic_dec(&per_cpu(perf_cgroup_events, cpu));
4757}
4758
4759#ifdef CONFIG_NO_HZ_FULL
4760static DEFINE_SPINLOCK(nr_freq_lock);
4761#endif
4762
4763static void unaccount_freq_event_nohz(void)
4764{
4765#ifdef CONFIG_NO_HZ_FULL
4766 spin_lock(&nr_freq_lock);
4767 if (atomic_dec_and_test(&nr_freq_events))
4768 tick_nohz_dep_clear(TICK_DEP_BIT_PERF_EVENTS);
4769 spin_unlock(&nr_freq_lock);
4770#endif
4771}
4772
4773static void unaccount_freq_event(void)
4774{
4775 if (tick_nohz_full_enabled())
4776 unaccount_freq_event_nohz();
4777 else
4778 atomic_dec(&nr_freq_events);
4779}
4780
4781static void unaccount_event(struct perf_event *event)
4782{
4783 bool dec = false;
4784
4785 if (event->parent)
4786 return;
4787
4788 if (event->attach_state & (PERF_ATTACH_TASK | PERF_ATTACH_SCHED_CB))
4789 dec = true;
4790 if (event->attr.mmap || event->attr.mmap_data)
4791 atomic_dec(&nr_mmap_events);
4792 if (event->attr.build_id)
4793 atomic_dec(&nr_build_id_events);
4794 if (event->attr.comm)
4795 atomic_dec(&nr_comm_events);
4796 if (event->attr.namespaces)
4797 atomic_dec(&nr_namespaces_events);
4798 if (event->attr.cgroup)
4799 atomic_dec(&nr_cgroup_events);
4800 if (event->attr.task)
4801 atomic_dec(&nr_task_events);
4802 if (event->attr.freq)
4803 unaccount_freq_event();
4804 if (event->attr.context_switch) {
4805 dec = true;
4806 atomic_dec(&nr_switch_events);
4807 }
4808 if (is_cgroup_event(event))
4809 dec = true;
4810 if (has_branch_stack(event))
4811 dec = true;
4812 if (event->attr.ksymbol)
4813 atomic_dec(&nr_ksymbol_events);
4814 if (event->attr.bpf_event)
4815 atomic_dec(&nr_bpf_events);
4816 if (event->attr.text_poke)
4817 atomic_dec(&nr_text_poke_events);
4818
4819 if (dec) {
4820 if (!atomic_add_unless(&perf_sched_count, -1, 1))
4821 schedule_delayed_work(&perf_sched_work, HZ);
4822 }
4823
4824 unaccount_event_cpu(event, event->cpu);
4825
4826 unaccount_pmu_sb_event(event);
4827}
4828
4829static void perf_sched_delayed(struct work_struct *work)
4830{
4831 mutex_lock(&perf_sched_mutex);
4832 if (atomic_dec_and_test(&perf_sched_count))
4833 static_branch_disable(&perf_sched_events);
4834 mutex_unlock(&perf_sched_mutex);
4835}
4836
4837
4838
4839
4840
4841
4842
4843
4844
4845
4846
4847
4848
4849static int exclusive_event_init(struct perf_event *event)
4850{
4851 struct pmu *pmu = event->pmu;
4852
4853 if (!is_exclusive_pmu(pmu))
4854 return 0;
4855
4856
4857
4858
4859
4860
4861
4862
4863
4864
4865
4866
4867
4868
4869 if (event->attach_state & PERF_ATTACH_TASK) {
4870 if (!atomic_inc_unless_negative(&pmu->exclusive_cnt))
4871 return -EBUSY;
4872 } else {
4873 if (!atomic_dec_unless_positive(&pmu->exclusive_cnt))
4874 return -EBUSY;
4875 }
4876
4877 return 0;
4878}
4879
4880static void exclusive_event_destroy(struct perf_event *event)
4881{
4882 struct pmu *pmu = event->pmu;
4883
4884 if (!is_exclusive_pmu(pmu))
4885 return;
4886
4887
4888 if (event->attach_state & PERF_ATTACH_TASK)
4889 atomic_dec(&pmu->exclusive_cnt);
4890 else
4891 atomic_inc(&pmu->