1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16#define pr_fmt(fmt) "seccomp: " fmt
17
18#include <linux/refcount.h>
19#include <linux/audit.h>
20#include <linux/compat.h>
21#include <linux/coredump.h>
22#include <linux/kmemleak.h>
23#include <linux/nospec.h>
24#include <linux/prctl.h>
25#include <linux/sched.h>
26#include <linux/sched/task_stack.h>
27#include <linux/seccomp.h>
28#include <linux/slab.h>
29#include <linux/syscalls.h>
30#include <linux/sysctl.h>
31
32
33#define SECCOMP_MODE_DEAD (SECCOMP_MODE_FILTER + 1)
34
35#ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER
36#include <asm/syscall.h>
37#endif
38
39#ifdef CONFIG_SECCOMP_FILTER
40#include <linux/file.h>
41#include <linux/filter.h>
42#include <linux/pid.h>
43#include <linux/ptrace.h>
44#include <linux/capability.h>
45#include <linux/uaccess.h>
46#include <linux/anon_inodes.h>
47#include <linux/lockdep.h>
48
49
50
51
52
53
54
55#define SECCOMP_IOCTL_NOTIF_ID_VALID_WRONG_DIR SECCOMP_IOR(2, __u64)
56
57enum notify_state {
58 SECCOMP_NOTIFY_INIT,
59 SECCOMP_NOTIFY_SENT,
60 SECCOMP_NOTIFY_REPLIED,
61};
62
63struct seccomp_knotif {
64
65 struct task_struct *task;
66
67
68 u64 id;
69
70
71
72
73
74
75 const struct seccomp_data *data;
76
77
78
79
80
81
82
83
84
85 enum notify_state state;
86
87
88 int error;
89 long val;
90 u32 flags;
91
92
93
94
95
96 struct completion ready;
97
98 struct list_head list;
99
100
101 struct list_head addfd;
102};
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120struct seccomp_kaddfd {
121 struct file *file;
122 int fd;
123 unsigned int flags;
124 __u32 ioctl_flags;
125
126 union {
127 bool setfd;
128
129 int ret;
130 };
131 struct completion completion;
132 struct list_head list;
133};
134
135
136
137
138
139
140
141
142
143
144
145
146
147struct notification {
148 struct semaphore request;
149 u64 next_id;
150 struct list_head notifications;
151};
152
153#ifdef SECCOMP_ARCH_NATIVE
154
155
156
157
158
159
160
161
162
163
164
165struct action_cache {
166 DECLARE_BITMAP(allow_native, SECCOMP_ARCH_NATIVE_NR);
167#ifdef SECCOMP_ARCH_COMPAT
168 DECLARE_BITMAP(allow_compat, SECCOMP_ARCH_COMPAT_NR);
169#endif
170};
171#else
172struct action_cache { };
173
174static inline bool seccomp_cache_check_allow(const struct seccomp_filter *sfilter,
175 const struct seccomp_data *sd)
176{
177 return false;
178}
179
180static inline void seccomp_cache_prepare(struct seccomp_filter *sfilter)
181{
182}
183#endif
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221struct seccomp_filter {
222 refcount_t refs;
223 refcount_t users;
224 bool log;
225 bool wait_killable_recv;
226 struct action_cache cache;
227 struct seccomp_filter *prev;
228 struct bpf_prog *prog;
229 struct notification *notif;
230 struct mutex notify_lock;
231 wait_queue_head_t wqh;
232};
233
234
235#define MAX_INSNS_PER_PATH ((1 << 18) / sizeof(struct sock_filter))
236
237
238
239
240
241static void populate_seccomp_data(struct seccomp_data *sd)
242{
243
244
245
246
247 struct task_struct *task = current;
248 struct pt_regs *regs = task_pt_regs(task);
249 unsigned long args[6];
250
251 sd->nr = syscall_get_nr(task, regs);
252 sd->arch = syscall_get_arch(task);
253 syscall_get_arguments(task, regs, args);
254 sd->args[0] = args[0];
255 sd->args[1] = args[1];
256 sd->args[2] = args[2];
257 sd->args[3] = args[3];
258 sd->args[4] = args[4];
259 sd->args[5] = args[5];
260 sd->instruction_pointer = KSTK_EIP(task);
261}
262
263
264
265
266
267
268
269
270
271
272
273
274
275static int seccomp_check_filter(struct sock_filter *filter, unsigned int flen)
276{
277 int pc;
278 for (pc = 0; pc < flen; pc++) {
279 struct sock_filter *ftest = &filter[pc];
280 u16 code = ftest->code;
281 u32 k = ftest->k;
282
283 switch (code) {
284 case BPF_LD | BPF_W | BPF_ABS:
285 ftest->code = BPF_LDX | BPF_W | BPF_ABS;
286
287 if (k >= sizeof(struct seccomp_data) || k & 3)
288 return -EINVAL;
289 continue;
290 case BPF_LD | BPF_W | BPF_LEN:
291 ftest->code = BPF_LD | BPF_IMM;
292 ftest->k = sizeof(struct seccomp_data);
293 continue;
294 case BPF_LDX | BPF_W | BPF_LEN:
295 ftest->code = BPF_LDX | BPF_IMM;
296 ftest->k = sizeof(struct seccomp_data);
297 continue;
298
299 case BPF_RET | BPF_K:
300 case BPF_RET | BPF_A:
301 case BPF_ALU | BPF_ADD | BPF_K:
302 case BPF_ALU | BPF_ADD | BPF_X:
303 case BPF_ALU | BPF_SUB | BPF_K:
304 case BPF_ALU | BPF_SUB | BPF_X:
305 case BPF_ALU | BPF_MUL | BPF_K:
306 case BPF_ALU | BPF_MUL | BPF_X:
307 case BPF_ALU | BPF_DIV | BPF_K:
308 case BPF_ALU | BPF_DIV | BPF_X:
309 case BPF_ALU | BPF_AND | BPF_K:
310 case BPF_ALU | BPF_AND | BPF_X:
311 case BPF_ALU | BPF_OR | BPF_K:
312 case BPF_ALU | BPF_OR | BPF_X:
313 case BPF_ALU | BPF_XOR | BPF_K:
314 case BPF_ALU | BPF_XOR | BPF_X:
315 case BPF_ALU | BPF_LSH | BPF_K:
316 case BPF_ALU | BPF_LSH | BPF_X:
317 case BPF_ALU | BPF_RSH | BPF_K:
318 case BPF_ALU | BPF_RSH | BPF_X:
319 case BPF_ALU | BPF_NEG:
320 case BPF_LD | BPF_IMM:
321 case BPF_LDX | BPF_IMM:
322 case BPF_MISC | BPF_TAX:
323 case BPF_MISC | BPF_TXA:
324 case BPF_LD | BPF_MEM:
325 case BPF_LDX | BPF_MEM:
326 case BPF_ST:
327 case BPF_STX:
328 case BPF_JMP | BPF_JA:
329 case BPF_JMP | BPF_JEQ | BPF_K:
330 case BPF_JMP | BPF_JEQ | BPF_X:
331 case BPF_JMP | BPF_JGE | BPF_K:
332 case BPF_JMP | BPF_JGE | BPF_X:
333 case BPF_JMP | BPF_JGT | BPF_K:
334 case BPF_JMP | BPF_JGT | BPF_X:
335 case BPF_JMP | BPF_JSET | BPF_K:
336 case BPF_JMP | BPF_JSET | BPF_X:
337 continue;
338 default:
339 return -EINVAL;
340 }
341 }
342 return 0;
343}
344
345#ifdef SECCOMP_ARCH_NATIVE
346static inline bool seccomp_cache_check_allow_bitmap(const void *bitmap,
347 size_t bitmap_size,
348 int syscall_nr)
349{
350 if (unlikely(syscall_nr < 0 || syscall_nr >= bitmap_size))
351 return false;
352 syscall_nr = array_index_nospec(syscall_nr, bitmap_size);
353
354 return test_bit(syscall_nr, bitmap);
355}
356
357
358
359
360
361
362
363
364static inline bool seccomp_cache_check_allow(const struct seccomp_filter *sfilter,
365 const struct seccomp_data *sd)
366{
367 int syscall_nr = sd->nr;
368 const struct action_cache *cache = &sfilter->cache;
369
370#ifndef SECCOMP_ARCH_COMPAT
371
372 return seccomp_cache_check_allow_bitmap(cache->allow_native,
373 SECCOMP_ARCH_NATIVE_NR,
374 syscall_nr);
375#else
376 if (likely(sd->arch == SECCOMP_ARCH_NATIVE))
377 return seccomp_cache_check_allow_bitmap(cache->allow_native,
378 SECCOMP_ARCH_NATIVE_NR,
379 syscall_nr);
380 if (likely(sd->arch == SECCOMP_ARCH_COMPAT))
381 return seccomp_cache_check_allow_bitmap(cache->allow_compat,
382 SECCOMP_ARCH_COMPAT_NR,
383 syscall_nr);
384#endif
385
386 WARN_ON_ONCE(true);
387 return false;
388}
389#endif
390
391
392
393
394
395
396
397
398
399
400#define ACTION_ONLY(ret) ((s32)((ret) & (SECCOMP_RET_ACTION_FULL)))
401static u32 seccomp_run_filters(const struct seccomp_data *sd,
402 struct seccomp_filter **match)
403{
404 u32 ret = SECCOMP_RET_ALLOW;
405
406 struct seccomp_filter *f =
407 READ_ONCE(current->seccomp.filter);
408
409
410 if (WARN_ON(f == NULL))
411 return SECCOMP_RET_KILL_PROCESS;
412
413 if (seccomp_cache_check_allow(f, sd))
414 return SECCOMP_RET_ALLOW;
415
416
417
418
419
420 for (; f; f = f->prev) {
421 u32 cur_ret = bpf_prog_run_pin_on_cpu(f->prog, sd);
422
423 if (ACTION_ONLY(cur_ret) < ACTION_ONLY(ret)) {
424 ret = cur_ret;
425 *match = f;
426 }
427 }
428 return ret;
429}
430#endif
431
432static inline bool seccomp_may_assign_mode(unsigned long seccomp_mode)
433{
434 assert_spin_locked(¤t->sighand->siglock);
435
436 if (current->seccomp.mode && current->seccomp.mode != seccomp_mode)
437 return false;
438
439 return true;
440}
441
442void __weak arch_seccomp_spec_mitigate(struct task_struct *task) { }
443
444static inline void seccomp_assign_mode(struct task_struct *task,
445 unsigned long seccomp_mode,
446 unsigned long flags)
447{
448 assert_spin_locked(&task->sighand->siglock);
449
450 task->seccomp.mode = seccomp_mode;
451
452
453
454
455 smp_mb__before_atomic();
456
457 if ((flags & SECCOMP_FILTER_FLAG_SPEC_ALLOW) == 0)
458 arch_seccomp_spec_mitigate(task);
459 set_task_syscall_work(task, SECCOMP);
460}
461
462#ifdef CONFIG_SECCOMP_FILTER
463
464static int is_ancestor(struct seccomp_filter *parent,
465 struct seccomp_filter *child)
466{
467
468 if (parent == NULL)
469 return 1;
470 for (; child; child = child->prev)
471 if (child == parent)
472 return 1;
473 return 0;
474}
475
476
477
478
479
480
481
482
483
484
485static inline pid_t seccomp_can_sync_threads(void)
486{
487 struct task_struct *thread, *caller;
488
489 BUG_ON(!mutex_is_locked(¤t->signal->cred_guard_mutex));
490 assert_spin_locked(¤t->sighand->siglock);
491
492
493 caller = current;
494 for_each_thread(caller, thread) {
495 pid_t failed;
496
497
498 if (thread == caller)
499 continue;
500
501 if (thread->seccomp.mode == SECCOMP_MODE_DISABLED ||
502 (thread->seccomp.mode == SECCOMP_MODE_FILTER &&
503 is_ancestor(thread->seccomp.filter,
504 caller->seccomp.filter)))
505 continue;
506
507
508 failed = task_pid_vnr(thread);
509
510 if (WARN_ON(failed == 0))
511 failed = -ESRCH;
512 return failed;
513 }
514
515 return 0;
516}
517
518static inline void seccomp_filter_free(struct seccomp_filter *filter)
519{
520 if (filter) {
521 bpf_prog_destroy(filter->prog);
522 kfree(filter);
523 }
524}
525
526static void __seccomp_filter_orphan(struct seccomp_filter *orig)
527{
528 while (orig && refcount_dec_and_test(&orig->users)) {
529 if (waitqueue_active(&orig->wqh))
530 wake_up_poll(&orig->wqh, EPOLLHUP);
531 orig = orig->prev;
532 }
533}
534
535static void __put_seccomp_filter(struct seccomp_filter *orig)
536{
537
538 while (orig && refcount_dec_and_test(&orig->refs)) {
539 struct seccomp_filter *freeme = orig;
540 orig = orig->prev;
541 seccomp_filter_free(freeme);
542 }
543}
544
545static void __seccomp_filter_release(struct seccomp_filter *orig)
546{
547
548 __seccomp_filter_orphan(orig);
549
550 __put_seccomp_filter(orig);
551}
552
553
554
555
556
557
558
559
560
561
562void seccomp_filter_release(struct task_struct *tsk)
563{
564 struct seccomp_filter *orig = tsk->seccomp.filter;
565
566
567 WARN_ON(tsk->sighand != NULL);
568
569
570 tsk->seccomp.filter = NULL;
571 __seccomp_filter_release(orig);
572}
573
574
575
576
577
578
579
580
581
582static inline void seccomp_sync_threads(unsigned long flags)
583{
584 struct task_struct *thread, *caller;
585
586 BUG_ON(!mutex_is_locked(¤t->signal->cred_guard_mutex));
587 assert_spin_locked(¤t->sighand->siglock);
588
589
590 caller = current;
591 for_each_thread(caller, thread) {
592
593 if (thread == caller)
594 continue;
595
596
597 get_seccomp_filter(caller);
598
599
600
601
602
603
604 __seccomp_filter_release(thread->seccomp.filter);
605
606
607 smp_store_release(&thread->seccomp.filter,
608 caller->seccomp.filter);
609 atomic_set(&thread->seccomp.filter_count,
610 atomic_read(&caller->seccomp.filter_count));
611
612
613
614
615
616
617
618 if (task_no_new_privs(caller))
619 task_set_no_new_privs(thread);
620
621
622
623
624
625
626
627 if (thread->seccomp.mode == SECCOMP_MODE_DISABLED)
628 seccomp_assign_mode(thread, SECCOMP_MODE_FILTER,
629 flags);
630 }
631}
632
633
634
635
636
637
638
639static struct seccomp_filter *seccomp_prepare_filter(struct sock_fprog *fprog)
640{
641 struct seccomp_filter *sfilter;
642 int ret;
643 const bool save_orig =
644#if defined(CONFIG_CHECKPOINT_RESTORE) || defined(SECCOMP_ARCH_NATIVE)
645 true;
646#else
647 false;
648#endif
649
650 if (fprog->len == 0 || fprog->len > BPF_MAXINSNS)
651 return ERR_PTR(-EINVAL);
652
653 BUG_ON(INT_MAX / fprog->len < sizeof(struct sock_filter));
654
655
656
657
658
659
660
661 if (!task_no_new_privs(current) &&
662 !ns_capable_noaudit(current_user_ns(), CAP_SYS_ADMIN))
663 return ERR_PTR(-EACCES);
664
665
666 sfilter = kzalloc(sizeof(*sfilter), GFP_KERNEL | __GFP_NOWARN);
667 if (!sfilter)
668 return ERR_PTR(-ENOMEM);
669
670 mutex_init(&sfilter->notify_lock);
671 ret = bpf_prog_create_from_user(&sfilter->prog, fprog,
672 seccomp_check_filter, save_orig);
673 if (ret < 0) {
674 kfree(sfilter);
675 return ERR_PTR(ret);
676 }
677
678 refcount_set(&sfilter->refs, 1);
679 refcount_set(&sfilter->users, 1);
680 init_waitqueue_head(&sfilter->wqh);
681
682 return sfilter;
683}
684
685
686
687
688
689
690
691static struct seccomp_filter *
692seccomp_prepare_user_filter(const char __user *user_filter)
693{
694 struct sock_fprog fprog;
695 struct seccomp_filter *filter = ERR_PTR(-EFAULT);
696
697#ifdef CONFIG_COMPAT
698 if (in_compat_syscall()) {
699 struct compat_sock_fprog fprog32;
700 if (copy_from_user(&fprog32, user_filter, sizeof(fprog32)))
701 goto out;
702 fprog.len = fprog32.len;
703 fprog.filter = compat_ptr(fprog32.filter);
704 } else
705#endif
706 if (copy_from_user(&fprog, user_filter, sizeof(fprog)))
707 goto out;
708 filter = seccomp_prepare_filter(&fprog);
709out:
710 return filter;
711}
712
713#ifdef SECCOMP_ARCH_NATIVE
714
715
716
717
718
719
720static bool seccomp_is_const_allow(struct sock_fprog_kern *fprog,
721 struct seccomp_data *sd)
722{
723 unsigned int reg_value = 0;
724 unsigned int pc;
725 bool op_res;
726
727 if (WARN_ON_ONCE(!fprog))
728 return false;
729
730 for (pc = 0; pc < fprog->len; pc++) {
731 struct sock_filter *insn = &fprog->filter[pc];
732 u16 code = insn->code;
733 u32 k = insn->k;
734
735 switch (code) {
736 case BPF_LD | BPF_W | BPF_ABS:
737 switch (k) {
738 case offsetof(struct seccomp_data, nr):
739 reg_value = sd->nr;
740 break;
741 case offsetof(struct seccomp_data, arch):
742 reg_value = sd->arch;
743 break;
744 default:
745
746 return false;
747 }
748 break;
749 case BPF_RET | BPF_K:
750
751 return k == SECCOMP_RET_ALLOW;
752 case BPF_JMP | BPF_JA:
753 pc += insn->k;
754 break;
755 case BPF_JMP | BPF_JEQ | BPF_K:
756 case BPF_JMP | BPF_JGE | BPF_K:
757 case BPF_JMP | BPF_JGT | BPF_K:
758 case BPF_JMP | BPF_JSET | BPF_K:
759 switch (BPF_OP(code)) {
760 case BPF_JEQ:
761 op_res = reg_value == k;
762 break;
763 case BPF_JGE:
764 op_res = reg_value >= k;
765 break;
766 case BPF_JGT:
767 op_res = reg_value > k;
768 break;
769 case BPF_JSET:
770 op_res = !!(reg_value & k);
771 break;
772 default:
773
774 return false;
775 }
776
777 pc += op_res ? insn->jt : insn->jf;
778 break;
779 case BPF_ALU | BPF_AND | BPF_K:
780 reg_value &= k;
781 break;
782 default:
783
784 return false;
785 }
786 }
787
788
789 WARN_ON(1);
790 return false;
791}
792
793static void seccomp_cache_prepare_bitmap(struct seccomp_filter *sfilter,
794 void *bitmap, const void *bitmap_prev,
795 size_t bitmap_size, int arch)
796{
797 struct sock_fprog_kern *fprog = sfilter->prog->orig_prog;
798 struct seccomp_data sd;
799 int nr;
800
801 if (bitmap_prev) {
802
803 bitmap_copy(bitmap, bitmap_prev, bitmap_size);
804 } else {
805
806 bitmap_fill(bitmap, bitmap_size);
807 }
808
809 for (nr = 0; nr < bitmap_size; nr++) {
810
811 if (!test_bit(nr, bitmap))
812 continue;
813
814 sd.nr = nr;
815 sd.arch = arch;
816
817
818 if (seccomp_is_const_allow(fprog, &sd))
819 continue;
820
821
822
823
824
825 __clear_bit(nr, bitmap);
826 }
827}
828
829
830
831
832
833
834
835static void seccomp_cache_prepare(struct seccomp_filter *sfilter)
836{
837 struct action_cache *cache = &sfilter->cache;
838 const struct action_cache *cache_prev =
839 sfilter->prev ? &sfilter->prev->cache : NULL;
840
841 seccomp_cache_prepare_bitmap(sfilter, cache->allow_native,
842 cache_prev ? cache_prev->allow_native : NULL,
843 SECCOMP_ARCH_NATIVE_NR,
844 SECCOMP_ARCH_NATIVE);
845
846#ifdef SECCOMP_ARCH_COMPAT
847 seccomp_cache_prepare_bitmap(sfilter, cache->allow_compat,
848 cache_prev ? cache_prev->allow_compat : NULL,
849 SECCOMP_ARCH_COMPAT_NR,
850 SECCOMP_ARCH_COMPAT);
851#endif
852}
853#endif
854
855
856
857
858
859
860
861
862
863
864
865
866
867static long seccomp_attach_filter(unsigned int flags,
868 struct seccomp_filter *filter)
869{
870 unsigned long total_insns;
871 struct seccomp_filter *walker;
872
873 assert_spin_locked(¤t->sighand->siglock);
874
875
876 total_insns = filter->prog->len;
877 for (walker = current->seccomp.filter; walker; walker = walker->prev)
878 total_insns += walker->prog->len + 4;
879 if (total_insns > MAX_INSNS_PER_PATH)
880 return -ENOMEM;
881
882
883 if (flags & SECCOMP_FILTER_FLAG_TSYNC) {
884 int ret;
885
886 ret = seccomp_can_sync_threads();
887 if (ret) {
888 if (flags & SECCOMP_FILTER_FLAG_TSYNC_ESRCH)
889 return -ESRCH;
890 else
891 return ret;
892 }
893 }
894
895
896 if (flags & SECCOMP_FILTER_FLAG_LOG)
897 filter->log = true;
898
899
900 if (flags & SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV)
901 filter->wait_killable_recv = true;
902
903
904
905
906
907 filter->prev = current->seccomp.filter;
908 seccomp_cache_prepare(filter);
909 current->seccomp.filter = filter;
910 atomic_inc(¤t->seccomp.filter_count);
911
912
913 if (flags & SECCOMP_FILTER_FLAG_TSYNC)
914 seccomp_sync_threads(flags);
915
916 return 0;
917}
918
919static void __get_seccomp_filter(struct seccomp_filter *filter)
920{
921 refcount_inc(&filter->refs);
922}
923
924
925void get_seccomp_filter(struct task_struct *tsk)
926{
927 struct seccomp_filter *orig = tsk->seccomp.filter;
928 if (!orig)
929 return;
930 __get_seccomp_filter(orig);
931 refcount_inc(&orig->users);
932}
933
934#endif
935
936
937#define SECCOMP_LOG_KILL_PROCESS (1 << 0)
938#define SECCOMP_LOG_KILL_THREAD (1 << 1)
939#define SECCOMP_LOG_TRAP (1 << 2)
940#define SECCOMP_LOG_ERRNO (1 << 3)
941#define SECCOMP_LOG_TRACE (1 << 4)
942#define SECCOMP_LOG_LOG (1 << 5)
943#define SECCOMP_LOG_ALLOW (1 << 6)
944#define SECCOMP_LOG_USER_NOTIF (1 << 7)
945
946static u32 seccomp_actions_logged = SECCOMP_LOG_KILL_PROCESS |
947 SECCOMP_LOG_KILL_THREAD |
948 SECCOMP_LOG_TRAP |
949 SECCOMP_LOG_ERRNO |
950 SECCOMP_LOG_USER_NOTIF |
951 SECCOMP_LOG_TRACE |
952 SECCOMP_LOG_LOG;
953
954static inline void seccomp_log(unsigned long syscall, long signr, u32 action,
955 bool requested)
956{
957 bool log = false;
958
959 switch (action) {
960 case SECCOMP_RET_ALLOW:
961 break;
962 case SECCOMP_RET_TRAP:
963 log = requested && seccomp_actions_logged & SECCOMP_LOG_TRAP;
964 break;
965 case SECCOMP_RET_ERRNO:
966 log = requested && seccomp_actions_logged & SECCOMP_LOG_ERRNO;
967 break;
968 case SECCOMP_RET_TRACE:
969 log = requested && seccomp_actions_logged & SECCOMP_LOG_TRACE;
970 break;
971 case SECCOMP_RET_USER_NOTIF:
972 log = requested && seccomp_actions_logged & SECCOMP_LOG_USER_NOTIF;
973 break;
974 case SECCOMP_RET_LOG:
975 log = seccomp_actions_logged & SECCOMP_LOG_LOG;
976 break;
977 case SECCOMP_RET_KILL_THREAD:
978 log = seccomp_actions_logged & SECCOMP_LOG_KILL_THREAD;
979 break;
980 case SECCOMP_RET_KILL_PROCESS:
981 default:
982 log = seccomp_actions_logged & SECCOMP_LOG_KILL_PROCESS;
983 }
984
985
986
987
988
989
990
991 if (!log)
992 return;
993
994 audit_seccomp(syscall, signr, action);
995}
996
997
998
999
1000
1001
1002static const int mode1_syscalls[] = {
1003 __NR_seccomp_read, __NR_seccomp_write, __NR_seccomp_exit, __NR_seccomp_sigreturn,
1004 -1,
1005};
1006
1007static void __secure_computing_strict(int this_syscall)
1008{
1009 const int *allowed_syscalls = mode1_syscalls;
1010#ifdef CONFIG_COMPAT
1011 if (in_compat_syscall())
1012 allowed_syscalls = get_compat_mode1_syscalls();
1013#endif
1014 do {
1015 if (*allowed_syscalls == this_syscall)
1016 return;
1017 } while (*++allowed_syscalls != -1);
1018
1019#ifdef SECCOMP_DEBUG
1020 dump_stack();
1021#endif
1022 current->seccomp.mode = SECCOMP_MODE_DEAD;
1023 seccomp_log(this_syscall, SIGKILL, SECCOMP_RET_KILL_THREAD, true);
1024 do_exit(SIGKILL);
1025}
1026
1027#ifndef CONFIG_HAVE_ARCH_SECCOMP_FILTER
1028void secure_computing_strict(int this_syscall)
1029{
1030 int mode = current->seccomp.mode;
1031
1032 if (IS_ENABLED(CONFIG_CHECKPOINT_RESTORE) &&
1033 unlikely(current->ptrace & PT_SUSPEND_SECCOMP))
1034 return;
1035
1036 if (mode == SECCOMP_MODE_DISABLED)
1037 return;
1038 else if (mode == SECCOMP_MODE_STRICT)
1039 __secure_computing_strict(this_syscall);
1040 else
1041 BUG();
1042}
1043#else
1044
1045#ifdef CONFIG_SECCOMP_FILTER
1046static u64 seccomp_next_notify_id(struct seccomp_filter *filter)
1047{
1048
1049
1050
1051
1052 lockdep_assert_held(&filter->notify_lock);
1053 return filter->notif->next_id++;
1054}
1055
1056static void seccomp_handle_addfd(struct seccomp_kaddfd *addfd, struct seccomp_knotif *n)
1057{
1058 int fd;
1059
1060
1061
1062
1063
1064 list_del_init(&addfd->list);
1065 if (!addfd->setfd)
1066 fd = receive_fd(addfd->file, addfd->flags);
1067 else
1068 fd = receive_fd_replace(addfd->fd, addfd->file, addfd->flags);
1069 addfd->ret = fd;
1070
1071 if (addfd->ioctl_flags & SECCOMP_ADDFD_FLAG_SEND) {
1072
1073 if (fd < 0) {
1074 n->state = SECCOMP_NOTIFY_SENT;
1075 } else {
1076
1077 n->flags = 0;
1078 n->error = 0;
1079 n->val = fd;
1080 }
1081 }
1082
1083
1084
1085
1086
1087 complete(&addfd->completion);
1088}
1089
1090static bool should_sleep_killable(struct seccomp_filter *match,
1091 struct seccomp_knotif *n)
1092{
1093 return match->wait_killable_recv && n->state == SECCOMP_NOTIFY_SENT;
1094}
1095
1096static int seccomp_do_user_notification(int this_syscall,
1097 struct seccomp_filter *match,
1098 const struct seccomp_data *sd)
1099{
1100 int err;
1101 u32 flags = 0;
1102 long ret = 0;
1103 struct seccomp_knotif n = {};
1104 struct seccomp_kaddfd *addfd, *tmp;
1105
1106 mutex_lock(&match->notify_lock);
1107 err = -ENOSYS;
1108 if (!match->notif)
1109 goto out;
1110
1111 n.task = current;
1112 n.state = SECCOMP_NOTIFY_INIT;
1113 n.data = sd;
1114 n.id = seccomp_next_notify_id(match);
1115 init_completion(&n.ready);
1116 list_add_tail(&n.list, &match->notif->notifications);
1117 INIT_LIST_HEAD(&n.addfd);
1118
1119 up(&match->notif->request);
1120 wake_up_poll(&match->wqh, EPOLLIN | EPOLLRDNORM);
1121
1122
1123
1124
1125 do {
1126 bool wait_killable = should_sleep_killable(match, &n);
1127
1128 mutex_unlock(&match->notify_lock);
1129 if (wait_killable)
1130 err = wait_for_completion_killable(&n.ready);
1131 else
1132 err = wait_for_completion_interruptible(&n.ready);
1133 mutex_lock(&match->notify_lock);
1134
1135 if (err != 0) {
1136
1137
1138
1139
1140 if (!wait_killable && should_sleep_killable(match, &n))
1141 continue;
1142
1143 goto interrupted;
1144 }
1145
1146 addfd = list_first_entry_or_null(&n.addfd,
1147 struct seccomp_kaddfd, list);
1148
1149 if (addfd)
1150 seccomp_handle_addfd(addfd, &n);
1151
1152 } while (n.state != SECCOMP_NOTIFY_REPLIED);
1153
1154 ret = n.val;
1155 err = n.error;
1156 flags = n.flags;
1157
1158interrupted:
1159
1160 list_for_each_entry_safe(addfd, tmp, &n.addfd, list) {
1161
1162 addfd->ret = -ESRCH;
1163 list_del_init(&addfd->list);
1164 complete(&addfd->completion);
1165 }
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177 if (match->notif)
1178 list_del(&n.list);
1179out:
1180 mutex_unlock(&match->notify_lock);
1181
1182
1183 if (flags & SECCOMP_USER_NOTIF_FLAG_CONTINUE)
1184 return 0;
1185
1186 syscall_set_return_value(current, current_pt_regs(),
1187 err, ret);
1188 return -1;
1189}
1190
1191static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd,
1192 const bool recheck_after_trace)
1193{
1194 u32 filter_ret, action;
1195 struct seccomp_filter *match = NULL;
1196 int data;
1197 struct seccomp_data sd_local;
1198
1199
1200
1201
1202
1203 smp_rmb();
1204
1205 if (!sd) {
1206 populate_seccomp_data(&sd_local);
1207 sd = &sd_local;
1208 }
1209
1210 filter_ret = seccomp_run_filters(sd, &match);
1211 data = filter_ret & SECCOMP_RET_DATA;
1212 action = filter_ret & SECCOMP_RET_ACTION_FULL;
1213
1214 switch (action) {
1215 case SECCOMP_RET_ERRNO:
1216
1217 if (data > MAX_ERRNO)
1218 data = MAX_ERRNO;
1219 syscall_set_return_value(current, current_pt_regs(),
1220 -data, 0);
1221 goto skip;
1222
1223 case SECCOMP_RET_TRAP:
1224
1225 syscall_rollback(current, current_pt_regs());
1226
1227 force_sig_seccomp(this_syscall, data, false);
1228 goto skip;
1229
1230 case SECCOMP_RET_TRACE:
1231
1232 if (recheck_after_trace)
1233 return 0;
1234
1235
1236 if (!ptrace_event_enabled(current, PTRACE_EVENT_SECCOMP)) {
1237 syscall_set_return_value(current,
1238 current_pt_regs(),
1239 -ENOSYS, 0);
1240 goto skip;
1241 }
1242
1243
1244 ptrace_event(PTRACE_EVENT_SECCOMP, data);
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255 if (fatal_signal_pending(current))
1256 goto skip;
1257
1258 this_syscall = syscall_get_nr(current, current_pt_regs());
1259 if (this_syscall < 0)
1260 goto skip;
1261
1262
1263
1264
1265
1266
1267
1268 if (__seccomp_filter(this_syscall, NULL, true))
1269 return -1;
1270
1271 return 0;
1272
1273 case SECCOMP_RET_USER_NOTIF:
1274 if (seccomp_do_user_notification(this_syscall, match, sd))
1275 goto skip;
1276
1277 return 0;
1278
1279 case SECCOMP_RET_LOG:
1280 seccomp_log(this_syscall, 0, action, true);
1281 return 0;
1282
1283 case SECCOMP_RET_ALLOW:
1284
1285
1286
1287
1288
1289 return 0;
1290
1291 case SECCOMP_RET_KILL_THREAD:
1292 case SECCOMP_RET_KILL_PROCESS:
1293 default:
1294 current->seccomp.mode = SECCOMP_MODE_DEAD;
1295 seccomp_log(this_syscall, SIGSYS, action, true);
1296
1297 if (action != SECCOMP_RET_KILL_THREAD ||
1298 (atomic_read(¤t->signal->live) == 1)) {
1299
1300 syscall_rollback(current, current_pt_regs());
1301
1302 force_sig_seccomp(this_syscall, data, true);
1303 } else {
1304 do_exit(SIGSYS);
1305 }
1306 return -1;
1307 }
1308
1309 unreachable();
1310
1311skip:
1312 seccomp_log(this_syscall, 0, action, match ? match->log : false);
1313 return -1;
1314}
1315#else
1316static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd,
1317 const bool recheck_after_trace)
1318{
1319 BUG();
1320
1321 return -1;
1322}
1323#endif
1324
1325int __secure_computing(const struct seccomp_data *sd)
1326{
1327 int mode = current->seccomp.mode;
1328 int this_syscall;
1329
1330 if (IS_ENABLED(CONFIG_CHECKPOINT_RESTORE) &&
1331 unlikely(current->ptrace & PT_SUSPEND_SECCOMP))
1332 return 0;
1333
1334 this_syscall = sd ? sd->nr :
1335 syscall_get_nr(current, current_pt_regs());
1336
1337 switch (mode) {
1338 case SECCOMP_MODE_STRICT:
1339 __secure_computing_strict(this_syscall);
1340 return 0;
1341 case SECCOMP_MODE_FILTER:
1342 return __seccomp_filter(this_syscall, sd, false);
1343
1344 case SECCOMP_MODE_DEAD:
1345 WARN_ON_ONCE(1);
1346 do_exit(SIGKILL);
1347 return -1;
1348 default:
1349 BUG();
1350 }
1351}
1352#endif
1353
1354long prctl_get_seccomp(void)
1355{
1356 return current->seccomp.mode;
1357}
1358
1359
1360
1361
1362
1363
1364
1365
1366static long seccomp_set_mode_strict(void)
1367{
1368 const unsigned long seccomp_mode = SECCOMP_MODE_STRICT;
1369 long ret = -EINVAL;
1370
1371 spin_lock_irq(¤t->sighand->siglock);
1372
1373 if (!seccomp_may_assign_mode(seccomp_mode))
1374 goto out;
1375
1376#ifdef TIF_NOTSC
1377 disable_TSC();
1378#endif
1379 seccomp_assign_mode(current, seccomp_mode, 0);
1380 ret = 0;
1381
1382out:
1383 spin_unlock_irq(¤t->sighand->siglock);
1384
1385 return ret;
1386}
1387
1388#ifdef CONFIG_SECCOMP_FILTER
1389static void seccomp_notify_free(struct seccomp_filter *filter)
1390{
1391 kfree(filter->notif);
1392 filter->notif = NULL;
1393}
1394
1395static void seccomp_notify_detach(struct seccomp_filter *filter)
1396{
1397 struct seccomp_knotif *knotif;
1398
1399 if (!filter)
1400 return;
1401
1402 mutex_lock(&filter->notify_lock);
1403
1404
1405
1406
1407
1408 list_for_each_entry(knotif, &filter->notif->notifications, list) {
1409 if (knotif->state == SECCOMP_NOTIFY_REPLIED)
1410 continue;
1411
1412 knotif->state = SECCOMP_NOTIFY_REPLIED;
1413 knotif->error = -ENOSYS;
1414 knotif->val = 0;
1415
1416
1417
1418
1419
1420
1421 complete(&knotif->ready);
1422 }
1423
1424 seccomp_notify_free(filter);
1425 mutex_unlock(&filter->notify_lock);
1426}
1427
1428static int seccomp_notify_release(struct inode *inode, struct file *file)
1429{
1430 struct seccomp_filter *filter = file->private_data;
1431
1432 seccomp_notify_detach(filter);
1433 __put_seccomp_filter(filter);
1434 return 0;
1435}
1436
1437
1438static inline struct seccomp_knotif *
1439find_notification(struct seccomp_filter *filter, u64 id)
1440{
1441 struct seccomp_knotif *cur;
1442
1443 lockdep_assert_held(&filter->notify_lock);
1444
1445 list_for_each_entry(cur, &filter->notif->notifications, list) {
1446 if (cur->id == id)
1447 return cur;
1448 }
1449
1450 return NULL;
1451}
1452
1453
1454static long seccomp_notify_recv(struct seccomp_filter *filter,
1455 void __user *buf)
1456{
1457 struct seccomp_knotif *knotif = NULL, *cur;
1458 struct seccomp_notif unotif;
1459 ssize_t ret;
1460
1461
1462 ret = check_zeroed_user(buf, sizeof(unotif));
1463 if (ret < 0)
1464 return ret;
1465 if (!ret)
1466 return -EINVAL;
1467
1468 memset(&unotif, 0, sizeof(unotif));
1469
1470 ret = down_interruptible(&filter->notif->request);
1471 if (ret < 0)
1472 return ret;
1473
1474 mutex_lock(&filter->notify_lock);
1475 list_for_each_entry(cur, &filter->notif->notifications, list) {
1476 if (cur->state == SECCOMP_NOTIFY_INIT) {
1477 knotif = cur;
1478 break;
1479 }
1480 }
1481
1482
1483
1484
1485
1486
1487 if (!knotif) {
1488 ret = -ENOENT;
1489 goto out;
1490 }
1491
1492 unotif.id = knotif->id;
1493 unotif.pid = task_pid_vnr(knotif->task);
1494 unotif.data = *(knotif->data);
1495
1496 knotif->state = SECCOMP_NOTIFY_SENT;
1497 wake_up_poll(&filter->wqh, EPOLLOUT | EPOLLWRNORM);
1498 ret = 0;
1499out:
1500 mutex_unlock(&filter->notify_lock);
1501
1502 if (ret == 0 && copy_to_user(buf, &unotif, sizeof(unotif))) {
1503 ret = -EFAULT;
1504
1505
1506
1507
1508
1509
1510
1511 mutex_lock(&filter->notify_lock);
1512 knotif = find_notification(filter, unotif.id);
1513 if (knotif) {
1514
1515 if (should_sleep_killable(filter, knotif))
1516 complete(&knotif->ready);
1517 knotif->state = SECCOMP_NOTIFY_INIT;
1518 up(&filter->notif->request);
1519 }
1520 mutex_unlock(&filter->notify_lock);
1521 }
1522
1523 return ret;
1524}
1525
1526static long seccomp_notify_send(struct seccomp_filter *filter,
1527 void __user *buf)
1528{
1529 struct seccomp_notif_resp resp = {};
1530 struct seccomp_knotif *knotif;
1531 long ret;
1532
1533 if (copy_from_user(&resp, buf, sizeof(resp)))
1534 return -EFAULT;
1535
1536 if (resp.flags & ~SECCOMP_USER_NOTIF_FLAG_CONTINUE)
1537 return -EINVAL;
1538
1539 if ((resp.flags & SECCOMP_USER_NOTIF_FLAG_CONTINUE) &&
1540 (resp.error || resp.val))
1541 return -EINVAL;
1542
1543 ret = mutex_lock_interruptible(&filter->notify_lock);
1544 if (ret < 0)
1545 return ret;
1546
1547 knotif = find_notification(filter, resp.id);
1548 if (!knotif) {
1549 ret = -ENOENT;
1550 goto out;
1551 }
1552
1553
1554 if (knotif->state != SECCOMP_NOTIFY_SENT) {
1555 ret = -EINPROGRESS;
1556 goto out;
1557 }
1558
1559 ret = 0;
1560 knotif->state = SECCOMP_NOTIFY_REPLIED;
1561 knotif->error = resp.error;
1562 knotif->val = resp.val;
1563 knotif->flags = resp.flags;
1564 complete(&knotif->ready);
1565out:
1566 mutex_unlock(&filter->notify_lock);
1567 return ret;
1568}
1569
1570static long seccomp_notify_id_valid(struct seccomp_filter *filter,
1571 void __user *buf)
1572{
1573 struct seccomp_knotif *knotif;
1574 u64 id;
1575 long ret;
1576
1577 if (copy_from_user(&id, buf, sizeof(id)))
1578 return -EFAULT;
1579
1580 ret = mutex_lock_interruptible(&filter->notify_lock);
1581 if (ret < 0)
1582 return ret;
1583
1584 knotif = find_notification(filter, id);
1585 if (knotif && knotif->state == SECCOMP_NOTIFY_SENT)
1586 ret = 0;
1587 else
1588 ret = -ENOENT;
1589
1590 mutex_unlock(&filter->notify_lock);
1591 return ret;
1592}
1593
1594static long seccomp_notify_addfd(struct seccomp_filter *filter,
1595 struct seccomp_notif_addfd __user *uaddfd,
1596 unsigned int size)
1597{
1598 struct seccomp_notif_addfd addfd;
1599 struct seccomp_knotif *knotif;
1600 struct seccomp_kaddfd kaddfd;
1601 int ret;
1602
1603 BUILD_BUG_ON(sizeof(addfd) < SECCOMP_NOTIFY_ADDFD_SIZE_VER0);
1604 BUILD_BUG_ON(sizeof(addfd) != SECCOMP_NOTIFY_ADDFD_SIZE_LATEST);
1605
1606 if (size < SECCOMP_NOTIFY_ADDFD_SIZE_VER0 || size >= PAGE_SIZE)
1607 return -EINVAL;
1608
1609 ret = copy_struct_from_user(&addfd, sizeof(addfd), uaddfd, size);
1610 if (ret)
1611 return ret;
1612
1613 if (addfd.newfd_flags & ~O_CLOEXEC)
1614 return -EINVAL;
1615
1616 if (addfd.flags & ~(SECCOMP_ADDFD_FLAG_SETFD | SECCOMP_ADDFD_FLAG_SEND))
1617 return -EINVAL;
1618
1619 if (addfd.newfd && !(addfd.flags & SECCOMP_ADDFD_FLAG_SETFD))
1620 return -EINVAL;
1621
1622 kaddfd.file = fget(addfd.srcfd);
1623 if (!kaddfd.file)
1624 return -EBADF;
1625
1626 kaddfd.ioctl_flags = addfd.flags;
1627 kaddfd.flags = addfd.newfd_flags;
1628 kaddfd.setfd = addfd.flags & SECCOMP_ADDFD_FLAG_SETFD;
1629 kaddfd.fd = addfd.newfd;
1630 init_completion(&kaddfd.completion);
1631
1632 ret = mutex_lock_interruptible(&filter->notify_lock);
1633 if (ret < 0)
1634 goto out;
1635
1636 knotif = find_notification(filter, addfd.id);
1637 if (!knotif) {
1638 ret = -ENOENT;
1639 goto out_unlock;
1640 }
1641
1642
1643
1644
1645
1646
1647 if (knotif->state != SECCOMP_NOTIFY_SENT) {
1648 ret = -EINPROGRESS;
1649 goto out_unlock;
1650 }
1651
1652 if (addfd.flags & SECCOMP_ADDFD_FLAG_SEND) {
1653
1654
1655
1656
1657
1658
1659
1660 if (!list_empty(&knotif->addfd)) {
1661 ret = -EBUSY;
1662 goto out_unlock;
1663 }
1664
1665
1666 knotif->state = SECCOMP_NOTIFY_REPLIED;
1667 }
1668
1669 list_add(&kaddfd.list, &knotif->addfd);
1670 complete(&knotif->ready);
1671 mutex_unlock(&filter->notify_lock);
1672
1673
1674 ret = wait_for_completion_interruptible(&kaddfd.completion);
1675 if (ret == 0) {
1676
1677
1678
1679
1680
1681
1682
1683 ret = kaddfd.ret;
1684 goto out;
1685 }
1686
1687 mutex_lock(&filter->notify_lock);
1688
1689
1690
1691
1692
1693
1694
1695 if (list_empty(&kaddfd.list))
1696 ret = kaddfd.ret;
1697 else
1698 list_del(&kaddfd.list);
1699
1700out_unlock:
1701 mutex_unlock(&filter->notify_lock);
1702out:
1703 fput(kaddfd.file);
1704
1705 return ret;
1706}
1707
1708static long seccomp_notify_ioctl(struct file *file, unsigned int cmd,
1709 unsigned long arg)
1710{
1711 struct seccomp_filter *filter = file->private_data;
1712 void __user *buf = (void __user *)arg;
1713
1714
1715 switch (cmd) {
1716 case SECCOMP_IOCTL_NOTIF_RECV:
1717 return seccomp_notify_recv(filter, buf);
1718 case SECCOMP_IOCTL_NOTIF_SEND:
1719 return seccomp_notify_send(filter, buf);
1720 case SECCOMP_IOCTL_NOTIF_ID_VALID_WRONG_DIR:
1721 case SECCOMP_IOCTL_NOTIF_ID_VALID:
1722 return seccomp_notify_id_valid(filter, buf);
1723 }
1724
1725
1726#define EA_IOCTL(cmd) ((cmd) & ~(IOC_INOUT | IOCSIZE_MASK))
1727 switch (EA_IOCTL(cmd)) {
1728 case EA_IOCTL(SECCOMP_IOCTL_NOTIF_ADDFD):
1729 return seccomp_notify_addfd(filter, buf, _IOC_SIZE(cmd));
1730 default:
1731 return -EINVAL;
1732 }
1733}
1734
1735static __poll_t seccomp_notify_poll(struct file *file,
1736 struct poll_table_struct *poll_tab)
1737{
1738 struct seccomp_filter *filter = file->private_data;
1739 __poll_t ret = 0;
1740 struct seccomp_knotif *cur;
1741
1742 poll_wait(file, &filter->wqh, poll_tab);
1743
1744 if (mutex_lock_interruptible(&filter->notify_lock) < 0)
1745 return EPOLLERR;
1746
1747 list_for_each_entry(cur, &filter->notif->notifications, list) {
1748 if (cur->state == SECCOMP_NOTIFY_INIT)
1749 ret |= EPOLLIN | EPOLLRDNORM;
1750 if (cur->state == SECCOMP_NOTIFY_SENT)
1751 ret |= EPOLLOUT | EPOLLWRNORM;
1752 if ((ret & EPOLLIN) && (ret & EPOLLOUT))
1753 break;
1754 }
1755
1756 mutex_unlock(&filter->notify_lock);
1757
1758 if (refcount_read(&filter->users) == 0)
1759 ret |= EPOLLHUP;
1760
1761 return ret;
1762}
1763
1764static const struct file_operations seccomp_notify_ops = {
1765 .poll = seccomp_notify_poll,
1766 .release = seccomp_notify_release,
1767 .unlocked_ioctl = seccomp_notify_ioctl,
1768 .compat_ioctl = seccomp_notify_ioctl,
1769};
1770
1771static struct file *init_listener(struct seccomp_filter *filter)
1772{
1773 struct file *ret;
1774
1775 ret = ERR_PTR(-ENOMEM);
1776 filter->notif = kzalloc(sizeof(*(filter->notif)), GFP_KERNEL);
1777 if (!filter->notif)
1778 goto out;
1779
1780 sema_init(&filter->notif->request, 0);
1781 filter->notif->next_id = get_random_u64();
1782 INIT_LIST_HEAD(&filter->notif->notifications);
1783
1784 ret = anon_inode_getfile("seccomp notify", &seccomp_notify_ops,
1785 filter, O_RDWR);
1786 if (IS_ERR(ret))
1787 goto out_notif;
1788
1789
1790 __get_seccomp_filter(filter);
1791
1792out_notif:
1793 if (IS_ERR(ret))
1794 seccomp_notify_free(filter);
1795out:
1796 return ret;
1797}
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807static bool has_duplicate_listener(struct seccomp_filter *new_child)
1808{
1809 struct seccomp_filter *cur;
1810
1811
1812 lockdep_assert_held(¤t->sighand->siglock);
1813
1814 if (!new_child->notif)
1815 return false;
1816 for (cur = current->seccomp.filter; cur; cur = cur->prev) {
1817 if (cur->notif)
1818 return true;
1819 }
1820
1821 return false;
1822}
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837static long seccomp_set_mode_filter(unsigned int flags,
1838 const char __user *filter)
1839{
1840 const unsigned long seccomp_mode = SECCOMP_MODE_FILTER;
1841 struct seccomp_filter *prepared = NULL;
1842 long ret = -EINVAL;
1843 int listener = -1;
1844 struct file *listener_f = NULL;
1845
1846
1847 if (flags & ~SECCOMP_FILTER_FLAG_MASK)
1848 return -EINVAL;
1849
1850
1851
1852
1853
1854
1855
1856
1857 if ((flags & SECCOMP_FILTER_FLAG_TSYNC) &&
1858 (flags & SECCOMP_FILTER_FLAG_NEW_LISTENER) &&
1859 ((flags & SECCOMP_FILTER_FLAG_TSYNC_ESRCH) == 0))
1860 return -EINVAL;
1861
1862
1863
1864
1865
1866 if ((flags & SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV) &&
1867 ((flags & SECCOMP_FILTER_FLAG_NEW_LISTENER) == 0))
1868 return -EINVAL;
1869
1870
1871 prepared = seccomp_prepare_user_filter(filter);
1872 if (IS_ERR(prepared))
1873 return PTR_ERR(prepared);
1874
1875 if (flags & SECCOMP_FILTER_FLAG_NEW_LISTENER) {
1876 listener = get_unused_fd_flags(O_CLOEXEC);
1877 if (listener < 0) {
1878 ret = listener;
1879 goto out_free;
1880 }
1881
1882 listener_f = init_listener(prepared);
1883 if (IS_ERR(listener_f)) {
1884 put_unused_fd(listener);
1885 ret = PTR_ERR(listener_f);
1886 goto out_free;
1887 }
1888 }
1889
1890
1891
1892
1893
1894 if (flags & SECCOMP_FILTER_FLAG_TSYNC &&
1895 mutex_lock_killable(¤t->signal->cred_guard_mutex))
1896 goto out_put_fd;
1897
1898 spin_lock_irq(¤t->sighand->siglock);
1899
1900 if (!seccomp_may_assign_mode(seccomp_mode))
1901 goto out;
1902
1903 if (has_duplicate_listener(prepared)) {
1904 ret = -EBUSY;
1905 goto out;
1906 }
1907
1908 ret = seccomp_attach_filter(flags, prepared);
1909 if (ret)
1910 goto out;
1911
1912 prepared = NULL;
1913
1914 seccomp_assign_mode(current, seccomp_mode, flags);
1915out:
1916 spin_unlock_irq(¤t->sighand->siglock);
1917 if (flags & SECCOMP_FILTER_FLAG_TSYNC)
1918 mutex_unlock(¤t->signal->cred_guard_mutex);
1919out_put_fd:
1920 if (flags & SECCOMP_FILTER_FLAG_NEW_LISTENER) {
1921 if (ret) {
1922 listener_f->private_data = NULL;
1923 fput(listener_f);
1924 put_unused_fd(listener);
1925 seccomp_notify_detach(prepared);
1926 } else {
1927 fd_install(listener, listener_f);
1928 ret = listener;
1929 }
1930 }
1931out_free:
1932 seccomp_filter_free(prepared);
1933 return ret;
1934}
1935#else
1936static inline long seccomp_set_mode_filter(unsigned int flags,
1937 const char __user *filter)
1938{
1939 return -EINVAL;
1940}
1941#endif
1942
1943static long seccomp_get_action_avail(const char __user *uaction)
1944{
1945 u32 action;
1946
1947 if (copy_from_user(&action, uaction, sizeof(action)))
1948 return -EFAULT;
1949
1950 switch (action) {
1951 case SECCOMP_RET_KILL_PROCESS:
1952 case SECCOMP_RET_KILL_THREAD:
1953 case SECCOMP_RET_TRAP:
1954 case SECCOMP_RET_ERRNO:
1955 case SECCOMP_RET_USER_NOTIF:
1956 case SECCOMP_RET_TRACE:
1957 case SECCOMP_RET_LOG:
1958 case SECCOMP_RET_ALLOW:
1959 break;
1960 default:
1961 return -EOPNOTSUPP;
1962 }
1963
1964 return 0;
1965}
1966
1967static long seccomp_get_notif_sizes(void __user *usizes)
1968{
1969 struct seccomp_notif_sizes sizes = {
1970 .seccomp_notif = sizeof(struct seccomp_notif),
1971 .seccomp_notif_resp = sizeof(struct seccomp_notif_resp),
1972 .seccomp_data = sizeof(struct seccomp_data),
1973 };
1974
1975 if (copy_to_user(usizes, &sizes, sizeof(sizes)))
1976 return -EFAULT;
1977
1978 return 0;
1979}
1980
1981
1982static long do_seccomp(unsigned int op, unsigned int flags,
1983 void __user *uargs)
1984{
1985 switch (op) {
1986 case SECCOMP_SET_MODE_STRICT:
1987 if (flags != 0 || uargs != NULL)
1988 return -EINVAL;
1989 return seccomp_set_mode_strict();
1990 case SECCOMP_SET_MODE_FILTER:
1991 return seccomp_set_mode_filter(flags, uargs);
1992 case SECCOMP_GET_ACTION_AVAIL:
1993 if (flags != 0)
1994 return -EINVAL;
1995
1996 return seccomp_get_action_avail(uargs);
1997 case SECCOMP_GET_NOTIF_SIZES:
1998 if (flags != 0)
1999 return -EINVAL;
2000
2001 return seccomp_get_notif_sizes(uargs);
2002 default:
2003 return -EINVAL;
2004 }
2005}
2006
2007SYSCALL_DEFINE3(seccomp, unsigned int, op, unsigned int, flags,
2008 void __user *, uargs)
2009{
2010 return do_seccomp(op, flags, uargs);
2011}
2012
2013
2014
2015
2016
2017
2018
2019
2020long prctl_set_seccomp(unsigned long seccomp_mode, void __user *filter)
2021{
2022 unsigned int op;
2023 void __user *uargs;
2024
2025 switch (seccomp_mode) {
2026 case SECCOMP_MODE_STRICT:
2027 op = SECCOMP_SET_MODE_STRICT;
2028
2029
2030
2031
2032
2033 uargs = NULL;
2034 break;
2035 case SECCOMP_MODE_FILTER:
2036 op = SECCOMP_SET_MODE_FILTER;
2037 uargs = filter;
2038 break;
2039 default:
2040 return -EINVAL;
2041 }
2042
2043
2044 return do_seccomp(op, 0, uargs);
2045}
2046
2047#if defined(CONFIG_SECCOMP_FILTER) && defined(CONFIG_CHECKPOINT_RESTORE)
2048static struct seccomp_filter *get_nth_filter(struct task_struct *task,
2049 unsigned long filter_off)
2050{
2051 struct seccomp_filter *orig, *filter;
2052 unsigned long count;
2053
2054
2055
2056
2057
2058 spin_lock_irq(&task->sighand->siglock);
2059
2060 if (task->seccomp.mode != SECCOMP_MODE_FILTER) {
2061 spin_unlock_irq(&task->sighand->siglock);
2062 return ERR_PTR(-EINVAL);
2063 }
2064
2065 orig = task->seccomp.filter;
2066 __get_seccomp_filter(orig);
2067 spin_unlock_irq(&task->sighand->siglock);
2068
2069 count = 0;
2070 for (filter = orig; filter; filter = filter->prev)
2071 count++;
2072
2073 if (filter_off >= count) {
2074 filter = ERR_PTR(-ENOENT);
2075 goto out;
2076 }
2077
2078 count -= filter_off;
2079 for (filter = orig; filter && count > 1; filter = filter->prev)
2080 count--;
2081
2082 if (WARN_ON(count != 1 || !filter)) {
2083 filter = ERR_PTR(-ENOENT);
2084 goto out;
2085 }
2086
2087 __get_seccomp_filter(filter);
2088
2089out:
2090 __put_seccomp_filter(orig);
2091 return filter;
2092}
2093
2094long seccomp_get_filter(struct task_struct *task, unsigned long filter_off,
2095 void __user *data)
2096{
2097 struct seccomp_filter *filter;
2098 struct sock_fprog_kern *fprog;
2099 long ret;
2100
2101 if (!capable(CAP_SYS_ADMIN) ||
2102 current->seccomp.mode != SECCOMP_MODE_DISABLED) {
2103 return -EACCES;
2104 }
2105
2106 filter = get_nth_filter(task, filter_off);
2107 if (IS_ERR(filter))
2108 return PTR_ERR(filter);
2109
2110 fprog = filter->prog->orig_prog;
2111 if (!fprog) {
2112
2113
2114
2115
2116 ret = -EMEDIUMTYPE;
2117 goto out;
2118 }
2119
2120 ret = fprog->len;
2121 if (!data)
2122 goto out;
2123
2124 if (copy_to_user(data, fprog->filter, bpf_classic_proglen(fprog)))
2125 ret = -EFAULT;
2126
2127out:
2128 __put_seccomp_filter(filter);
2129 return ret;
2130}
2131
2132long seccomp_get_metadata(struct task_struct *task,
2133 unsigned long size, void __user *data)
2134{
2135 long ret;
2136 struct seccomp_filter *filter;
2137 struct seccomp_metadata kmd = {};
2138
2139 if (!capable(CAP_SYS_ADMIN) ||
2140 current->seccomp.mode != SECCOMP_MODE_DISABLED) {
2141 return -EACCES;
2142 }
2143
2144 size = min_t(unsigned long, size, sizeof(kmd));
2145
2146 if (size < sizeof(kmd.filter_off))
2147 return -EINVAL;
2148
2149 if (copy_from_user(&kmd.filter_off, data, sizeof(kmd.filter_off)))
2150 return -EFAULT;
2151
2152 filter = get_nth_filter(task, kmd.filter_off);
2153 if (IS_ERR(filter))
2154 return PTR_ERR(filter);
2155
2156 if (filter->log)
2157 kmd.flags |= SECCOMP_FILTER_FLAG_LOG;
2158
2159 ret = size;
2160 if (copy_to_user(data, &kmd, size))
2161 ret = -EFAULT;
2162
2163 __put_seccomp_filter(filter);
2164 return ret;
2165}
2166#endif
2167
2168#ifdef CONFIG_SYSCTL
2169
2170
2171#define SECCOMP_RET_KILL_PROCESS_NAME "kill_process"
2172#define SECCOMP_RET_KILL_THREAD_NAME "kill_thread"
2173#define SECCOMP_RET_TRAP_NAME "trap"
2174#define SECCOMP_RET_ERRNO_NAME "errno"
2175#define SECCOMP_RET_USER_NOTIF_NAME "user_notif"
2176#define SECCOMP_RET_TRACE_NAME "trace"
2177#define SECCOMP_RET_LOG_NAME "log"
2178#define SECCOMP_RET_ALLOW_NAME "allow"
2179
2180static const char seccomp_actions_avail[] =
2181 SECCOMP_RET_KILL_PROCESS_NAME " "
2182 SECCOMP_RET_KILL_THREAD_NAME " "
2183 SECCOMP_RET_TRAP_NAME " "
2184 SECCOMP_RET_ERRNO_NAME " "
2185 SECCOMP_RET_USER_NOTIF_NAME " "
2186 SECCOMP_RET_TRACE_NAME " "
2187 SECCOMP_RET_LOG_NAME " "
2188 SECCOMP_RET_ALLOW_NAME;
2189
2190struct seccomp_log_name {
2191 u32 log;
2192 const char *name;
2193};
2194
2195static const struct seccomp_log_name seccomp_log_names[] = {
2196 { SECCOMP_LOG_KILL_PROCESS, SECCOMP_RET_KILL_PROCESS_NAME },
2197 { SECCOMP_LOG_KILL_THREAD, SECCOMP_RET_KILL_THREAD_NAME },
2198 { SECCOMP_LOG_TRAP, SECCOMP_RET_TRAP_NAME },
2199 { SECCOMP_LOG_ERRNO, SECCOMP_RET_ERRNO_NAME },
2200 { SECCOMP_LOG_USER_NOTIF, SECCOMP_RET_USER_NOTIF_NAME },
2201 { SECCOMP_LOG_TRACE, SECCOMP_RET_TRACE_NAME },
2202 { SECCOMP_LOG_LOG, SECCOMP_RET_LOG_NAME },
2203 { SECCOMP_LOG_ALLOW, SECCOMP_RET_ALLOW_NAME },
2204 { }
2205};
2206
2207static bool seccomp_names_from_actions_logged(char *names, size_t size,
2208 u32 actions_logged,
2209 const char *sep)
2210{
2211 const struct seccomp_log_name *cur;
2212 bool append_sep = false;
2213
2214 for (cur = seccomp_log_names; cur->name && size; cur++) {
2215 ssize_t ret;
2216
2217 if (!(actions_logged & cur->log))
2218 continue;
2219
2220 if (append_sep) {
2221 ret = strscpy(names, sep, size);
2222 if (ret < 0)
2223 return false;
2224
2225 names += ret;
2226 size -= ret;
2227 } else
2228 append_sep = true;
2229
2230 ret = strscpy(names, cur->name, size);
2231 if (ret < 0)
2232 return false;
2233
2234 names += ret;
2235 size -= ret;
2236 }
2237
2238 return true;
2239}
2240
2241static bool seccomp_action_logged_from_name(u32 *action_logged,
2242 const char *name)
2243{
2244 const struct seccomp_log_name *cur;
2245
2246 for (cur = seccomp_log_names; cur->name; cur++) {
2247 if (!strcmp(cur->name, name)) {
2248 *action_logged = cur->log;
2249 return true;
2250 }
2251 }
2252
2253 return false;
2254}
2255
2256static bool seccomp_actions_logged_from_names(u32 *actions_logged, char *names)
2257{
2258 char *name;
2259
2260 *actions_logged = 0;
2261 while ((name = strsep(&names, " ")) && *name) {
2262 u32 action_logged = 0;
2263
2264 if (!seccomp_action_logged_from_name(&action_logged, name))
2265 return false;
2266
2267 *actions_logged |= action_logged;
2268 }
2269
2270 return true;
2271}
2272
2273static int read_actions_logged(struct ctl_table *ro_table, void *buffer,
2274 size_t *lenp, loff_t *ppos)
2275{
2276 char names[sizeof(seccomp_actions_avail)];
2277 struct ctl_table table;
2278
2279 memset(names, 0, sizeof(names));
2280
2281 if (!seccomp_names_from_actions_logged(names, sizeof(names),
2282 seccomp_actions_logged, " "))
2283 return -EINVAL;
2284
2285 table = *ro_table;
2286 table.data = names;
2287 table.maxlen = sizeof(names);
2288 return proc_dostring(&table, 0, buffer, lenp, ppos);
2289}
2290
2291static int write_actions_logged(struct ctl_table *ro_table, void *buffer,
2292 size_t *lenp, loff_t *ppos, u32 *actions_logged)
2293{
2294 char names[sizeof(seccomp_actions_avail)];
2295 struct ctl_table table;
2296 int ret;
2297
2298 if (!capable(CAP_SYS_ADMIN))
2299 return -EPERM;
2300
2301 memset(names, 0, sizeof(names));
2302
2303 table = *ro_table;
2304 table.data = names;
2305 table.maxlen = sizeof(names);
2306 ret = proc_dostring(&table, 1, buffer, lenp, ppos);
2307 if (ret)
2308 return ret;
2309
2310 if (!seccomp_actions_logged_from_names(actions_logged, table.data))
2311 return -EINVAL;
2312
2313 if (*actions_logged & SECCOMP_LOG_ALLOW)
2314 return -EINVAL;
2315
2316 seccomp_actions_logged = *actions_logged;
2317 return 0;
2318}
2319
2320static void audit_actions_logged(u32 actions_logged, u32 old_actions_logged,
2321 int ret)
2322{
2323 char names[sizeof(seccomp_actions_avail)];
2324 char old_names[sizeof(seccomp_actions_avail)];
2325 const char *new = names;
2326 const char *old = old_names;
2327
2328 if (!audit_enabled)
2329 return;
2330
2331 memset(names, 0, sizeof(names));
2332 memset(old_names, 0, sizeof(old_names));
2333
2334 if (ret)
2335 new = "?";
2336 else if (!actions_logged)
2337 new = "(none)";
2338 else if (!seccomp_names_from_actions_logged(names, sizeof(names),
2339 actions_logged, ","))
2340 new = "?";
2341
2342 if (!old_actions_logged)
2343 old = "(none)";
2344 else if (!seccomp_names_from_actions_logged(old_names,
2345 sizeof(old_names),
2346 old_actions_logged, ","))
2347 old = "?";
2348
2349 return audit_seccomp_actions_logged(new, old, !ret);
2350}
2351
2352static int seccomp_actions_logged_handler(struct ctl_table *ro_table, int write,
2353 void *buffer, size_t *lenp,
2354 loff_t *ppos)
2355{
2356 int ret;
2357
2358 if (write) {
2359 u32 actions_logged = 0;
2360 u32 old_actions_logged = seccomp_actions_logged;
2361
2362 ret = write_actions_logged(ro_table, buffer, lenp, ppos,
2363 &actions_logged);
2364 audit_actions_logged(actions_logged, old_actions_logged, ret);
2365 } else
2366 ret = read_actions_logged(ro_table, buffer, lenp, ppos);
2367
2368 return ret;
2369}
2370
2371static struct ctl_path seccomp_sysctl_path[] = {
2372 { .procname = "kernel", },
2373 { .procname = "seccomp", },
2374 { }
2375};
2376
2377static struct ctl_table seccomp_sysctl_table[] = {
2378 {
2379 .procname = "actions_avail",
2380 .data = (void *) &seccomp_actions_avail,
2381 .maxlen = sizeof(seccomp_actions_avail),
2382 .mode = 0444,
2383 .proc_handler = proc_dostring,
2384 },
2385 {
2386 .procname = "actions_logged",
2387 .mode = 0644,
2388 .proc_handler = seccomp_actions_logged_handler,
2389 },
2390 { }
2391};
2392
2393static int __init seccomp_sysctl_init(void)
2394{
2395 struct ctl_table_header *hdr;
2396
2397 hdr = register_sysctl_paths(seccomp_sysctl_path, seccomp_sysctl_table);
2398 if (!hdr)
2399 pr_warn("sysctl registration failed\n");
2400 else
2401 kmemleak_not_leak(hdr);
2402
2403 return 0;
2404}
2405
2406device_initcall(seccomp_sysctl_init)
2407
2408#endif
2409
2410#ifdef CONFIG_SECCOMP_CACHE_DEBUG
2411
2412static void proc_pid_seccomp_cache_arch(struct seq_file *m, const char *name,
2413 const void *bitmap, size_t bitmap_size)
2414{
2415 int nr;
2416
2417 for (nr = 0; nr < bitmap_size; nr++) {
2418 bool cached = test_bit(nr, bitmap);
2419 char *status = cached ? "ALLOW" : "FILTER";
2420
2421 seq_printf(m, "%s %d %s\n", name, nr, status);
2422 }
2423}
2424
2425int proc_pid_seccomp_cache(struct seq_file *m, struct pid_namespace *ns,
2426 struct pid *pid, struct task_struct *task)
2427{
2428 struct seccomp_filter *f;
2429 unsigned long flags;
2430
2431
2432
2433
2434
2435 if (!file_ns_capable(m->file, &init_user_ns, CAP_SYS_ADMIN))
2436 return -EACCES;
2437
2438 if (!lock_task_sighand(task, &flags))
2439 return -ESRCH;
2440
2441 f = READ_ONCE(task->seccomp.filter);
2442 if (!f) {
2443 unlock_task_sighand(task, &flags);
2444 return 0;
2445 }
2446
2447
2448 __get_seccomp_filter(f);
2449 unlock_task_sighand(task, &flags);
2450
2451 proc_pid_seccomp_cache_arch(m, SECCOMP_ARCH_NATIVE_NAME,
2452 f->cache.allow_native,
2453 SECCOMP_ARCH_NATIVE_NR);
2454
2455#ifdef SECCOMP_ARCH_COMPAT
2456 proc_pid_seccomp_cache_arch(m, SECCOMP_ARCH_COMPAT_NAME,
2457 f->cache.allow_compat,
2458 SECCOMP_ARCH_COMPAT_NR);
2459#endif
2460
2461 __put_seccomp_filter(f);
2462 return 0;
2463}
2464#endif
2465