1
2
3
4
5
6
7
8
9
10#include <linux/capability.h>
11#include <linux/audit.h>
12#include <linux/module.h>
13#include <linux/init.h>
14#include <linux/kernel.h>
15#include <linux/security.h>
16#include <linux/file.h>
17#include <linux/mm.h>
18#include <linux/mman.h>
19#include <linux/pagemap.h>
20#include <linux/swap.h>
21#include <linux/skbuff.h>
22#include <linux/netlink.h>
23#include <linux/ptrace.h>
24#include <linux/xattr.h>
25#include <linux/hugetlb.h>
26#include <linux/mount.h>
27#include <linux/sched.h>
28#include <linux/prctl.h>
29#include <linux/securebits.h>
30#include <linux/user_namespace.h>
31
32
33
34
35
36
37
38
39
40
41
42
43static void warn_setuid_and_fcaps_mixed(const char *fname)
44{
45 static int warned;
46 if (!warned) {
47 printk(KERN_INFO "warning: `%s' has both setuid-root and"
48 " effective capabilities. Therefore not raising all"
49 " capabilities.\n", fname);
50 warned = 1;
51 }
52}
53
54int cap_netlink_send(struct sock *sk, struct sk_buff *skb)
55{
56 return 0;
57}
58
59int cap_netlink_recv(struct sk_buff *skb, int cap)
60{
61 if (!cap_raised(current_cap(), cap))
62 return -EPERM;
63 return 0;
64}
65EXPORT_SYMBOL(cap_netlink_recv);
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83int cap_capable(struct task_struct *tsk, const struct cred *cred,
84 struct user_namespace *targ_ns, int cap, int audit)
85{
86 for (;;) {
87
88 if (targ_ns != &init_user_ns && targ_ns->creator == cred->user)
89 return 0;
90
91
92 if (targ_ns == cred->user->user_ns)
93 return cap_raised(cred->cap_effective, cap) ? 0 : -EPERM;
94
95
96 if (targ_ns == &init_user_ns)
97 return -EPERM;
98
99
100
101
102
103 targ_ns = targ_ns->creator->user_ns;
104 }
105
106
107}
108
109
110
111
112
113
114
115
116
117int cap_settime(const struct timespec *ts, const struct timezone *tz)
118{
119 if (!capable(CAP_SYS_TIME))
120 return -EPERM;
121 return 0;
122}
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139int cap_ptrace_access_check(struct task_struct *child, unsigned int mode)
140{
141 int ret = 0;
142 const struct cred *cred, *child_cred;
143
144 rcu_read_lock();
145 cred = current_cred();
146 child_cred = __task_cred(child);
147 if (cred->user->user_ns == child_cred->user->user_ns &&
148 cap_issubset(child_cred->cap_permitted, cred->cap_permitted))
149 goto out;
150 if (ns_capable(child_cred->user->user_ns, CAP_SYS_PTRACE))
151 goto out;
152 ret = -EPERM;
153out:
154 rcu_read_unlock();
155 return ret;
156}
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171int cap_ptrace_traceme(struct task_struct *parent)
172{
173 int ret = 0;
174 const struct cred *cred, *child_cred;
175
176 rcu_read_lock();
177 cred = __task_cred(parent);
178 child_cred = current_cred();
179 if (cred->user->user_ns == child_cred->user->user_ns &&
180 cap_issubset(child_cred->cap_permitted, cred->cap_permitted))
181 goto out;
182 if (has_ns_capability(parent, child_cred->user->user_ns, CAP_SYS_PTRACE))
183 goto out;
184 ret = -EPERM;
185out:
186 rcu_read_unlock();
187 return ret;
188}
189
190
191
192
193
194
195
196
197
198
199
200int cap_capget(struct task_struct *target, kernel_cap_t *effective,
201 kernel_cap_t *inheritable, kernel_cap_t *permitted)
202{
203 const struct cred *cred;
204
205
206 rcu_read_lock();
207 cred = __task_cred(target);
208 *effective = cred->cap_effective;
209 *inheritable = cred->cap_inheritable;
210 *permitted = cred->cap_permitted;
211 rcu_read_unlock();
212 return 0;
213}
214
215
216
217
218
219static inline int cap_inh_is_capped(void)
220{
221
222
223
224
225 if (cap_capable(current, current_cred(),
226 current_cred()->user->user_ns, CAP_SETPCAP,
227 SECURITY_CAP_AUDIT) == 0)
228 return 0;
229 return 1;
230}
231
232
233
234
235
236
237
238
239
240
241
242
243
244int cap_capset(struct cred *new,
245 const struct cred *old,
246 const kernel_cap_t *effective,
247 const kernel_cap_t *inheritable,
248 const kernel_cap_t *permitted)
249{
250 if (cap_inh_is_capped() &&
251 !cap_issubset(*inheritable,
252 cap_combine(old->cap_inheritable,
253 old->cap_permitted)))
254
255 return -EPERM;
256
257 if (!cap_issubset(*inheritable,
258 cap_combine(old->cap_inheritable,
259 old->cap_bset)))
260
261 return -EPERM;
262
263
264 if (!cap_issubset(*permitted, old->cap_permitted))
265 return -EPERM;
266
267
268 if (!cap_issubset(*effective, *permitted))
269 return -EPERM;
270
271 new->cap_effective = *effective;
272 new->cap_inheritable = *inheritable;
273 new->cap_permitted = *permitted;
274 return 0;
275}
276
277
278
279
280static inline void bprm_clear_caps(struct linux_binprm *bprm)
281{
282 cap_clear(bprm->cred->cap_permitted);
283 bprm->cap_effective = false;
284}
285
286
287
288
289
290
291
292
293
294
295
296
297int cap_inode_need_killpriv(struct dentry *dentry)
298{
299 struct inode *inode = dentry->d_inode;
300 int error;
301
302 if (!inode->i_op->getxattr)
303 return 0;
304
305 error = inode->i_op->getxattr(dentry, XATTR_NAME_CAPS, NULL, 0);
306 if (error <= 0)
307 return 0;
308 return 1;
309}
310
311
312
313
314
315
316
317
318
319int cap_inode_killpriv(struct dentry *dentry)
320{
321 struct inode *inode = dentry->d_inode;
322
323 if (!inode->i_op->removexattr)
324 return 0;
325
326 return inode->i_op->removexattr(dentry, XATTR_NAME_CAPS);
327}
328
329
330
331
332
333static inline int bprm_caps_from_vfs_caps(struct cpu_vfs_cap_data *caps,
334 struct linux_binprm *bprm,
335 bool *effective,
336 bool *has_cap)
337{
338 struct cred *new = bprm->cred;
339 unsigned i;
340 int ret = 0;
341
342 if (caps->magic_etc & VFS_CAP_FLAGS_EFFECTIVE)
343 *effective = true;
344
345 if (caps->magic_etc & VFS_CAP_REVISION_MASK)
346 *has_cap = true;
347
348 CAP_FOR_EACH_U32(i) {
349 __u32 permitted = caps->permitted.cap[i];
350 __u32 inheritable = caps->inheritable.cap[i];
351
352
353
354
355 new->cap_permitted.cap[i] =
356 (new->cap_bset.cap[i] & permitted) |
357 (new->cap_inheritable.cap[i] & inheritable);
358
359 if (permitted & ~new->cap_permitted.cap[i])
360
361 ret = -EPERM;
362 }
363
364
365
366
367
368
369 return *effective ? ret : 0;
370}
371
372
373
374
375int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data *cpu_caps)
376{
377 struct inode *inode = dentry->d_inode;
378 __u32 magic_etc;
379 unsigned tocopy, i;
380 int size;
381 struct vfs_cap_data caps;
382
383 memset(cpu_caps, 0, sizeof(struct cpu_vfs_cap_data));
384
385 if (!inode || !inode->i_op->getxattr)
386 return -ENODATA;
387
388 size = inode->i_op->getxattr((struct dentry *)dentry, XATTR_NAME_CAPS, &caps,
389 XATTR_CAPS_SZ);
390 if (size == -ENODATA || size == -EOPNOTSUPP)
391
392 return -ENODATA;
393 if (size < 0)
394 return size;
395
396 if (size < sizeof(magic_etc))
397 return -EINVAL;
398
399 cpu_caps->magic_etc = magic_etc = le32_to_cpu(caps.magic_etc);
400
401 switch (magic_etc & VFS_CAP_REVISION_MASK) {
402 case VFS_CAP_REVISION_1:
403 if (size != XATTR_CAPS_SZ_1)
404 return -EINVAL;
405 tocopy = VFS_CAP_U32_1;
406 break;
407 case VFS_CAP_REVISION_2:
408 if (size != XATTR_CAPS_SZ_2)
409 return -EINVAL;
410 tocopy = VFS_CAP_U32_2;
411 break;
412 default:
413 return -EINVAL;
414 }
415
416 CAP_FOR_EACH_U32(i) {
417 if (i >= tocopy)
418 break;
419 cpu_caps->permitted.cap[i] = le32_to_cpu(caps.data[i].permitted);
420 cpu_caps->inheritable.cap[i] = le32_to_cpu(caps.data[i].inheritable);
421 }
422
423 return 0;
424}
425
426
427
428
429
430
431static int get_file_caps(struct linux_binprm *bprm, bool *effective, bool *has_cap)
432{
433 struct dentry *dentry;
434 int rc = 0;
435 struct cpu_vfs_cap_data vcaps;
436
437 bprm_clear_caps(bprm);
438
439 if (!file_caps_enabled)
440 return 0;
441
442 if (bprm->file->f_vfsmnt->mnt_flags & MNT_NOSUID)
443 return 0;
444
445 dentry = dget(bprm->file->f_dentry);
446
447 rc = get_vfs_caps_from_disk(dentry, &vcaps);
448 if (rc < 0) {
449 if (rc == -EINVAL)
450 printk(KERN_NOTICE "%s: get_vfs_caps_from_disk returned %d for %s\n",
451 __func__, rc, bprm->filename);
452 else if (rc == -ENODATA)
453 rc = 0;
454 goto out;
455 }
456
457 rc = bprm_caps_from_vfs_caps(&vcaps, bprm, effective, has_cap);
458 if (rc == -EINVAL)
459 printk(KERN_NOTICE "%s: cap_from_disk returned %d for %s\n",
460 __func__, rc, bprm->filename);
461
462out:
463 dput(dentry);
464 if (rc)
465 bprm_clear_caps(bprm);
466
467 return rc;
468}
469
470
471
472
473
474
475
476
477
478int cap_bprm_set_creds(struct linux_binprm *bprm)
479{
480 const struct cred *old = current_cred();
481 struct cred *new = bprm->cred;
482 bool effective, has_cap = false;
483 int ret;
484
485 effective = false;
486 ret = get_file_caps(bprm, &effective, &has_cap);
487 if (ret < 0)
488 return ret;
489
490 if (!issecure(SECURE_NOROOT)) {
491
492
493
494
495
496 if (has_cap && new->uid != 0 && new->euid == 0) {
497 warn_setuid_and_fcaps_mixed(bprm->filename);
498 goto skip;
499 }
500
501
502
503
504
505
506
507 if (new->euid == 0 || new->uid == 0) {
508
509 new->cap_permitted = cap_combine(old->cap_bset,
510 old->cap_inheritable);
511 }
512 if (new->euid == 0)
513 effective = true;
514 }
515skip:
516
517
518
519
520 if ((new->euid != old->uid ||
521 new->egid != old->gid ||
522 !cap_issubset(new->cap_permitted, old->cap_permitted)) &&
523 bprm->unsafe & ~LSM_UNSAFE_PTRACE_CAP) {
524
525 if (!capable(CAP_SETUID)) {
526 new->euid = new->uid;
527 new->egid = new->gid;
528 }
529 new->cap_permitted = cap_intersect(new->cap_permitted,
530 old->cap_permitted);
531 }
532
533 new->suid = new->fsuid = new->euid;
534 new->sgid = new->fsgid = new->egid;
535
536 if (effective)
537 new->cap_effective = new->cap_permitted;
538 else
539 cap_clear(new->cap_effective);
540 bprm->cap_effective = effective;
541
542
543
544
545
546
547
548
549
550
551
552
553
554 if (!cap_isclear(new->cap_effective)) {
555 if (!cap_issubset(CAP_FULL_SET, new->cap_effective) ||
556 new->euid != 0 || new->uid != 0 ||
557 issecure(SECURE_NOROOT)) {
558 ret = audit_log_bprm_fcaps(bprm, new, old);
559 if (ret < 0)
560 return ret;
561 }
562 }
563
564 new->securebits &= ~issecure_mask(SECURE_KEEP_CAPS);
565 return 0;
566}
567
568
569
570
571
572
573
574
575
576
577
578int cap_bprm_secureexec(struct linux_binprm *bprm)
579{
580 const struct cred *cred = current_cred();
581
582 if (cred->uid != 0) {
583 if (bprm->cap_effective)
584 return 1;
585 if (!cap_isclear(cred->cap_permitted))
586 return 1;
587 }
588
589 return (cred->euid != cred->uid ||
590 cred->egid != cred->gid);
591}
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607int cap_inode_setxattr(struct dentry *dentry, const char *name,
608 const void *value, size_t size, int flags)
609{
610 if (!strcmp(name, XATTR_NAME_CAPS)) {
611 if (!capable(CAP_SETFCAP))
612 return -EPERM;
613 return 0;
614 }
615
616 if (!strncmp(name, XATTR_SECURITY_PREFIX,
617 sizeof(XATTR_SECURITY_PREFIX) - 1) &&
618 !capable(CAP_SYS_ADMIN))
619 return -EPERM;
620 return 0;
621}
622
623
624
625
626
627
628
629
630
631
632
633
634int cap_inode_removexattr(struct dentry *dentry, const char *name)
635{
636 if (!strcmp(name, XATTR_NAME_CAPS)) {
637 if (!capable(CAP_SETFCAP))
638 return -EPERM;
639 return 0;
640 }
641
642 if (!strncmp(name, XATTR_SECURITY_PREFIX,
643 sizeof(XATTR_SECURITY_PREFIX) - 1) &&
644 !capable(CAP_SYS_ADMIN))
645 return -EPERM;
646 return 0;
647}
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678static inline void cap_emulate_setxuid(struct cred *new, const struct cred *old)
679{
680 if ((old->uid == 0 || old->euid == 0 || old->suid == 0) &&
681 (new->uid != 0 && new->euid != 0 && new->suid != 0) &&
682 !issecure(SECURE_KEEP_CAPS)) {
683 cap_clear(new->cap_permitted);
684 cap_clear(new->cap_effective);
685 }
686 if (old->euid == 0 && new->euid != 0)
687 cap_clear(new->cap_effective);
688 if (old->euid != 0 && new->euid == 0)
689 new->cap_effective = new->cap_permitted;
690}
691
692
693
694
695
696
697
698
699
700
701int cap_task_fix_setuid(struct cred *new, const struct cred *old, int flags)
702{
703 switch (flags) {
704 case LSM_SETID_RE:
705 case LSM_SETID_ID:
706 case LSM_SETID_RES:
707
708
709 if (!issecure(SECURE_NO_SETUID_FIXUP))
710 cap_emulate_setxuid(new, old);
711 break;
712
713 case LSM_SETID_FS:
714
715
716
717
718
719
720 if (!issecure(SECURE_NO_SETUID_FIXUP)) {
721 if (old->fsuid == 0 && new->fsuid != 0)
722 new->cap_effective =
723 cap_drop_fs_set(new->cap_effective);
724
725 if (old->fsuid != 0 && new->fsuid == 0)
726 new->cap_effective =
727 cap_raise_fs_set(new->cap_effective,
728 new->cap_permitted);
729 }
730 break;
731
732 default:
733 return -EINVAL;
734 }
735
736 return 0;
737}
738
739
740
741
742
743
744
745
746
747
748
749static int cap_safe_nice(struct task_struct *p)
750{
751 int is_subset;
752
753 rcu_read_lock();
754 is_subset = cap_issubset(__task_cred(p)->cap_permitted,
755 current_cred()->cap_permitted);
756 rcu_read_unlock();
757
758 if (!is_subset && !capable(CAP_SYS_NICE))
759 return -EPERM;
760 return 0;
761}
762
763
764
765
766
767
768
769
770int cap_task_setscheduler(struct task_struct *p)
771{
772 return cap_safe_nice(p);
773}
774
775
776
777
778
779
780
781
782
783int cap_task_setioprio(struct task_struct *p, int ioprio)
784{
785 return cap_safe_nice(p);
786}
787
788
789
790
791
792
793
794
795
796int cap_task_setnice(struct task_struct *p, int nice)
797{
798 return cap_safe_nice(p);
799}
800
801
802
803
804
805static long cap_prctl_drop(struct cred *new, unsigned long cap)
806{
807 if (!capable(CAP_SETPCAP))
808 return -EPERM;
809 if (!cap_valid(cap))
810 return -EINVAL;
811
812 cap_lower(new->cap_bset, cap);
813 return 0;
814}
815
816
817
818
819
820
821
822
823
824
825
826
827
828int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3,
829 unsigned long arg4, unsigned long arg5)
830{
831 struct cred *new;
832 long error = 0;
833
834 new = prepare_creds();
835 if (!new)
836 return -ENOMEM;
837
838 switch (option) {
839 case PR_CAPBSET_READ:
840 error = -EINVAL;
841 if (!cap_valid(arg2))
842 goto error;
843 error = !!cap_raised(new->cap_bset, arg2);
844 goto no_change;
845
846 case PR_CAPBSET_DROP:
847 error = cap_prctl_drop(new, arg2);
848 if (error < 0)
849 goto error;
850 goto changed;
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871 case PR_SET_SECUREBITS:
872 error = -EPERM;
873 if ((((new->securebits & SECURE_ALL_LOCKS) >> 1)
874 & (new->securebits ^ arg2))
875 || ((new->securebits & SECURE_ALL_LOCKS & ~arg2))
876 || (arg2 & ~(SECURE_ALL_LOCKS | SECURE_ALL_BITS))
877 || (cap_capable(current, current_cred(),
878 current_cred()->user->user_ns, CAP_SETPCAP,
879 SECURITY_CAP_AUDIT) != 0)
880
881
882
883
884
885
886
887 )
888
889 goto error;
890 new->securebits = arg2;
891 goto changed;
892
893 case PR_GET_SECUREBITS:
894 error = new->securebits;
895 goto no_change;
896
897 case PR_GET_KEEPCAPS:
898 if (issecure(SECURE_KEEP_CAPS))
899 error = 1;
900 goto no_change;
901
902 case PR_SET_KEEPCAPS:
903 error = -EINVAL;
904 if (arg2 > 1)
905 goto error;
906 error = -EPERM;
907 if (issecure(SECURE_KEEP_CAPS_LOCKED))
908 goto error;
909 if (arg2)
910 new->securebits |= issecure_mask(SECURE_KEEP_CAPS);
911 else
912 new->securebits &= ~issecure_mask(SECURE_KEEP_CAPS);
913 goto changed;
914
915 default:
916
917 error = -ENOSYS;
918 goto error;
919 }
920
921
922changed:
923 return commit_creds(new);
924
925no_change:
926error:
927 abort_creds(new);
928 return error;
929}
930
931
932
933
934
935
936
937
938
939int cap_vm_enough_memory(struct mm_struct *mm, long pages)
940{
941 int cap_sys_admin = 0;
942
943 if (cap_capable(current, current_cred(), &init_user_ns, CAP_SYS_ADMIN,
944 SECURITY_CAP_NOAUDIT) == 0)
945 cap_sys_admin = 1;
946 return __vm_enough_memory(mm, pages, cap_sys_admin);
947}
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963int cap_file_mmap(struct file *file, unsigned long reqprot,
964 unsigned long prot, unsigned long flags,
965 unsigned long addr, unsigned long addr_only)
966{
967 int ret = 0;
968
969 if (addr < dac_mmap_min_addr) {
970 ret = cap_capable(current, current_cred(), &init_user_ns, CAP_SYS_RAWIO,
971 SECURITY_CAP_AUDIT);
972
973 if (ret == 0)
974 current->flags |= PF_SUPERPRIV;
975 }
976 return ret;
977}
978