linux/kernel/sys.c
<<
>>
Prefs
   1/*
   2 *  linux/kernel/sys.c
   3 *
   4 *  Copyright (C) 1991, 1992  Linus Torvalds
   5 */
   6
   7#include <linux/module.h>
   8#include <linux/mm.h>
   9#include <linux/utsname.h>
  10#include <linux/mman.h>
  11#include <linux/smp_lock.h>
  12#include <linux/notifier.h>
  13#include <linux/reboot.h>
  14#include <linux/prctl.h>
  15#include <linux/highuid.h>
  16#include <linux/fs.h>
  17#include <linux/resource.h>
  18#include <linux/kernel.h>
  19#include <linux/kexec.h>
  20#include <linux/workqueue.h>
  21#include <linux/capability.h>
  22#include <linux/device.h>
  23#include <linux/key.h>
  24#include <linux/times.h>
  25#include <linux/posix-timers.h>
  26#include <linux/security.h>
  27#include <linux/dcookies.h>
  28#include <linux/suspend.h>
  29#include <linux/tty.h>
  30#include <linux/signal.h>
  31#include <linux/cn_proc.h>
  32#include <linux/getcpu.h>
  33#include <linux/task_io_accounting_ops.h>
  34#include <linux/seccomp.h>
  35#include <linux/cpu.h>
  36#include <linux/ptrace.h>
  37
  38#include <linux/compat.h>
  39#include <linux/syscalls.h>
  40#include <linux/kprobes.h>
  41#include <linux/user_namespace.h>
  42
  43#include <asm/uaccess.h>
  44#include <asm/io.h>
  45#include <asm/unistd.h>
  46
  47#ifndef SET_UNALIGN_CTL
  48# define SET_UNALIGN_CTL(a,b)   (-EINVAL)
  49#endif
  50#ifndef GET_UNALIGN_CTL
  51# define GET_UNALIGN_CTL(a,b)   (-EINVAL)
  52#endif
  53#ifndef SET_FPEMU_CTL
  54# define SET_FPEMU_CTL(a,b)     (-EINVAL)
  55#endif
  56#ifndef GET_FPEMU_CTL
  57# define GET_FPEMU_CTL(a,b)     (-EINVAL)
  58#endif
  59#ifndef SET_FPEXC_CTL
  60# define SET_FPEXC_CTL(a,b)     (-EINVAL)
  61#endif
  62#ifndef GET_FPEXC_CTL
  63# define GET_FPEXC_CTL(a,b)     (-EINVAL)
  64#endif
  65#ifndef GET_ENDIAN
  66# define GET_ENDIAN(a,b)        (-EINVAL)
  67#endif
  68#ifndef SET_ENDIAN
  69# define SET_ENDIAN(a,b)        (-EINVAL)
  70#endif
  71#ifndef GET_TSC_CTL
  72# define GET_TSC_CTL(a)         (-EINVAL)
  73#endif
  74#ifndef SET_TSC_CTL
  75# define SET_TSC_CTL(a)         (-EINVAL)
  76#endif
  77
  78/*
  79 * this is where the system-wide overflow UID and GID are defined, for
  80 * architectures that now have 32-bit UID/GID but didn't in the past
  81 */
  82
  83int overflowuid = DEFAULT_OVERFLOWUID;
  84int overflowgid = DEFAULT_OVERFLOWGID;
  85
  86#ifdef CONFIG_UID16
  87EXPORT_SYMBOL(overflowuid);
  88EXPORT_SYMBOL(overflowgid);
  89#endif
  90
  91/*
  92 * the same as above, but for filesystems which can only store a 16-bit
  93 * UID and GID. as such, this is needed on all architectures
  94 */
  95
  96int fs_overflowuid = DEFAULT_FS_OVERFLOWUID;
  97int fs_overflowgid = DEFAULT_FS_OVERFLOWUID;
  98
  99EXPORT_SYMBOL(fs_overflowuid);
 100EXPORT_SYMBOL(fs_overflowgid);
 101
 102/*
 103 * this indicates whether you can reboot with ctrl-alt-del: the default is yes
 104 */
 105
 106int C_A_D = 1;
 107struct pid *cad_pid;
 108EXPORT_SYMBOL(cad_pid);
 109
 110/*
 111 * If set, this is used for preparing the system to power off.
 112 */
 113
 114void (*pm_power_off_prepare)(void);
 115
 116/*
 117 * set the priority of a task
 118 * - the caller must hold the RCU read lock
 119 */
 120static int set_one_prio(struct task_struct *p, int niceval, int error)
 121{
 122        const struct cred *cred = current_cred(), *pcred = __task_cred(p);
 123        int no_nice;
 124
 125        if (pcred->uid  != cred->euid &&
 126            pcred->euid != cred->euid && !capable(CAP_SYS_NICE)) {
 127                error = -EPERM;
 128                goto out;
 129        }
 130        if (niceval < task_nice(p) && !can_nice(p, niceval)) {
 131                error = -EACCES;
 132                goto out;
 133        }
 134        no_nice = security_task_setnice(p, niceval);
 135        if (no_nice) {
 136                error = no_nice;
 137                goto out;
 138        }
 139        if (error == -ESRCH)
 140                error = 0;
 141        set_user_nice(p, niceval);
 142out:
 143        return error;
 144}
 145
 146SYSCALL_DEFINE3(setpriority, int, which, int, who, int, niceval)
 147{
 148        struct task_struct *g, *p;
 149        struct user_struct *user;
 150        const struct cred *cred = current_cred();
 151        int error = -EINVAL;
 152        struct pid *pgrp;
 153
 154        if (which > PRIO_USER || which < PRIO_PROCESS)
 155                goto out;
 156
 157        /* normalize: avoid signed division (rounding problems) */
 158        error = -ESRCH;
 159        if (niceval < -20)
 160                niceval = -20;
 161        if (niceval > 19)
 162                niceval = 19;
 163
 164        read_lock(&tasklist_lock);
 165        switch (which) {
 166                case PRIO_PROCESS:
 167                        if (who)
 168                                p = find_task_by_vpid(who);
 169                        else
 170                                p = current;
 171                        if (p)
 172                                error = set_one_prio(p, niceval, error);
 173                        break;
 174                case PRIO_PGRP:
 175                        if (who)
 176                                pgrp = find_vpid(who);
 177                        else
 178                                pgrp = task_pgrp(current);
 179                        do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
 180                                error = set_one_prio(p, niceval, error);
 181                        } while_each_pid_thread(pgrp, PIDTYPE_PGID, p);
 182                        break;
 183                case PRIO_USER:
 184                        user = (struct user_struct *) cred->user;
 185                        if (!who)
 186                                who = cred->uid;
 187                        else if ((who != cred->uid) &&
 188                                 !(user = find_user(who)))
 189                                goto out_unlock;        /* No processes for this user */
 190
 191                        do_each_thread(g, p)
 192                                if (__task_cred(p)->uid == who)
 193                                        error = set_one_prio(p, niceval, error);
 194                        while_each_thread(g, p);
 195                        if (who != cred->uid)
 196                                free_uid(user);         /* For find_user() */
 197                        break;
 198        }
 199out_unlock:
 200        read_unlock(&tasklist_lock);
 201out:
 202        return error;
 203}
 204
 205/*
 206 * Ugh. To avoid negative return values, "getpriority()" will
 207 * not return the normal nice-value, but a negated value that
 208 * has been offset by 20 (ie it returns 40..1 instead of -20..19)
 209 * to stay compatible.
 210 */
 211SYSCALL_DEFINE2(getpriority, int, which, int, who)
 212{
 213        struct task_struct *g, *p;
 214        struct user_struct *user;
 215        const struct cred *cred = current_cred();
 216        long niceval, retval = -ESRCH;
 217        struct pid *pgrp;
 218
 219        if (which > PRIO_USER || which < PRIO_PROCESS)
 220                return -EINVAL;
 221
 222        read_lock(&tasklist_lock);
 223        switch (which) {
 224                case PRIO_PROCESS:
 225                        if (who)
 226                                p = find_task_by_vpid(who);
 227                        else
 228                                p = current;
 229                        if (p) {
 230                                niceval = 20 - task_nice(p);
 231                                if (niceval > retval)
 232                                        retval = niceval;
 233                        }
 234                        break;
 235                case PRIO_PGRP:
 236                        if (who)
 237                                pgrp = find_vpid(who);
 238                        else
 239                                pgrp = task_pgrp(current);
 240                        do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
 241                                niceval = 20 - task_nice(p);
 242                                if (niceval > retval)
 243                                        retval = niceval;
 244                        } while_each_pid_thread(pgrp, PIDTYPE_PGID, p);
 245                        break;
 246                case PRIO_USER:
 247                        user = (struct user_struct *) cred->user;
 248                        if (!who)
 249                                who = cred->uid;
 250                        else if ((who != cred->uid) &&
 251                                 !(user = find_user(who)))
 252                                goto out_unlock;        /* No processes for this user */
 253
 254                        do_each_thread(g, p)
 255                                if (__task_cred(p)->uid == who) {
 256                                        niceval = 20 - task_nice(p);
 257                                        if (niceval > retval)
 258                                                retval = niceval;
 259                                }
 260                        while_each_thread(g, p);
 261                        if (who != cred->uid)
 262                                free_uid(user);         /* for find_user() */
 263                        break;
 264        }
 265out_unlock:
 266        read_unlock(&tasklist_lock);
 267
 268        return retval;
 269}
 270
 271/**
 272 *      emergency_restart - reboot the system
 273 *
 274 *      Without shutting down any hardware or taking any locks
 275 *      reboot the system.  This is called when we know we are in
 276 *      trouble so this is our best effort to reboot.  This is
 277 *      safe to call in interrupt context.
 278 */
 279void emergency_restart(void)
 280{
 281        machine_emergency_restart();
 282}
 283EXPORT_SYMBOL_GPL(emergency_restart);
 284
 285void kernel_restart_prepare(char *cmd)
 286{
 287        blocking_notifier_call_chain(&reboot_notifier_list, SYS_RESTART, cmd);
 288        system_state = SYSTEM_RESTART;
 289        device_shutdown();
 290        sysdev_shutdown();
 291}
 292
 293/**
 294 *      kernel_restart - reboot the system
 295 *      @cmd: pointer to buffer containing command to execute for restart
 296 *              or %NULL
 297 *
 298 *      Shutdown everything and perform a clean reboot.
 299 *      This is not safe to call in interrupt context.
 300 */
 301void kernel_restart(char *cmd)
 302{
 303        kernel_restart_prepare(cmd);
 304        if (!cmd)
 305                printk(KERN_EMERG "Restarting system.\n");
 306        else
 307                printk(KERN_EMERG "Restarting system with command '%s'.\n", cmd);
 308        machine_restart(cmd);
 309}
 310EXPORT_SYMBOL_GPL(kernel_restart);
 311
 312static void kernel_shutdown_prepare(enum system_states state)
 313{
 314        blocking_notifier_call_chain(&reboot_notifier_list,
 315                (state == SYSTEM_HALT)?SYS_HALT:SYS_POWER_OFF, NULL);
 316        system_state = state;
 317        device_shutdown();
 318}
 319/**
 320 *      kernel_halt - halt the system
 321 *
 322 *      Shutdown everything and perform a clean system halt.
 323 */
 324void kernel_halt(void)
 325{
 326        kernel_shutdown_prepare(SYSTEM_HALT);
 327        sysdev_shutdown();
 328        printk(KERN_EMERG "System halted.\n");
 329        machine_halt();
 330}
 331
 332EXPORT_SYMBOL_GPL(kernel_halt);
 333
 334/**
 335 *      kernel_power_off - power_off the system
 336 *
 337 *      Shutdown everything and perform a clean system power_off.
 338 */
 339void kernel_power_off(void)
 340{
 341        kernel_shutdown_prepare(SYSTEM_POWER_OFF);
 342        if (pm_power_off_prepare)
 343                pm_power_off_prepare();
 344        disable_nonboot_cpus();
 345        sysdev_shutdown();
 346        printk(KERN_EMERG "Power down.\n");
 347        machine_power_off();
 348}
 349EXPORT_SYMBOL_GPL(kernel_power_off);
 350/*
 351 * Reboot system call: for obvious reasons only root may call it,
 352 * and even root needs to set up some magic numbers in the registers
 353 * so that some mistake won't make this reboot the whole machine.
 354 * You can also set the meaning of the ctrl-alt-del-key here.
 355 *
 356 * reboot doesn't sync: do that yourself before calling this.
 357 */
 358SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd,
 359                void __user *, arg)
 360{
 361        char buffer[256];
 362
 363        /* We only trust the superuser with rebooting the system. */
 364        if (!capable(CAP_SYS_BOOT))
 365                return -EPERM;
 366
 367        /* For safety, we require "magic" arguments. */
 368        if (magic1 != LINUX_REBOOT_MAGIC1 ||
 369            (magic2 != LINUX_REBOOT_MAGIC2 &&
 370                        magic2 != LINUX_REBOOT_MAGIC2A &&
 371                        magic2 != LINUX_REBOOT_MAGIC2B &&
 372                        magic2 != LINUX_REBOOT_MAGIC2C))
 373                return -EINVAL;
 374
 375        /* Instead of trying to make the power_off code look like
 376         * halt when pm_power_off is not set do it the easy way.
 377         */
 378        if ((cmd == LINUX_REBOOT_CMD_POWER_OFF) && !pm_power_off)
 379                cmd = LINUX_REBOOT_CMD_HALT;
 380
 381        lock_kernel();
 382        switch (cmd) {
 383        case LINUX_REBOOT_CMD_RESTART:
 384                kernel_restart(NULL);
 385                break;
 386
 387        case LINUX_REBOOT_CMD_CAD_ON:
 388                C_A_D = 1;
 389                break;
 390
 391        case LINUX_REBOOT_CMD_CAD_OFF:
 392                C_A_D = 0;
 393                break;
 394
 395        case LINUX_REBOOT_CMD_HALT:
 396                kernel_halt();
 397                unlock_kernel();
 398                do_exit(0);
 399                break;
 400
 401        case LINUX_REBOOT_CMD_POWER_OFF:
 402                kernel_power_off();
 403                unlock_kernel();
 404                do_exit(0);
 405                break;
 406
 407        case LINUX_REBOOT_CMD_RESTART2:
 408                if (strncpy_from_user(&buffer[0], arg, sizeof(buffer) - 1) < 0) {
 409                        unlock_kernel();
 410                        return -EFAULT;
 411                }
 412                buffer[sizeof(buffer) - 1] = '\0';
 413
 414                kernel_restart(buffer);
 415                break;
 416
 417#ifdef CONFIG_KEXEC
 418        case LINUX_REBOOT_CMD_KEXEC:
 419                {
 420                        int ret;
 421                        ret = kernel_kexec();
 422                        unlock_kernel();
 423                        return ret;
 424                }
 425#endif
 426
 427#ifdef CONFIG_HIBERNATION
 428        case LINUX_REBOOT_CMD_SW_SUSPEND:
 429                {
 430                        int ret = hibernate();
 431                        unlock_kernel();
 432                        return ret;
 433                }
 434#endif
 435
 436        default:
 437                unlock_kernel();
 438                return -EINVAL;
 439        }
 440        unlock_kernel();
 441        return 0;
 442}
 443
 444static void deferred_cad(struct work_struct *dummy)
 445{
 446        kernel_restart(NULL);
 447}
 448
 449/*
 450 * This function gets called by ctrl-alt-del - ie the keyboard interrupt.
 451 * As it's called within an interrupt, it may NOT sync: the only choice
 452 * is whether to reboot at once, or just ignore the ctrl-alt-del.
 453 */
 454void ctrl_alt_del(void)
 455{
 456        static DECLARE_WORK(cad_work, deferred_cad);
 457
 458        if (C_A_D)
 459                schedule_work(&cad_work);
 460        else
 461                kill_cad_pid(SIGINT, 1);
 462}
 463        
 464/*
 465 * Unprivileged users may change the real gid to the effective gid
 466 * or vice versa.  (BSD-style)
 467 *
 468 * If you set the real gid at all, or set the effective gid to a value not
 469 * equal to the real gid, then the saved gid is set to the new effective gid.
 470 *
 471 * This makes it possible for a setgid program to completely drop its
 472 * privileges, which is often a useful assertion to make when you are doing
 473 * a security audit over a program.
 474 *
 475 * The general idea is that a program which uses just setregid() will be
 476 * 100% compatible with BSD.  A program which uses just setgid() will be
 477 * 100% compatible with POSIX with saved IDs. 
 478 *
 479 * SMP: There are not races, the GIDs are checked only by filesystem
 480 *      operations (as far as semantic preservation is concerned).
 481 */
 482SYSCALL_DEFINE2(setregid, gid_t, rgid, gid_t, egid)
 483{
 484        const struct cred *old;
 485        struct cred *new;
 486        int retval;
 487
 488        new = prepare_creds();
 489        if (!new)
 490                return -ENOMEM;
 491        old = current_cred();
 492
 493        retval = security_task_setgid(rgid, egid, (gid_t)-1, LSM_SETID_RE);
 494        if (retval)
 495                goto error;
 496
 497        retval = -EPERM;
 498        if (rgid != (gid_t) -1) {
 499                if (old->gid == rgid ||
 500                    old->egid == rgid ||
 501                    capable(CAP_SETGID))
 502                        new->gid = rgid;
 503                else
 504                        goto error;
 505        }
 506        if (egid != (gid_t) -1) {
 507                if (old->gid == egid ||
 508                    old->egid == egid ||
 509                    old->sgid == egid ||
 510                    capable(CAP_SETGID))
 511                        new->egid = egid;
 512                else
 513                        goto error;
 514        }
 515
 516        if (rgid != (gid_t) -1 ||
 517            (egid != (gid_t) -1 && egid != old->gid))
 518                new->sgid = new->egid;
 519        new->fsgid = new->egid;
 520
 521        return commit_creds(new);
 522
 523error:
 524        abort_creds(new);
 525        return retval;
 526}
 527
 528/*
 529 * setgid() is implemented like SysV w/ SAVED_IDS 
 530 *
 531 * SMP: Same implicit races as above.
 532 */
 533SYSCALL_DEFINE1(setgid, gid_t, gid)
 534{
 535        const struct cred *old;
 536        struct cred *new;
 537        int retval;
 538
 539        new = prepare_creds();
 540        if (!new)
 541                return -ENOMEM;
 542        old = current_cred();
 543
 544        retval = security_task_setgid(gid, (gid_t)-1, (gid_t)-1, LSM_SETID_ID);
 545        if (retval)
 546                goto error;
 547
 548        retval = -EPERM;
 549        if (capable(CAP_SETGID))
 550                new->gid = new->egid = new->sgid = new->fsgid = gid;
 551        else if (gid == old->gid || gid == old->sgid)
 552                new->egid = new->fsgid = gid;
 553        else
 554                goto error;
 555
 556        return commit_creds(new);
 557
 558error:
 559        abort_creds(new);
 560        return retval;
 561}
 562
 563/*
 564 * change the user struct in a credentials set to match the new UID
 565 */
 566static int set_user(struct cred *new)
 567{
 568        struct user_struct *new_user;
 569
 570        new_user = alloc_uid(current_user_ns(), new->uid);
 571        if (!new_user)
 572                return -EAGAIN;
 573
 574        if (!task_can_switch_user(new_user, current)) {
 575                free_uid(new_user);
 576                return -EINVAL;
 577        }
 578
 579        if (atomic_read(&new_user->processes) >=
 580                                current->signal->rlim[RLIMIT_NPROC].rlim_cur &&
 581                        new_user != INIT_USER) {
 582                free_uid(new_user);
 583                return -EAGAIN;
 584        }
 585
 586        free_uid(new->user);
 587        new->user = new_user;
 588        return 0;
 589}
 590
 591/*
 592 * Unprivileged users may change the real uid to the effective uid
 593 * or vice versa.  (BSD-style)
 594 *
 595 * If you set the real uid at all, or set the effective uid to a value not
 596 * equal to the real uid, then the saved uid is set to the new effective uid.
 597 *
 598 * This makes it possible for a setuid program to completely drop its
 599 * privileges, which is often a useful assertion to make when you are doing
 600 * a security audit over a program.
 601 *
 602 * The general idea is that a program which uses just setreuid() will be
 603 * 100% compatible with BSD.  A program which uses just setuid() will be
 604 * 100% compatible with POSIX with saved IDs. 
 605 */
 606SYSCALL_DEFINE2(setreuid, uid_t, ruid, uid_t, euid)
 607{
 608        const struct cred *old;
 609        struct cred *new;
 610        int retval;
 611
 612        new = prepare_creds();
 613        if (!new)
 614                return -ENOMEM;
 615        old = current_cred();
 616
 617        retval = security_task_setuid(ruid, euid, (uid_t)-1, LSM_SETID_RE);
 618        if (retval)
 619                goto error;
 620
 621        retval = -EPERM;
 622        if (ruid != (uid_t) -1) {
 623                new->uid = ruid;
 624                if (old->uid != ruid &&
 625                    old->euid != ruid &&
 626                    !capable(CAP_SETUID))
 627                        goto error;
 628        }
 629
 630        if (euid != (uid_t) -1) {
 631                new->euid = euid;
 632                if (old->uid != euid &&
 633                    old->euid != euid &&
 634                    old->suid != euid &&
 635                    !capable(CAP_SETUID))
 636                        goto error;
 637        }
 638
 639        if (new->uid != old->uid) {
 640                retval = set_user(new);
 641                if (retval < 0)
 642                        goto error;
 643        }
 644        if (ruid != (uid_t) -1 ||
 645            (euid != (uid_t) -1 && euid != old->uid))
 646                new->suid = new->euid;
 647        new->fsuid = new->euid;
 648
 649        retval = security_task_fix_setuid(new, old, LSM_SETID_RE);
 650        if (retval < 0)
 651                goto error;
 652
 653        return commit_creds(new);
 654
 655error:
 656        abort_creds(new);
 657        return retval;
 658}
 659                
 660/*
 661 * setuid() is implemented like SysV with SAVED_IDS 
 662 * 
 663 * Note that SAVED_ID's is deficient in that a setuid root program
 664 * like sendmail, for example, cannot set its uid to be a normal 
 665 * user and then switch back, because if you're root, setuid() sets
 666 * the saved uid too.  If you don't like this, blame the bright people
 667 * in the POSIX committee and/or USG.  Note that the BSD-style setreuid()
 668 * will allow a root program to temporarily drop privileges and be able to
 669 * regain them by swapping the real and effective uid.  
 670 */
 671SYSCALL_DEFINE1(setuid, uid_t, uid)
 672{
 673        const struct cred *old;
 674        struct cred *new;
 675        int retval;
 676
 677        new = prepare_creds();
 678        if (!new)
 679                return -ENOMEM;
 680        old = current_cred();
 681
 682        retval = security_task_setuid(uid, (uid_t)-1, (uid_t)-1, LSM_SETID_ID);
 683        if (retval)
 684                goto error;
 685
 686        retval = -EPERM;
 687        if (capable(CAP_SETUID)) {
 688                new->suid = new->uid = uid;
 689                if (uid != old->uid) {
 690                        retval = set_user(new);
 691                        if (retval < 0)
 692                                goto error;
 693                }
 694        } else if (uid != old->uid && uid != new->suid) {
 695                goto error;
 696        }
 697
 698        new->fsuid = new->euid = uid;
 699
 700        retval = security_task_fix_setuid(new, old, LSM_SETID_ID);
 701        if (retval < 0)
 702                goto error;
 703
 704        return commit_creds(new);
 705
 706error:
 707        abort_creds(new);
 708        return retval;
 709}
 710
 711
 712/*
 713 * This function implements a generic ability to update ruid, euid,
 714 * and suid.  This allows you to implement the 4.4 compatible seteuid().
 715 */
 716SYSCALL_DEFINE3(setresuid, uid_t, ruid, uid_t, euid, uid_t, suid)
 717{
 718        const struct cred *old;
 719        struct cred *new;
 720        int retval;
 721
 722        new = prepare_creds();
 723        if (!new)
 724                return -ENOMEM;
 725
 726        retval = security_task_setuid(ruid, euid, suid, LSM_SETID_RES);
 727        if (retval)
 728                goto error;
 729        old = current_cred();
 730
 731        retval = -EPERM;
 732        if (!capable(CAP_SETUID)) {
 733                if (ruid != (uid_t) -1 && ruid != old->uid &&
 734                    ruid != old->euid  && ruid != old->suid)
 735                        goto error;
 736                if (euid != (uid_t) -1 && euid != old->uid &&
 737                    euid != old->euid  && euid != old->suid)
 738                        goto error;
 739                if (suid != (uid_t) -1 && suid != old->uid &&
 740                    suid != old->euid  && suid != old->suid)
 741                        goto error;
 742        }
 743
 744        if (ruid != (uid_t) -1) {
 745                new->uid = ruid;
 746                if (ruid != old->uid) {
 747                        retval = set_user(new);
 748                        if (retval < 0)
 749                                goto error;
 750                }
 751        }
 752        if (euid != (uid_t) -1)
 753                new->euid = euid;
 754        if (suid != (uid_t) -1)
 755                new->suid = suid;
 756        new->fsuid = new->euid;
 757
 758        retval = security_task_fix_setuid(new, old, LSM_SETID_RES);
 759        if (retval < 0)
 760                goto error;
 761
 762        return commit_creds(new);
 763
 764error:
 765        abort_creds(new);
 766        return retval;
 767}
 768
 769SYSCALL_DEFINE3(getresuid, uid_t __user *, ruid, uid_t __user *, euid, uid_t __user *, suid)
 770{
 771        const struct cred *cred = current_cred();
 772        int retval;
 773
 774        if (!(retval   = put_user(cred->uid,  ruid)) &&
 775            !(retval   = put_user(cred->euid, euid)))
 776                retval = put_user(cred->suid, suid);
 777
 778        return retval;
 779}
 780
 781/*
 782 * Same as above, but for rgid, egid, sgid.
 783 */
 784SYSCALL_DEFINE3(setresgid, gid_t, rgid, gid_t, egid, gid_t, sgid)
 785{
 786        const struct cred *old;
 787        struct cred *new;
 788        int retval;
 789
 790        new = prepare_creds();
 791        if (!new)
 792                return -ENOMEM;
 793        old = current_cred();
 794
 795        retval = security_task_setgid(rgid, egid, sgid, LSM_SETID_RES);
 796        if (retval)
 797                goto error;
 798
 799        retval = -EPERM;
 800        if (!capable(CAP_SETGID)) {
 801                if (rgid != (gid_t) -1 && rgid != old->gid &&
 802                    rgid != old->egid  && rgid != old->sgid)
 803                        goto error;
 804                if (egid != (gid_t) -1 && egid != old->gid &&
 805                    egid != old->egid  && egid != old->sgid)
 806                        goto error;
 807                if (sgid != (gid_t) -1 && sgid != old->gid &&
 808                    sgid != old->egid  && sgid != old->sgid)
 809                        goto error;
 810        }
 811
 812        if (rgid != (gid_t) -1)
 813                new->gid = rgid;
 814        if (egid != (gid_t) -1)
 815                new->egid = egid;
 816        if (sgid != (gid_t) -1)
 817                new->sgid = sgid;
 818        new->fsgid = new->egid;
 819
 820        return commit_creds(new);
 821
 822error:
 823        abort_creds(new);
 824        return retval;
 825}
 826
 827SYSCALL_DEFINE3(getresgid, gid_t __user *, rgid, gid_t __user *, egid, gid_t __user *, sgid)
 828{
 829        const struct cred *cred = current_cred();
 830        int retval;
 831
 832        if (!(retval   = put_user(cred->gid,  rgid)) &&
 833            !(retval   = put_user(cred->egid, egid)))
 834                retval = put_user(cred->sgid, sgid);
 835
 836        return retval;
 837}
 838
 839
 840/*
 841 * "setfsuid()" sets the fsuid - the uid used for filesystem checks. This
 842 * is used for "access()" and for the NFS daemon (letting nfsd stay at
 843 * whatever uid it wants to). It normally shadows "euid", except when
 844 * explicitly set by setfsuid() or for access..
 845 */
 846SYSCALL_DEFINE1(setfsuid, uid_t, uid)
 847{
 848        const struct cred *old;
 849        struct cred *new;
 850        uid_t old_fsuid;
 851
 852        new = prepare_creds();
 853        if (!new)
 854                return current_fsuid();
 855        old = current_cred();
 856        old_fsuid = old->fsuid;
 857
 858        if (security_task_setuid(uid, (uid_t)-1, (uid_t)-1, LSM_SETID_FS) < 0)
 859                goto error;
 860
 861        if (uid == old->uid  || uid == old->euid  ||
 862            uid == old->suid || uid == old->fsuid ||
 863            capable(CAP_SETUID)) {
 864                if (uid != old_fsuid) {
 865                        new->fsuid = uid;
 866                        if (security_task_fix_setuid(new, old, LSM_SETID_FS) == 0)
 867                                goto change_okay;
 868                }
 869        }
 870
 871error:
 872        abort_creds(new);
 873        return old_fsuid;
 874
 875change_okay:
 876        commit_creds(new);
 877        return old_fsuid;
 878}
 879
 880/*
 881 * Samma på svenska..
 882 */
 883SYSCALL_DEFINE1(setfsgid, gid_t, gid)
 884{
 885        const struct cred *old;
 886        struct cred *new;
 887        gid_t old_fsgid;
 888
 889        new = prepare_creds();
 890        if (!new)
 891                return current_fsgid();
 892        old = current_cred();
 893        old_fsgid = old->fsgid;
 894
 895        if (security_task_setgid(gid, (gid_t)-1, (gid_t)-1, LSM_SETID_FS))
 896                goto error;
 897
 898        if (gid == old->gid  || gid == old->egid  ||
 899            gid == old->sgid || gid == old->fsgid ||
 900            capable(CAP_SETGID)) {
 901                if (gid != old_fsgid) {
 902                        new->fsgid = gid;
 903                        goto change_okay;
 904                }
 905        }
 906
 907error:
 908        abort_creds(new);
 909        return old_fsgid;
 910
 911change_okay:
 912        commit_creds(new);
 913        return old_fsgid;
 914}
 915
 916void do_sys_times(struct tms *tms)
 917{
 918        struct task_cputime cputime;
 919        cputime_t cutime, cstime;
 920
 921        thread_group_cputime(current, &cputime);
 922        spin_lock_irq(&current->sighand->siglock);
 923        cutime = current->signal->cutime;
 924        cstime = current->signal->cstime;
 925        spin_unlock_irq(&current->sighand->siglock);
 926        tms->tms_utime = cputime_to_clock_t(cputime.utime);
 927        tms->tms_stime = cputime_to_clock_t(cputime.stime);
 928        tms->tms_cutime = cputime_to_clock_t(cutime);
 929        tms->tms_cstime = cputime_to_clock_t(cstime);
 930}
 931
 932SYSCALL_DEFINE1(times, struct tms __user *, tbuf)
 933{
 934        if (tbuf) {
 935                struct tms tmp;
 936
 937                do_sys_times(&tmp);
 938                if (copy_to_user(tbuf, &tmp, sizeof(struct tms)))
 939                        return -EFAULT;
 940        }
 941        force_successful_syscall_return();
 942        return (long) jiffies_64_to_clock_t(get_jiffies_64());
 943}
 944
 945/*
 946 * This needs some heavy checking ...
 947 * I just haven't the stomach for it. I also don't fully
 948 * understand sessions/pgrp etc. Let somebody who does explain it.
 949 *
 950 * OK, I think I have the protection semantics right.... this is really
 951 * only important on a multi-user system anyway, to make sure one user
 952 * can't send a signal to a process owned by another.  -TYT, 12/12/91
 953 *
 954 * Auch. Had to add the 'did_exec' flag to conform completely to POSIX.
 955 * LBT 04.03.94
 956 */
 957SYSCALL_DEFINE2(setpgid, pid_t, pid, pid_t, pgid)
 958{
 959        struct task_struct *p;
 960        struct task_struct *group_leader = current->group_leader;
 961        struct pid *pgrp;
 962        int err;
 963
 964        if (!pid)
 965                pid = task_pid_vnr(group_leader);
 966        if (!pgid)
 967                pgid = pid;
 968        if (pgid < 0)
 969                return -EINVAL;
 970
 971        /* From this point forward we keep holding onto the tasklist lock
 972         * so that our parent does not change from under us. -DaveM
 973         */
 974        write_lock_irq(&tasklist_lock);
 975
 976        err = -ESRCH;
 977        p = find_task_by_vpid(pid);
 978        if (!p)
 979                goto out;
 980
 981        err = -EINVAL;
 982        if (!thread_group_leader(p))
 983                goto out;
 984
 985        if (same_thread_group(p->real_parent, group_leader)) {
 986                err = -EPERM;
 987                if (task_session(p) != task_session(group_leader))
 988                        goto out;
 989                err = -EACCES;
 990                if (p->did_exec)
 991                        goto out;
 992        } else {
 993                err = -ESRCH;
 994                if (p != group_leader)
 995                        goto out;
 996        }
 997
 998        err = -EPERM;
 999        if (p->signal->leader)
1000                goto out;
1001
1002        pgrp = task_pid(p);
1003        if (pgid != pid) {
1004                struct task_struct *g;
1005
1006                pgrp = find_vpid(pgid);
1007                g = pid_task(pgrp, PIDTYPE_PGID);
1008                if (!g || task_session(g) != task_session(group_leader))
1009                        goto out;
1010        }
1011
1012        err = security_task_setpgid(p, pgid);
1013        if (err)
1014                goto out;
1015
1016        if (task_pgrp(p) != pgrp) {
1017                change_pid(p, PIDTYPE_PGID, pgrp);
1018                set_task_pgrp(p, pid_nr(pgrp));
1019        }
1020
1021        err = 0;
1022out:
1023        /* All paths lead to here, thus we are safe. -DaveM */
1024        write_unlock_irq(&tasklist_lock);
1025        return err;
1026}
1027
1028SYSCALL_DEFINE1(getpgid, pid_t, pid)
1029{
1030        struct task_struct *p;
1031        struct pid *grp;
1032        int retval;
1033
1034        rcu_read_lock();
1035        if (!pid)
1036                grp = task_pgrp(current);
1037        else {
1038                retval = -ESRCH;
1039                p = find_task_by_vpid(pid);
1040                if (!p)
1041                        goto out;
1042                grp = task_pgrp(p);
1043                if (!grp)
1044                        goto out;
1045
1046                retval = security_task_getpgid(p);
1047                if (retval)
1048                        goto out;
1049        }
1050        retval = pid_vnr(grp);
1051out:
1052        rcu_read_unlock();
1053        return retval;
1054}
1055
1056#ifdef __ARCH_WANT_SYS_GETPGRP
1057
1058SYSCALL_DEFINE0(getpgrp)
1059{
1060        return sys_getpgid(0);
1061}
1062
1063#endif
1064
1065SYSCALL_DEFINE1(getsid, pid_t, pid)
1066{
1067        struct task_struct *p;
1068        struct pid *sid;
1069        int retval;
1070
1071        rcu_read_lock();
1072        if (!pid)
1073                sid = task_session(current);
1074        else {
1075                retval = -ESRCH;
1076                p = find_task_by_vpid(pid);
1077                if (!p)
1078                        goto out;
1079                sid = task_session(p);
1080                if (!sid)
1081                        goto out;
1082
1083                retval = security_task_getsid(p);
1084                if (retval)
1085                        goto out;
1086        }
1087        retval = pid_vnr(sid);
1088out:
1089        rcu_read_unlock();
1090        return retval;
1091}
1092
1093SYSCALL_DEFINE0(setsid)
1094{
1095        struct task_struct *group_leader = current->group_leader;
1096        struct pid *sid = task_pid(group_leader);
1097        pid_t session = pid_vnr(sid);
1098        int err = -EPERM;
1099
1100        write_lock_irq(&tasklist_lock);
1101        /* Fail if I am already a session leader */
1102        if (group_leader->signal->leader)
1103                goto out;
1104
1105        /* Fail if a process group id already exists that equals the
1106         * proposed session id.
1107         */
1108        if (pid_task(sid, PIDTYPE_PGID))
1109                goto out;
1110
1111        group_leader->signal->leader = 1;
1112        __set_special_pids(sid);
1113
1114        proc_clear_tty(group_leader);
1115
1116        err = session;
1117out:
1118        write_unlock_irq(&tasklist_lock);
1119        return err;
1120}
1121
1122/*
1123 * Supplementary group IDs
1124 */
1125
1126/* init to 2 - one for init_task, one to ensure it is never freed */
1127struct group_info init_groups = { .usage = ATOMIC_INIT(2) };
1128
1129struct group_info *groups_alloc(int gidsetsize)
1130{
1131        struct group_info *group_info;
1132        int nblocks;
1133        int i;
1134
1135        nblocks = (gidsetsize + NGROUPS_PER_BLOCK - 1) / NGROUPS_PER_BLOCK;
1136        /* Make sure we always allocate at least one indirect block pointer */
1137        nblocks = nblocks ? : 1;
1138        group_info = kmalloc(sizeof(*group_info) + nblocks*sizeof(gid_t *), GFP_USER);
1139        if (!group_info)
1140                return NULL;
1141        group_info->ngroups = gidsetsize;
1142        group_info->nblocks = nblocks;
1143        atomic_set(&group_info->usage, 1);
1144
1145        if (gidsetsize <= NGROUPS_SMALL)
1146                group_info->blocks[0] = group_info->small_block;
1147        else {
1148                for (i = 0; i < nblocks; i++) {
1149                        gid_t *b;
1150                        b = (void *)__get_free_page(GFP_USER);
1151                        if (!b)
1152                                goto out_undo_partial_alloc;
1153                        group_info->blocks[i] = b;
1154                }
1155        }
1156        return group_info;
1157
1158out_undo_partial_alloc:
1159        while (--i >= 0) {
1160                free_page((unsigned long)group_info->blocks[i]);
1161        }
1162        kfree(group_info);
1163        return NULL;
1164}
1165
1166EXPORT_SYMBOL(groups_alloc);
1167
1168void groups_free(struct group_info *group_info)
1169{
1170        if (group_info->blocks[0] != group_info->small_block) {
1171                int i;
1172                for (i = 0; i < group_info->nblocks; i++)
1173                        free_page((unsigned long)group_info->blocks[i]);
1174        }
1175        kfree(group_info);
1176}
1177
1178EXPORT_SYMBOL(groups_free);
1179
1180/* export the group_info to a user-space array */
1181static int groups_to_user(gid_t __user *grouplist,
1182                          const struct group_info *group_info)
1183{
1184        int i;
1185        unsigned int count = group_info->ngroups;
1186
1187        for (i = 0; i < group_info->nblocks; i++) {
1188                unsigned int cp_count = min(NGROUPS_PER_BLOCK, count);
1189                unsigned int len = cp_count * sizeof(*grouplist);
1190
1191                if (copy_to_user(grouplist, group_info->blocks[i], len))
1192                        return -EFAULT;
1193
1194                grouplist += NGROUPS_PER_BLOCK;
1195                count -= cp_count;
1196        }
1197        return 0;
1198}
1199
1200/* fill a group_info from a user-space array - it must be allocated already */
1201static int groups_from_user(struct group_info *group_info,
1202    gid_t __user *grouplist)
1203{
1204        int i;
1205        unsigned int count = group_info->ngroups;
1206
1207        for (i = 0; i < group_info->nblocks; i++) {
1208                unsigned int cp_count = min(NGROUPS_PER_BLOCK, count);
1209                unsigned int len = cp_count * sizeof(*grouplist);
1210
1211                if (copy_from_user(group_info->blocks[i], grouplist, len))
1212                        return -EFAULT;
1213
1214                grouplist += NGROUPS_PER_BLOCK;
1215                count -= cp_count;
1216        }
1217        return 0;
1218}
1219
1220/* a simple Shell sort */
1221static void groups_sort(struct group_info *group_info)
1222{
1223        int base, max, stride;
1224        int gidsetsize = group_info->ngroups;
1225
1226        for (stride = 1; stride < gidsetsize; stride = 3 * stride + 1)
1227                ; /* nothing */
1228        stride /= 3;
1229
1230        while (stride) {
1231                max = gidsetsize - stride;
1232                for (base = 0; base < max; base++) {
1233                        int left = base;
1234                        int right = left + stride;
1235                        gid_t tmp = GROUP_AT(group_info, right);
1236
1237                        while (left >= 0 && GROUP_AT(group_info, left) > tmp) {
1238                                GROUP_AT(group_info, right) =
1239                                    GROUP_AT(group_info, left);
1240                                right = left;
1241                                left -= stride;
1242                        }
1243                        GROUP_AT(group_info, right) = tmp;
1244                }
1245                stride /= 3;
1246        }
1247}
1248
1249/* a simple bsearch */
1250int groups_search(const struct group_info *group_info, gid_t grp)
1251{
1252        unsigned int left, right;
1253
1254        if (!group_info)
1255                return 0;
1256
1257        left = 0;
1258        right = group_info->ngroups;
1259        while (left < right) {
1260                unsigned int mid = (left+right)/2;
1261                int cmp = grp - GROUP_AT(group_info, mid);
1262                if (cmp > 0)
1263                        left = mid + 1;
1264                else if (cmp < 0)
1265                        right = mid;
1266                else
1267                        return 1;
1268        }
1269        return 0;
1270}
1271
1272/**
1273 * set_groups - Change a group subscription in a set of credentials
1274 * @new: The newly prepared set of credentials to alter
1275 * @group_info: The group list to install
1276 *
1277 * Validate a group subscription and, if valid, insert it into a set
1278 * of credentials.
1279 */
1280int set_groups(struct cred *new, struct group_info *group_info)
1281{
1282        int retval;
1283
1284        retval = security_task_setgroups(group_info);
1285        if (retval)
1286                return retval;
1287
1288        put_group_info(new->group_info);
1289        groups_sort(group_info);
1290        get_group_info(group_info);
1291        new->group_info = group_info;
1292        return 0;
1293}
1294
1295EXPORT_SYMBOL(set_groups);
1296
1297/**
1298 * set_current_groups - Change current's group subscription
1299 * @group_info: The group list to impose
1300 *
1301 * Validate a group subscription and, if valid, impose it upon current's task
1302 * security record.
1303 */
1304int set_current_groups(struct group_info *group_info)
1305{
1306        struct cred *new;
1307        int ret;
1308
1309        new = prepare_creds();
1310        if (!new)
1311                return -ENOMEM;
1312
1313        ret = set_groups(new, group_info);
1314        if (ret < 0) {
1315                abort_creds(new);
1316                return ret;
1317        }
1318
1319        return commit_creds(new);
1320}
1321
1322EXPORT_SYMBOL(set_current_groups);
1323
1324SYSCALL_DEFINE2(getgroups, int, gidsetsize, gid_t __user *, grouplist)
1325{
1326        const struct cred *cred = current_cred();
1327        int i;
1328
1329        if (gidsetsize < 0)
1330                return -EINVAL;
1331
1332        /* no need to grab task_lock here; it cannot change */
1333        i = cred->group_info->ngroups;
1334        if (gidsetsize) {
1335                if (i > gidsetsize) {
1336                        i = -EINVAL;
1337                        goto out;
1338                }
1339                if (groups_to_user(grouplist, cred->group_info)) {
1340                        i = -EFAULT;
1341                        goto out;
1342                }
1343        }
1344out:
1345        return i;
1346}
1347
1348/*
1349 *      SMP: Our groups are copy-on-write. We can set them safely
1350 *      without another task interfering.
1351 */
1352 
1353SYSCALL_DEFINE2(setgroups, int, gidsetsize, gid_t __user *, grouplist)
1354{
1355        struct group_info *group_info;
1356        int retval;
1357
1358        if (!capable(CAP_SETGID))
1359                return -EPERM;
1360        if ((unsigned)gidsetsize > NGROUPS_MAX)
1361                return -EINVAL;
1362
1363        group_info = groups_alloc(gidsetsize);
1364        if (!group_info)
1365                return -ENOMEM;
1366        retval = groups_from_user(group_info, grouplist);
1367        if (retval) {
1368                put_group_info(group_info);
1369                return retval;
1370        }
1371
1372        retval = set_current_groups(group_info);
1373        put_group_info(group_info);
1374
1375        return retval;
1376}
1377
1378/*
1379 * Check whether we're fsgid/egid or in the supplemental group..
1380 */
1381int in_group_p(gid_t grp)
1382{
1383        const struct cred *cred = current_cred();
1384        int retval = 1;
1385
1386        if (grp != cred->fsgid)
1387                retval = groups_search(cred->group_info, grp);
1388        return retval;
1389}
1390
1391EXPORT_SYMBOL(in_group_p);
1392
1393int in_egroup_p(gid_t grp)
1394{
1395        const struct cred *cred = current_cred();
1396        int retval = 1;
1397
1398        if (grp != cred->egid)
1399                retval = groups_search(cred->group_info, grp);
1400        return retval;
1401}
1402
1403EXPORT_SYMBOL(in_egroup_p);
1404
1405DECLARE_RWSEM(uts_sem);
1406
1407SYSCALL_DEFINE1(newuname, struct new_utsname __user *, name)
1408{
1409        int errno = 0;
1410
1411        down_read(&uts_sem);
1412        if (copy_to_user(name, utsname(), sizeof *name))
1413                errno = -EFAULT;
1414        up_read(&uts_sem);
1415        return errno;
1416}
1417
1418SYSCALL_DEFINE2(sethostname, char __user *, name, int, len)
1419{
1420        int errno;
1421        char tmp[__NEW_UTS_LEN];
1422
1423        if (!capable(CAP_SYS_ADMIN))
1424                return -EPERM;
1425        if (len < 0 || len > __NEW_UTS_LEN)
1426                return -EINVAL;
1427        down_write(&uts_sem);
1428        errno = -EFAULT;
1429        if (!copy_from_user(tmp, name, len)) {
1430                struct new_utsname *u = utsname();
1431
1432                memcpy(u->nodename, tmp, len);
1433                memset(u->nodename + len, 0, sizeof(u->nodename) - len);
1434                errno = 0;
1435        }
1436        up_write(&uts_sem);
1437        return errno;
1438}
1439
1440#ifdef __ARCH_WANT_SYS_GETHOSTNAME
1441
1442SYSCALL_DEFINE2(gethostname, char __user *, name, int, len)
1443{
1444        int i, errno;
1445        struct new_utsname *u;
1446
1447        if (len < 0)
1448                return -EINVAL;
1449        down_read(&uts_sem);
1450        u = utsname();
1451        i = 1 + strlen(u->nodename);
1452        if (i > len)
1453                i = len;
1454        errno = 0;
1455        if (copy_to_user(name, u->nodename, i))
1456                errno = -EFAULT;
1457        up_read(&uts_sem);
1458        return errno;
1459}
1460
1461#endif
1462
1463/*
1464 * Only setdomainname; getdomainname can be implemented by calling
1465 * uname()
1466 */
1467SYSCALL_DEFINE2(setdomainname, char __user *, name, int, len)
1468{
1469        int errno;
1470        char tmp[__NEW_UTS_LEN];
1471
1472        if (!capable(CAP_SYS_ADMIN))
1473                return -EPERM;
1474        if (len < 0 || len > __NEW_UTS_LEN)
1475                return -EINVAL;
1476
1477        down_write(&uts_sem);
1478        errno = -EFAULT;
1479        if (!copy_from_user(tmp, name, len)) {
1480                struct new_utsname *u = utsname();
1481
1482                memcpy(u->domainname, tmp, len);
1483                memset(u->domainname + len, 0, sizeof(u->domainname) - len);
1484                errno = 0;
1485        }
1486        up_write(&uts_sem);
1487        return errno;
1488}
1489
1490SYSCALL_DEFINE2(getrlimit, unsigned int, resource, struct rlimit __user *, rlim)
1491{
1492        if (resource >= RLIM_NLIMITS)
1493                return -EINVAL;
1494        else {
1495                struct rlimit value;
1496                task_lock(current->group_leader);
1497                value = current->signal->rlim[resource];
1498                task_unlock(current->group_leader);
1499                return copy_to_user(rlim, &value, sizeof(*rlim)) ? -EFAULT : 0;
1500        }
1501}
1502
1503#ifdef __ARCH_WANT_SYS_OLD_GETRLIMIT
1504
1505/*
1506 *      Back compatibility for getrlimit. Needed for some apps.
1507 */
1508 
1509SYSCALL_DEFINE2(old_getrlimit, unsigned int, resource,
1510                struct rlimit __user *, rlim)
1511{
1512        struct rlimit x;
1513        if (resource >= RLIM_NLIMITS)
1514                return -EINVAL;
1515
1516        task_lock(current->group_leader);
1517        x = current->signal->rlim[resource];
1518        task_unlock(current->group_leader);
1519        if (x.rlim_cur > 0x7FFFFFFF)
1520                x.rlim_cur = 0x7FFFFFFF;
1521        if (x.rlim_max > 0x7FFFFFFF)
1522                x.rlim_max = 0x7FFFFFFF;
1523        return copy_to_user(rlim, &x, sizeof(x))?-EFAULT:0;
1524}
1525
1526#endif
1527
1528SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct rlimit __user *, rlim)
1529{
1530        struct rlimit new_rlim, *old_rlim;
1531        int retval;
1532
1533        if (resource >= RLIM_NLIMITS)
1534                return -EINVAL;
1535        if (copy_from_user(&new_rlim, rlim, sizeof(*rlim)))
1536                return -EFAULT;
1537        if (new_rlim.rlim_cur > new_rlim.rlim_max)
1538                return -EINVAL;
1539        old_rlim = current->signal->rlim + resource;
1540        if ((new_rlim.rlim_max > old_rlim->rlim_max) &&
1541            !capable(CAP_SYS_RESOURCE))
1542                return -EPERM;
1543        if (resource == RLIMIT_NOFILE && new_rlim.rlim_max > sysctl_nr_open)
1544                return -EPERM;
1545
1546        retval = security_task_setrlimit(resource, &new_rlim);
1547        if (retval)
1548                return retval;
1549
1550        if (resource == RLIMIT_CPU && new_rlim.rlim_cur == 0) {
1551                /*
1552                 * The caller is asking for an immediate RLIMIT_CPU
1553                 * expiry.  But we use the zero value to mean "it was
1554                 * never set".  So let's cheat and make it one second
1555                 * instead
1556                 */
1557                new_rlim.rlim_cur = 1;
1558        }
1559
1560        task_lock(current->group_leader);
1561        *old_rlim = new_rlim;
1562        task_unlock(current->group_leader);
1563
1564        if (resource != RLIMIT_CPU)
1565                goto out;
1566
1567        /*
1568         * RLIMIT_CPU handling.   Note that the kernel fails to return an error
1569         * code if it rejected the user's attempt to set RLIMIT_CPU.  This is a
1570         * very long-standing error, and fixing it now risks breakage of
1571         * applications, so we live with it
1572         */
1573        if (new_rlim.rlim_cur == RLIM_INFINITY)
1574                goto out;
1575
1576        update_rlimit_cpu(new_rlim.rlim_cur);
1577out:
1578        return 0;
1579}
1580
1581/*
1582 * It would make sense to put struct rusage in the task_struct,
1583 * except that would make the task_struct be *really big*.  After
1584 * task_struct gets moved into malloc'ed memory, it would
1585 * make sense to do this.  It will make moving the rest of the information
1586 * a lot simpler!  (Which we're not doing right now because we're not
1587 * measuring them yet).
1588 *
1589 * When sampling multiple threads for RUSAGE_SELF, under SMP we might have
1590 * races with threads incrementing their own counters.  But since word
1591 * reads are atomic, we either get new values or old values and we don't
1592 * care which for the sums.  We always take the siglock to protect reading
1593 * the c* fields from p->signal from races with exit.c updating those
1594 * fields when reaping, so a sample either gets all the additions of a
1595 * given child after it's reaped, or none so this sample is before reaping.
1596 *
1597 * Locking:
1598 * We need to take the siglock for CHILDEREN, SELF and BOTH
1599 * for  the cases current multithreaded, non-current single threaded
1600 * non-current multithreaded.  Thread traversal is now safe with
1601 * the siglock held.
1602 * Strictly speaking, we donot need to take the siglock if we are current and
1603 * single threaded,  as no one else can take our signal_struct away, no one
1604 * else can  reap the  children to update signal->c* counters, and no one else
1605 * can race with the signal-> fields. If we do not take any lock, the
1606 * signal-> fields could be read out of order while another thread was just
1607 * exiting. So we should  place a read memory barrier when we avoid the lock.
1608 * On the writer side,  write memory barrier is implied in  __exit_signal
1609 * as __exit_signal releases  the siglock spinlock after updating the signal->
1610 * fields. But we don't do this yet to keep things simple.
1611 *
1612 */
1613
1614static void accumulate_thread_rusage(struct task_struct *t, struct rusage *r)
1615{
1616        r->ru_nvcsw += t->nvcsw;
1617        r->ru_nivcsw += t->nivcsw;
1618        r->ru_minflt += t->min_flt;
1619        r->ru_majflt += t->maj_flt;
1620        r->ru_inblock += task_io_get_inblock(t);
1621        r->ru_oublock += task_io_get_oublock(t);
1622}
1623
1624static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
1625{
1626        struct task_struct *t;
1627        unsigned long flags;
1628        cputime_t utime, stime;
1629        struct task_cputime cputime;
1630
1631        memset((char *) r, 0, sizeof *r);
1632        utime = stime = cputime_zero;
1633
1634        if (who == RUSAGE_THREAD) {
1635                utime = task_utime(current);
1636                stime = task_stime(current);
1637                accumulate_thread_rusage(p, r);
1638                goto out;
1639        }
1640
1641        if (!lock_task_sighand(p, &flags))
1642                return;
1643
1644        switch (who) {
1645                case RUSAGE_BOTH:
1646                case RUSAGE_CHILDREN:
1647                        utime = p->signal->cutime;
1648                        stime = p->signal->cstime;
1649                        r->ru_nvcsw = p->signal->cnvcsw;
1650                        r->ru_nivcsw = p->signal->cnivcsw;
1651                        r->ru_minflt = p->signal->cmin_flt;
1652                        r->ru_majflt = p->signal->cmaj_flt;
1653                        r->ru_inblock = p->signal->cinblock;
1654                        r->ru_oublock = p->signal->coublock;
1655
1656                        if (who == RUSAGE_CHILDREN)
1657                                break;
1658
1659                case RUSAGE_SELF:
1660                        thread_group_cputime(p, &cputime);
1661                        utime = cputime_add(utime, cputime.utime);
1662                        stime = cputime_add(stime, cputime.stime);
1663                        r->ru_nvcsw += p->signal->nvcsw;
1664                        r->ru_nivcsw += p->signal->nivcsw;
1665                        r->ru_minflt += p->signal->min_flt;
1666                        r->ru_majflt += p->signal->maj_flt;
1667                        r->ru_inblock += p->signal->inblock;
1668                        r->ru_oublock += p->signal->oublock;
1669                        t = p;
1670                        do {
1671                                accumulate_thread_rusage(t, r);
1672                                t = next_thread(t);
1673                        } while (t != p);
1674                        break;
1675
1676                default:
1677                        BUG();
1678        }
1679        unlock_task_sighand(p, &flags);
1680
1681out:
1682        cputime_to_timeval(utime, &r->ru_utime);
1683        cputime_to_timeval(stime, &r->ru_stime);
1684}
1685
1686int getrusage(struct task_struct *p, int who, struct rusage __user *ru)
1687{
1688        struct rusage r;
1689        k_getrusage(p, who, &r);
1690        return copy_to_user(ru, &r, sizeof(r)) ? -EFAULT : 0;
1691}
1692
1693SYSCALL_DEFINE2(getrusage, int, who, struct rusage __user *, ru)
1694{
1695        if (who != RUSAGE_SELF && who != RUSAGE_CHILDREN &&
1696            who != RUSAGE_THREAD)
1697                return -EINVAL;
1698        return getrusage(current, who, ru);
1699}
1700
1701SYSCALL_DEFINE1(umask, int, mask)
1702{
1703        mask = xchg(&current->fs->umask, mask & S_IRWXUGO);
1704        return mask;
1705}
1706
1707SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
1708                unsigned long, arg4, unsigned long, arg5)
1709{
1710        struct task_struct *me = current;
1711        unsigned char comm[sizeof(me->comm)];
1712        long error;
1713
1714        error = security_task_prctl(option, arg2, arg3, arg4, arg5);
1715        if (error != -ENOSYS)
1716                return error;
1717
1718        error = 0;
1719        switch (option) {
1720                case PR_SET_PDEATHSIG:
1721                        if (!valid_signal(arg2)) {
1722                                error = -EINVAL;
1723                                break;
1724                        }
1725                        me->pdeath_signal = arg2;
1726                        error = 0;
1727                        break;
1728                case PR_GET_PDEATHSIG:
1729                        error = put_user(me->pdeath_signal, (int __user *)arg2);
1730                        break;
1731                case PR_GET_DUMPABLE:
1732                        error = get_dumpable(me->mm);
1733                        break;
1734                case PR_SET_DUMPABLE:
1735                        if (arg2 < 0 || arg2 > 1) {
1736                                error = -EINVAL;
1737                                break;
1738                        }
1739                        set_dumpable(me->mm, arg2);
1740                        error = 0;
1741                        break;
1742
1743                case PR_SET_UNALIGN:
1744                        error = SET_UNALIGN_CTL(me, arg2);
1745                        break;
1746                case PR_GET_UNALIGN:
1747                        error = GET_UNALIGN_CTL(me, arg2);
1748                        break;
1749                case PR_SET_FPEMU:
1750                        error = SET_FPEMU_CTL(me, arg2);
1751                        break;
1752                case PR_GET_FPEMU:
1753                        error = GET_FPEMU_CTL(me, arg2);
1754                        break;
1755                case PR_SET_FPEXC:
1756                        error = SET_FPEXC_CTL(me, arg2);
1757                        break;
1758                case PR_GET_FPEXC:
1759                        error = GET_FPEXC_CTL(me, arg2);
1760                        break;
1761                case PR_GET_TIMING:
1762                        error = PR_TIMING_STATISTICAL;
1763                        break;
1764                case PR_SET_TIMING:
1765                        if (arg2 != PR_TIMING_STATISTICAL)
1766                                error = -EINVAL;
1767                        else
1768                                error = 0;
1769                        break;
1770
1771                case PR_SET_NAME:
1772                        comm[sizeof(me->comm)-1] = 0;
1773                        if (strncpy_from_user(comm, (char __user *)arg2,
1774                                              sizeof(me->comm) - 1) < 0)
1775                                return -EFAULT;
1776                        set_task_comm(me, comm);
1777                        return 0;
1778                case PR_GET_NAME:
1779                        get_task_comm(comm, me);
1780                        if (copy_to_user((char __user *)arg2, comm,
1781                                         sizeof(comm)))
1782                                return -EFAULT;
1783                        return 0;
1784                case PR_GET_ENDIAN:
1785                        error = GET_ENDIAN(me, arg2);
1786                        break;
1787                case PR_SET_ENDIAN:
1788                        error = SET_ENDIAN(me, arg2);
1789                        break;
1790
1791                case PR_GET_SECCOMP:
1792                        error = prctl_get_seccomp();
1793                        break;
1794                case PR_SET_SECCOMP:
1795                        error = prctl_set_seccomp(arg2);
1796                        break;
1797                case PR_GET_TSC:
1798                        error = GET_TSC_CTL(arg2);
1799                        break;
1800                case PR_SET_TSC:
1801                        error = SET_TSC_CTL(arg2);
1802                        break;
1803                case PR_GET_TIMERSLACK:
1804                        error = current->timer_slack_ns;
1805                        break;
1806                case PR_SET_TIMERSLACK:
1807                        if (arg2 <= 0)
1808                                current->timer_slack_ns =
1809                                        current->default_timer_slack_ns;
1810                        else
1811                                current->timer_slack_ns = arg2;
1812                        error = 0;
1813                        break;
1814                default:
1815                        error = -EINVAL;
1816                        break;
1817        }
1818        return error;
1819}
1820
1821SYSCALL_DEFINE3(getcpu, unsigned __user *, cpup, unsigned __user *, nodep,
1822                struct getcpu_cache __user *, unused)
1823{
1824        int err = 0;
1825        int cpu = raw_smp_processor_id();
1826        if (cpup)
1827                err |= put_user(cpu, cpup);
1828        if (nodep)
1829                err |= put_user(cpu_to_node(cpu), nodep);
1830        return err ? -EFAULT : 0;
1831}
1832
1833char poweroff_cmd[POWEROFF_CMD_PATH_LEN] = "/sbin/poweroff";
1834
1835static void argv_cleanup(char **argv, char **envp)
1836{
1837        argv_free(argv);
1838}
1839
1840/**
1841 * orderly_poweroff - Trigger an orderly system poweroff
1842 * @force: force poweroff if command execution fails
1843 *
1844 * This may be called from any context to trigger a system shutdown.
1845 * If the orderly shutdown fails, it will force an immediate shutdown.
1846 */
1847int orderly_poweroff(bool force)
1848{
1849        int argc;
1850        char **argv = argv_split(GFP_ATOMIC, poweroff_cmd, &argc);
1851        static char *envp[] = {
1852                "HOME=/",
1853                "PATH=/sbin:/bin:/usr/sbin:/usr/bin",
1854                NULL
1855        };
1856        int ret = -ENOMEM;
1857        struct subprocess_info *info;
1858
1859        if (argv == NULL) {
1860                printk(KERN_WARNING "%s failed to allocate memory for \"%s\"\n",
1861                       __func__, poweroff_cmd);
1862                goto out;
1863        }
1864
1865        info = call_usermodehelper_setup(argv[0], argv, envp, GFP_ATOMIC);
1866        if (info == NULL) {
1867                argv_free(argv);
1868                goto out;
1869        }
1870
1871        call_usermodehelper_setcleanup(info, argv_cleanup);
1872
1873        ret = call_usermodehelper_exec(info, UMH_NO_WAIT);
1874
1875  out:
1876        if (ret && force) {
1877                printk(KERN_WARNING "Failed to start orderly shutdown: "
1878                       "forcing the issue\n");
1879
1880                /* I guess this should try to kick off some daemon to
1881                   sync and poweroff asap.  Or not even bother syncing
1882                   if we're doing an emergency shutdown? */
1883                emergency_sync();
1884                kernel_power_off();
1885        }
1886
1887        return ret;
1888}
1889EXPORT_SYMBOL_GPL(orderly_poweroff);
1890
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.