linux-bk/kernel/acct.c
<<
>>
Prefs
   1/*
   2 *  linux/kernel/acct.c
   3 *
   4 *  BSD Process Accounting for Linux
   5 *
   6 *  Author: Marco van Wieringen <mvw@planets.elm.net>
   7 *
   8 *  Some code based on ideas and code from:
   9 *  Thomas K. Dyas <tdyas@eden.rutgers.edu>
  10 *
  11 *  This file implements BSD-style process accounting. Whenever any
  12 *  process exits, an accounting record of type "struct acct" is
  13 *  written to the file specified with the acct() system call. It is
  14 *  up to user-level programs to do useful things with the accounting
  15 *  log. The kernel just provides the raw accounting information.
  16 *
  17 * (C) Copyright 1995 - 1997 Marco van Wieringen - ELM Consultancy B.V.
  18 *
  19 *  Plugged two leaks. 1) It didn't return acct_file into the free_filps if
  20 *  the file happened to be read-only. 2) If the accounting was suspended
  21 *  due to the lack of space it happily allowed to reopen it and completely
  22 *  lost the old acct_file. 3/10/98, Al Viro.
  23 *
  24 *  Now we silently close acct_file on attempt to reopen. Cleaned sys_acct().
  25 *  XTerms and EMACS are manifestations of pure evil. 21/10/98, AV.
  26 *
  27 *  Fixed a nasty interaction with with sys_umount(). If the accointing
  28 *  was suspeneded we failed to stop it on umount(). Messy.
  29 *  Another one: remount to readonly didn't stop accounting.
  30 *      Question: what should we do if we have CAP_SYS_ADMIN but not
  31 *  CAP_SYS_PACCT? Current code does the following: umount returns -EBUSY
  32 *  unless we are messing with the root. In that case we are getting a
  33 *  real mess with do_remount_sb(). 9/11/98, AV.
  34 *
  35 *  Fixed a bunch of races (and pair of leaks). Probably not the best way,
  36 *  but this one obviously doesn't introduce deadlocks. Later. BTW, found
  37 *  one race (and leak) in BSD implementation.
  38 *  OK, that's better. ANOTHER race and leak in BSD variant. There always
  39 *  is one more bug... 10/11/98, AV.
  40 *
  41 *      Oh, fsck... Oopsable SMP race in do_process_acct() - we must hold
  42 * ->mmap_sem to walk the vma list of current->mm. Nasty, since it leaks
  43 * a struct file opened for write. Fixed. 2/6/2000, AV.
  44 */
  45
  46#include <linux/config.h>
  47#include <linux/mm.h>
  48#include <linux/slab.h>
  49#include <linux/acct.h>
  50#include <linux/file.h>
  51#include <linux/tty.h>
  52#include <asm/uaccess.h>
  53
  54/*
  55 * These constants control the amount of freespace that suspend and
  56 * resume the process accounting system, and the time delay between
  57 * each check.
  58 * Turned into sysctl-controllable parameters. AV, 12/11/98
  59 */
  60
  61int acct_parm[3] = {4, 2, 30};
  62#define RESUME          (acct_parm[0])  /* >foo% free space - resume */
  63#define SUSPEND         (acct_parm[1])  /* <foo% free space - suspend */
  64#define ACCT_TIMEOUT    (acct_parm[2])  /* foo second timeout between checks */
  65
  66/*
  67 * External references and all of the globals.
  68 */
  69static void do_acct_process(long, struct file *);
  70
  71/*
  72 * This structure is used so that all the data protected by lock
  73 * can be placed in the same cache line as the lock.  This primes
  74 * the cache line to have the data after getting the lock.
  75 */
  76struct acct_glbs {
  77        spinlock_t              lock;
  78        volatile int            active;
  79        volatile int            needcheck;
  80        struct file             *file;
  81        struct timer_list       timer;
  82};
  83
  84static struct acct_glbs acct_globals __cacheline_aligned = {SPIN_LOCK_UNLOCKED};
  85
  86/*
  87 * Called whenever the timer says to check the free space.
  88 */
  89static void acct_timeout(unsigned long unused)
  90{
  91        acct_globals.needcheck = 1;
  92}
  93
  94/*
  95 * Check the amount of free space and suspend/resume accordingly.
  96 */
  97static int check_free_space(struct file *file)
  98{
  99        struct statfs sbuf;
 100        int res;
 101        int act;
 102
 103        spin_lock(&acct_globals.lock);
 104        res = acct_globals.active;
 105        if (!file || !acct_globals.needcheck)
 106                goto out;
 107        spin_unlock(&acct_globals.lock);
 108
 109        /* May block */
 110        if (vfs_statfs(file->f_dentry->d_inode->i_sb, &sbuf))
 111                return res;
 112
 113        if (sbuf.f_bavail <= SUSPEND * sbuf.f_blocks / 100)
 114                act = -1;
 115        else if (sbuf.f_bavail >= RESUME * sbuf.f_blocks / 100)
 116                act = 1;
 117        else
 118                act = 0;
 119
 120        /*
 121         * If some joker switched acct_globals.file under us we'ld better be
 122         * silent and _not_ touch anything.
 123         */
 124        spin_lock(&acct_globals.lock);
 125        if (file != acct_globals.file) {
 126                if (act)
 127                        res = act>0;
 128                goto out;
 129        }
 130
 131        if (acct_globals.active) {
 132                if (act < 0) {
 133                        acct_globals.active = 0;
 134                        printk(KERN_INFO "Process accounting paused\n");
 135                }
 136        } else {
 137                if (act > 0) {
 138                        acct_globals.active = 1;
 139                        printk(KERN_INFO "Process accounting resumed\n");
 140                }
 141        }
 142
 143        del_timer(&acct_globals.timer);
 144        acct_globals.needcheck = 0;
 145        acct_globals.timer.expires = jiffies + ACCT_TIMEOUT*HZ;
 146        add_timer(&acct_globals.timer);
 147        res = acct_globals.active;
 148out:
 149        spin_unlock(&acct_globals.lock);
 150        return res;
 151}
 152
 153/*
 154 * Close the old accouting file (if currently open) and then replace
 155 * it with file (if non-NULL).
 156 *
 157 * NOTE: acct_globals.lock MUST be held on entry and exit.
 158 */
 159void acct_file_reopen(struct file *file)
 160{
 161        struct file *old_acct = NULL;
 162
 163        if (acct_globals.file) {
 164                old_acct = acct_globals.file;
 165                del_timer(&acct_globals.timer);
 166                acct_globals.active = 0;
 167                acct_globals.needcheck = 0;
 168                acct_globals.file = NULL;
 169        }
 170        if (file) {
 171                acct_globals.file = file;
 172                acct_globals.needcheck = 0;
 173                acct_globals.active = 1;
 174                /* It's been deleted if it was used before so this is safe */
 175                init_timer(&acct_globals.timer);
 176                acct_globals.timer.function = acct_timeout;
 177                acct_globals.timer.expires = jiffies + ACCT_TIMEOUT*HZ;
 178                add_timer(&acct_globals.timer);
 179        }
 180        if (old_acct) {
 181                spin_unlock(&acct_globals.lock);
 182                do_acct_process(0, old_acct);
 183                filp_close(old_acct, NULL);
 184                spin_lock(&acct_globals.lock);
 185        }
 186}
 187
 188/*
 189 *  sys_acct() is the only system call needed to implement process
 190 *  accounting. It takes the name of the file where accounting records
 191 *  should be written. If the filename is NULL, accounting will be
 192 *  shutdown.
 193 */
 194asmlinkage long sys_acct(const char *name)
 195{
 196        struct file *file = NULL;
 197        char *tmp;
 198        int error;
 199
 200        if (!capable(CAP_SYS_PACCT))
 201                return -EPERM;
 202
 203        if (name) {
 204                tmp = getname(name);
 205                if (IS_ERR(tmp)) {
 206                        return (PTR_ERR(tmp));
 207                }
 208                /* Difference from BSD - they don't do O_APPEND */
 209                file = filp_open(tmp, O_WRONLY|O_APPEND, 0);
 210                putname(tmp);
 211                if (IS_ERR(file)) {
 212                        return (PTR_ERR(file));
 213                }
 214                if (!S_ISREG(file->f_dentry->d_inode->i_mode)) {
 215                        filp_close(file, NULL);
 216                        return (-EACCES);
 217                }
 218
 219                if (!file->f_op->write) {
 220                        filp_close(file, NULL);
 221                        return (-EIO);
 222                }
 223        }
 224
 225        error = security_ops->acct(file);
 226        if (error)
 227                return error;
 228
 229        spin_lock(&acct_globals.lock);
 230        acct_file_reopen(file);
 231        spin_unlock(&acct_globals.lock);
 232
 233        return (0);
 234}
 235
 236/*
 237 * If the accouting is turned on for a file in the filesystem pointed
 238 * to by sb, turn accouting off.
 239 */
 240void acct_auto_close(struct super_block *sb)
 241{
 242        spin_lock(&acct_globals.lock);
 243        if (acct_globals.file &&
 244            acct_globals.file->f_dentry->d_inode->i_sb == sb) {
 245                acct_file_reopen((struct file *)NULL);
 246        }
 247        spin_unlock(&acct_globals.lock);
 248}
 249
 250/*
 251 *  encode an unsigned long into a comp_t
 252 *
 253 *  This routine has been adopted from the encode_comp_t() function in
 254 *  the kern_acct.c file of the FreeBSD operating system. The encoding
 255 *  is a 13-bit fraction with a 3-bit (base 8) exponent.
 256 */
 257
 258#define MANTSIZE        13                      /* 13 bit mantissa. */
 259#define EXPSIZE         3                       /* Base 8 (3 bit) exponent. */
 260#define MAXFRACT        ((1 << MANTSIZE) - 1)   /* Maximum fractional value. */
 261
 262static comp_t encode_comp_t(unsigned long value)
 263{
 264        int exp, rnd;
 265
 266        exp = rnd = 0;
 267        while (value > MAXFRACT) {
 268                rnd = value & (1 << (EXPSIZE - 1));     /* Round up? */
 269                value >>= EXPSIZE;      /* Base 8 exponent == 3 bit shift. */
 270                exp++;
 271        }
 272
 273        /*
 274         * If we need to round up, do it (and handle overflow correctly).
 275         */
 276        if (rnd && (++value > MAXFRACT)) {
 277                value >>= EXPSIZE;
 278                exp++;
 279        }
 280
 281        /*
 282         * Clean it up and polish it off.
 283         */
 284        exp <<= MANTSIZE;               /* Shift the exponent into place */
 285        exp += value;                   /* and add on the mantissa. */
 286        return exp;
 287}
 288
 289/*
 290 *  Write an accounting entry for an exiting process
 291 *
 292 *  The acct_process() call is the workhorse of the process
 293 *  accounting system. The struct acct is built here and then written
 294 *  into the accounting file. This function should only be called from
 295 *  do_exit().
 296 */
 297
 298/*
 299 *  do_acct_process does all actual work. Caller holds the reference to file.
 300 */
 301static void do_acct_process(long exitcode, struct file *file)
 302{
 303        struct acct ac;
 304        mm_segment_t fs;
 305        unsigned long vsize;
 306        unsigned long flim;
 307
 308        /*
 309         * First check to see if there is enough free_space to continue
 310         * the process accounting system.
 311         */
 312        if (!check_free_space(file))
 313                return;
 314
 315        /*
 316         * Fill the accounting struct with the needed info as recorded
 317         * by the different kernel functions.
 318         */
 319        memset((caddr_t)&ac, 0, sizeof(struct acct));
 320
 321        strncpy(ac.ac_comm, current->comm, ACCT_COMM);
 322        ac.ac_comm[ACCT_COMM - 1] = '\0';
 323
 324        ac.ac_btime = CT_TO_SECS(current->start_time) +
 325                (xtime.tv_sec - (jiffies / HZ));
 326        ac.ac_etime = encode_comp_t(jiffies - current->start_time);
 327        ac.ac_utime = encode_comp_t(current->utime);
 328        ac.ac_stime = encode_comp_t(current->stime);
 329        ac.ac_uid = current->uid;
 330        ac.ac_gid = current->gid;
 331        ac.ac_tty = (current->tty) ? kdev_t_to_nr(current->tty->device) : 0;
 332
 333        ac.ac_flag = 0;
 334        if (current->flags & PF_FORKNOEXEC)
 335                ac.ac_flag |= AFORK;
 336        if (current->flags & PF_SUPERPRIV)
 337                ac.ac_flag |= ASU;
 338        if (current->flags & PF_DUMPCORE)
 339                ac.ac_flag |= ACORE;
 340        if (current->flags & PF_SIGNALED)
 341                ac.ac_flag |= AXSIG;
 342
 343        vsize = 0;
 344        if (current->mm) {
 345                struct vm_area_struct *vma;
 346                down_read(&current->mm->mmap_sem);
 347                vma = current->mm->mmap;
 348                while (vma) {
 349                        vsize += vma->vm_end - vma->vm_start;
 350                        vma = vma->vm_next;
 351                }
 352                up_read(&current->mm->mmap_sem);
 353        }
 354        vsize = vsize / 1024;
 355        ac.ac_mem = encode_comp_t(vsize);
 356        ac.ac_io = encode_comp_t(0 /* current->io_usage */);    /* %% */
 357        ac.ac_rw = encode_comp_t(ac.ac_io / 1024);
 358        ac.ac_minflt = encode_comp_t(current->min_flt);
 359        ac.ac_majflt = encode_comp_t(current->maj_flt);
 360        ac.ac_swaps = encode_comp_t(current->nswap);
 361        ac.ac_exitcode = exitcode;
 362
 363        /*
 364         * Kernel segment override to datasegment and write it
 365         * to the accounting file.
 366         */
 367        fs = get_fs();
 368        set_fs(KERNEL_DS);
 369        /*
 370         * Accounting records are not subject to resource limits.
 371         */
 372        flim = current->rlim[RLIMIT_FSIZE].rlim_cur;
 373        current->rlim[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY;
 374        file->f_op->write(file, (char *)&ac,
 375                               sizeof(struct acct), &file->f_pos);
 376        current->rlim[RLIMIT_FSIZE].rlim_cur = flim;
 377        set_fs(fs);
 378}
 379
 380/*
 381 * acct_process - now just a wrapper around do_acct_process
 382 */
 383int acct_process(long exitcode)
 384{
 385        struct file *file = NULL;
 386        spin_lock(&acct_globals.lock);
 387        if (acct_globals.file) {
 388                file = acct_globals.file;
 389                get_file(file);
 390                spin_unlock(&acct_globals.lock);
 391                do_acct_process(exitcode, file);
 392                fput(file);
 393        } else
 394                spin_unlock(&acct_globals.lock);
 395        return 0;
 396}
 397
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.