linux-bk/kernel/timer.c
<<
>>
Prefs
   1/*
   2 *  linux/kernel/timer.c
   3 *
   4 *  Kernel internal timers, kernel timekeeping, basic process system calls
   5 *
   6 *  Copyright (C) 1991, 1992  Linus Torvalds
   7 *
   8 *  1997-01-28  Modified by Finn Arne Gangstad to make timers scale better.
   9 *
  10 *  1997-09-10  Updated NTP code according to technical memorandum Jan '96
  11 *              "A Kernel Model for Precision Timekeeping" by Dave Mills
  12 *  1998-12-24  Fixed a xtime SMP race (we need the xtime_lock rw spinlock to
  13 *              serialize accesses to xtime/lost_ticks).
  14 *                              Copyright (C) 1998  Andrea Arcangeli
  15 *  1999-03-10  Improved NTP compatibility by Ulrich Windl
  16 *  2002-05-31  Move sys_sysinfo here and make its locking sane, Robert Love
  17 */
  18
  19#include <linux/config.h>
  20#include <linux/mm.h>
  21#include <linux/timex.h>
  22#include <linux/delay.h>
  23#include <linux/smp_lock.h>
  24#include <linux/interrupt.h>
  25#include <linux/tqueue.h>
  26#include <linux/kernel_stat.h>
  27
  28#include <asm/uaccess.h>
  29
  30struct kernel_stat kstat;
  31
  32/*
  33 * Timekeeping variables
  34 */
  35
  36unsigned long tick_usec = TICK_USEC;            /* ACTHZ          period (usec) */
  37unsigned long tick_nsec = TICK_NSEC(TICK_USEC); /* USER_HZ period (nsec) */
  38
  39/* The current time */
  40struct timespec xtime __attribute__ ((aligned (16)));
  41
  42/* Don't completely fail for HZ > 500.  */
  43int tickadj = 500/HZ ? : 1;             /* microsecs */
  44
  45DECLARE_TASK_QUEUE(tq_timer);
  46DECLARE_TASK_QUEUE(tq_immediate);
  47
  48/*
  49 * phase-lock loop variables
  50 */
  51/* TIME_ERROR prevents overwriting the CMOS clock */
  52int time_state = TIME_OK;               /* clock synchronization status */
  53int time_status = STA_UNSYNC;           /* clock status bits            */
  54long time_offset;                       /* time adjustment (us)         */
  55long time_constant = 2;                 /* pll time constant            */
  56long time_tolerance = MAXFREQ;          /* frequency tolerance (ppm)    */
  57long time_precision = 1;                /* clock precision (us)         */
  58long time_maxerror = NTP_PHASE_LIMIT;   /* maximum error (us)           */
  59long time_esterror = NTP_PHASE_LIMIT;   /* estimated error (us)         */
  60long time_phase;                        /* phase offset (scaled us)     */
  61long time_freq = ((1000000 + HZ/2) % HZ - HZ/2) << SHIFT_USEC;
  62                                        /* frequency offset (scaled ppm)*/
  63long time_adj;                          /* tick adjust (scaled 1 / HZ)  */
  64long time_reftime;                      /* time at last adjustment (s)  */
  65
  66long time_adjust;
  67
  68unsigned long event;
  69
  70extern int do_setitimer(int, struct itimerval *, struct itimerval *);
  71
  72/*
  73 * The 64-bit jiffies value is not atomic - you MUST NOT read it
  74 * without holding read_lock_irq(&xtime_lock).
  75 * jiffies is defined in the linker script...
  76 */
  77
  78
  79unsigned int * prof_buffer;
  80unsigned long prof_len;
  81unsigned long prof_shift;
  82
  83/*
  84 * Event timer code
  85 */
  86#define TVN_BITS 6
  87#define TVR_BITS 8
  88#define TVN_SIZE (1 << TVN_BITS)
  89#define TVR_SIZE (1 << TVR_BITS)
  90#define TVN_MASK (TVN_SIZE - 1)
  91#define TVR_MASK (TVR_SIZE - 1)
  92
  93struct timer_vec {
  94        int index;
  95        struct list_head vec[TVN_SIZE];
  96};
  97
  98struct timer_vec_root {
  99        int index;
 100        struct list_head vec[TVR_SIZE];
 101};
 102
 103static struct timer_vec tv5;
 104static struct timer_vec tv4;
 105static struct timer_vec tv3;
 106static struct timer_vec tv2;
 107static struct timer_vec_root tv1;
 108
 109static struct timer_vec * const tvecs[] = {
 110        (struct timer_vec *)&tv1, &tv2, &tv3, &tv4, &tv5
 111};
 112
 113#define NOOF_TVECS (sizeof(tvecs) / sizeof(tvecs[0]))
 114
 115void init_timervecs (void)
 116{
 117        int i;
 118
 119        for (i = 0; i < TVN_SIZE; i++) {
 120                INIT_LIST_HEAD(tv5.vec + i);
 121                INIT_LIST_HEAD(tv4.vec + i);
 122                INIT_LIST_HEAD(tv3.vec + i);
 123                INIT_LIST_HEAD(tv2.vec + i);
 124        }
 125        for (i = 0; i < TVR_SIZE; i++)
 126                INIT_LIST_HEAD(tv1.vec + i);
 127}
 128
 129static unsigned long timer_jiffies;
 130
 131static inline void internal_add_timer(struct timer_list *timer)
 132{
 133        /*
 134         * must be cli-ed when calling this
 135         */
 136        unsigned long expires = timer->expires;
 137        unsigned long idx = expires - timer_jiffies;
 138        struct list_head * vec;
 139
 140        if (idx < TVR_SIZE) {
 141                int i = expires & TVR_MASK;
 142                vec = tv1.vec + i;
 143        } else if (idx < 1 << (TVR_BITS + TVN_BITS)) {
 144                int i = (expires >> TVR_BITS) & TVN_MASK;
 145                vec = tv2.vec + i;
 146        } else if (idx < 1 << (TVR_BITS + 2 * TVN_BITS)) {
 147                int i = (expires >> (TVR_BITS + TVN_BITS)) & TVN_MASK;
 148                vec =  tv3.vec + i;
 149        } else if (idx < 1 << (TVR_BITS + 3 * TVN_BITS)) {
 150                int i = (expires >> (TVR_BITS + 2 * TVN_BITS)) & TVN_MASK;
 151                vec = tv4.vec + i;
 152        } else if ((signed long) idx < 0) {
 153                /* can happen if you add a timer with expires == jiffies,
 154                 * or you set a timer to go off in the past
 155                 */
 156                vec = tv1.vec + tv1.index;
 157        } else if (idx <= 0xffffffffUL) {
 158                int i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK;
 159                vec = tv5.vec + i;
 160        } else {
 161                /* Can only get here on architectures with 64-bit jiffies */
 162                INIT_LIST_HEAD(&timer->list);
 163                return;
 164        }
 165        /*
 166         * Timers are FIFO!
 167         */
 168        list_add(&timer->list, vec->prev);
 169}
 170
 171/* Initialize both explicitly - let's try to have them in the same cache line */
 172spinlock_t timerlist_lock ____cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
 173
 174#ifdef CONFIG_SMP
 175volatile struct timer_list * volatile running_timer;
 176#define timer_enter(t) do { running_timer = t; mb(); } while (0)
 177#define timer_exit() do { running_timer = NULL; } while (0)
 178#define timer_is_running(t) (running_timer == t)
 179#define timer_synchronize(t) while (timer_is_running(t)) barrier()
 180#else
 181#define timer_enter(t)          do { } while (0)
 182#define timer_exit()            do { } while (0)
 183#endif
 184
 185void add_timer(struct timer_list *timer)
 186{
 187        unsigned long flags;
 188
 189        spin_lock_irqsave(&timerlist_lock, flags);
 190        if (unlikely(timer_pending(timer)))
 191                goto bug;
 192        internal_add_timer(timer);
 193        spin_unlock_irqrestore(&timerlist_lock, flags);
 194        return;
 195bug:
 196        spin_unlock_irqrestore(&timerlist_lock, flags);
 197        printk(KERN_ERR "BUG: kernel timer added twice at %p.\n",
 198                        __builtin_return_address(0));
 199}
 200
 201static inline int detach_timer (struct timer_list *timer)
 202{
 203        if (!timer_pending(timer))
 204                return 0;
 205        list_del(&timer->list);
 206        return 1;
 207}
 208
 209int mod_timer(struct timer_list *timer, unsigned long expires)
 210{
 211        int ret;
 212        unsigned long flags;
 213
 214        spin_lock_irqsave(&timerlist_lock, flags);
 215        timer->expires = expires;
 216        ret = detach_timer(timer);
 217        internal_add_timer(timer);
 218        spin_unlock_irqrestore(&timerlist_lock, flags);
 219        return ret;
 220}
 221
 222int del_timer(struct timer_list * timer)
 223{
 224        int ret;
 225        unsigned long flags;
 226
 227        spin_lock_irqsave(&timerlist_lock, flags);
 228        ret = detach_timer(timer);
 229        timer->list.next = timer->list.prev = NULL;
 230        spin_unlock_irqrestore(&timerlist_lock, flags);
 231        return ret;
 232}
 233
 234#ifdef CONFIG_SMP
 235/*
 236 * SMP specific function to delete periodic timer.
 237 * Caller must disable by some means restarting the timer
 238 * for new. Upon exit the timer is not queued and handler is not running
 239 * on any CPU. It returns number of times, which timer was deleted
 240 * (for reference counting).
 241 */
 242
 243int del_timer_sync(struct timer_list * timer)
 244{
 245        int ret = 0;
 246
 247        for (;;) {
 248                unsigned long flags;
 249                int running;
 250
 251                spin_lock_irqsave(&timerlist_lock, flags);
 252                ret += detach_timer(timer);
 253                timer->list.next = timer->list.prev = 0;
 254                running = timer_is_running(timer);
 255                spin_unlock_irqrestore(&timerlist_lock, flags);
 256
 257                if (!running)
 258                        break;
 259
 260                timer_synchronize(timer);
 261        }
 262
 263        return ret;
 264}
 265#endif
 266
 267
 268static inline void cascade_timers(struct timer_vec *tv)
 269{
 270        /* cascade all the timers from tv up one level */
 271        struct list_head *head, *curr, *next;
 272
 273        head = tv->vec + tv->index;
 274        curr = head->next;
 275        /*
 276         * We are removing _all_ timers from the list, so we don't  have to
 277         * detach them individually, just clear the list afterwards.
 278         */
 279        while (curr != head) {
 280                struct timer_list *tmp;
 281
 282                tmp = list_entry(curr, struct timer_list, list);
 283                next = curr->next;
 284                list_del(curr); // not needed
 285                internal_add_timer(tmp);
 286                curr = next;
 287        }
 288        INIT_LIST_HEAD(head);
 289        tv->index = (tv->index + 1) & TVN_MASK;
 290}
 291
 292static inline void run_timer_list(void)
 293{
 294        spin_lock_irq(&timerlist_lock);
 295        while ((long)(jiffies - timer_jiffies) >= 0) {
 296                struct list_head *head, *curr;
 297                if (!tv1.index) {
 298                        int n = 1;
 299                        do {
 300                                cascade_timers(tvecs[n]);
 301                        } while (tvecs[n]->index == 1 && ++n < NOOF_TVECS);
 302                }
 303repeat:
 304                head = tv1.vec + tv1.index;
 305                curr = head->next;
 306                if (curr != head) {
 307                        struct timer_list *timer;
 308                        void (*fn)(unsigned long);
 309                        unsigned long data;
 310
 311                        timer = list_entry(curr, struct timer_list, list);
 312                        fn = timer->function;
 313                        data= timer->data;
 314
 315                        detach_timer(timer);
 316                        timer->list.next = timer->list.prev = NULL;
 317                        timer_enter(timer);
 318                        spin_unlock_irq(&timerlist_lock);
 319                        fn(data);
 320                        spin_lock_irq(&timerlist_lock);
 321                        timer_exit();
 322                        goto repeat;
 323                }
 324                ++timer_jiffies; 
 325                tv1.index = (tv1.index + 1) & TVR_MASK;
 326        }
 327        spin_unlock_irq(&timerlist_lock);
 328}
 329
 330spinlock_t tqueue_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
 331
 332void tqueue_bh(void)
 333{
 334        run_task_queue(&tq_timer);
 335}
 336
 337void immediate_bh(void)
 338{
 339        run_task_queue(&tq_immediate);
 340}
 341
 342/*
 343 * this routine handles the overflow of the microsecond field
 344 *
 345 * The tricky bits of code to handle the accurate clock support
 346 * were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame.
 347 * They were originally developed for SUN and DEC kernels.
 348 * All the kudos should go to Dave for this stuff.
 349 *
 350 */
 351static void second_overflow(void)
 352{
 353    long ltemp;
 354
 355    /* Bump the maxerror field */
 356    time_maxerror += time_tolerance >> SHIFT_USEC;
 357    if ( time_maxerror > NTP_PHASE_LIMIT ) {
 358        time_maxerror = NTP_PHASE_LIMIT;
 359        time_status |= STA_UNSYNC;
 360    }
 361
 362    /*
 363     * Leap second processing. If in leap-insert state at
 364     * the end of the day, the system clock is set back one
 365     * second; if in leap-delete state, the system clock is
 366     * set ahead one second. The microtime() routine or
 367     * external clock driver will insure that reported time
 368     * is always monotonic. The ugly divides should be
 369     * replaced.
 370     */
 371    switch (time_state) {
 372
 373    case TIME_OK:
 374        if (time_status & STA_INS)
 375            time_state = TIME_INS;
 376        else if (time_status & STA_DEL)
 377            time_state = TIME_DEL;
 378        break;
 379
 380    case TIME_INS:
 381        if (xtime.tv_sec % 86400 == 0) {
 382            xtime.tv_sec--;
 383            time_state = TIME_OOP;
 384            printk(KERN_NOTICE "Clock: inserting leap second 23:59:60 UTC\n");
 385        }
 386        break;
 387
 388    case TIME_DEL:
 389        if ((xtime.tv_sec + 1) % 86400 == 0) {
 390            xtime.tv_sec++;
 391            time_state = TIME_WAIT;
 392            printk(KERN_NOTICE "Clock: deleting leap second 23:59:59 UTC\n");
 393        }
 394        break;
 395
 396    case TIME_OOP:
 397        time_state = TIME_WAIT;
 398        break;
 399
 400    case TIME_WAIT:
 401        if (!(time_status & (STA_INS | STA_DEL)))
 402            time_state = TIME_OK;
 403    }
 404
 405    /*
 406     * Compute the phase adjustment for the next second. In
 407     * PLL mode, the offset is reduced by a fixed factor
 408     * times the time constant. In FLL mode the offset is
 409     * used directly. In either mode, the maximum phase
 410     * adjustment for each second is clamped so as to spread
 411     * the adjustment over not more than the number of
 412     * seconds between updates.
 413     */
 414    if (time_offset < 0) {
 415        ltemp = -time_offset;
 416        if (!(time_status & STA_FLL))
 417            ltemp >>= SHIFT_KG + time_constant;
 418        if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE)
 419            ltemp = (MAXPHASE / MINSEC) << SHIFT_UPDATE;
 420        time_offset += ltemp;
 421        time_adj = -ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE);
 422    } else {
 423        ltemp = time_offset;
 424        if (!(time_status & STA_FLL))
 425            ltemp >>= SHIFT_KG + time_constant;
 426        if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE)
 427            ltemp = (MAXPHASE / MINSEC) << SHIFT_UPDATE;
 428        time_offset -= ltemp;
 429        time_adj = ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE);
 430    }
 431
 432    /*
 433     * Compute the frequency estimate and additional phase
 434     * adjustment due to frequency error for the next
 435     * second. When the PPS signal is engaged, gnaw on the
 436     * watchdog counter and update the frequency computed by
 437     * the pll and the PPS signal.
 438     */
 439    pps_valid++;
 440    if (pps_valid == PPS_VALID) {       /* PPS signal lost */
 441        pps_jitter = MAXTIME;
 442        pps_stabil = MAXFREQ;
 443        time_status &= ~(STA_PPSSIGNAL | STA_PPSJITTER |
 444                         STA_PPSWANDER | STA_PPSERROR);
 445    }
 446    ltemp = time_freq + pps_freq;
 447    if (ltemp < 0)
 448        time_adj -= -ltemp >>
 449            (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE);
 450    else
 451        time_adj += ltemp >>
 452            (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE);
 453
 454#if HZ == 100
 455    /* Compensate for (HZ==100) != (1 << SHIFT_HZ).
 456     * Add 25% and 3.125% to get 128.125; => only 0.125% error (p. 14)
 457     */
 458    if (time_adj < 0)
 459        time_adj -= (-time_adj >> 2) + (-time_adj >> 5);
 460    else
 461        time_adj += (time_adj >> 2) + (time_adj >> 5);
 462#endif
 463}
 464
 465/* in the NTP reference this is called "hardclock()" */
 466static void update_wall_time_one_tick(void)
 467{
 468        long time_adjust_step;
 469
 470        if ( (time_adjust_step = time_adjust) != 0 ) {
 471            /* We are doing an adjtime thing. 
 472             *
 473             * Prepare time_adjust_step to be within bounds.
 474             * Note that a positive time_adjust means we want the clock
 475             * to run faster.
 476             *
 477             * Limit the amount of the step to be in the range
 478             * -tickadj .. +tickadj
 479             */
 480             if (time_adjust > tickadj)
 481                time_adjust_step = tickadj;
 482             else if (time_adjust < -tickadj)
 483                time_adjust_step = -tickadj;
 484             
 485            /* Reduce by this step the amount of time left  */
 486            time_adjust -= time_adjust_step;
 487        }
 488        xtime.tv_nsec += tick_nsec + time_adjust_step * 1000;
 489        /*
 490         * Advance the phase, once it gets to one microsecond, then
 491         * advance the tick more.
 492         */
 493        time_phase += time_adj;
 494        if (time_phase <= -FINEUSEC) {
 495                long ltemp = -time_phase >> (SHIFT_SCALE - 10);
 496                time_phase += ltemp << (SHIFT_SCALE - 10);
 497                xtime.tv_nsec -= ltemp;
 498        }
 499        else if (time_phase >= FINEUSEC) {
 500                long ltemp = time_phase >> (SHIFT_SCALE - 10);
 501                time_phase -= ltemp << (SHIFT_SCALE - 10);
 502                xtime.tv_nsec += ltemp;
 503        }
 504}
 505
 506/*
 507 * Using a loop looks inefficient, but "ticks" is
 508 * usually just one (we shouldn't be losing ticks,
 509 * we're doing this this way mainly for interrupt
 510 * latency reasons, not because we think we'll
 511 * have lots of lost timer ticks
 512 */
 513static void update_wall_time(unsigned long ticks)
 514{
 515        do {
 516                ticks--;
 517                update_wall_time_one_tick();
 518        } while (ticks);
 519
 520        if (xtime.tv_nsec >= 1000000000) {
 521            xtime.tv_nsec -= 1000000000;
 522            xtime.tv_sec++;
 523            second_overflow();
 524        }
 525}
 526
 527static inline void do_process_times(struct task_struct *p,
 528        unsigned long user, unsigned long system)
 529{
 530        unsigned long psecs;
 531
 532        psecs = (p->utime += user);
 533        psecs += (p->stime += system);
 534        if (psecs / HZ > p->rlim[RLIMIT_CPU].rlim_cur) {
 535                /* Send SIGXCPU every second.. */
 536                if (!(psecs % HZ))
 537                        send_sig(SIGXCPU, p, 1);
 538                /* and SIGKILL when we go over max.. */
 539                if (psecs / HZ > p->rlim[RLIMIT_CPU].rlim_max)
 540                        send_sig(SIGKILL, p, 1);
 541        }
 542}
 543
 544static inline void do_it_virt(struct task_struct * p, unsigned long ticks)
 545{
 546        unsigned long it_virt = p->it_virt_value;
 547
 548        if (it_virt) {
 549                it_virt -= ticks;
 550                if (!it_virt) {
 551                        it_virt = p->it_virt_incr;
 552                        send_sig(SIGVTALRM, p, 1);
 553                }
 554                p->it_virt_value = it_virt;
 555        }
 556}
 557
 558static inline void do_it_prof(struct task_struct *p)
 559{
 560        unsigned long it_prof = p->it_prof_value;
 561
 562        if (it_prof) {
 563                if (--it_prof == 0) {
 564                        it_prof = p->it_prof_incr;
 565                        send_sig(SIGPROF, p, 1);
 566                }
 567                p->it_prof_value = it_prof;
 568        }
 569}
 570
 571void update_one_process(struct task_struct *p, unsigned long user,
 572                        unsigned long system, int cpu)
 573{
 574        p->per_cpu_utime[cpu] += user;
 575        p->per_cpu_stime[cpu] += system;
 576        do_process_times(p, user, system);
 577        do_it_virt(p, user);
 578        do_it_prof(p);
 579}       
 580
 581/*
 582 * Called from the timer interrupt handler to charge one tick to the current 
 583 * process.  user_tick is 1 if the tick is user time, 0 for system.
 584 */
 585void update_process_times(int user_tick)
 586{
 587        struct task_struct *p = current;
 588        int cpu = smp_processor_id(), system = user_tick ^ 1;
 589
 590        update_one_process(p, user_tick, system, cpu);
 591        scheduler_tick(user_tick, system);
 592}
 593
 594/*
 595 * Nr of active tasks - counted in fixed-point numbers
 596 */
 597static unsigned long count_active_tasks(void)
 598{
 599        return (nr_running() + nr_uninterruptible()) * FIXED_1;
 600}
 601
 602/*
 603 * Hmm.. Changed this, as the GNU make sources (load.c) seems to
 604 * imply that avenrun[] is the standard name for this kind of thing.
 605 * Nothing else seems to be standardized: the fractional size etc
 606 * all seem to differ on different machines.
 607 *
 608 * Requires xtime_lock to access.
 609 */
 610unsigned long avenrun[3];
 611
 612/*
 613 * calc_load - given tick count, update the avenrun load estimates.
 614 * This is called while holding a write_lock on xtime_lock.
 615 */
 616static inline void calc_load(unsigned long ticks)
 617{
 618        unsigned long active_tasks; /* fixed-point */
 619        static int count = LOAD_FREQ;
 620
 621        count -= ticks;
 622        if (count < 0) {
 623                count += LOAD_FREQ;
 624                active_tasks = count_active_tasks();
 625                CALC_LOAD(avenrun[0], EXP_1, active_tasks);
 626                CALC_LOAD(avenrun[1], EXP_5, active_tasks);
 627                CALC_LOAD(avenrun[2], EXP_15, active_tasks);
 628        }
 629}
 630
 631/* jiffies at the most recent update of wall time */
 632unsigned long wall_jiffies;
 633
 634/*
 635 * This read-write spinlock protects us from races in SMP while
 636 * playing with xtime and avenrun.
 637 */
 638rwlock_t xtime_lock __cacheline_aligned_in_smp = RW_LOCK_UNLOCKED;
 639unsigned long last_time_offset;
 640
 641static inline void update_times(void)
 642{
 643        unsigned long ticks;
 644
 645        /*
 646         * update_times() is run from the raw timer_bh handler so we
 647         * just know that the irqs are locally enabled and so we don't
 648         * need to save/restore the flags of the local CPU here. -arca
 649         */
 650        write_lock_irq(&xtime_lock);
 651
 652        ticks = jiffies - wall_jiffies;
 653        if (ticks) {
 654                wall_jiffies += ticks;
 655                update_wall_time(ticks);
 656        }
 657        last_time_offset = 0;
 658        calc_load(ticks);
 659        write_unlock_irq(&xtime_lock);
 660}
 661
 662void timer_bh(void)
 663{
 664        update_times();
 665        run_timer_list();
 666}
 667
 668void do_timer(struct pt_regs *regs)
 669{
 670        jiffies_64++;
 671#ifndef CONFIG_SMP
 672        /* SMP process accounting uses the local APIC timer */
 673
 674        update_process_times(user_mode(regs));
 675#endif
 676        mark_bh(TIMER_BH);
 677        if (TQ_ACTIVE(tq_timer))
 678                mark_bh(TQUEUE_BH);
 679}
 680
 681#if !defined(__alpha__) && !defined(__ia64__)
 682
 683/*
 684 * For backwards compatibility?  This can be done in libc so Alpha
 685 * and all newer ports shouldn't need it.
 686 */
 687asmlinkage unsigned long sys_alarm(unsigned int seconds)
 688{
 689        struct itimerval it_new, it_old;
 690        unsigned int oldalarm;
 691
 692        it_new.it_interval.tv_sec = it_new.it_interval.tv_usec = 0;
 693        it_new.it_value.tv_sec = seconds;
 694        it_new.it_value.tv_usec = 0;
 695        do_setitimer(ITIMER_REAL, &it_new, &it_old);
 696        oldalarm = it_old.it_value.tv_sec;
 697        /* ehhh.. We can't return 0 if we have an alarm pending.. */
 698        /* And we'd better return too much than too little anyway */
 699        if (it_old.it_value.tv_usec)
 700                oldalarm++;
 701        return oldalarm;
 702}
 703
 704#endif
 705
 706#ifndef __alpha__
 707
 708/*
 709 * The Alpha uses getxpid, getxuid, and getxgid instead.  Maybe this
 710 * should be moved into arch/i386 instead?
 711 */
 712 
 713asmlinkage long sys_getpid(void)
 714{
 715        /* This is SMP safe - current->pid doesn't change */
 716        return current->tgid;
 717}
 718
 719/*
 720 * This is not strictly SMP safe: p_opptr could change
 721 * from under us. However, rather than getting any lock
 722 * we can use an optimistic algorithm: get the parent
 723 * pid, and go back and check that the parent is still
 724 * the same. If it has changed (which is extremely unlikely
 725 * indeed), we just try again..
 726 *
 727 * NOTE! This depends on the fact that even if we _do_
 728 * get an old value of "parent", we can happily dereference
 729 * the pointer: we just can't necessarily trust the result
 730 * until we know that the parent pointer is valid.
 731 *
 732 * The "mb()" macro is a memory barrier - a synchronizing
 733 * event. It also makes sure that gcc doesn't optimize
 734 * away the necessary memory references.. The barrier doesn't
 735 * have to have all that strong semantics: on x86 we don't
 736 * really require a synchronizing instruction, for example.
 737 * The barrier is more important for code generation than
 738 * for any real memory ordering semantics (even if there is
 739 * a small window for a race, using the old pointer is
 740 * harmless for a while).
 741 */
 742asmlinkage long sys_getppid(void)
 743{
 744        int pid;
 745        struct task_struct * me = current;
 746        struct task_struct * parent;
 747
 748        parent = me->real_parent;
 749        for (;;) {
 750                pid = parent->pid;
 751#if CONFIG_SMP
 752{
 753                struct task_struct *old = parent;
 754                mb();
 755                parent = me->real_parent;
 756                if (old != parent)
 757                        continue;
 758}
 759#endif
 760                break;
 761        }
 762        return pid;
 763}
 764
 765asmlinkage long sys_getuid(void)
 766{
 767        /* Only we change this so SMP safe */
 768        return current->uid;
 769}
 770
 771asmlinkage long sys_geteuid(void)
 772{
 773        /* Only we change this so SMP safe */
 774        return current->euid;
 775}
 776
 777asmlinkage long sys_getgid(void)
 778{
 779        /* Only we change this so SMP safe */
 780        return current->gid;
 781}
 782
 783asmlinkage long sys_getegid(void)
 784{
 785        /* Only we change this so SMP safe */
 786        return  current->egid;
 787}
 788
 789#endif
 790
 791static void process_timeout(unsigned long __data)
 792{
 793        wake_up_process((task_t *)__data);
 794}
 795
 796/**
 797 * schedule_timeout - sleep until timeout
 798 * @timeout: timeout value in jiffies
 799 *
 800 * Make the current task sleep until @timeout jiffies have
 801 * elapsed. The routine will return immediately unless
 802 * the current task state has been set (see set_current_state()).
 803 *
 804 * You can set the task state as follows -
 805 *
 806 * %TASK_UNINTERRUPTIBLE - at least @timeout jiffies are guaranteed to
 807 * pass before the routine returns. The routine will return 0
 808 *
 809 * %TASK_INTERRUPTIBLE - the routine may return early if a signal is
 810 * delivered to the current task. In this case the remaining time
 811 * in jiffies will be returned, or 0 if the timer expired in time
 812 *
 813 * The current task state is guaranteed to be TASK_RUNNING when this
 814 * routine returns.
 815 *
 816 * Specifying a @timeout value of %MAX_SCHEDULE_TIMEOUT will schedule
 817 * the CPU away without a bound on the timeout. In this case the return
 818 * value will be %MAX_SCHEDULE_TIMEOUT.
 819 *
 820 * In all cases the return value is guaranteed to be non-negative.
 821 */
 822signed long schedule_timeout(signed long timeout)
 823{
 824        struct timer_list timer;
 825        unsigned long expire;
 826
 827        switch (timeout)
 828        {
 829        case MAX_SCHEDULE_TIMEOUT:
 830                /*
 831                 * These two special cases are useful to be comfortable
 832                 * in the caller. Nothing more. We could take
 833                 * MAX_SCHEDULE_TIMEOUT from one of the negative value
 834                 * but I' d like to return a valid offset (>=0) to allow
 835                 * the caller to do everything it want with the retval.
 836                 */
 837                schedule();
 838                goto out;
 839        default:
 840                /*
 841                 * Another bit of PARANOID. Note that the retval will be
 842                 * 0 since no piece of kernel is supposed to do a check
 843                 * for a negative retval of schedule_timeout() (since it
 844                 * should never happens anyway). You just have the printk()
 845                 * that will tell you if something is gone wrong and where.
 846                 */
 847                if (timeout < 0)
 848                {
 849                        printk(KERN_ERR "schedule_timeout: wrong timeout "
 850                               "value %lx from %p\n", timeout,
 851                               __builtin_return_address(0));
 852                        current->state = TASK_RUNNING;
 853                        goto out;
 854                }
 855        }
 856
 857        expire = timeout + jiffies;
 858
 859        init_timer(&timer);
 860        timer.expires = expire;
 861        timer.data = (unsigned long) current;
 862        timer.function = process_timeout;
 863
 864        add_timer(&timer);
 865        schedule();
 866        del_timer_sync(&timer);
 867
 868        timeout = expire - jiffies;
 869
 870 out:
 871        return timeout < 0 ? 0 : timeout;
 872}
 873
 874/* Thread ID - the internal kernel "pid" */
 875asmlinkage long sys_gettid(void)
 876{
 877        return current->pid;
 878}
 879
 880asmlinkage long sys_nanosleep(struct timespec *rqtp, struct timespec *rmtp)
 881{
 882        struct timespec t;
 883        unsigned long expire;
 884
 885        if(copy_from_user(&t, rqtp, sizeof(struct timespec)))
 886                return -EFAULT;
 887
 888        if (t.tv_nsec >= 1000000000L || t.tv_nsec < 0 || t.tv_sec < 0)
 889                return -EINVAL;
 890
 891
 892        if (t.tv_sec == 0 && t.tv_nsec <= 2000000L &&
 893            current->policy != SCHED_NORMAL)
 894        {
 895                /*
 896                 * Short delay requests up to 2 ms will be handled with
 897                 * high precision by a busy wait for all real-time processes.
 898                 *
 899                 * Its important on SMP not to do this holding locks.
 900                 */
 901                udelay((t.tv_nsec + 999) / 1000);
 902                return 0;
 903        }
 904
 905        expire = timespec_to_jiffies(&t) + (t.tv_sec || t.tv_nsec);
 906
 907        current->state = TASK_INTERRUPTIBLE;
 908        expire = schedule_timeout(expire);
 909
 910        if (expire) {
 911                if (rmtp) {
 912                        jiffies_to_timespec(expire, &t);
 913                        if (copy_to_user(rmtp, &t, sizeof(struct timespec)))
 914                                return -EFAULT;
 915                }
 916                return -EINTR;
 917        }
 918        return 0;
 919}
 920
 921/*
 922 * sys_sysinfo - fill in sysinfo struct
 923 */ 
 924asmlinkage long sys_sysinfo(struct sysinfo *info)
 925{
 926        struct sysinfo val;
 927        unsigned long mem_total, sav_total;
 928        unsigned int mem_unit, bitcount;
 929
 930        memset((char *)&val, 0, sizeof(struct sysinfo));
 931
 932        read_lock_irq(&xtime_lock);
 933        val.uptime = jiffies / HZ;
 934
 935        val.loads[0] = avenrun[0] << (SI_LOAD_SHIFT - FSHIFT);
 936        val.loads[1] = avenrun[1] << (SI_LOAD_SHIFT - FSHIFT);
 937        val.loads[2] = avenrun[2] << (SI_LOAD_SHIFT - FSHIFT);
 938
 939        val.procs = nr_threads;
 940        read_unlock_irq(&xtime_lock);
 941
 942        si_meminfo(&val);
 943        si_swapinfo(&val);
 944
 945        /*
 946         * If the sum of all the available memory (i.e. ram + swap)
 947         * is less than can be stored in a 32 bit unsigned long then
 948         * we can be binary compatible with 2.2.x kernels.  If not,
 949         * well, in that case 2.2.x was broken anyways...
 950         *
 951         *  -Erik Andersen <andersee@debian.org>
 952         */
 953
 954        mem_total = val.totalram + val.totalswap;
 955        if (mem_total < val.totalram || mem_total < val.totalswap)
 956                goto out;
 957        bitcount = 0;
 958        mem_unit = val.mem_unit;
 959        while (mem_unit > 1) {
 960                bitcount++;
 961                mem_unit >>= 1;
 962                sav_total = mem_total;
 963                mem_total <<= 1;
 964                if (mem_total < sav_total)
 965                        goto out;
 966        }
 967
 968        /*
 969         * If mem_total did not overflow, multiply all memory values by
 970         * val.mem_unit and set it to 1.  This leaves things compatible
 971         * with 2.2.x, and also retains compatibility with earlier 2.4.x
 972         * kernels...
 973         */
 974
 975        val.mem_unit = 1;
 976        val.totalram <<= bitcount;
 977        val.freeram <<= bitcount;
 978        val.sharedram <<= bitcount;
 979        val.bufferram <<= bitcount;
 980        val.totalswap <<= bitcount;
 981        val.freeswap <<= bitcount;
 982        val.totalhigh <<= bitcount;
 983        val.freehigh <<= bitcount;
 984
 985out:
 986        if (copy_to_user(info, &val, sizeof(struct sysinfo)))
 987                return -EFAULT;
 988
 989        return 0;
 990}
 991
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.