1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19#include <linux/mm.h>
20#include <linux/nmi.h>
21#include <linux/init.h>
22#include <asm/uaccess.h>
23#include <linux/highmem.h>
24#include <linux/smp_lock.h>
25#include <asm/mmu_context.h>
26#include <linux/interrupt.h>
27#include <linux/completion.h>
28#include <linux/kernel_stat.h>
29#include <linux/security.h>
30#include <linux/notifier.h>
31#include <linux/delay.h>
32
33
34
35
36
37
38#define NICE_TO_PRIO(nice) (MAX_RT_PRIO + (nice) + 20)
39#define PRIO_TO_NICE(prio) ((prio) - MAX_RT_PRIO - 20)
40#define TASK_NICE(p) PRIO_TO_NICE((p)->static_prio)
41
42
43
44
45
46
47#define USER_PRIO(p) ((p)-MAX_RT_PRIO)
48#define TASK_USER_PRIO(p) USER_PRIO((p)->static_prio)
49#define MAX_USER_PRIO (USER_PRIO(MAX_PRIO))
50
51
52
53
54
55
56
57
58#define MIN_TIMESLICE ( 10 * HZ / 1000)
59#define MAX_TIMESLICE (300 * HZ / 1000)
60#define CHILD_PENALTY 95
61#define PARENT_PENALTY 100
62#define EXIT_WEIGHT 3
63#define PRIO_BONUS_RATIO 25
64#define INTERACTIVE_DELTA 2
65#define MAX_SLEEP_AVG (2*HZ)
66#define STARVATION_LIMIT (2*HZ)
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96#define SCALE(v1,v1_max,v2_max) \
97 (v1) * (v2_max) / (v1_max)
98
99#define DELTA(p) \
100 (SCALE(TASK_NICE(p), 40, MAX_USER_PRIO*PRIO_BONUS_RATIO/100) + \
101 INTERACTIVE_DELTA)
102
103#define TASK_INTERACTIVE(p) \
104 ((p)->prio <= (p)->static_prio - DELTA(p))
105
106
107
108
109
110
111
112
113
114
115
116
117#define BASE_TIMESLICE(p) (MIN_TIMESLICE + \
118 ((MAX_TIMESLICE - MIN_TIMESLICE) * (MAX_PRIO-1-(p)->static_prio)/(MAX_USER_PRIO - 1)))
119
120static inline unsigned int task_timeslice(task_t *p)
121{
122 return BASE_TIMESLICE(p);
123}
124
125
126
127
128
129#define BITMAP_SIZE ((((MAX_PRIO+1+7)/8)+sizeof(long)-1)/sizeof(long))
130
131typedef struct runqueue runqueue_t;
132
133struct prio_array {
134 int nr_active;
135 unsigned long bitmap[BITMAP_SIZE];
136 struct list_head queue[MAX_PRIO];
137};
138
139
140
141
142
143
144
145
146struct runqueue {
147 spinlock_t lock;
148 unsigned long nr_running, nr_switches, expired_timestamp,
149 nr_uninterruptible;
150 task_t *curr, *idle;
151 prio_array_t *active, *expired, arrays[2];
152 int prev_nr_running[NR_CPUS];
153
154 task_t *migration_thread;
155 struct list_head migration_queue;
156
157} ____cacheline_aligned;
158
159static struct runqueue runqueues[NR_CPUS] __cacheline_aligned;
160
161#define cpu_rq(cpu) (runqueues + (cpu))
162#define this_rq() cpu_rq(smp_processor_id())
163#define task_rq(p) cpu_rq(task_cpu(p))
164#define cpu_curr(cpu) (cpu_rq(cpu)->curr)
165#define rt_task(p) ((p)->prio < MAX_RT_PRIO)
166
167
168
169
170#ifndef prepare_arch_switch
171# define prepare_arch_switch(rq, next) do { } while(0)
172# define finish_arch_switch(rq, next) spin_unlock_irq(&(rq)->lock)
173# define task_running(rq, p) ((rq)->curr == (p))
174#endif
175
176
177
178
179
180
181static inline runqueue_t *task_rq_lock(task_t *p, unsigned long *flags)
182{
183 struct runqueue *rq;
184
185repeat_lock_task:
186 local_irq_save(*flags);
187 rq = task_rq(p);
188 spin_lock(&rq->lock);
189 if (unlikely(rq != task_rq(p))) {
190 spin_unlock_irqrestore(&rq->lock, *flags);
191 goto repeat_lock_task;
192 }
193 return rq;
194}
195
196static inline void task_rq_unlock(runqueue_t *rq, unsigned long *flags)
197{
198 spin_unlock_irqrestore(&rq->lock, *flags);
199}
200
201
202
203
204static inline runqueue_t *this_rq_lock(void)
205{
206 runqueue_t *rq;
207
208 local_irq_disable();
209 rq = this_rq();
210 spin_lock(&rq->lock);
211
212 return rq;
213}
214
215static inline void rq_unlock(runqueue_t *rq)
216{
217 spin_unlock_irq(&rq->lock);
218}
219
220
221
222
223static inline void dequeue_task(struct task_struct *p, prio_array_t *array)
224{
225 array->nr_active--;
226 list_del(&p->run_list);
227 if (list_empty(array->queue + p->prio))
228 __clear_bit(p->prio, array->bitmap);
229}
230
231static inline void enqueue_task(struct task_struct *p, prio_array_t *array)
232{
233 list_add_tail(&p->run_list, array->queue + p->prio);
234 __set_bit(p->prio, array->bitmap);
235 array->nr_active++;
236 p->array = array;
237}
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253static inline int effective_prio(task_t *p)
254{
255 int bonus, prio;
256
257 bonus = MAX_USER_PRIO*PRIO_BONUS_RATIO*p->sleep_avg/MAX_SLEEP_AVG/100 -
258 MAX_USER_PRIO*PRIO_BONUS_RATIO/100/2;
259
260 prio = p->static_prio - bonus;
261 if (prio < MAX_RT_PRIO)
262 prio = MAX_RT_PRIO;
263 if (prio > MAX_PRIO-1)
264 prio = MAX_PRIO-1;
265 return prio;
266}
267
268
269
270
271
272
273
274static inline void activate_task(task_t *p, runqueue_t *rq)
275{
276 unsigned long sleep_time = jiffies - p->sleep_timestamp;
277 prio_array_t *array = rq->active;
278
279 if (!rt_task(p) && sleep_time) {
280
281
282
283
284
285
286
287 p->sleep_avg += sleep_time;
288 if (p->sleep_avg > MAX_SLEEP_AVG)
289 p->sleep_avg = MAX_SLEEP_AVG;
290 p->prio = effective_prio(p);
291 }
292 enqueue_task(p, array);
293 rq->nr_running++;
294}
295
296
297
298
299static inline void deactivate_task(struct task_struct *p, runqueue_t *rq)
300{
301 rq->nr_running--;
302 if (p->state == TASK_UNINTERRUPTIBLE)
303 rq->nr_uninterruptible++;
304 dequeue_task(p, p->array);
305 p->array = NULL;
306}
307
308
309
310
311
312
313
314
315static inline void resched_task(task_t *p)
316{
317#ifdef CONFIG_SMP
318 int need_resched, nrpolling;
319
320 preempt_disable();
321
322 nrpolling = test_tsk_thread_flag(p,TIF_POLLING_NRFLAG);
323 need_resched = test_and_set_tsk_thread_flag(p,TIF_NEED_RESCHED);
324 nrpolling |= test_tsk_thread_flag(p,TIF_POLLING_NRFLAG);
325
326 if (!need_resched && !nrpolling && (task_cpu(p) != smp_processor_id()))
327 smp_send_reschedule(task_cpu(p));
328 preempt_enable();
329#else
330 set_tsk_need_resched(p);
331#endif
332}
333
334#ifdef CONFIG_SMP
335
336
337
338
339
340
341
342void wait_task_inactive(task_t * p)
343{
344 unsigned long flags;
345 runqueue_t *rq;
346
347repeat:
348 preempt_disable();
349 rq = task_rq(p);
350 if (unlikely(task_running(rq, p))) {
351 cpu_relax();
352
353
354
355
356
357 preempt_enable();
358 goto repeat;
359 }
360 rq = task_rq_lock(p, &flags);
361 if (unlikely(task_running(rq, p))) {
362 task_rq_unlock(rq, &flags);
363 preempt_enable();
364 goto repeat;
365 }
366 task_rq_unlock(rq, &flags);
367 preempt_enable();
368}
369#endif
370
371
372
373
374
375
376
377
378
379
380
381void kick_if_running(task_t * p)
382{
383 if ((task_running(task_rq(p), p)) && (task_cpu(p) != smp_processor_id()))
384 resched_task(p);
385}
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400static int try_to_wake_up(task_t * p, int sync)
401{
402 unsigned long flags;
403 int success = 0;
404 long old_state;
405 runqueue_t *rq;
406
407repeat_lock_task:
408 rq = task_rq_lock(p, &flags);
409 old_state = p->state;
410 if (!p->array) {
411
412
413
414
415 if (unlikely(sync && !task_running(rq, p) &&
416 (task_cpu(p) != smp_processor_id()) &&
417 (p->cpus_allowed & (1UL << smp_processor_id())))) {
418
419 set_task_cpu(p, smp_processor_id());
420 task_rq_unlock(rq, &flags);
421 goto repeat_lock_task;
422 }
423 if (old_state == TASK_UNINTERRUPTIBLE)
424 rq->nr_uninterruptible--;
425 activate_task(p, rq);
426
427 if (p->prio < rq->curr->prio)
428 resched_task(rq->curr);
429 success = 1;
430 }
431 p->state = TASK_RUNNING;
432 task_rq_unlock(rq, &flags);
433
434 return success;
435}
436
437int wake_up_process(task_t * p)
438{
439 return try_to_wake_up(p, 0);
440}
441
442
443
444
445
446
447
448void wake_up_forked_process(task_t * p)
449{
450 runqueue_t *rq = this_rq_lock();
451
452 p->state = TASK_RUNNING;
453 if (!rt_task(p)) {
454
455
456
457
458
459 current->sleep_avg = current->sleep_avg * PARENT_PENALTY / 100;
460 p->sleep_avg = p->sleep_avg * CHILD_PENALTY / 100;
461 p->prio = effective_prio(p);
462 }
463 set_task_cpu(p, smp_processor_id());
464 activate_task(p, rq);
465
466 rq_unlock(rq);
467}
468
469
470
471
472
473
474
475
476
477
478void sched_exit(task_t * p)
479{
480 local_irq_disable();
481 if (p->first_time_slice) {
482 p->parent->time_slice += p->time_slice;
483 if (unlikely(p->parent->time_slice > MAX_TIMESLICE))
484 p->parent->time_slice = MAX_TIMESLICE;
485 }
486 local_irq_enable();
487
488
489
490
491 if (p->sleep_avg < p->parent->sleep_avg)
492 p->parent->sleep_avg = (p->parent->sleep_avg * EXIT_WEIGHT +
493 p->sleep_avg) / (EXIT_WEIGHT + 1);
494}
495
496
497
498
499
500#if CONFIG_SMP || CONFIG_PREEMPT
501asmlinkage void schedule_tail(task_t *prev)
502{
503 finish_arch_switch(this_rq(), prev);
504}
505#endif
506
507
508
509
510
511static inline task_t * context_switch(task_t *prev, task_t *next)
512{
513 struct mm_struct *mm = next->mm;
514 struct mm_struct *oldmm = prev->active_mm;
515
516 if (unlikely(!mm)) {
517 next->active_mm = oldmm;
518 atomic_inc(&oldmm->mm_count);
519 enter_lazy_tlb(oldmm, next, smp_processor_id());
520 } else
521 switch_mm(oldmm, mm, next, smp_processor_id());
522
523 if (unlikely(!prev->mm)) {
524 prev->active_mm = NULL;
525 mmdrop(oldmm);
526 }
527
528
529 switch_to(prev, next, prev);
530
531 return prev;
532}
533
534
535
536
537
538
539
540
541unsigned long nr_running(void)
542{
543 unsigned long i, sum = 0;
544
545 for (i = 0; i < NR_CPUS; i++)
546 sum += cpu_rq(i)->nr_running;
547
548 return sum;
549}
550
551unsigned long nr_uninterruptible(void)
552{
553 unsigned long i, sum = 0;
554
555 for (i = 0; i < NR_CPUS; i++)
556 sum += cpu_rq(i)->nr_uninterruptible;
557
558 return sum;
559}
560
561unsigned long nr_context_switches(void)
562{
563 unsigned long i, sum = 0;
564
565 for (i = 0; i < NR_CPUS; i++)
566 sum += cpu_rq(i)->nr_switches;
567
568 return sum;
569}
570
571
572
573
574
575
576
577static inline void double_rq_lock(runqueue_t *rq1, runqueue_t *rq2)
578{
579 if (rq1 == rq2)
580 spin_lock(&rq1->lock);
581 else {
582 if (rq1 < rq2) {
583 spin_lock(&rq1->lock);
584 spin_lock(&rq2->lock);
585 } else {
586 spin_lock(&rq2->lock);
587 spin_lock(&rq1->lock);
588 }
589 }
590}
591
592
593
594
595
596
597
598static inline void double_rq_unlock(runqueue_t *rq1, runqueue_t *rq2)
599{
600 spin_unlock(&rq1->lock);
601 if (rq1 != rq2)
602 spin_unlock(&rq2->lock);
603}
604
605#if CONFIG_SMP
606
607
608
609
610
611
612
613static inline unsigned int double_lock_balance(runqueue_t *this_rq,
614 runqueue_t *busiest, int this_cpu, int idle, unsigned int nr_running)
615{
616 if (unlikely(!spin_trylock(&busiest->lock))) {
617 if (busiest < this_rq) {
618 spin_unlock(&this_rq->lock);
619 spin_lock(&busiest->lock);
620 spin_lock(&this_rq->lock);
621
622 if (idle || (this_rq->nr_running > this_rq->prev_nr_running[this_cpu]))
623 nr_running = this_rq->nr_running;
624 else
625 nr_running = this_rq->prev_nr_running[this_cpu];
626 } else
627 spin_lock(&busiest->lock);
628 }
629 return nr_running;
630}
631
632
633
634
635static inline runqueue_t *find_busiest_queue(runqueue_t *this_rq, int this_cpu, int idle, int *imbalance)
636{
637 int nr_running, load, max_load, i;
638 runqueue_t *busiest, *rq_src;
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662 if (idle || (this_rq->nr_running > this_rq->prev_nr_running[this_cpu]))
663 nr_running = this_rq->nr_running;
664 else
665 nr_running = this_rq->prev_nr_running[this_cpu];
666
667 busiest = NULL;
668 max_load = 1;
669 for (i = 0; i < NR_CPUS; i++) {
670 if (!cpu_online(i))
671 continue;
672
673 rq_src = cpu_rq(i);
674 if (idle || (rq_src->nr_running < this_rq->prev_nr_running[i]))
675 load = rq_src->nr_running;
676 else
677 load = this_rq->prev_nr_running[i];
678 this_rq->prev_nr_running[i] = rq_src->nr_running;
679
680 if ((load > max_load) && (rq_src != this_rq)) {
681 busiest = rq_src;
682 max_load = load;
683 }
684 }
685
686 if (likely(!busiest))
687 goto out;
688
689 *imbalance = (max_load - nr_running) / 2;
690
691
692 if (!idle && (*imbalance < (max_load + 3)/4)) {
693 busiest = NULL;
694 goto out;
695 }
696
697 nr_running = double_lock_balance(this_rq, busiest, this_cpu, idle, nr_running);
698
699
700
701
702 if (busiest->nr_running <= nr_running + 1) {
703 spin_unlock(&busiest->lock);
704 busiest = NULL;
705 }
706out:
707 return busiest;
708}
709
710
711
712
713
714static inline void pull_task(runqueue_t *src_rq, prio_array_t *src_array, task_t *p, runqueue_t *this_rq, int this_cpu)
715{
716 dequeue_task(p, src_array);
717 src_rq->nr_running--;
718 set_task_cpu(p, this_cpu);
719 this_rq->nr_running++;
720 enqueue_task(p, this_rq->active);
721
722
723
724
725 if (p->prio < this_rq->curr->prio)
726 set_need_resched();
727}
728
729
730
731
732
733
734
735
736
737static void load_balance(runqueue_t *this_rq, int idle)
738{
739 int imbalance, idx, this_cpu = smp_processor_id();
740 runqueue_t *busiest;
741 prio_array_t *array;
742 struct list_head *head, *curr;
743 task_t *tmp;
744
745 busiest = find_busiest_queue(this_rq, this_cpu, idle, &imbalance);
746 if (!busiest)
747 goto out;
748
749
750
751
752
753
754
755 if (busiest->expired->nr_active)
756 array = busiest->expired;
757 else
758 array = busiest->active;
759
760new_array:
761
762 idx = 0;
763skip_bitmap:
764 if (!idx)
765 idx = sched_find_first_bit(array->bitmap);
766 else
767 idx = find_next_bit(array->bitmap, MAX_PRIO, idx);
768 if (idx == MAX_PRIO) {
769 if (array == busiest->expired) {
770 array = busiest->active;
771 goto new_array;
772 }
773 goto out_unlock;
774 }
775
776 head = array->queue + idx;
777 curr = head->prev;
778skip_queue:
779 tmp = list_entry(curr, task_t, run_list);
780
781
782
783
784
785
786
787
788#define CAN_MIGRATE_TASK(p,rq,this_cpu) \
789 ((jiffies - (p)->sleep_timestamp > cache_decay_ticks) && \
790 !task_running(rq, p) && \
791 ((p)->cpus_allowed & (1UL << (this_cpu))))
792
793 curr = curr->prev;
794
795 if (!CAN_MIGRATE_TASK(tmp, busiest, this_cpu)) {
796 if (curr != head)
797 goto skip_queue;
798 idx++;
799 goto skip_bitmap;
800 }
801 pull_task(busiest, array, tmp, this_rq, this_cpu);
802 if (!idle && --imbalance) {
803 if (curr != head)
804 goto skip_queue;
805 idx++;
806 goto skip_bitmap;
807 }
808out_unlock:
809 spin_unlock(&busiest->lock);
810out:
811 ;
812}
813
814
815
816
817
818
819
820
821
822
823#define BUSY_REBALANCE_TICK (HZ/4 ?: 1)
824#define IDLE_REBALANCE_TICK (HZ/1000 ?: 1)
825
826static inline void idle_tick(runqueue_t *rq)
827{
828 if (jiffies % IDLE_REBALANCE_TICK)
829 return;
830 spin_lock(&rq->lock);
831 load_balance(rq, 1);
832 spin_unlock(&rq->lock);
833}
834
835#endif
836
837
838
839
840
841
842
843
844
845
846#define EXPIRED_STARVING(rq) \
847 ((rq)->expired_timestamp && \
848 (jiffies - (rq)->expired_timestamp >= \
849 STARVATION_LIMIT * ((rq)->nr_running) + 1))
850
851
852
853
854
855void scheduler_tick(int user_ticks, int sys_ticks)
856{
857 int cpu = smp_processor_id();
858 runqueue_t *rq = this_rq();
859 task_t *p = current;
860
861 if (p == rq->idle) {
862
863 if (irq_count() - HARDIRQ_OFFSET >= SOFTIRQ_OFFSET)
864 kstat.per_cpu_system[cpu] += sys_ticks;
865#if CONFIG_SMP
866 idle_tick(rq);
867#endif
868 return;
869 }
870 if (TASK_NICE(p) > 0)
871 kstat.per_cpu_nice[cpu] += user_ticks;
872 else
873 kstat.per_cpu_user[cpu] += user_ticks;
874 kstat.per_cpu_system[cpu] += sys_ticks;
875
876
877 if (p->array != rq->active) {
878 set_tsk_need_resched(p);
879 return;
880 }
881 spin_lock(&rq->lock);
882 if (unlikely(rt_task(p))) {
883
884
885
886
887 if ((p->policy == SCHED_RR) && !--p->time_slice) {
888 p->time_slice = task_timeslice(p);
889 p->first_time_slice = 0;
890 set_tsk_need_resched(p);
891
892
893 dequeue_task(p, rq->active);
894 enqueue_task(p, rq->active);
895 }
896 goto out;
897 }
898
899
900
901
902
903
904
905
906 if (p->sleep_avg)
907 p->sleep_avg--;
908 if (!--p->time_slice) {
909 dequeue_task(p, rq->active);
910 set_tsk_need_resched(p);
911 p->prio = effective_prio(p);
912 p->time_slice = task_timeslice(p);
913 p->first_time_slice = 0;
914
915 if (!TASK_INTERACTIVE(p) || EXPIRED_STARVING(rq)) {
916 if (!rq->expired_timestamp)
917 rq->expired_timestamp = jiffies;
918 enqueue_task(p, rq->expired);
919 } else
920 enqueue_task(p, rq->active);
921 }
922out:
923#if CONFIG_SMP
924 if (!(jiffies % BUSY_REBALANCE_TICK))
925 load_balance(rq, 0);
926#endif
927 spin_unlock(&rq->lock);
928}
929
930void scheduling_functions_start_here(void) { }
931
932
933
934
935asmlinkage void schedule(void)
936{
937 task_t *prev, *next;
938 runqueue_t *rq;
939 prio_array_t *array;
940 struct list_head *queue;
941 int idx;
942
943
944
945
946
947
948 if (likely(current->state != TASK_ZOMBIE)) {
949 if (unlikely(in_atomic())) {
950 printk(KERN_ERR "bad: scheduling while atomic!\n");
951 dump_stack();
952 }
953 }
954
955#if CONFIG_DEBUG_HIGHMEM
956 check_highmem_ptes();
957#endif
958need_resched:
959 preempt_disable();
960 prev = current;
961 rq = this_rq();
962
963 release_kernel_lock(prev);
964 prev->sleep_timestamp = jiffies;
965 spin_lock_irq(&rq->lock);
966
967
968
969
970
971 if (unlikely(preempt_count() & PREEMPT_ACTIVE))
972 goto pick_next_task;
973
974 switch (prev->state) {
975 case TASK_INTERRUPTIBLE:
976 if (unlikely(signal_pending(prev))) {
977 prev->state = TASK_RUNNING;
978 break;
979 }
980 default:
981 deactivate_task(prev, rq);
982 case TASK_RUNNING:
983 ;
984 }
985pick_next_task:
986 if (unlikely(!rq->nr_running)) {
987#if CONFIG_SMP
988 load_balance(rq, 1);
989 if (rq->nr_running)
990 goto pick_next_task;
991#endif
992 next = rq->idle;
993 rq->expired_timestamp = 0;
994 goto switch_tasks;
995 }
996
997 array = rq->active;
998 if (unlikely(!array->nr_active)) {
999
1000
1001
1002 rq->active = rq->expired;
1003 rq->expired = array;
1004 array = rq->active;
1005 rq->expired_timestamp = 0;
1006 }
1007
1008 idx = sched_find_first_bit(array->bitmap);
1009 queue = array->queue + idx;
1010 next = list_entry(queue->next, task_t, run_list);
1011
1012switch_tasks:
1013 prefetch(next);
1014 clear_tsk_need_resched(prev);
1015
1016 if (likely(prev != next)) {
1017 rq->nr_switches++;
1018 rq->curr = next;
1019
1020 prepare_arch_switch(rq, next);
1021 prev = context_switch(prev, next);
1022 barrier();
1023 rq = this_rq();
1024 finish_arch_switch(rq, prev);
1025 } else
1026 spin_unlock_irq(&rq->lock);
1027
1028 reacquire_kernel_lock(current);
1029 preempt_enable_no_resched();
1030 if (test_thread_flag(TIF_NEED_RESCHED))
1031 goto need_resched;
1032}
1033
1034#ifdef CONFIG_PREEMPT
1035
1036
1037
1038
1039
1040asmlinkage void preempt_schedule(void)
1041{
1042 struct thread_info *ti = current_thread_info();
1043
1044
1045
1046
1047
1048 if (unlikely(ti->preempt_count || irqs_disabled()))
1049 return;
1050
1051need_resched:
1052 ti->preempt_count = PREEMPT_ACTIVE;
1053 schedule();
1054 ti->preempt_count = 0;
1055
1056
1057 barrier();
1058 if (unlikely(test_thread_flag(TIF_NEED_RESCHED)))
1059 goto need_resched;
1060}
1061#endif
1062
1063int default_wake_function(wait_queue_t *curr, unsigned mode, int sync)
1064{
1065 task_t *p = curr->task;
1066 return ((p->state & mode) && try_to_wake_up(p, sync));
1067}
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078static void __wake_up_common(wait_queue_head_t *q, unsigned int mode, int nr_exclusive, int sync)
1079{
1080 struct list_head *tmp, *next;
1081
1082 list_for_each_safe(tmp, next, &q->task_list) {
1083 wait_queue_t *curr;
1084 unsigned flags;
1085 curr = list_entry(tmp, wait_queue_t, task_list);
1086 flags = curr->flags;
1087 if (curr->func(curr, mode, sync) &&
1088 (flags & WQ_FLAG_EXCLUSIVE) &&
1089 !--nr_exclusive)
1090 break;
1091 }
1092}
1093
1094
1095
1096
1097
1098
1099
1100void __wake_up(wait_queue_head_t *q, unsigned int mode, int nr_exclusive)
1101{
1102 unsigned long flags;
1103
1104 if (unlikely(!q))
1105 return;
1106
1107 spin_lock_irqsave(&q->lock, flags);
1108 __wake_up_common(q, mode, nr_exclusive, 0);
1109 spin_unlock_irqrestore(&q->lock, flags);
1110}
1111
1112
1113
1114
1115void __wake_up_locked(wait_queue_head_t *q, unsigned int mode)
1116{
1117 __wake_up_common(q, mode, 1, 0);
1118}
1119
1120#if CONFIG_SMP
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr_exclusive)
1134{
1135 unsigned long flags;
1136
1137 if (unlikely(!q))
1138 return;
1139
1140 spin_lock_irqsave(&q->lock, flags);
1141 if (likely(nr_exclusive))
1142 __wake_up_common(q, mode, nr_exclusive, 1);
1143 else
1144 __wake_up_common(q, mode, nr_exclusive, 0);
1145 spin_unlock_irqrestore(&q->lock, flags);
1146}
1147
1148#endif
1149
1150void complete(struct completion *x)
1151{
1152 unsigned long flags;
1153
1154 spin_lock_irqsave(&x->wait.lock, flags);
1155 x->done++;
1156 __wake_up_common(&x->wait, TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, 1, 0);
1157 spin_unlock_irqrestore(&x->wait.lock, flags);
1158}
1159
1160void wait_for_completion(struct completion *x)
1161{
1162 spin_lock_irq(&x->wait.lock);
1163 if (!x->done) {
1164 DECLARE_WAITQUEUE(wait, current);
1165
1166 wait.flags |= WQ_FLAG_EXCLUSIVE;
1167 __add_wait_queue_tail(&x->wait, &wait);
1168 do {
1169 __set_current_state(TASK_UNINTERRUPTIBLE);
1170 spin_unlock_irq(&x->wait.lock);
1171 schedule();
1172 spin_lock_irq(&x->wait.lock);
1173 } while (!x->done);
1174 __remove_wait_queue(&x->wait, &wait);
1175 }
1176 x->done--;
1177 spin_unlock_irq(&x->wait.lock);
1178}
1179
1180#define SLEEP_ON_VAR \
1181 unsigned long flags; \
1182 wait_queue_t wait; \
1183 init_waitqueue_entry(&wait, current);
1184
1185#define SLEEP_ON_HEAD \
1186 spin_lock_irqsave(&q->lock,flags); \
1187 __add_wait_queue(q, &wait); \
1188 spin_unlock(&q->lock);
1189
1190#define SLEEP_ON_TAIL \
1191 spin_lock_irq(&q->lock); \
1192 __remove_wait_queue(q, &wait); \
1193 spin_unlock_irqrestore(&q->lock, flags);
1194
1195void interruptible_sleep_on(wait_queue_head_t *q)
1196{
1197 SLEEP_ON_VAR
1198
1199 current->state = TASK_INTERRUPTIBLE;
1200
1201 SLEEP_ON_HEAD
1202 schedule();
1203 SLEEP_ON_TAIL
1204}
1205
1206long interruptible_sleep_on_timeout(wait_queue_head_t *q, long timeout)
1207{
1208 SLEEP_ON_VAR
1209
1210 current->state = TASK_INTERRUPTIBLE;
1211
1212 SLEEP_ON_HEAD
1213 timeout = schedule_timeout(timeout);
1214 SLEEP_ON_TAIL
1215
1216 return timeout;
1217}
1218
1219void sleep_on(wait_queue_head_t *q)
1220{
1221 SLEEP_ON_VAR
1222
1223 current->state = TASK_UNINTERRUPTIBLE;
1224
1225 SLEEP_ON_HEAD
1226 schedule();
1227 SLEEP_ON_TAIL
1228}
1229
1230long sleep_on_timeout(wait_queue_head_t *q, long timeout)
1231{
1232 SLEEP_ON_VAR
1233
1234 current->state = TASK_UNINTERRUPTIBLE;
1235
1236 SLEEP_ON_HEAD
1237 timeout = schedule_timeout(timeout);
1238 SLEEP_ON_TAIL
1239
1240 return timeout;
1241}
1242
1243void scheduling_functions_end_here(void) { }
1244
1245void set_user_nice(task_t *p, long nice)
1246{
1247 unsigned long flags;
1248 prio_array_t *array;
1249 runqueue_t *rq;
1250
1251 if (TASK_NICE(p) == nice || nice < -20 || nice > 19)
1252 return;
1253
1254
1255
1256
1257 rq = task_rq_lock(p, &flags);
1258 if (rt_task(p)) {
1259 p->static_prio = NICE_TO_PRIO(nice);
1260 goto out_unlock;
1261 }
1262 array = p->array;
1263 if (array)
1264 dequeue_task(p, array);
1265 p->static_prio = NICE_TO_PRIO(nice);
1266 p->prio = NICE_TO_PRIO(nice);
1267 if (array) {
1268 enqueue_task(p, array);
1269
1270
1271
1272
1273 if ((NICE_TO_PRIO(nice) < p->static_prio) ||
1274 task_running(rq, p))
1275 resched_task(rq->curr);
1276 }
1277out_unlock:
1278 task_rq_unlock(rq, &flags);
1279}
1280
1281#ifndef __alpha__
1282
1283
1284
1285
1286
1287
1288
1289
1290asmlinkage long sys_nice(int increment)
1291{
1292 int retval;
1293 long nice;
1294
1295
1296
1297
1298
1299
1300 if (increment < 0) {
1301 if (!capable(CAP_SYS_NICE))
1302 return -EPERM;
1303 if (increment < -40)
1304 increment = -40;
1305 }
1306 if (increment > 40)
1307 increment = 40;
1308
1309 nice = PRIO_TO_NICE(current->static_prio) + increment;
1310 if (nice < -20)
1311 nice = -20;
1312 if (nice > 19)
1313 nice = 19;
1314
1315 retval = security_ops->task_setnice(current, nice);
1316 if (retval)
1317 return retval;
1318
1319 set_user_nice(current, nice);
1320 return 0;
1321}
1322
1323#endif
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333int task_prio(task_t *p)
1334{
1335 return p->prio - MAX_USER_RT_PRIO;
1336}
1337
1338
1339
1340
1341
1342int task_nice(task_t *p)
1343{
1344 return TASK_NICE(p);
1345}
1346
1347
1348
1349
1350
1351int task_curr(task_t *p)
1352{
1353 return cpu_curr(task_cpu(p)) == p;
1354}
1355
1356
1357
1358
1359
1360int idle_cpu(int cpu)
1361{
1362 return cpu_curr(cpu) == cpu_rq(cpu)->idle;
1363}
1364
1365
1366
1367
1368
1369static inline task_t *find_process_by_pid(pid_t pid)
1370{
1371 return pid ? find_task_by_pid(pid) : current;
1372}
1373
1374
1375
1376
1377static int setscheduler(pid_t pid, int policy, struct sched_param *param)
1378{
1379 struct sched_param lp;
1380 int retval = -EINVAL;
1381 prio_array_t *array;
1382 unsigned long flags;
1383 runqueue_t *rq;
1384 task_t *p;
1385
1386 if (!param || pid < 0)
1387 goto out_nounlock;
1388
1389 retval = -EFAULT;
1390 if (copy_from_user(&lp, param, sizeof(struct sched_param)))
1391 goto out_nounlock;
1392
1393
1394
1395
1396 read_lock_irq(&tasklist_lock);
1397
1398 p = find_process_by_pid(pid);
1399
1400 retval = -ESRCH;
1401 if (!p)
1402 goto out_unlock_tasklist;
1403
1404
1405
1406
1407
1408 rq = task_rq_lock(p, &flags);
1409
1410 if (policy < 0)
1411 policy = p->policy;
1412 else {
1413 retval = -EINVAL;
1414 if (policy != SCHED_FIFO && policy != SCHED_RR &&
1415 policy != SCHED_NORMAL)
1416 goto out_unlock;
1417 }
1418
1419
1420
1421
1422
1423 retval = -EINVAL;
1424 if (lp.sched_priority < 0 || lp.sched_priority > MAX_USER_RT_PRIO-1)
1425 goto out_unlock;
1426 if ((policy == SCHED_NORMAL) != (lp.sched_priority == 0))
1427 goto out_unlock;
1428
1429 retval = -EPERM;
1430 if ((policy == SCHED_FIFO || policy == SCHED_RR) &&
1431 !capable(CAP_SYS_NICE))
1432 goto out_unlock;
1433 if ((current->euid != p->euid) && (current->euid != p->uid) &&
1434 !capable(CAP_SYS_NICE))
1435 goto out_unlock;
1436
1437 retval = security_ops->task_setscheduler(p, policy, &lp);
1438 if (retval)
1439 goto out_unlock;
1440
1441 array = p->array;
1442 if (array)
1443 deactivate_task(p, task_rq(p));
1444 retval = 0;
1445 p->policy = policy;
1446 p->rt_priority = lp.sched_priority;
1447 if (policy != SCHED_NORMAL)
1448 p->prio = MAX_USER_RT_PRIO-1 - p->rt_priority;
1449 else
1450 p->prio = p->static_prio;
1451 if (array)
1452 activate_task(p, task_rq(p));
1453
1454out_unlock:
1455 task_rq_unlock(rq, &flags);
1456out_unlock_tasklist:
1457 read_unlock_irq(&tasklist_lock);
1458
1459out_nounlock:
1460 return retval;
1461}
1462
1463
1464
1465
1466
1467
1468
1469asmlinkage long sys_sched_setscheduler(pid_t pid, int policy,
1470 struct sched_param *param)
1471{
1472 return setscheduler(pid, policy, param);
1473}
1474
1475
1476
1477
1478
1479
1480asmlinkage long sys_sched_setparam(pid_t pid, struct sched_param *param)
1481{
1482 return setscheduler(pid, -1, param);
1483}
1484
1485
1486
1487
1488
1489asmlinkage long sys_sched_getscheduler(pid_t pid)
1490{
1491 int retval = -EINVAL;
1492 task_t *p;
1493
1494 if (pid < 0)
1495 goto out_nounlock;
1496
1497 retval = -ESRCH;
1498 read_lock(&tasklist_lock);
1499 p = find_process_by_pid(pid);
1500 if (p) {
1501 retval = security_ops->task_getscheduler(p);
1502 if (!retval)
1503 retval = p->policy;
1504 }
1505 read_unlock(&tasklist_lock);
1506
1507out_nounlock:
1508 return retval;
1509}
1510
1511
1512
1513
1514
1515
1516asmlinkage long sys_sched_getparam(pid_t pid, struct sched_param *param)
1517{
1518 struct sched_param lp;
1519 int retval = -EINVAL;
1520 task_t *p;
1521
1522 if (!param || pid < 0)
1523 goto out_nounlock;
1524
1525 read_lock(&tasklist_lock);
1526 p = find_process_by_pid(pid);
1527 retval = -ESRCH;
1528 if (!p)
1529 goto out_unlock;
1530
1531 retval = security_ops->task_getscheduler(p);
1532 if (retval)
1533 goto out_unlock;
1534
1535 lp.sched_priority = p->rt_priority;
1536 read_unlock(&tasklist_lock);
1537
1538
1539
1540
1541 retval = copy_to_user(param, &lp, sizeof(*param)) ? -EFAULT : 0;
1542
1543out_nounlock:
1544 return retval;
1545
1546out_unlock:
1547 read_unlock(&tasklist_lock);
1548 return retval;
1549}
1550
1551
1552
1553
1554
1555
1556
1557asmlinkage int sys_sched_setaffinity(pid_t pid, unsigned int len,
1558 unsigned long *user_mask_ptr)
1559{
1560 unsigned long new_mask;
1561 int retval;
1562 task_t *p;
1563
1564 if (len < sizeof(new_mask))
1565 return -EINVAL;
1566
1567 if (copy_from_user(&new_mask, user_mask_ptr, sizeof(new_mask)))
1568 return -EFAULT;
1569
1570 new_mask &= cpu_online_map;
1571 if (!new_mask)
1572 return -EINVAL;
1573
1574 read_lock(&tasklist_lock);
1575
1576 p = find_process_by_pid(pid);
1577 if (!p) {
1578 read_unlock(&tasklist_lock);
1579 return -ESRCH;
1580 }
1581
1582
1583
1584
1585
1586
1587 get_task_struct(p);
1588 read_unlock(&tasklist_lock);
1589
1590 retval = -EPERM;
1591 if ((current->euid != p->euid) && (current->euid != p->uid) &&
1592 !capable(CAP_SYS_NICE))
1593 goto out_unlock;
1594
1595 retval = 0;
1596 set_cpus_allowed(p, new_mask);
1597
1598out_unlock:
1599 put_task_struct(p);
1600 return retval;
1601}
1602
1603
1604
1605
1606
1607
1608
1609asmlinkage int sys_sched_getaffinity(pid_t pid, unsigned int len,
1610 unsigned long *user_mask_ptr)
1611{
1612 unsigned int real_len;
1613 unsigned long mask;
1614 int retval;
1615 task_t *p;
1616
1617 real_len = sizeof(mask);
1618 if (len < real_len)
1619 return -EINVAL;
1620
1621 read_lock(&tasklist_lock);
1622
1623 retval = -ESRCH;
1624 p = find_process_by_pid(pid);
1625 if (!p)
1626 goto out_unlock;
1627
1628 retval = 0;
1629 mask = p->cpus_allowed & cpu_online_map;
1630
1631out_unlock:
1632 read_unlock(&tasklist_lock);
1633 if (retval)
1634 return retval;
1635 if (copy_to_user(user_mask_ptr, &mask, real_len))
1636 return -EFAULT;
1637 return real_len;
1638}
1639
1640
1641
1642
1643
1644
1645
1646
1647asmlinkage long sys_sched_yield(void)
1648{
1649 runqueue_t *rq = this_rq_lock();
1650 prio_array_t *array = current->array;
1651
1652
1653
1654
1655
1656
1657
1658
1659 if (likely(!rt_task(current))) {
1660 dequeue_task(current, array);
1661 enqueue_task(current, rq->expired);
1662 } else {
1663 list_del(¤t->run_list);
1664 list_add_tail(¤t->run_list, array->queue + current->prio);
1665 }
1666
1667
1668
1669
1670 _raw_spin_unlock(&rq->lock);
1671 preempt_enable_no_resched();
1672
1673 schedule();
1674
1675 return 0;
1676}
1677
1678void __cond_resched(void)
1679{
1680 set_current_state(TASK_RUNNING);
1681 schedule();
1682}
1683
1684
1685
1686
1687
1688
1689
1690void yield(void)
1691{
1692 set_current_state(TASK_RUNNING);
1693 sys_sched_yield();
1694}
1695
1696
1697
1698
1699
1700
1701
1702
1703asmlinkage long sys_sched_get_priority_max(int policy)
1704{
1705 int ret = -EINVAL;
1706
1707 switch (policy) {
1708 case SCHED_FIFO:
1709 case SCHED_RR:
1710 ret = MAX_USER_RT_PRIO-1;
1711 break;
1712 case SCHED_NORMAL:
1713 ret = 0;
1714 break;
1715 }
1716 return ret;
1717}
1718
1719
1720
1721
1722
1723
1724
1725
1726asmlinkage long sys_sched_get_priority_min(int policy)
1727{
1728 int ret = -EINVAL;
1729
1730 switch (policy) {
1731 case SCHED_FIFO:
1732 case SCHED_RR:
1733 ret = 1;
1734 break;
1735 case SCHED_NORMAL:
1736 ret = 0;
1737 }
1738 return ret;
1739}
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749asmlinkage long sys_sched_rr_get_interval(pid_t pid, struct timespec *interval)
1750{
1751 int retval = -EINVAL;
1752 struct timespec t;
1753 task_t *p;
1754
1755 if (pid < 0)
1756 goto out_nounlock;
1757
1758 retval = -ESRCH;
1759 read_lock(&tasklist_lock);
1760 p = find_process_by_pid(pid);
1761 if (!p)
1762 goto out_unlock;
1763
1764 retval = security_ops->task_getscheduler(p);
1765 if (retval)
1766 goto out_unlock;
1767
1768 jiffies_to_timespec(p->policy & SCHED_FIFO ?
1769 0 : task_timeslice(p), &t);
1770 read_unlock(&tasklist_lock);
1771 retval = copy_to_user(interval, &t, sizeof(t)) ? -EFAULT : 0;
1772out_nounlock:
1773 return retval;
1774out_unlock:
1775 read_unlock(&tasklist_lock);
1776 return retval;
1777}
1778
1779static void show_task(task_t * p)
1780{
1781 unsigned long free = 0;
1782 task_t *relative;
1783 int state;
1784 static const char * stat_nam[] = { "R", "S", "D", "Z", "T", "W" };
1785
1786 printk("%-13.13s ", p->comm);
1787 state = p->state ? __ffs(p->state) + 1 : 0;
1788 if (((unsigned) state) < sizeof(stat_nam)/sizeof(char *))
1789 printk(stat_nam[state]);
1790 else
1791 printk(" ");
1792#if (BITS_PER_LONG == 32)
1793 if (p == current)
1794 printk(" current ");
1795 else
1796 printk(" %08lX ", thread_saved_pc(p));
1797#else
1798 if (p == current)
1799 printk(" current task ");
1800 else
1801 printk(" %016lx ", thread_saved_pc(p));
1802#endif
1803 {
1804 unsigned long * n = (unsigned long *) (p+1);
1805 while (!*n)
1806 n++;
1807 free = (unsigned long) n - (unsigned long)(p+1);
1808 }
1809 printk("%5lu %5d %6d ", free, p->pid, p->parent->pid);
1810 if ((relative = eldest_child(p)))
1811 printk("%5d ", relative->pid);
1812 else
1813 printk(" ");
1814 if ((relative = younger_sibling(p)))
1815 printk("%7d", relative->pid);
1816 else
1817 printk(" ");
1818 if ((relative = older_sibling(p)))
1819 printk(" %5d", relative->pid);
1820 else
1821 printk(" ");
1822 if (!p->mm)
1823 printk(" (L-TLB)\n");
1824 else
1825 printk(" (NOTLB)\n");
1826
1827 {
1828 extern void show_trace_task(task_t *tsk);
1829 show_trace_task(p);
1830 }
1831}
1832
1833char * render_sigset_t(sigset_t *set, char *buffer)
1834{
1835 int i = _NSIG, x;
1836 do {
1837 i -= 4, x = 0;
1838 if (sigismember(set, i+1)) x |= 1;
1839 if (sigismember(set, i+2)) x |= 2;
1840 if (sigismember(set, i+3)) x |= 4;
1841 if (sigismember(set, i+4)) x |= 8;
1842 *buffer++ = (x < 10 ? '0' : 'a' - 10) + x;
1843 } while (i >= 4);
1844 *buffer = 0;
1845 return buffer;
1846}
1847
1848void show_state(void)
1849{
1850 task_t *g, *p;
1851
1852#if (BITS_PER_LONG == 32)
1853 printk("\n"
1854 " free sibling\n");
1855 printk(" task PC stack pid father child younger older\n");
1856#else
1857 printk("\n"
1858 " free sibling\n");
1859 printk(" task PC stack pid father child younger older\n");
1860#endif
1861 read_lock(&tasklist_lock);
1862 do_each_thread(g, p) {
1863
1864
1865
1866
1867 touch_nmi_watchdog();
1868 show_task(p);
1869 } while_each_thread(g, p);
1870
1871 read_unlock(&tasklist_lock);
1872}
1873
1874void __init init_idle(task_t *idle, int cpu)
1875{
1876 runqueue_t *idle_rq = cpu_rq(cpu), *rq = cpu_rq(task_cpu(idle));
1877 unsigned long flags;
1878
1879 local_irq_save(flags);
1880 double_rq_lock(idle_rq, rq);
1881
1882 idle_rq->curr = idle_rq->idle = idle;
1883 deactivate_task(idle, rq);
1884 idle->array = NULL;
1885 idle->prio = MAX_PRIO;
1886 idle->state = TASK_RUNNING;
1887 set_task_cpu(idle, cpu);
1888 double_rq_unlock(idle_rq, rq);
1889 set_tsk_need_resched(idle);
1890 local_irq_restore(flags);
1891
1892
1893#if CONFIG_PREEMPT
1894 idle->thread_info->preempt_count = (idle->lock_depth >= 0);
1895#else
1896 idle->thread_info->preempt_count = 0;
1897#endif
1898}
1899
1900#if CONFIG_SMP
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917typedef struct {
1918 struct list_head list;
1919 task_t *task;
1920 struct completion done;
1921} migration_req_t;
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932void set_cpus_allowed(task_t *p, unsigned long new_mask)
1933{
1934 unsigned long flags;
1935 migration_req_t req;
1936 runqueue_t *rq;
1937
1938#if 0
1939 new_mask &= cpu_online_map;
1940 if (!new_mask)
1941 BUG();
1942#endif
1943
1944 preempt_disable();
1945 rq = task_rq_lock(p, &flags);
1946 p->cpus_allowed = new_mask;
1947
1948
1949
1950
1951 if (new_mask & (1UL << task_cpu(p))) {
1952 task_rq_unlock(rq, &flags);
1953 goto out;
1954 }
1955
1956
1957
1958
1959 if (!p->array && !task_running(rq, p)) {
1960 set_task_cpu(p, __ffs(p->cpus_allowed));
1961 task_rq_unlock(rq, &flags);
1962 goto out;
1963 }
1964 init_completion(&req.done);
1965 req.task = p;
1966 list_add(&req.list, &rq->migration_queue);
1967 task_rq_unlock(rq, &flags);
1968 wake_up_process(rq->migration_thread);
1969
1970 wait_for_completion(&req.done);
1971out:
1972 preempt_enable();
1973}
1974
1975
1976
1977
1978
1979static int migration_thread(void * data)
1980{
1981 struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
1982 int cpu = (long) data;
1983 runqueue_t *rq;
1984 int ret;
1985
1986 daemonize();
1987 sigfillset(¤t->blocked);
1988 set_fs(KERNEL_DS);
1989
1990 set_cpus_allowed(current, 1UL << cpu);
1991
1992
1993
1994
1995
1996
1997
1998 if (smp_processor_id() != cpu)
1999 printk("migration_task %d on cpu=%d\n", cpu, smp_processor_id());
2000 ret = setscheduler(0, SCHED_FIFO, ¶m);
2001
2002 rq = this_rq();
2003 rq->migration_thread = current;
2004
2005 sprintf(current->comm, "migration_CPU%d", smp_processor_id());
2006
2007 for (;;) {
2008 runqueue_t *rq_src, *rq_dest;
2009 struct list_head *head;
2010 int cpu_src, cpu_dest;
2011 migration_req_t *req;
2012 unsigned long flags;
2013 task_t *p;
2014
2015 spin_lock_irqsave(&rq->lock, flags);
2016 head = &rq->migration_queue;
2017 current->state = TASK_INTERRUPTIBLE;
2018 if (list_empty(head)) {
2019 spin_unlock_irqrestore(&rq->lock, flags);
2020 schedule();
2021 continue;
2022 }
2023 req = list_entry(head->next, migration_req_t, list);
2024 list_del_init(head->next);
2025 spin_unlock_irqrestore(&rq->lock, flags);
2026
2027 p = req->task;
2028 cpu_dest = __ffs(p->cpus_allowed);
2029 rq_dest = cpu_rq(cpu_dest);
2030repeat:
2031 cpu_src = task_cpu(p);
2032 rq_src = cpu_rq(cpu_src);
2033
2034 local_irq_save(flags);
2035 double_rq_lock(rq_src, rq_dest);
2036 if (task_cpu(p) != cpu_src) {
2037 double_rq_unlock(rq_src, rq_dest);
2038 local_irq_restore(flags);
2039 goto repeat;
2040 }
2041 if (rq_src == rq) {
2042 set_task_cpu(p, cpu_dest);
2043 if (p->array) {
2044 deactivate_task(p, rq_src);
2045 activate_task(p, rq_dest);
2046 }
2047 }
2048 double_rq_unlock(rq_src, rq_dest);
2049 local_irq_restore(flags);
2050
2051 complete(&req->done);
2052 }
2053}
2054
2055
2056
2057
2058
2059static int migration_call(struct notifier_block *nfb,
2060 unsigned long action,
2061 void *hcpu)
2062{
2063 switch (action) {
2064 case CPU_ONLINE:
2065 printk("Starting migration thread for cpu %li\n",
2066 (long)hcpu);
2067 kernel_thread(migration_thread, hcpu, CLONE_KERNEL);
2068 while (!cpu_rq((long)hcpu)->migration_thread)
2069 yield();
2070 break;
2071 }
2072 return NOTIFY_OK;
2073}
2074
2075static struct notifier_block migration_notifier = { &migration_call, NULL, 0 };
2076
2077__init int migration_init(void)
2078{
2079
2080 migration_call(&migration_notifier, CPU_ONLINE,
2081 (void *)(long)smp_processor_id());
2082 register_cpu_notifier(&migration_notifier);
2083 return 0;
2084}
2085
2086#endif
2087
2088#if CONFIG_SMP || CONFIG_PREEMPT
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099spinlock_t kernel_flag __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
2100#endif
2101
2102extern void init_timervecs(void);
2103extern void timer_bh(void);
2104extern void tqueue_bh(void);
2105extern void immediate_bh(void);
2106
2107void __init sched_init(void)
2108{
2109 runqueue_t *rq;
2110 int i, j, k;
2111
2112 for (i = 0; i < NR_CPUS; i++) {
2113 prio_array_t *array;
2114
2115 rq = cpu_rq(i);
2116 rq->active = rq->arrays;
2117 rq->expired = rq->arrays + 1;
2118 spin_lock_init(&rq->lock);
2119 INIT_LIST_HEAD(&rq->migration_queue);
2120
2121 for (j = 0; j < 2; j++) {
2122 array = rq->arrays + j;
2123 for (k = 0; k < MAX_PRIO; k++) {
2124 INIT_LIST_HEAD(array->queue + k);
2125 __clear_bit(k, array->bitmap);
2126 }
2127
2128 __set_bit(MAX_PRIO, array->bitmap);
2129 }
2130 }
2131
2132
2133
2134
2135 rq = this_rq();
2136 rq->curr = current;
2137 rq->idle = current;
2138 set_task_cpu(current, smp_processor_id());
2139 wake_up_process(current);
2140
2141 init_timervecs();
2142 init_bh(TIMER_BH, timer_bh);
2143 init_bh(TQUEUE_BH, tqueue_bh);
2144 init_bh(IMMEDIATE_BH, immediate_bh);
2145
2146
2147
2148
2149 atomic_inc(&init_mm.mm_count);
2150 enter_lazy_tlb(&init_mm, current, smp_processor_id());
2151}
2152
2153