1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29#include <linux/mm.h>
30#include <linux/module.h>
31#include <linux/nmi.h>
32#include <linux/init.h>
33#include <linux/uaccess.h>
34#include <linux/highmem.h>
35#include <asm/mmu_context.h>
36#include <linux/interrupt.h>
37#include <linux/capability.h>
38#include <linux/completion.h>
39#include <linux/kernel_stat.h>
40#include <linux/debug_locks.h>
41#include <linux/perf_event.h>
42#include <linux/security.h>
43#include <linux/notifier.h>
44#include <linux/profile.h>
45#include <linux/freezer.h>
46#include <linux/vmalloc.h>
47#include <linux/blkdev.h>
48#include <linux/delay.h>
49#include <linux/pid_namespace.h>
50#include <linux/smp.h>
51#include <linux/threads.h>
52#include <linux/timer.h>
53#include <linux/rcupdate.h>
54#include <linux/cpu.h>
55#include <linux/cpuset.h>
56#include <linux/percpu.h>
57#include <linux/proc_fs.h>
58#include <linux/seq_file.h>
59#include <linux/sysctl.h>
60#include <linux/syscalls.h>
61#include <linux/times.h>
62#include <linux/tsacct_kern.h>
63#include <linux/kprobes.h>
64#include <linux/delayacct.h>
65#include <linux/unistd.h>
66#include <linux/pagemap.h>
67#include <linux/hrtimer.h>
68#include <linux/tick.h>
69#include <linux/debugfs.h>
70#include <linux/ctype.h>
71#include <linux/ftrace.h>
72#include <linux/slab.h>
73#include <linux/init_task.h>
74#include <linux/binfmts.h>
75
76#include <asm/switch_to.h>
77#include <asm/tlb.h>
78#include <asm/irq_regs.h>
79#include <asm/mutex.h>
80#ifdef CONFIG_PARAVIRT
81#include <asm/paravirt.h>
82#endif
83
84#include "sched.h"
85#include "../workqueue_sched.h"
86#include "../smpboot.h"
87
88#define CREATE_TRACE_POINTS
89#include <trace/events/sched.h>
90
91void start_bandwidth_timer(struct hrtimer *period_timer, ktime_t period)
92{
93 unsigned long delta;
94 ktime_t soft, hard, now;
95
96 for (;;) {
97 if (hrtimer_active(period_timer))
98 break;
99
100 now = hrtimer_cb_get_time(period_timer);
101 hrtimer_forward(period_timer, now, period);
102
103 soft = hrtimer_get_softexpires(period_timer);
104 hard = hrtimer_get_expires(period_timer);
105 delta = ktime_to_ns(ktime_sub(hard, soft));
106 __hrtimer_start_range_ns(period_timer, soft, delta,
107 HRTIMER_MODE_ABS_PINNED, 0);
108 }
109}
110
111DEFINE_MUTEX(sched_domains_mutex);
112DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
113
114static void update_rq_clock_task(struct rq *rq, s64 delta);
115
116void update_rq_clock(struct rq *rq)
117{
118 s64 delta;
119
120 if (rq->skip_clock_update > 0)
121 return;
122
123 delta = sched_clock_cpu(cpu_of(rq)) - rq->clock;
124 rq->clock += delta;
125 update_rq_clock_task(rq, delta);
126}
127
128
129
130
131
132#define SCHED_FEAT(name, enabled) \
133 (1UL << __SCHED_FEAT_##name) * enabled |
134
135const_debug unsigned int sysctl_sched_features =
136#include "features.h"
137 0;
138
139#undef SCHED_FEAT
140
141#ifdef CONFIG_SCHED_DEBUG
142#define SCHED_FEAT(name, enabled) \
143 #name ,
144
145static const char * const sched_feat_names[] = {
146#include "features.h"
147};
148
149#undef SCHED_FEAT
150
151static int sched_feat_show(struct seq_file *m, void *v)
152{
153 int i;
154
155 for (i = 0; i < __SCHED_FEAT_NR; i++) {
156 if (!(sysctl_sched_features & (1UL << i)))
157 seq_puts(m, "NO_");
158 seq_printf(m, "%s ", sched_feat_names[i]);
159 }
160 seq_puts(m, "\n");
161
162 return 0;
163}
164
165#ifdef HAVE_JUMP_LABEL
166
167#define jump_label_key__true STATIC_KEY_INIT_TRUE
168#define jump_label_key__false STATIC_KEY_INIT_FALSE
169
170#define SCHED_FEAT(name, enabled) \
171 jump_label_key__##enabled ,
172
173struct static_key sched_feat_keys[__SCHED_FEAT_NR] = {
174#include "features.h"
175};
176
177#undef SCHED_FEAT
178
179static void sched_feat_disable(int i)
180{
181 if (static_key_enabled(&sched_feat_keys[i]))
182 static_key_slow_dec(&sched_feat_keys[i]);
183}
184
185static void sched_feat_enable(int i)
186{
187 if (!static_key_enabled(&sched_feat_keys[i]))
188 static_key_slow_inc(&sched_feat_keys[i]);
189}
190#else
191static void sched_feat_disable(int i) { };
192static void sched_feat_enable(int i) { };
193#endif
194
195static ssize_t
196sched_feat_write(struct file *filp, const char __user *ubuf,
197 size_t cnt, loff_t *ppos)
198{
199 char buf[64];
200 char *cmp;
201 int neg = 0;
202 int i;
203
204 if (cnt > 63)
205 cnt = 63;
206
207 if (copy_from_user(&buf, ubuf, cnt))
208 return -EFAULT;
209
210 buf[cnt] = 0;
211 cmp = strstrip(buf);
212
213 if (strncmp(cmp, "NO_", 3) == 0) {
214 neg = 1;
215 cmp += 3;
216 }
217
218 for (i = 0; i < __SCHED_FEAT_NR; i++) {
219 if (strcmp(cmp, sched_feat_names[i]) == 0) {
220 if (neg) {
221 sysctl_sched_features &= ~(1UL << i);
222 sched_feat_disable(i);
223 } else {
224 sysctl_sched_features |= (1UL << i);
225 sched_feat_enable(i);
226 }
227 break;
228 }
229 }
230
231 if (i == __SCHED_FEAT_NR)
232 return -EINVAL;
233
234 *ppos += cnt;
235
236 return cnt;
237}
238
239static int sched_feat_open(struct inode *inode, struct file *filp)
240{
241 return single_open(filp, sched_feat_show, NULL);
242}
243
244static const struct file_operations sched_feat_fops = {
245 .open = sched_feat_open,
246 .write = sched_feat_write,
247 .read = seq_read,
248 .llseek = seq_lseek,
249 .release = single_release,
250};
251
252static __init int sched_init_debug(void)
253{
254 debugfs_create_file("sched_features", 0644, NULL, NULL,
255 &sched_feat_fops);
256
257 return 0;
258}
259late_initcall(sched_init_debug);
260#endif
261
262
263
264
265
266const_debug unsigned int sysctl_sched_nr_migrate = 32;
267
268
269
270
271
272
273
274const_debug unsigned int sysctl_sched_time_avg = MSEC_PER_SEC;
275
276
277
278
279
280unsigned int sysctl_sched_rt_period = 1000000;
281
282__read_mostly int scheduler_running;
283
284
285
286
287
288int sysctl_sched_rt_runtime = 950000;
289
290
291
292
293
294
295static inline struct rq *__task_rq_lock(struct task_struct *p)
296 __acquires(rq->lock)
297{
298 struct rq *rq;
299
300 lockdep_assert_held(&p->pi_lock);
301
302 for (;;) {
303 rq = task_rq(p);
304 raw_spin_lock(&rq->lock);
305 if (likely(rq == task_rq(p)))
306 return rq;
307 raw_spin_unlock(&rq->lock);
308 }
309}
310
311
312
313
314static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags)
315 __acquires(p->pi_lock)
316 __acquires(rq->lock)
317{
318 struct rq *rq;
319
320 for (;;) {
321 raw_spin_lock_irqsave(&p->pi_lock, *flags);
322 rq = task_rq(p);
323 raw_spin_lock(&rq->lock);
324 if (likely(rq == task_rq(p)))
325 return rq;
326 raw_spin_unlock(&rq->lock);
327 raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
328 }
329}
330
331static void __task_rq_unlock(struct rq *rq)
332 __releases(rq->lock)
333{
334 raw_spin_unlock(&rq->lock);
335}
336
337static inline void
338task_rq_unlock(struct rq *rq, struct task_struct *p, unsigned long *flags)
339 __releases(rq->lock)
340 __releases(p->pi_lock)
341{
342 raw_spin_unlock(&rq->lock);
343 raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
344}
345
346
347
348
349static struct rq *this_rq_lock(void)
350 __acquires(rq->lock)
351{
352 struct rq *rq;
353
354 local_irq_disable();
355 rq = this_rq();
356 raw_spin_lock(&rq->lock);
357
358 return rq;
359}
360
361#ifdef CONFIG_SCHED_HRTICK
362
363
364
365
366
367
368
369
370
371
372
373static void hrtick_clear(struct rq *rq)
374{
375 if (hrtimer_active(&rq->hrtick_timer))
376 hrtimer_cancel(&rq->hrtick_timer);
377}
378
379
380
381
382
383static enum hrtimer_restart hrtick(struct hrtimer *timer)
384{
385 struct rq *rq = container_of(timer, struct rq, hrtick_timer);
386
387 WARN_ON_ONCE(cpu_of(rq) != smp_processor_id());
388
389 raw_spin_lock(&rq->lock);
390 update_rq_clock(rq);
391 rq->curr->sched_class->task_tick(rq, rq->curr, 1);
392 raw_spin_unlock(&rq->lock);
393
394 return HRTIMER_NORESTART;
395}
396
397#ifdef CONFIG_SMP
398
399
400
401static void __hrtick_start(void *arg)
402{
403 struct rq *rq = arg;
404
405 raw_spin_lock(&rq->lock);
406 hrtimer_restart(&rq->hrtick_timer);
407 rq->hrtick_csd_pending = 0;
408 raw_spin_unlock(&rq->lock);
409}
410
411
412
413
414
415
416void hrtick_start(struct rq *rq, u64 delay)
417{
418 struct hrtimer *timer = &rq->hrtick_timer;
419 ktime_t time = ktime_add_ns(timer->base->get_time(), delay);
420
421 hrtimer_set_expires(timer, time);
422
423 if (rq == this_rq()) {
424 hrtimer_restart(timer);
425 } else if (!rq->hrtick_csd_pending) {
426 __smp_call_function_single(cpu_of(rq), &rq->hrtick_csd, 0);
427 rq->hrtick_csd_pending = 1;
428 }
429}
430
431static int
432hotplug_hrtick(struct notifier_block *nfb, unsigned long action, void *hcpu)
433{
434 int cpu = (int)(long)hcpu;
435
436 switch (action) {
437 case CPU_UP_CANCELED:
438 case CPU_UP_CANCELED_FROZEN:
439 case CPU_DOWN_PREPARE:
440 case CPU_DOWN_PREPARE_FROZEN:
441 case CPU_DEAD:
442 case CPU_DEAD_FROZEN:
443 hrtick_clear(cpu_rq(cpu));
444 return NOTIFY_OK;
445 }
446
447 return NOTIFY_DONE;
448}
449
450static __init void init_hrtick(void)
451{
452 hotcpu_notifier(hotplug_hrtick, 0);
453}
454#else
455
456
457
458
459
460void hrtick_start(struct rq *rq, u64 delay)
461{
462 __hrtimer_start_range_ns(&rq->hrtick_timer, ns_to_ktime(delay), 0,
463 HRTIMER_MODE_REL_PINNED, 0);
464}
465
466static inline void init_hrtick(void)
467{
468}
469#endif
470
471static void init_rq_hrtick(struct rq *rq)
472{
473#ifdef CONFIG_SMP
474 rq->hrtick_csd_pending = 0;
475
476 rq->hrtick_csd.flags = 0;
477 rq->hrtick_csd.func = __hrtick_start;
478 rq->hrtick_csd.info = rq;
479#endif
480
481 hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
482 rq->hrtick_timer.function = hrtick;
483}
484#else
485static inline void hrtick_clear(struct rq *rq)
486{
487}
488
489static inline void init_rq_hrtick(struct rq *rq)
490{
491}
492
493static inline void init_hrtick(void)
494{
495}
496#endif
497
498
499
500
501
502
503
504
505#ifdef CONFIG_SMP
506
507#ifndef tsk_is_polling
508#define tsk_is_polling(t) test_tsk_thread_flag(t, TIF_POLLING_NRFLAG)
509#endif
510
511void resched_task(struct task_struct *p)
512{
513 int cpu;
514
515 assert_raw_spin_locked(&task_rq(p)->lock);
516
517 if (test_tsk_need_resched(p))
518 return;
519
520 set_tsk_need_resched(p);
521
522 cpu = task_cpu(p);
523 if (cpu == smp_processor_id())
524 return;
525
526
527 smp_mb();
528 if (!tsk_is_polling(p))
529 smp_send_reschedule(cpu);
530}
531
532void resched_cpu(int cpu)
533{
534 struct rq *rq = cpu_rq(cpu);
535 unsigned long flags;
536
537 if (!raw_spin_trylock_irqsave(&rq->lock, flags))
538 return;
539 resched_task(cpu_curr(cpu));
540 raw_spin_unlock_irqrestore(&rq->lock, flags);
541}
542
543#ifdef CONFIG_NO_HZ
544
545
546
547
548
549
550
551
552int get_nohz_timer_target(void)
553{
554 int cpu = smp_processor_id();
555 int i;
556 struct sched_domain *sd;
557
558 rcu_read_lock();
559 for_each_domain(cpu, sd) {
560 for_each_cpu(i, sched_domain_span(sd)) {
561 if (!idle_cpu(i)) {
562 cpu = i;
563 goto unlock;
564 }
565 }
566 }
567unlock:
568 rcu_read_unlock();
569 return cpu;
570}
571
572
573
574
575
576
577
578
579
580
581void wake_up_idle_cpu(int cpu)
582{
583 struct rq *rq = cpu_rq(cpu);
584
585 if (cpu == smp_processor_id())
586 return;
587
588
589
590
591
592
593
594
595 if (rq->curr != rq->idle)
596 return;
597
598
599
600
601
602
603 set_tsk_need_resched(rq->idle);
604
605
606 smp_mb();
607 if (!tsk_is_polling(rq->idle))
608 smp_send_reschedule(cpu);
609}
610
611static inline bool got_nohz_idle_kick(void)
612{
613 int cpu = smp_processor_id();
614 return idle_cpu(cpu) && test_bit(NOHZ_BALANCE_KICK, nohz_flags(cpu));
615}
616
617#else
618
619static inline bool got_nohz_idle_kick(void)
620{
621 return false;
622}
623
624#endif
625
626void sched_avg_update(struct rq *rq)
627{
628 s64 period = sched_avg_period();
629
630 while ((s64)(rq->clock - rq->age_stamp) > period) {
631
632
633
634
635
636 asm("" : "+rm" (rq->age_stamp));
637 rq->age_stamp += period;
638 rq->rt_avg /= 2;
639 }
640}
641
642#else
643void resched_task(struct task_struct *p)
644{
645 assert_raw_spin_locked(&task_rq(p)->lock);
646 set_tsk_need_resched(p);
647}
648#endif
649
650#if defined(CONFIG_RT_GROUP_SCHED) || (defined(CONFIG_FAIR_GROUP_SCHED) && \
651 (defined(CONFIG_SMP) || defined(CONFIG_CFS_BANDWIDTH)))
652
653
654
655
656
657
658int walk_tg_tree_from(struct task_group *from,
659 tg_visitor down, tg_visitor up, void *data)
660{
661 struct task_group *parent, *child;
662 int ret;
663
664 parent = from;
665
666down:
667 ret = (*down)(parent, data);
668 if (ret)
669 goto out;
670 list_for_each_entry_rcu(child, &parent->children, siblings) {
671 parent = child;
672 goto down;
673
674up:
675 continue;
676 }
677 ret = (*up)(parent, data);
678 if (ret || parent == from)
679 goto out;
680
681 child = parent;
682 parent = parent->parent;
683 if (parent)
684 goto up;
685out:
686 return ret;
687}
688
689int tg_nop(struct task_group *tg, void *data)
690{
691 return 0;
692}
693#endif
694
695static void set_load_weight(struct task_struct *p)
696{
697 int prio = p->static_prio - MAX_RT_PRIO;
698 struct load_weight *load = &p->se.load;
699
700
701
702
703 if (p->policy == SCHED_IDLE) {
704 load->weight = scale_load(WEIGHT_IDLEPRIO);
705 load->inv_weight = WMULT_IDLEPRIO;
706 return;
707 }
708
709 load->weight = scale_load(prio_to_weight[prio]);
710 load->inv_weight = prio_to_wmult[prio];
711}
712
713static void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
714{
715 update_rq_clock(rq);
716 sched_info_queued(p);
717 p->sched_class->enqueue_task(rq, p, flags);
718}
719
720static void dequeue_task(struct rq *rq, struct task_struct *p, int flags)
721{
722 update_rq_clock(rq);
723 sched_info_dequeued(p);
724 p->sched_class->dequeue_task(rq, p, flags);
725}
726
727void activate_task(struct rq *rq, struct task_struct *p, int flags)
728{
729 if (task_contributes_to_load(p))
730 rq->nr_uninterruptible--;
731
732 enqueue_task(rq, p, flags);
733}
734
735void deactivate_task(struct rq *rq, struct task_struct *p, int flags)
736{
737 if (task_contributes_to_load(p))
738 rq->nr_uninterruptible++;
739
740 dequeue_task(rq, p, flags);
741}
742
743#ifdef CONFIG_IRQ_TIME_ACCOUNTING
744
745
746
747
748
749
750
751
752
753
754
755
756static DEFINE_PER_CPU(u64, cpu_hardirq_time);
757static DEFINE_PER_CPU(u64, cpu_softirq_time);
758
759static DEFINE_PER_CPU(u64, irq_start_time);
760static int sched_clock_irqtime;
761
762void enable_sched_clock_irqtime(void)
763{
764 sched_clock_irqtime = 1;
765}
766
767void disable_sched_clock_irqtime(void)
768{
769 sched_clock_irqtime = 0;
770}
771
772#ifndef CONFIG_64BIT
773static DEFINE_PER_CPU(seqcount_t, irq_time_seq);
774
775static inline void irq_time_write_begin(void)
776{
777 __this_cpu_inc(irq_time_seq.sequence);
778 smp_wmb();
779}
780
781static inline void irq_time_write_end(void)
782{
783 smp_wmb();
784 __this_cpu_inc(irq_time_seq.sequence);
785}
786
787static inline u64 irq_time_read(int cpu)
788{
789 u64 irq_time;
790 unsigned seq;
791
792 do {
793 seq = read_seqcount_begin(&per_cpu(irq_time_seq, cpu));
794 irq_time = per_cpu(cpu_softirq_time, cpu) +
795 per_cpu(cpu_hardirq_time, cpu);
796 } while (read_seqcount_retry(&per_cpu(irq_time_seq, cpu), seq));
797
798 return irq_time;
799}
800#else
801static inline void irq_time_write_begin(void)
802{
803}
804
805static inline void irq_time_write_end(void)
806{
807}
808
809static inline u64 irq_time_read(int cpu)
810{
811 return per_cpu(cpu_softirq_time, cpu) + per_cpu(cpu_hardirq_time, cpu);
812}
813#endif
814
815
816
817
818
819void account_system_vtime(struct task_struct *curr)
820{
821 unsigned long flags;
822 s64 delta;
823 int cpu;
824
825 if (!sched_clock_irqtime)
826 return;
827
828 local_irq_save(flags);
829
830 cpu = smp_processor_id();
831 delta = sched_clock_cpu(cpu) - __this_cpu_read(irq_start_time);
832 __this_cpu_add(irq_start_time, delta);
833
834 irq_time_write_begin();
835
836
837
838
839
840
841 if (hardirq_count())
842 __this_cpu_add(cpu_hardirq_time, delta);
843 else if (in_serving_softirq() && curr != this_cpu_ksoftirqd())
844 __this_cpu_add(cpu_softirq_time, delta);
845
846 irq_time_write_end();
847 local_irq_restore(flags);
848}
849EXPORT_SYMBOL_GPL(account_system_vtime);
850
851#endif
852
853#ifdef CONFIG_PARAVIRT
854static inline u64 steal_ticks(u64 steal)
855{
856 if (unlikely(steal > NSEC_PER_SEC))
857 return div_u64(steal, TICK_NSEC);
858
859 return __iter_div_u64_rem(steal, TICK_NSEC, &steal);
860}
861#endif
862
863static void update_rq_clock_task(struct rq *rq, s64 delta)
864{
865
866
867
868
869#if defined(CONFIG_IRQ_TIME_ACCOUNTING) || defined(CONFIG_PARAVIRT_TIME_ACCOUNTING)
870 s64 steal = 0, irq_delta = 0;
871#endif
872#ifdef CONFIG_IRQ_TIME_ACCOUNTING
873 irq_delta = irq_time_read(cpu_of(rq)) - rq->prev_irq_time;
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890 if (irq_delta > delta)
891 irq_delta = delta;
892
893 rq->prev_irq_time += irq_delta;
894 delta -= irq_delta;
895#endif
896#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
897 if (static_key_false((¶virt_steal_rq_enabled))) {
898 u64 st;
899
900 steal = paravirt_steal_clock(cpu_of(rq));
901 steal -= rq->prev_steal_time_rq;
902
903 if (unlikely(steal > delta))
904 steal = delta;
905
906 st = steal_ticks(steal);
907 steal = st * TICK_NSEC;
908
909 rq->prev_steal_time_rq += steal;
910
911 delta -= steal;
912 }
913#endif
914
915 rq->clock_task += delta;
916
917#if defined(CONFIG_IRQ_TIME_ACCOUNTING) || defined(CONFIG_PARAVIRT_TIME_ACCOUNTING)
918 if ((irq_delta + steal) && sched_feat(NONTASK_POWER))
919 sched_rt_avg_update(rq, irq_delta + steal);
920#endif
921}
922
923#ifdef CONFIG_IRQ_TIME_ACCOUNTING
924static int irqtime_account_hi_update(void)
925{
926 u64 *cpustat = kcpustat_this_cpu->cpustat;
927 unsigned long flags;
928 u64 latest_ns;
929 int ret = 0;
930
931 local_irq_save(flags);
932 latest_ns = this_cpu_read(cpu_hardirq_time);
933 if (nsecs_to_cputime64(latest_ns) > cpustat[CPUTIME_IRQ])
934 ret = 1;
935 local_irq_restore(flags);
936 return ret;
937}
938
939static int irqtime_account_si_update(void)
940{
941 u64 *cpustat = kcpustat_this_cpu->cpustat;
942 unsigned long flags;
943 u64 latest_ns;
944 int ret = 0;
945
946 local_irq_save(flags);
947 latest_ns = this_cpu_read(cpu_softirq_time);
948 if (nsecs_to_cputime64(latest_ns) > cpustat[CPUTIME_SOFTIRQ])
949 ret = 1;
950 local_irq_restore(flags);
951 return ret;
952}
953
954#else
955
956#define sched_clock_irqtime (0)
957
958#endif
959
960void sched_set_stop_task(int cpu, struct task_struct *stop)
961{
962 struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 };
963 struct task_struct *old_stop = cpu_rq(cpu)->stop;
964
965 if (stop) {
966
967
968
969
970
971
972
973
974 sched_setscheduler_nocheck(stop, SCHED_FIFO, ¶m);
975
976 stop->sched_class = &stop_sched_class;
977 }
978
979 cpu_rq(cpu)->stop = stop;
980
981 if (old_stop) {
982
983
984
985
986 old_stop->sched_class = &rt_sched_class;
987 }
988}
989
990
991
992
993static inline int __normal_prio(struct task_struct *p)
994{
995 return p->static_prio;
996}
997
998
999
1000
1001
1002
1003
1004
1005static inline int normal_prio(struct task_struct *p)
1006{
1007 int prio;
1008
1009 if (task_has_rt_policy(p))
1010 prio = MAX_RT_PRIO-1 - p->rt_priority;
1011 else
1012 prio = __normal_prio(p);
1013 return prio;
1014}
1015
1016
1017
1018
1019
1020
1021
1022
1023static int effective_prio(struct task_struct *p)
1024{
1025 p->normal_prio = normal_prio(p);
1026
1027
1028
1029
1030
1031 if (!rt_prio(p->prio))
1032 return p->normal_prio;
1033 return p->prio;
1034}
1035
1036
1037
1038
1039
1040inline int task_curr(const struct task_struct *p)
1041{
1042 return cpu_curr(task_cpu(p)) == p;
1043}
1044
1045static inline void check_class_changed(struct rq *rq, struct task_struct *p,
1046 const struct sched_class *prev_class,
1047 int oldprio)
1048{
1049 if (prev_class != p->sched_class) {
1050 if (prev_class->switched_from)
1051 prev_class->switched_from(rq, p);
1052 p->sched_class->switched_to(rq, p);
1053 } else if (oldprio != p->prio)
1054 p->sched_class->prio_changed(rq, p, oldprio);
1055}
1056
1057void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
1058{
1059 const struct sched_class *class;
1060
1061 if (p->sched_class == rq->curr->sched_class) {
1062 rq->curr->sched_class->check_preempt_curr(rq, p, flags);
1063 } else {
1064 for_each_class(class) {
1065 if (class == rq->curr->sched_class)
1066 break;
1067 if (class == p->sched_class) {
1068 resched_task(rq->curr);
1069 break;
1070 }
1071 }
1072 }
1073
1074
1075
1076
1077
1078 if (rq->curr->on_rq && test_tsk_need_resched(rq->curr))
1079 rq->skip_clock_update = 1;
1080}
1081
1082#ifdef CONFIG_SMP
1083void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
1084{
1085#ifdef CONFIG_SCHED_DEBUG
1086
1087
1088
1089
1090 WARN_ON_ONCE(p->state != TASK_RUNNING && p->state != TASK_WAKING &&
1091 !(task_thread_info(p)->preempt_count & PREEMPT_ACTIVE));
1092
1093#ifdef CONFIG_LOCKDEP
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104 WARN_ON_ONCE(debug_locks && !(lockdep_is_held(&p->pi_lock) ||
1105 lockdep_is_held(&task_rq(p)->lock)));
1106#endif
1107#endif
1108
1109 trace_sched_migrate_task(p, new_cpu);
1110
1111 if (task_cpu(p) != new_cpu) {
1112 p->se.nr_migrations++;
1113 perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, 1, NULL, 0);
1114 }
1115
1116 __set_task_cpu(p, new_cpu);
1117}
1118
1119struct migration_arg {
1120 struct task_struct *task;
1121 int dest_cpu;
1122};
1123
1124static int migration_cpu_stop(void *data);
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142unsigned long wait_task_inactive(struct task_struct *p, long match_state)
1143{
1144 unsigned long flags;
1145 int running, on_rq;
1146 unsigned long ncsw;
1147 struct rq *rq;
1148
1149 for (;;) {
1150
1151
1152
1153
1154
1155
1156 rq = task_rq(p);
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169 while (task_running(rq, p)) {
1170 if (match_state && unlikely(p->state != match_state))
1171 return 0;
1172 cpu_relax();
1173 }
1174
1175
1176
1177
1178
1179
1180 rq = task_rq_lock(p, &flags);
1181 trace_sched_wait_task(p);
1182 running = task_running(rq, p);
1183 on_rq = p->on_rq;
1184 ncsw = 0;
1185 if (!match_state || p->state == match_state)
1186 ncsw = p->nvcsw | LONG_MIN;
1187 task_rq_unlock(rq, p, &flags);
1188
1189
1190
1191
1192 if (unlikely(!ncsw))
1193 break;
1194
1195
1196
1197
1198
1199
1200
1201 if (unlikely(running)) {
1202 cpu_relax();
1203 continue;
1204 }
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215 if (unlikely(on_rq)) {
1216 ktime_t to = ktime_set(0, NSEC_PER_SEC/HZ);
1217
1218 set_current_state(TASK_UNINTERRUPTIBLE);
1219 schedule_hrtimeout(&to, HRTIMER_MODE_REL);
1220 continue;
1221 }
1222
1223
1224
1225
1226
1227
1228 break;
1229 }
1230
1231 return ncsw;
1232}
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247void kick_process(struct task_struct *p)
1248{
1249 int cpu;
1250
1251 preempt_disable();
1252 cpu = task_cpu(p);
1253 if ((cpu != smp_processor_id()) && task_curr(p))
1254 smp_send_reschedule(cpu);
1255 preempt_enable();
1256}
1257EXPORT_SYMBOL_GPL(kick_process);
1258#endif
1259
1260#ifdef CONFIG_SMP
1261
1262
1263
1264static int select_fallback_rq(int cpu, struct task_struct *p)
1265{
1266 const struct cpumask *nodemask = cpumask_of_node(cpu_to_node(cpu));
1267 enum { cpuset, possible, fail } state = cpuset;
1268 int dest_cpu;
1269
1270
1271 for_each_cpu(dest_cpu, nodemask) {
1272 if (!cpu_online(dest_cpu))
1273 continue;
1274 if (!cpu_active(dest_cpu))
1275 continue;
1276 if (cpumask_test_cpu(dest_cpu, tsk_cpus_allowed(p)))
1277 return dest_cpu;
1278 }
1279
1280 for (;;) {
1281
1282 for_each_cpu(dest_cpu, tsk_cpus_allowed(p)) {
1283 if (!cpu_online(dest_cpu))
1284 continue;
1285 if (!cpu_active(dest_cpu))
1286 continue;
1287 goto out;
1288 }
1289
1290 switch (state) {
1291 case cpuset:
1292
1293 cpuset_cpus_allowed_fallback(p);
1294 state = possible;
1295 break;
1296
1297 case possible:
1298 do_set_cpus_allowed(p, cpu_possible_mask);
1299 state = fail;
1300 break;
1301
1302 case fail:
1303 BUG();
1304 break;
1305 }
1306 }
1307
1308out:
1309 if (state != cpuset) {
1310
1311
1312
1313
1314
1315 if (p->mm && printk_ratelimit()) {
1316 printk_sched("process %d (%s) no longer affine to cpu%d\n",
1317 task_pid_nr(p), p->comm, cpu);
1318 }
1319 }
1320
1321 return dest_cpu;
1322}
1323
1324
1325
1326
1327static inline
1328int select_task_rq(struct task_struct *p, int sd_flags, int wake_flags)
1329{
1330 int cpu = p->sched_class->select_task_rq(p, sd_flags, wake_flags);
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342 if (unlikely(!cpumask_test_cpu(cpu, tsk_cpus_allowed(p)) ||
1343 !cpu_online(cpu)))
1344 cpu = select_fallback_rq(task_cpu(p), p);
1345
1346 return cpu;
1347}
1348
1349static void update_avg(u64 *avg, u64 sample)
1350{
1351 s64 diff = sample - *avg;
1352 *avg += diff >> 3;
1353}
1354#endif
1355
1356static void
1357ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
1358{
1359#ifdef CONFIG_SCHEDSTATS
1360 struct rq *rq = this_rq();
1361
1362#ifdef CONFIG_SMP
1363 int this_cpu = smp_processor_id();
1364
1365 if (cpu == this_cpu) {
1366 schedstat_inc(rq, ttwu_local);
1367 schedstat_inc(p, se.statistics.nr_wakeups_local);
1368 } else {
1369 struct sched_domain *sd;
1370
1371 schedstat_inc(p, se.statistics.nr_wakeups_remote);
1372 rcu_read_lock();
1373 for_each_domain(this_cpu, sd) {
1374 if (cpumask_test_cpu(cpu, sched_domain_span(sd))) {
1375 schedstat_inc(sd, ttwu_wake_remote);
1376 break;
1377 }
1378 }
1379 rcu_read_unlock();
1380 }
1381
1382 if (wake_flags & WF_MIGRATED)
1383 schedstat_inc(p, se.statistics.nr_wakeups_migrate);
1384
1385#endif
1386
1387 schedstat_inc(rq, ttwu_count);
1388 schedstat_inc(p, se.statistics.nr_wakeups);
1389
1390 if (wake_flags & WF_SYNC)
1391 schedstat_inc(p, se.statistics.nr_wakeups_sync);
1392
1393#endif
1394}
1395
1396static void ttwu_activate(struct rq *rq, struct task_struct *p, int en_flags)
1397{
1398 activate_task(rq, p, en_flags);
1399 p->on_rq = 1;
1400
1401
1402 if (p->flags & PF_WQ_WORKER)
1403 wq_worker_waking_up(p, cpu_of(rq));
1404}
1405
1406
1407
1408
1409static void
1410ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags)
1411{
1412 trace_sched_wakeup(p, true);
1413 check_preempt_curr(rq, p, wake_flags);
1414
1415 p->state = TASK_RUNNING;
1416#ifdef CONFIG_SMP
1417 if (p->sched_class->task_woken)
1418 p->sched_class->task_woken(rq, p);
1419
1420 if (rq->idle_stamp) {
1421 u64 delta = rq->clock - rq->idle_stamp;
1422 u64 max = 2*sysctl_sched_migration_cost;
1423
1424 if (delta > max)
1425 rq->avg_idle = max;
1426 else
1427 update_avg(&rq->avg_idle, delta);
1428 rq->idle_stamp = 0;
1429 }
1430#endif
1431}
1432
1433static void
1434ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags)
1435{
1436#ifdef CONFIG_SMP
1437 if (p->sched_contributes_to_load)
1438 rq->nr_uninterruptible--;
1439#endif
1440
1441 ttwu_activate(rq, p, ENQUEUE_WAKEUP | ENQUEUE_WAKING);
1442 ttwu_do_wakeup(rq, p, wake_flags);
1443}
1444
1445
1446
1447
1448
1449
1450
1451static int ttwu_remote(struct task_struct *p, int wake_flags)
1452{
1453 struct rq *rq;
1454 int ret = 0;
1455
1456 rq = __task_rq_lock(p);
1457 if (p->on_rq) {
1458 ttwu_do_wakeup(rq, p, wake_flags);
1459 ret = 1;
1460 }
1461 __task_rq_unlock(rq);
1462
1463 return ret;
1464}
1465
1466#ifdef CONFIG_SMP
1467static void sched_ttwu_pending(void)
1468{
1469 struct rq *rq = this_rq();
1470 struct llist_node *llist = llist_del_all(&rq->wake_list);
1471 struct task_struct *p;
1472
1473 raw_spin_lock(&rq->lock);
1474
1475 while (llist) {
1476 p = llist_entry(llist, struct task_struct, wake_entry);
1477 llist = llist_next(llist);
1478 ttwu_do_activate(rq, p, 0);
1479 }
1480
1481 raw_spin_unlock(&rq->lock);
1482}
1483
1484void scheduler_ipi(void)
1485{
1486 if (llist_empty(&this_rq()->wake_list) && !got_nohz_idle_kick())
1487 return;
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502 irq_enter();
1503 sched_ttwu_pending();
1504
1505
1506
1507
1508 if (unlikely(got_nohz_idle_kick() && !need_resched())) {
1509 this_rq()->idle_balance = 1;
1510 raise_softirq_irqoff(SCHED_SOFTIRQ);
1511 }
1512 irq_exit();
1513}
1514
1515static void ttwu_queue_remote(struct task_struct *p, int cpu)
1516{
1517 if (llist_add(&p->wake_entry, &cpu_rq(cpu)->wake_list))
1518 smp_send_reschedule(cpu);
1519}
1520
1521#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
1522static int ttwu_activate_remote(struct task_struct *p, int wake_flags)
1523{
1524 struct rq *rq;
1525 int ret = 0;
1526
1527 rq = __task_rq_lock(p);
1528 if (p->on_cpu) {
1529 ttwu_activate(rq, p, ENQUEUE_WAKEUP);
1530 ttwu_do_wakeup(rq, p, wake_flags);
1531 ret = 1;
1532 }
1533 __task_rq_unlock(rq);
1534
1535 return ret;
1536
1537}
1538#endif
1539
1540bool cpus_share_cache(int this_cpu, int that_cpu)
1541{
1542 return per_cpu(sd_llc_id, this_cpu) == per_cpu(sd_llc_id, that_cpu);
1543}
1544#endif
1545
1546static void ttwu_queue(struct task_struct *p, int cpu)
1547{
1548 struct rq *rq = cpu_rq(cpu);
1549
1550#if defined(CONFIG_SMP)
1551 if (sched_feat(TTWU_QUEUE) && !cpus_share_cache(smp_processor_id(), cpu)) {
1552 sched_clock_cpu(cpu);
1553 ttwu_queue_remote(p, cpu);
1554 return;
1555 }
1556#endif
1557
1558 raw_spin_lock(&rq->lock);
1559 ttwu_do_activate(rq, p, 0);
1560 raw_spin_unlock(&rq->lock);
1561}
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578static int
1579try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
1580{
1581 unsigned long flags;
1582 int cpu, success = 0;
1583
1584 smp_wmb();
1585 raw_spin_lock_irqsave(&p->pi_lock, flags);
1586 if (!(p->state & state))
1587 goto out;
1588
1589 success = 1;
1590 cpu = task_cpu(p);
1591
1592 if (p->on_rq && ttwu_remote(p, wake_flags))
1593 goto stat;
1594
1595#ifdef CONFIG_SMP
1596
1597
1598
1599
1600 while (p->on_cpu) {
1601#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
1602
1603
1604
1605
1606
1607
1608
1609 if (ttwu_activate_remote(p, wake_flags))
1610 goto stat;
1611#else
1612 cpu_relax();
1613#endif
1614 }
1615
1616
1617
1618 smp_rmb();
1619
1620 p->sched_contributes_to_load = !!task_contributes_to_load(p);
1621 p->state = TASK_WAKING;
1622
1623 if (p->sched_class->task_waking)
1624 p->sched_class->task_waking(p);
1625
1626 cpu = select_task_rq(p, SD_BALANCE_WAKE, wake_flags);
1627 if (task_cpu(p) != cpu) {
1628 wake_flags |= WF_MIGRATED;
1629 set_task_cpu(p, cpu);
1630 }
1631#endif
1632
1633 ttwu_queue(p, cpu);
1634stat:
1635 ttwu_stat(p, cpu, wake_flags);
1636out:
1637 raw_spin_unlock_irqrestore(&p->pi_lock, flags);
1638
1639 return success;
1640}
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650static void try_to_wake_up_local(struct task_struct *p)
1651{
1652 struct rq *rq = task_rq(p);
1653
1654 BUG_ON(rq != this_rq());
1655 BUG_ON(p == current);
1656 lockdep_assert_held(&rq->lock);
1657
1658 if (!raw_spin_trylock(&p->pi_lock)) {
1659 raw_spin_unlock(&rq->lock);
1660 raw_spin_lock(&p->pi_lock);
1661 raw_spin_lock(&rq->lock);
1662 }
1663
1664 if (!(p->state & TASK_NORMAL))
1665 goto out;
1666
1667 if (!p->on_rq)
1668 ttwu_activate(rq, p, ENQUEUE_WAKEUP);
1669
1670 ttwu_do_wakeup(rq, p, 0);
1671 ttwu_stat(p, smp_processor_id(), 0);
1672out:
1673 raw_spin_unlock(&p->pi_lock);
1674}
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687int wake_up_process(struct task_struct *p)
1688{
1689 return try_to_wake_up(p, TASK_ALL, 0);
1690}
1691EXPORT_SYMBOL(wake_up_process);
1692
1693int wake_up_state(struct task_struct *p, unsigned int state)
1694{
1695 return try_to_wake_up(p, state, 0);
1696}
1697
1698
1699
1700
1701
1702
1703
1704static void __sched_fork(struct task_struct *p)
1705{
1706 p->on_rq = 0;
1707
1708 p->se.on_rq = 0;
1709 p->se.exec_start = 0;
1710 p->se.sum_exec_runtime = 0;
1711 p->se.prev_sum_exec_runtime = 0;
1712 p->se.nr_migrations = 0;
1713 p->se.vruntime = 0;
1714 INIT_LIST_HEAD(&p->se.group_node);
1715
1716#ifdef CONFIG_SCHEDSTATS
1717 memset(&p->se.statistics, 0, sizeof(p->se.statistics));
1718#endif
1719
1720 INIT_LIST_HEAD(&p->rt.run_list);
1721
1722#ifdef CONFIG_PREEMPT_NOTIFIERS
1723 INIT_HLIST_HEAD(&p->preempt_notifiers);
1724#endif
1725}
1726
1727
1728
1729
1730void sched_fork(struct task_struct *p)
1731{
1732 unsigned long flags;
1733 int cpu = get_cpu();
1734
1735 __sched_fork(p);
1736
1737
1738
1739
1740
1741 p->state = TASK_RUNNING;
1742
1743
1744
1745
1746 p->prio = current->normal_prio;
1747
1748
1749
1750
1751 if (unlikely(p->sched_reset_on_fork)) {
1752 if (task_has_rt_policy(p)) {
1753 p->policy = SCHED_NORMAL;
1754 p->static_prio = NICE_TO_PRIO(0);
1755 p->rt_priority = 0;
1756 } else if (PRIO_TO_NICE(p->static_prio) < 0)
1757 p->static_prio = NICE_TO_PRIO(0);
1758
1759 p->prio = p->normal_prio = __normal_prio(p);
1760 set_load_weight(p);
1761
1762
1763
1764
1765
1766 p->sched_reset_on_fork = 0;
1767 }
1768
1769 if (!rt_prio(p->prio))
1770 p->sched_class = &fair_sched_class;
1771
1772 if (p->sched_class->task_fork)
1773 p->sched_class->task_fork(p);
1774
1775
1776
1777
1778
1779
1780
1781
1782 raw_spin_lock_irqsave(&p->pi_lock, flags);
1783 set_task_cpu(p, cpu);
1784 raw_spin_unlock_irqrestore(&p->pi_lock, flags);
1785
1786#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
1787 if (likely(sched_info_on()))
1788 memset(&p->sched_info, 0, sizeof(p->sched_info));
1789#endif
1790#if defined(CONFIG_SMP)
1791 p->on_cpu = 0;
1792#endif
1793#ifdef CONFIG_PREEMPT_COUNT
1794
1795 task_thread_info(p)->preempt_count = 1;
1796#endif
1797#ifdef CONFIG_SMP
1798 plist_node_init(&p->pushable_tasks, MAX_PRIO);
1799#endif
1800
1801 put_cpu();
1802}
1803
1804
1805
1806
1807
1808
1809
1810
1811void wake_up_new_task(struct task_struct *p)
1812{
1813 unsigned long flags;
1814 struct rq *rq;
1815
1816 raw_spin_lock_irqsave(&p->pi_lock, flags);
1817#ifdef CONFIG_SMP
1818
1819
1820
1821
1822
1823 set_task_cpu(p, select_task_rq(p, SD_BALANCE_FORK, 0));
1824#endif
1825
1826 rq = __task_rq_lock(p);
1827 activate_task(rq, p, 0);
1828 p->on_rq = 1;
1829 trace_sched_wakeup_new(p, true);
1830 check_preempt_curr(rq, p, WF_FORK);
1831#ifdef CONFIG_SMP
1832 if (p->sched_class->task_woken)
1833 p->sched_class->task_woken(rq, p);
1834#endif
1835 task_rq_unlock(rq, p, &flags);
1836}
1837
1838#ifdef CONFIG_PREEMPT_NOTIFIERS
1839
1840
1841
1842
1843
1844void preempt_notifier_register(struct preempt_notifier *notifier)
1845{
1846 hlist_add_head(¬ifier->link, ¤t->preempt_notifiers);
1847}
1848EXPORT_SYMBOL_GPL(preempt_notifier_register);
1849
1850
1851
1852
1853
1854
1855
1856void preempt_notifier_unregister(struct preempt_notifier *notifier)
1857{
1858 hlist_del(¬ifier->link);
1859}
1860EXPORT_SYMBOL_GPL(preempt_notifier_unregister);
1861
1862static void fire_sched_in_preempt_notifiers(struct task_struct *curr)
1863{
1864 struct preempt_notifier *notifier;
1865 struct hlist_node *node;
1866
1867 hlist_for_each_entry(notifier, node, &curr->preempt_notifiers, link)
1868 notifier->ops->sched_in(notifier, raw_smp_processor_id());
1869}
1870
1871static void
1872fire_sched_out_preempt_notifiers(struct task_struct *curr,
1873 struct task_struct *next)
1874{
1875 struct preempt_notifier *notifier;
1876 struct hlist_node *node;
1877
1878 hlist_for_each_entry(notifier, node, &curr->preempt_notifiers, link)
1879 notifier->ops->sched_out(notifier, next);
1880}
1881
1882#else
1883
1884static void fire_sched_in_preempt_notifiers(struct task_struct *curr)
1885{
1886}
1887
1888static void
1889fire_sched_out_preempt_notifiers(struct task_struct *curr,
1890 struct task_struct *next)
1891{
1892}
1893
1894#endif
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909static inline void
1910prepare_task_switch(struct rq *rq, struct task_struct *prev,
1911 struct task_struct *next)
1912{
1913 trace_sched_switch(prev, next);
1914 sched_info_switch(prev, next);
1915 perf_event_task_sched_out(prev, next);
1916 fire_sched_out_preempt_notifiers(prev, next);
1917 prepare_lock_switch(rq, next);
1918 prepare_arch_switch(next);
1919}
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936static void finish_task_switch(struct rq *rq, struct task_struct *prev)
1937 __releases(rq->lock)
1938{
1939 struct mm_struct *mm = rq->prev_mm;
1940 long prev_state;
1941
1942 rq->prev_mm = NULL;
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955 prev_state = prev->state;
1956 finish_arch_switch(prev);
1957#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
1958 local_irq_disable();
1959#endif
1960 perf_event_task_sched_in(prev, current);
1961#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
1962 local_irq_enable();
1963#endif
1964 finish_lock_switch(rq, prev);
1965 finish_arch_post_lock_switch();
1966
1967 fire_sched_in_preempt_notifiers(current);
1968 if (mm)
1969 mmdrop(mm);
1970 if (unlikely(prev_state == TASK_DEAD)) {
1971
1972
1973
1974
1975 kprobe_flush_task(prev);
1976 put_task_struct(prev);
1977 }
1978}
1979
1980#ifdef CONFIG_SMP
1981
1982
1983static inline void pre_schedule(struct rq *rq, struct task_struct *prev)
1984{
1985 if (prev->sched_class->pre_schedule)
1986 prev->sched_class->pre_schedule(rq, prev);
1987}
1988
1989
1990static inline void post_schedule(struct rq *rq)
1991{
1992 if (rq->post_schedule) {
1993 unsigned long flags;
1994
1995 raw_spin_lock_irqsave(&rq->lock, flags);
1996 if (rq->curr->sched_class->post_schedule)
1997 rq->curr->sched_class->post_schedule(rq);
1998 raw_spin_unlock_irqrestore(&rq->lock, flags);
1999
2000 rq->post_schedule = 0;
2001 }
2002}
2003
2004#else
2005
2006static inline void pre_schedule(struct rq *rq, struct task_struct *p)
2007{
2008}
2009
2010static inline void post_schedule(struct rq *rq)
2011{
2012}
2013
2014#endif
2015
2016
2017
2018
2019
2020asmlinkage void schedule_tail(struct task_struct *prev)
2021 __releases(rq->lock)
2022{
2023 struct rq *rq = this_rq();
2024
2025 finish_task_switch(rq, prev);
2026
2027
2028
2029
2030
2031 post_schedule(rq);
2032
2033#ifdef __ARCH_WANT_UNLOCKED_CTXSW
2034
2035 preempt_enable();
2036#endif
2037 if (current->set_child_tid)
2038 put_user(task_pid_vnr(current), current->set_child_tid);
2039}
2040
2041
2042
2043
2044
2045static inline void
2046context_switch(struct rq *rq, struct task_struct *prev,
2047 struct task_struct *next)
2048{
2049 struct mm_struct *mm, *oldmm;
2050
2051 prepare_task_switch(rq, prev, next);
2052
2053 mm = next->mm;
2054 oldmm = prev->active_mm;
2055
2056
2057
2058
2059
2060 arch_start_context_switch(prev);
2061
2062 if (!mm) {
2063 next->active_mm = oldmm;
2064 atomic_inc(&oldmm->mm_count);
2065 enter_lazy_tlb(oldmm, next);
2066 } else
2067 switch_mm(oldmm, mm, next);
2068
2069 if (!prev->mm) {
2070 prev->active_mm = NULL;
2071 rq->prev_mm = oldmm;
2072 }
2073
2074
2075
2076
2077
2078
2079#ifndef __ARCH_WANT_UNLOCKED_CTXSW
2080 spin_release(&rq->lock.dep_map, 1, _THIS_IP_);
2081#endif
2082
2083
2084 switch_to(prev, next, prev);
2085
2086 barrier();
2087
2088
2089
2090
2091
2092 finish_task_switch(this_rq(), prev);
2093}
2094
2095
2096
2097
2098
2099
2100
2101
2102unsigned long nr_running(void)
2103{
2104 unsigned long i, sum = 0;
2105
2106 for_each_online_cpu(i)
2107 sum += cpu_rq(i)->nr_running;
2108
2109 return sum;
2110}
2111
2112unsigned long nr_uninterruptible(void)
2113{
2114 unsigned long i, sum = 0;
2115
2116 for_each_possible_cpu(i)
2117 sum += cpu_rq(i)->nr_uninterruptible;
2118
2119
2120
2121
2122
2123 if (unlikely((long)sum < 0))
2124 sum = 0;
2125
2126 return sum;
2127}
2128
2129unsigned long long nr_context_switches(void)
2130{
2131 int i;
2132 unsigned long long sum = 0;
2133
2134 for_each_possible_cpu(i)
2135 sum += cpu_rq(i)->nr_switches;
2136
2137 return sum;
2138}
2139
2140unsigned long nr_iowait(void)
2141{
2142 unsigned long i, sum = 0;
2143
2144 for_each_possible_cpu(i)
2145 sum += atomic_read(&cpu_rq(i)->nr_iowait);
2146
2147 return sum;
2148}
2149
2150unsigned long nr_iowait_cpu(int cpu)
2151{
2152 struct rq *this = cpu_rq(cpu);
2153 return atomic_read(&this->nr_iowait);
2154}
2155
2156unsigned long this_cpu_load(void)
2157{
2158 struct rq *this = this_rq();
2159 return this->cpu_load[0];
2160}
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211static atomic_long_t calc_load_tasks;
2212static unsigned long calc_load_update;
2213unsigned long avenrun[3];
2214EXPORT_SYMBOL(avenrun);
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224void get_avenrun(unsigned long *loads, unsigned long offset, int shift)
2225{
2226 loads[0] = (avenrun[0] + offset) << shift;
2227 loads[1] = (avenrun[1] + offset) << shift;
2228 loads[2] = (avenrun[2] + offset) << shift;
2229}
2230
2231static long calc_load_fold_active(struct rq *this_rq)
2232{
2233 long nr_active, delta = 0;
2234
2235 nr_active = this_rq->nr_running;
2236 nr_active += (long) this_rq->nr_uninterruptible;
2237
2238 if (nr_active != this_rq->calc_load_active) {
2239 delta = nr_active - this_rq->calc_load_active;
2240 this_rq->calc_load_active = nr_active;
2241 }
2242
2243 return delta;
2244}
2245
2246
2247
2248
2249static unsigned long
2250calc_load(unsigned long load, unsigned long exp, unsigned long active)
2251{
2252 load *= exp;
2253 load += active * (FIXED_1 - exp);
2254 load += 1UL << (FSHIFT - 1);
2255 return load >> FSHIFT;
2256}
2257
2258#ifdef CONFIG_NO_HZ
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301static atomic_long_t calc_load_idle[2];
2302static int calc_load_idx;
2303
2304static inline int calc_load_write_idx(void)
2305{
2306 int idx = calc_load_idx;
2307
2308
2309
2310
2311
2312 smp_rmb();
2313
2314
2315
2316
2317
2318 if (!time_before(jiffies, calc_load_update))
2319 idx++;
2320
2321 return idx & 1;
2322}
2323
2324static inline int calc_load_read_idx(void)
2325{
2326 return calc_load_idx & 1;
2327}
2328
2329void calc_load_enter_idle(void)
2330{
2331 struct rq *this_rq = this_rq();
2332 long delta;
2333
2334
2335
2336
2337
2338 delta = calc_load_fold_active(this_rq);
2339 if (delta) {
2340 int idx = calc_load_write_idx();
2341 atomic_long_add(delta, &calc_load_idle[idx]);
2342 }
2343}
2344
2345void calc_load_exit_idle(void)
2346{
2347 struct rq *this_rq = this_rq();
2348
2349
2350
2351
2352 if (time_before(jiffies, this_rq->calc_load_update))
2353 return;
2354
2355
2356
2357
2358
2359
2360 this_rq->calc_load_update = calc_load_update;
2361 if (time_before(jiffies, this_rq->calc_load_update + 10))
2362 this_rq->calc_load_update += LOAD_FREQ;
2363}
2364
2365static long calc_load_fold_idle(void)
2366{
2367 int idx = calc_load_read_idx();
2368 long delta = 0;
2369
2370 if (atomic_long_read(&calc_load_idle[idx]))
2371 delta = atomic_long_xchg(&calc_load_idle[idx], 0);
2372
2373 return delta;
2374}
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391static unsigned long
2392fixed_power_int(unsigned long x, unsigned int frac_bits, unsigned int n)
2393{
2394 unsigned long result = 1UL << frac_bits;
2395
2396 if (n) for (;;) {
2397 if (n & 1) {
2398 result *= x;
2399 result += 1UL << (frac_bits - 1);
2400 result >>= frac_bits;
2401 }
2402 n >>= 1;
2403 if (!n)
2404 break;
2405 x *= x;
2406 x += 1UL << (frac_bits - 1);
2407 x >>= frac_bits;
2408 }
2409
2410 return result;
2411}
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436static unsigned long
2437calc_load_n(unsigned long load, unsigned long exp,
2438 unsigned long active, unsigned int n)
2439{
2440
2441 return calc_load(load, fixed_power_int(exp, FSHIFT, n), active);
2442}
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453static void calc_global_nohz(void)
2454{
2455 long delta, active, n;
2456
2457 if (!time_before(jiffies, calc_load_update + 10)) {
2458
2459
2460
2461 delta = jiffies - calc_load_update - 10;
2462 n = 1 + (delta / LOAD_FREQ);
2463
2464 active = atomic_long_read(&calc_load_tasks);
2465 active = active > 0 ? active * FIXED_1 : 0;
2466
2467 avenrun[0] = calc_load_n(avenrun[0], EXP_1, active, n);
2468 avenrun[1] = calc_load_n(avenrun[1], EXP_5, active, n);
2469 avenrun[2] = calc_load_n(avenrun[2], EXP_15, active, n);
2470
2471 calc_load_update += n * LOAD_FREQ;
2472 }
2473
2474
2475
2476
2477
2478
2479
2480
2481 smp_wmb();
2482 calc_load_idx++;
2483}
2484#else
2485
2486static inline long calc_load_fold_idle(void) { return 0; }
2487static inline void calc_global_nohz(void) { }
2488
2489#endif
2490
2491
2492
2493
2494
2495void calc_global_load(unsigned long ticks)
2496{
2497 long active, delta;
2498
2499 if (time_before(jiffies, calc_load_update + 10))
2500 return;
2501
2502
2503
2504
2505 delta = calc_load_fold_idle();
2506 if (delta)
2507 atomic_long_add(delta, &calc_load_tasks);
2508
2509 active = atomic_long_read(&calc_load_tasks);
2510 active = active > 0 ? active * FIXED_1 : 0;
2511
2512 avenrun[0] = calc_load(avenrun[0], EXP_1, active);
2513 avenrun[1] = calc_load(avenrun[1], EXP_5, active);
2514 avenrun[2] = calc_load(avenrun[2], EXP_15, active);
2515
2516 calc_load_update += LOAD_FREQ;
2517
2518
2519
2520
2521 calc_global_nohz();
2522}
2523
2524
2525
2526
2527
2528static void calc_load_account_active(struct rq *this_rq)
2529{
2530 long delta;
2531
2532 if (time_before(jiffies, this_rq->calc_load_update))
2533 return;
2534
2535 delta = calc_load_fold_active(this_rq);
2536 if (delta)
2537 atomic_long_add(delta, &calc_load_tasks);
2538
2539 this_rq->calc_load_update += LOAD_FREQ;
2540}
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573#define DEGRADE_SHIFT 7
2574static const unsigned char
2575 degrade_zero_ticks[CPU_LOAD_IDX_MAX] = {0, 8, 32, 64, 128};
2576static const unsigned char
2577 degrade_factor[CPU_LOAD_IDX_MAX][DEGRADE_SHIFT + 1] = {
2578 {0, 0, 0, 0, 0, 0, 0, 0},
2579 {64, 32, 8, 0, 0, 0, 0, 0},
2580 {96, 72, 40, 12, 1, 0, 0},
2581 {112, 98, 75, 43, 15, 1, 0},
2582 {120, 112, 98, 76, 45, 16, 2} };
2583
2584
2585
2586
2587
2588
2589static unsigned long
2590decay_load_missed(unsigned long load, unsigned long missed_updates, int idx)
2591{
2592 int j = 0;
2593
2594 if (!missed_updates)
2595 return load;
2596
2597 if (missed_updates >= degrade_zero_ticks[idx])
2598 return 0;
2599
2600 if (idx == 1)
2601 return load >> missed_updates;
2602
2603 while (missed_updates) {
2604 if (missed_updates % 2)
2605 load = (load * degrade_factor[idx][j]) >> DEGRADE_SHIFT;
2606
2607 missed_updates >>= 1;
2608 j++;
2609 }
2610 return load;
2611}
2612
2613
2614
2615
2616
2617
2618static void __update_cpu_load(struct rq *this_rq, unsigned long this_load,
2619 unsigned long pending_updates)
2620{
2621 int i, scale;
2622
2623 this_rq->nr_load_updates++;
2624
2625
2626 this_rq->cpu_load[0] = this_load;
2627 for (i = 1, scale = 2; i < CPU_LOAD_IDX_MAX; i++, scale += scale) {
2628 unsigned long old_load, new_load;
2629
2630
2631
2632 old_load = this_rq->cpu_load[i];
2633 old_load = decay_load_missed(old_load, pending_updates - 1, i);
2634 new_load = this_load;
2635
2636
2637
2638
2639
2640 if (new_load > old_load)
2641 new_load += scale - 1;
2642
2643 this_rq->cpu_load[i] = (old_load * (scale - 1) + new_load) >> i;
2644 }
2645
2646 sched_avg_update(this_rq);
2647}
2648
2649#ifdef CONFIG_NO_HZ
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667void update_idle_cpu_load(struct rq *this_rq)
2668{
2669 unsigned long curr_jiffies = ACCESS_ONCE(jiffies);
2670 unsigned long load = this_rq->load.weight;
2671 unsigned long pending_updates;
2672
2673
2674
2675
2676 if (load || curr_jiffies == this_rq->last_load_update_tick)
2677 return;
2678
2679 pending_updates = curr_jiffies - this_rq->last_load_update_tick;
2680 this_rq->last_load_update_tick = curr_jiffies;
2681
2682 __update_cpu_load(this_rq, load, pending_updates);
2683}
2684
2685
2686
2687
2688void update_cpu_load_nohz(void)
2689{
2690 struct rq *this_rq = this_rq();
2691 unsigned long curr_jiffies = ACCESS_ONCE(jiffies);
2692 unsigned long pending_updates;
2693
2694 if (curr_jiffies == this_rq->last_load_update_tick)
2695 return;
2696
2697 raw_spin_lock(&this_rq->lock);
2698 pending_updates = curr_jiffies - this_rq->last_load_update_tick;
2699 if (pending_updates) {
2700 this_rq->last_load_update_tick = curr_jiffies;
2701
2702
2703
2704
2705 __update_cpu_load(this_rq, 0, pending_updates);
2706 }
2707 raw_spin_unlock(&this_rq->lock);
2708}
2709#endif
2710
2711
2712
2713
2714static void update_cpu_load_active(struct rq *this_rq)
2715{
2716
2717
2718
2719 this_rq->last_load_update_tick = jiffies;
2720 __update_cpu_load(this_rq, this_rq->load.weight, 1);
2721
2722 calc_load_account_active(this_rq);
2723}
2724
2725#ifdef CONFIG_SMP
2726
2727
2728
2729
2730
2731void sched_exec(void)
2732{
2733 struct task_struct *p = current;
2734 unsigned long flags;
2735 int dest_cpu;
2736
2737 raw_spin_lock_irqsave(&p->pi_lock, flags);
2738 dest_cpu = p->sched_class->select_task_rq(p, SD_BALANCE_EXEC, 0);
2739 if (dest_cpu == smp_processor_id())
2740 goto unlock;
2741
2742 if (likely(cpu_active(dest_cpu))) {
2743 struct migration_arg arg = { p, dest_cpu };
2744
2745 raw_spin_unlock_irqrestore(&p->pi_lock, flags);
2746 stop_one_cpu(task_cpu(p), migration_cpu_stop, &arg);
2747 return;
2748 }
2749unlock:
2750 raw_spin_unlock_irqrestore(&p->pi_lock, flags);
2751}
2752
2753#endif
2754
2755DEFINE_PER_CPU(struct kernel_stat, kstat);
2756DEFINE_PER_CPU(struct kernel_cpustat, kernel_cpustat);
2757
2758EXPORT_PER_CPU_SYMBOL(kstat);
2759EXPORT_PER_CPU_SYMBOL(kernel_cpustat);
2760
2761
2762
2763
2764
2765
2766
2767static u64 do_task_delta_exec(struct task_struct *p, struct rq *rq)
2768{
2769 u64 ns = 0;
2770
2771 if (task_current(rq, p)) {
2772 update_rq_clock(rq);
2773 ns = rq->clock_task - p->se.exec_start;
2774 if ((s64)ns < 0)
2775 ns = 0;
2776 }
2777
2778 return ns;
2779}
2780
2781unsigned long long task_delta_exec(struct task_struct *p)
2782{
2783 unsigned long flags;
2784 struct rq *rq;
2785 u64 ns = 0;
2786
2787 rq = task_rq_lock(p, &flags);
2788 ns = do_task_delta_exec(p, rq);
2789 task_rq_unlock(rq, p, &flags);
2790
2791 return ns;
2792}
2793
2794
2795
2796
2797
2798
2799unsigned long long task_sched_runtime(struct task_struct *p)
2800{
2801 unsigned long flags;
2802 struct rq *rq;
2803 u64 ns = 0;
2804
2805 rq = task_rq_lock(p, &flags);
2806 ns = p->se.sum_exec_runtime + do_task_delta_exec(p, rq);
2807 task_rq_unlock(rq, p, &flags);
2808
2809 return ns;
2810}
2811
2812#ifdef CONFIG_CGROUP_CPUACCT
2813struct cgroup_subsys cpuacct_subsys;
2814struct cpuacct root_cpuacct;
2815#endif
2816
2817static inline void task_group_account_field(struct task_struct *p, int index,
2818 u64 tmp)
2819{
2820#ifdef CONFIG_CGROUP_CPUACCT
2821 struct kernel_cpustat *kcpustat;
2822 struct cpuacct *ca;
2823#endif
2824
2825
2826
2827
2828
2829
2830 __get_cpu_var(kernel_cpustat).cpustat[index] += tmp;
2831
2832#ifdef CONFIG_CGROUP_CPUACCT
2833 if (unlikely(!cpuacct_subsys.active))
2834 return;
2835
2836 rcu_read_lock();
2837 ca = task_ca(p);
2838 while (ca && (ca != &root_cpuacct)) {
2839 kcpustat = this_cpu_ptr(ca->cpustat);
2840 kcpustat->cpustat[index] += tmp;
2841 ca = parent_ca(ca);
2842 }
2843 rcu_read_unlock();
2844#endif
2845}
2846
2847
2848
2849
2850
2851
2852
2853
2854void account_user_time(struct task_struct *p, cputime_t cputime,
2855 cputime_t cputime_scaled)
2856{
2857 int index;
2858
2859
2860 p->utime += cputime;
2861 p->utimescaled += cputime_scaled;
2862 account_group_user_time(p, cputime);
2863
2864 index = (TASK_NICE(p) > 0) ? CPUTIME_NICE : CPUTIME_USER;
2865
2866
2867 task_group_account_field(p, index, (__force u64) cputime);
2868
2869
2870 acct_update_integrals(p);
2871}
2872
2873
2874
2875
2876
2877
2878
2879static void account_guest_time(struct task_struct *p, cputime_t cputime,
2880 cputime_t cputime_scaled)
2881{
2882 u64 *cpustat = kcpustat_this_cpu->cpustat;
2883
2884
2885 p->utime += cputime;
2886 p->utimescaled += cputime_scaled;
2887 account_group_user_time(p, cputime);
2888 p->gtime += cputime;
2889
2890
2891 if (TASK_NICE(p) > 0) {
2892 cpustat[CPUTIME_NICE] += (__force u64) cputime;
2893 cpustat[CPUTIME_GUEST_NICE] += (__force u64) cputime;
2894 } else {
2895 cpustat[CPUTIME_USER] += (__force u64) cputime;
2896 cpustat[CPUTIME_GUEST] += (__force u64) cputime;
2897 }
2898}
2899
2900
2901
2902
2903
2904
2905
2906
2907static inline
2908void __account_system_time(struct task_struct *p, cputime_t cputime,
2909 cputime_t cputime_scaled, int index)
2910{
2911
2912 p->stime += cputime;
2913 p->stimescaled += cputime_scaled;
2914 account_group_system_time(p, cputime);
2915
2916
2917 task_group_account_field(p, index, (__force u64) cputime);
2918
2919
2920 acct_update_integrals(p);
2921}
2922
2923
2924
2925
2926
2927
2928
2929
2930void account_system_time(struct task_struct *p, int hardirq_offset,
2931 cputime_t cputime, cputime_t cputime_scaled)
2932{
2933 int index;
2934
2935 if ((p->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0)) {
2936 account_guest_time(p, cputime, cputime_scaled);
2937 return;
2938 }
2939
2940 if (hardirq_count() - hardirq_offset)
2941 index = CPUTIME_IRQ;
2942 else if (in_serving_softirq())
2943 index = CPUTIME_SOFTIRQ;
2944 else
2945 index = CPUTIME_SYSTEM;
2946
2947 __account_system_time(p, cputime, cputime_scaled, index);
2948}
2949
2950
2951
2952
2953
2954void account_steal_time(cputime_t cputime)
2955{
2956 u64 *cpustat = kcpustat_this_cpu->cpustat;
2957
2958 cpustat[CPUTIME_STEAL] += (__force u64) cputime;
2959}
2960
2961
2962
2963
2964
2965void account_idle_time(cputime_t cputime)
2966{
2967 u64 *cpustat = kcpustat_this_cpu->cpustat;
2968 struct rq *rq = this_rq();
2969
2970 if (atomic_read(&rq->nr_iowait) > 0)
2971 cpustat[CPUTIME_IOWAIT] += (__force u64) cputime;
2972 else
2973 cpustat[CPUTIME_IDLE] += (__force u64) cputime;
2974}
2975
2976static __always_inline bool steal_account_process_tick(void)
2977{
2978#ifdef CONFIG_PARAVIRT
2979 if (static_key_false(¶virt_steal_enabled)) {
2980 u64 steal, st = 0;
2981
2982 steal = paravirt_steal_clock(smp_processor_id());
2983 steal -= this_rq()->prev_steal_time;
2984
2985 st = steal_ticks(steal);
2986 this_rq()->prev_steal_time += st * TICK_NSEC;
2987
2988 account_steal_time(st);
2989 return st;
2990 }
2991#endif
2992 return false;
2993}
2994
2995#ifndef CONFIG_VIRT_CPU_ACCOUNTING
2996
2997#ifdef CONFIG_IRQ_TIME_ACCOUNTING
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
3020 struct rq *rq)
3021{
3022 cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy);
3023 u64 *cpustat = kcpustat_this_cpu->cpustat;
3024
3025 if (steal_account_process_tick())
3026 return;
3027
3028 if (irqtime_account_hi_update()) {
3029 cpustat[CPUTIME_IRQ] += (__force u64) cputime_one_jiffy;
3030 } else if (irqtime_account_si_update()) {
3031 cpustat[CPUTIME_SOFTIRQ] += (__force u64) cputime_one_jiffy;
3032 } else if (this_cpu_ksoftirqd() == p) {
3033
3034
3035
3036
3037
3038 __account_system_time(p, cputime_one_jiffy, one_jiffy_scaled,
3039 CPUTIME_SOFTIRQ);
3040 } else if (user_tick) {
3041 account_user_time(p, cputime_one_jiffy, one_jiffy_scaled);
3042 } else if (p == rq->idle) {
3043 account_idle_time(cputime_one_jiffy);
3044 } else if (p->flags & PF_VCPU) {
3045 account_guest_time(p, cputime_one_jiffy, one_jiffy_scaled);
3046 } else {
3047 __account_system_time(p, cputime_one_jiffy, one_jiffy_scaled,
3048 CPUTIME_SYSTEM);
3049 }
3050}
3051
3052static void irqtime_account_idle_ticks(int ticks)
3053{
3054 int i;
3055 struct rq *rq = this_rq();
3056
3057 for (i = 0; i < ticks; i++)
3058 irqtime_account_process_tick(current, 0, rq);
3059}
3060#else
3061static void irqtime_account_idle_ticks(int ticks) {}
3062static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
3063 struct rq *rq) {}
3064#endif
3065
3066
3067
3068
3069
3070
3071void account_process_tick(struct task_struct *p, int user_tick)
3072{
3073 cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy);
3074 struct rq *rq = this_rq();
3075
3076 if (sched_clock_irqtime) {
3077 irqtime_account_process_tick(p, user_tick, rq);
3078 return;
3079 }
3080
3081 if (steal_account_process_tick())
3082 return;
3083
3084 if (user_tick)
3085 account_user_time(p, cputime_one_jiffy, one_jiffy_scaled);
3086 else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET))
3087 account_system_time(p, HARDIRQ_OFFSET, cputime_one_jiffy,
3088 one_jiffy_scaled);
3089 else
3090 account_idle_time(cputime_one_jiffy);
3091}
3092
3093
3094
3095
3096
3097
3098void account_steal_ticks(unsigned long ticks)
3099{
3100 account_steal_time(jiffies_to_cputime(ticks));
3101}
3102
3103
3104
3105
3106
3107void account_idle_ticks(unsigned long ticks)
3108{
3109
3110 if (sched_clock_irqtime) {
3111 irqtime_account_idle_ticks(ticks);
3112 return;
3113 }
3114
3115 account_idle_time(jiffies_to_cputime(ticks));
3116}
3117
3118#endif
3119
3120
3121
3122
3123#ifdef CONFIG_VIRT_CPU_ACCOUNTING
3124void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
3125{
3126 *ut = p->utime;
3127 *st = p->stime;
3128}
3129
3130void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
3131{
3132 struct task_cputime cputime;
3133
3134 thread_group_cputime(p, &cputime);
3135
3136 *ut = cputime.utime;
3137 *st = cputime.stime;
3138}
3139#else
3140
3141#ifndef nsecs_to_cputime
3142# define nsecs_to_cputime(__nsecs) nsecs_to_jiffies(__nsecs)
3143#endif
3144
3145static cputime_t scale_utime(cputime_t utime, cputime_t rtime, cputime_t total)
3146{
3147 u64 temp = (__force u64) rtime;
3148
3149 temp *= (__force u64) utime;
3150
3151 if (sizeof(cputime_t) == 4)
3152 temp = div_u64(temp, (__force u32) total);
3153 else
3154 temp = div64_u64(temp, (__force u64) total);
3155
3156 return (__force cputime_t) temp;
3157}
3158
3159void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
3160{
3161 cputime_t rtime, utime = p->utime, total = utime + p->stime;
3162
3163
3164
3165
3166 rtime = nsecs_to_cputime(p->se.sum_exec_runtime);
3167
3168 if (total)
3169 utime = scale_utime(utime, rtime, total);
3170 else
3171 utime = rtime;
3172
3173
3174
3175
3176 p->prev_utime = max(p->prev_utime, utime);
3177 p->prev_stime = max(p->prev_stime, rtime - p->prev_utime);
3178
3179 *ut = p->prev_utime;
3180 *st = p->prev_stime;
3181}
3182
3183
3184
3185
3186void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
3187{
3188 struct signal_struct *sig = p->signal;
3189 struct task_cputime cputime;
3190 cputime_t rtime, utime, total;
3191
3192 thread_group_cputime(p, &cputime);
3193
3194 total = cputime.utime + cputime.stime;
3195 rtime = nsecs_to_cputime(cputime.sum_exec_runtime);
3196
3197 if (total)
3198 utime = scale_utime(cputime.utime, rtime, total);
3199 else
3200 utime = rtime;
3201
3202 sig->prev_utime = max(sig->prev_utime, utime);
3203 sig->prev_stime = max(sig->prev_stime, rtime - sig->prev_utime);
3204
3205 *ut = sig->prev_utime;
3206 *st = sig->prev_stime;
3207}
3208#endif
3209
3210
3211
3212
3213
3214void scheduler_tick(void)
3215{
3216 int cpu = smp_processor_id();
3217 struct rq *rq = cpu_rq(cpu);
3218 struct task_struct *curr = rq->curr;
3219
3220 sched_clock_tick();
3221
3222 raw_spin_lock(&rq->lock);
3223 update_rq_clock(rq);
3224 update_cpu_load_active(rq);
3225 curr->sched_class->task_tick(rq, curr, 0);
3226 raw_spin_unlock(&rq->lock);
3227
3228 perf_event_task_tick();
3229
3230#ifdef CONFIG_SMP
3231 rq->idle_balance = idle_cpu(cpu);
3232 trigger_load_balance(rq, cpu);
3233#endif
3234}
3235
3236notrace unsigned long get_parent_ip(unsigned long addr)
3237{
3238 if (in_lock_functions(addr)) {
3239 addr = CALLER_ADDR2;
3240 if (in_lock_functions(addr))
3241 addr = CALLER_ADDR3;
3242 }
3243 return addr;
3244}
3245
3246#if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \
3247 defined(CONFIG_PREEMPT_TRACER))
3248
3249void __kprobes add_preempt_count(int val)
3250{
3251#ifdef CONFIG_DEBUG_PREEMPT
3252
3253
3254
3255 if (DEBUG_LOCKS_WARN_ON((preempt_count() < 0)))
3256 return;
3257#endif
3258 preempt_count() += val;
3259#ifdef CONFIG_DEBUG_PREEMPT
3260
3261
3262
3263 DEBUG_LOCKS_WARN_ON((preempt_count() & PREEMPT_MASK) >=
3264 PREEMPT_MASK - 10);
3265#endif
3266 if (preempt_count() == val)
3267 trace_preempt_off(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1));
3268}
3269EXPORT_SYMBOL(add_preempt_count);
3270
3271void __kprobes sub_preempt_count(int val)
3272{
3273#ifdef CONFIG_DEBUG_PREEMPT
3274
3275
3276
3277 if (DEBUG_LOCKS_WARN_ON(val > preempt_count()))
3278 return;
3279
3280
3281
3282 if (DEBUG_LOCKS_WARN_ON((val < PREEMPT_MASK) &&
3283 !(preempt_count() & PREEMPT_MASK)))
3284 return;
3285#endif
3286
3287 if (preempt_count() == val)
3288 trace_preempt_on(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1));
3289 preempt_count() -= val;
3290}
3291EXPORT_SYMBOL(sub_preempt_count);
3292
3293#endif
3294
3295
3296
3297
3298static noinline void __schedule_bug(struct task_struct *prev)
3299{
3300 if (oops_in_progress)
3301 return;
3302
3303 printk(KERN_ERR "BUG: scheduling while atomic: %s/%d/0x%08x\n",
3304 prev->comm, prev->pid, preempt_count());
3305
3306 debug_show_held_locks(prev);
3307 print_modules();
3308 if (irqs_disabled())
3309 print_irqtrace_events(prev);
3310 dump_stack();
3311 add_taint(TAINT_WARN);
3312}
3313
3314
3315
3316
3317static inline void schedule_debug(struct task_struct *prev)
3318{
3319
3320
3321
3322
3323
3324 if (unlikely(in_atomic_preempt_off() && !prev->exit_state))
3325 __schedule_bug(prev);
3326 rcu_sleep_check();
3327
3328 profile_hit(SCHED_PROFILING, __builtin_return_address(0));
3329
3330 schedstat_inc(this_rq(), sched_count);
3331}
3332
3333static void put_prev_task(struct rq *rq, struct task_struct *prev)
3334{
3335 if (prev->on_rq || rq->skip_clock_update < 0)
3336 update_rq_clock(rq);
3337 prev->sched_class->put_prev_task(rq, prev);
3338}
3339
3340
3341
3342
3343static inline struct task_struct *
3344pick_next_task(struct rq *rq)
3345{
3346 const struct sched_class *class;
3347 struct task_struct *p;
3348
3349
3350
3351
3352
3353 if (likely(rq->nr_running == rq->cfs.h_nr_running)) {
3354 p = fair_sched_class.pick_next_task(rq);
3355 if (likely(p))
3356 return p;
3357 }
3358
3359 for_each_class(class) {
3360 p = class->pick_next_task(rq);
3361 if (p)
3362 return p;
3363 }
3364
3365 BUG();
3366}
3367
3368
3369
3370
3371static void __sched __schedule(void)
3372{
3373 struct task_struct *prev, *next;
3374 unsigned long *switch_count;
3375 struct rq *rq;
3376 int cpu;
3377
3378need_resched:
3379 preempt_disable();
3380 cpu = smp_processor_id();
3381 rq = cpu_rq(cpu);
3382 rcu_note_context_switch(cpu);
3383 prev = rq->curr;
3384
3385 schedule_debug(prev);
3386
3387 if (sched_feat(HRTICK))
3388 hrtick_clear(rq);
3389
3390 raw_spin_lock_irq(&rq->lock);
3391
3392 switch_count = &prev->nivcsw;
3393 if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) {
3394 if (unlikely(signal_pending_state(prev->state, prev))) {
3395 prev->state = TASK_RUNNING;
3396 } else {
3397 deactivate_task(rq, prev, DEQUEUE_SLEEP);
3398 prev->on_rq = 0;
3399
3400
3401
3402
3403
3404
3405 if (prev->flags & PF_WQ_WORKER) {
3406 struct task_struct *to_wakeup;
3407
3408 to_wakeup = wq_worker_sleeping(prev, cpu);
3409 if (to_wakeup)
3410 try_to_wake_up_local(to_wakeup);
3411 }
3412 }
3413 switch_count = &prev->nvcsw;
3414 }
3415
3416 pre_schedule(rq, prev);
3417
3418 if (unlikely(!rq->nr_running))
3419 idle_balance(cpu, rq);
3420
3421 put_prev_task(rq, prev);
3422 next = pick_next_task(rq);
3423 clear_tsk_need_resched(prev);
3424 rq->skip_clock_update = 0;
3425
3426 if (likely(prev != next)) {
3427 rq->nr_switches++;
3428 rq->curr = next;
3429 ++*switch_count;
3430
3431 context_switch(rq, prev, next);
3432
3433
3434
3435
3436
3437
3438 cpu = smp_processor_id();
3439 rq = cpu_rq(cpu);
3440 } else
3441 raw_spin_unlock_irq(&rq->lock);
3442
3443 post_schedule(rq);
3444
3445 sched_preempt_enable_no_resched();
3446 if (need_resched())
3447 goto need_resched;
3448}
3449
3450static inline void sched_submit_work(struct task_struct *tsk)
3451{
3452 if (!tsk->state || tsk_is_pi_blocked(tsk))
3453 return;
3454
3455
3456
3457
3458 if (blk_needs_flush_plug(tsk))
3459 blk_schedule_flush_plug(tsk);
3460}
3461
3462asmlinkage void __sched schedule(void)
3463{
3464 struct task_struct *tsk = current;
3465
3466 sched_submit_work(tsk);
3467 __schedule();
3468}
3469EXPORT_SYMBOL(schedule);
3470
3471
3472
3473
3474
3475
3476void __sched schedule_preempt_disabled(void)
3477{
3478 sched_preempt_enable_no_resched();
3479 schedule();
3480 preempt_disable();
3481}
3482
3483#ifdef CONFIG_MUTEX_SPIN_ON_OWNER
3484
3485static inline bool owner_running(struct mutex *lock, struct task_struct *owner)
3486{
3487 if (lock->owner != owner)
3488 return false;
3489
3490
3491
3492
3493
3494
3495
3496 barrier();
3497
3498 return owner->on_cpu;
3499}
3500
3501
3502
3503
3504
3505int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner)
3506{
3507 if (!sched_feat(OWNER_SPIN))
3508 return 0;
3509
3510 rcu_read_lock();
3511 while (owner_running(lock, owner)) {
3512 if (need_resched())
3513 break;
3514
3515 arch_mutex_cpu_relax();
3516 }
3517 rcu_read_unlock();
3518
3519
3520
3521
3522
3523
3524 return lock->owner == NULL;
3525}
3526#endif
3527
3528#ifdef CONFIG_PREEMPT
3529
3530
3531
3532
3533
3534asmlinkage void __sched notrace preempt_schedule(void)
3535{
3536 struct thread_info *ti = current_thread_info();
3537
3538
3539
3540
3541
3542 if (likely(ti->preempt_count || irqs_disabled()))
3543 return;
3544
3545 do {
3546 add_preempt_count_notrace(PREEMPT_ACTIVE);
3547 __schedule();
3548 sub_preempt_count_notrace(PREEMPT_ACTIVE);
3549
3550
3551
3552
3553
3554 barrier();
3555 } while (need_resched());
3556}
3557EXPORT_SYMBOL(preempt_schedule);
3558
3559
3560
3561
3562
3563
3564
3565asmlinkage void __sched preempt_schedule_irq(void)
3566{
3567 struct thread_info *ti = current_thread_info();
3568
3569
3570 BUG_ON(ti->preempt_count || !irqs_disabled());
3571
3572 do {
3573 add_preempt_count(PREEMPT_ACTIVE);
3574 local_irq_enable();
3575 __schedule();
3576 local_irq_disable();
3577 sub_preempt_count(PREEMPT_ACTIVE);
3578
3579
3580
3581
3582
3583 barrier();
3584 } while (need_resched());
3585}
3586
3587#endif
3588
3589int default_wake_function(wait_queue_t *curr, unsigned mode, int wake_flags,
3590 void *key)
3591{
3592 return try_to_wake_up(curr->private, mode, wake_flags);
3593}
3594EXPORT_SYMBOL(default_wake_function);
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
3606 int nr_exclusive, int wake_flags, void *key)
3607{
3608 wait_queue_t *curr, *next;
3609
3610 list_for_each_entry_safe(curr, next, &q->task_list, task_list) {
3611 unsigned flags = curr->flags;
3612
3613 if (curr->func(curr, mode, wake_flags, key) &&
3614 (flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive)
3615 break;
3616 }
3617}
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629void __wake_up(wait_queue_head_t *q, unsigned int mode,
3630 int nr_exclusive, void *key)
3631{
3632 unsigned long flags;
3633
3634 spin_lock_irqsave(&q->lock, flags);
3635 __wake_up_common(q, mode, nr_exclusive, 0, key);
3636 spin_unlock_irqrestore(&q->lock, flags);
3637}
3638EXPORT_SYMBOL(__wake_up);
3639
3640
3641
3642
3643void __wake_up_locked(wait_queue_head_t *q, unsigned int mode, int nr)
3644{
3645 __wake_up_common(q, mode, nr, 0, NULL);
3646}
3647EXPORT_SYMBOL_GPL(__wake_up_locked);
3648
3649void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key)
3650{
3651 __wake_up_common(q, mode, 1, 0, key);
3652}
3653EXPORT_SYMBOL_GPL(__wake_up_locked_key);
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode,
3673 int nr_exclusive, void *key)
3674{
3675 unsigned long flags;
3676 int wake_flags = WF_SYNC;
3677
3678 if (unlikely(!q))
3679 return;
3680
3681 if (unlikely(!nr_exclusive))
3682 wake_flags = 0;
3683
3684 spin_lock_irqsave(&q->lock, flags);
3685 __wake_up_common(q, mode, nr_exclusive, wake_flags, key);
3686 spin_unlock_irqrestore(&q->lock, flags);
3687}
3688EXPORT_SYMBOL_GPL(__wake_up_sync_key);
3689
3690
3691
3692
3693void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr_exclusive)
3694{
3695 __wake_up_sync_key(q, mode, nr_exclusive, NULL);
3696}
3697EXPORT_SYMBOL_GPL(__wake_up_sync);
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711void complete(struct completion *x)
3712{
3713 unsigned long flags;
3714
3715 spin_lock_irqsave(&x->wait.lock, flags);
3716 x->done++;
3717 __wake_up_common(&x->wait, TASK_NORMAL, 1, 0, NULL);
3718 spin_unlock_irqrestore(&x->wait.lock, flags);
3719}
3720EXPORT_SYMBOL(complete);
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731void complete_all(struct completion *x)
3732{
3733 unsigned long flags;
3734
3735 spin_lock_irqsave(&x->wait.lock, flags);
3736 x->done += UINT_MAX/2;
3737 __wake_up_common(&x->wait, TASK_NORMAL, 0, 0, NULL);
3738 spin_unlock_irqrestore(&x->wait.lock, flags);
3739}
3740EXPORT_SYMBOL(complete_all);
3741
3742static inline long __sched
3743do_wait_for_common(struct completion *x, long timeout, int state)
3744{
3745 if (!x->done) {
3746 DECLARE_WAITQUEUE(wait, current);
3747
3748 __add_wait_queue_tail_exclusive(&x->wait, &wait);
3749 do {
3750 if (signal_pending_state(state, current)) {
3751 timeout = -ERESTARTSYS;
3752 break;
3753 }
3754 __set_current_state(state);
3755 spin_unlock_irq(&x->wait.lock);
3756 timeout = schedule_timeout(timeout);
3757 spin_lock_irq(&x->wait.lock);
3758 } while (!x->done && timeout);
3759 __remove_wait_queue(&x->wait, &wait);
3760 if (!x->done)
3761 return timeout;
3762 }
3763 x->done--;
3764 return timeout ?: 1;
3765}
3766
3767static long __sched
3768wait_for_common(struct completion *x, long timeout, int state)
3769{
3770 might_sleep();
3771
3772 spin_lock_irq(&x->wait.lock);
3773 timeout = do_wait_for_common(x, timeout, state);
3774 spin_unlock_irq(&x->wait.lock);
3775 return timeout;
3776}
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788void __sched wait_for_completion(struct completion *x)
3789{
3790 wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_UNINTERRUPTIBLE);
3791}
3792EXPORT_SYMBOL(wait_for_completion);
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806unsigned long __sched
3807wait_for_completion_timeout(struct completion *x, unsigned long timeout)
3808{
3809 return wait_for_common(x, timeout, TASK_UNINTERRUPTIBLE);
3810}
3811EXPORT_SYMBOL(wait_for_completion_timeout);
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822int __sched wait_for_completion_interruptible(struct completion *x)
3823{
3824 long t = wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_INTERRUPTIBLE);
3825 if (t == -ERESTARTSYS)
3826 return t;
3827 return 0;
3828}
3829EXPORT_SYMBOL(wait_for_completion_interruptible);
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842long __sched
3843wait_for_completion_interruptible_timeout(struct completion *x,
3844 unsigned long timeout)
3845{
3846 return wait_for_common(x, timeout, TASK_INTERRUPTIBLE);
3847}
3848EXPORT_SYMBOL(wait_for_completion_interruptible_timeout);
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859int __sched wait_for_completion_killable(struct completion *x)
3860{
3861 long t = wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_KILLABLE);
3862 if (t == -ERESTARTSYS)
3863 return t;
3864 return 0;
3865}
3866EXPORT_SYMBOL(wait_for_completion_killable);
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880long __sched
3881wait_for_completion_killable_timeout(struct completion *x,
3882 unsigned long timeout)
3883{
3884 return wait_for_common(x, timeout, TASK_KILLABLE);
3885}
3886EXPORT_SYMBOL(wait_for_completion_killable_timeout);
3887
3888
3889
3890
3891
3892
3893
3894
3895
3896
3897
3898
3899
3900bool try_wait_for_completion(struct completion *x)
3901{
3902 unsigned long flags;
3903 int ret = 1;
3904
3905 spin_lock_irqsave(&x->wait.lock, flags);
3906 if (!x->done)
3907 ret = 0;
3908 else
3909 x->done--;
3910 spin_unlock_irqrestore(&x->wait.lock, flags);
3911 return ret;
3912}
3913EXPORT_SYMBOL(try_wait_for_completion);
3914
3915
3916
3917
3918
3919
3920
3921
3922
3923bool completion_done(struct completion *x)
3924{
3925 unsigned long flags;
3926 int ret = 1;
3927
3928 spin_lock_irqsave(&x->wait.lock, flags);
3929 if (!x->done)
3930 ret = 0;
3931 spin_unlock_irqrestore(&x->wait.lock, flags);
3932 return ret;
3933}
3934EXPORT_SYMBOL(completion_done);
3935
3936static long __sched
3937sleep_on_common(wait_queue_head_t *q, int state, long timeout)
3938{
3939 unsigned long flags;
3940 wait_queue_t wait;
3941
3942 init_waitqueue_entry(&wait, current);
3943
3944 __set_current_state(state);
3945
3946 spin_lock_irqsave(&q->lock, flags);
3947 __add_wait_queue(q, &wait);
3948 spin_unlock(&q->lock);
3949 timeout = schedule_timeout(timeout);
3950 spin_lock_irq(&q->lock);
3951 __remove_wait_queue(q, &wait);
3952 spin_unlock_irqrestore(&q->lock, flags);
3953
3954 return timeout;
3955}
3956
3957void __sched interruptible_sleep_on(wait_queue_head_t *q)
3958{
3959 sleep_on_common(q, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
3960}
3961EXPORT_SYMBOL(interruptible_sleep_on);
3962
3963long __sched
3964interruptible_sleep_on_timeout(wait_queue_head_t *q, long timeout)
3965{
3966 return sleep_on_common(q, TASK_INTERRUPTIBLE, timeout);
3967}
3968EXPORT_SYMBOL(interruptible_sleep_on_timeout);
3969
3970void __sched sleep_on(wait_queue_head_t *q)
3971{
3972 sleep_on_common(q, TASK_UNINTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
3973}
3974EXPORT_SYMBOL(sleep_on);
3975
3976long __sched sleep_on_timeout(wait_queue_head_t *q, long timeout)
3977{
3978 return sleep_on_common(q, TASK_UNINTERRUPTIBLE, timeout);
3979}
3980EXPORT_SYMBOL(sleep_on_timeout);
3981
3982#ifdef CONFIG_RT_MUTEXES
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994void rt_mutex_setprio(struct task_struct *p, int prio)
3995{
3996 int oldprio, on_rq, running;
3997 struct rq *rq;
3998 const struct sched_class *prev_class;
3999
4000 BUG_ON(prio < 0 || prio > MAX_PRIO);
4001
4002 rq = __task_rq_lock(p);
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016 if (unlikely(p == rq->idle)) {
4017 WARN_ON(p != rq->curr);
4018 WARN_ON(p->pi_blocked_on);
4019 goto out_unlock;
4020 }
4021
4022 trace_sched_pi_setprio(p, prio);
4023 oldprio = p->prio;
4024 prev_class = p->sched_class;
4025 on_rq = p->on_rq;
4026 running = task_current(rq, p);
4027 if (on_rq)
4028 dequeue_task(rq, p, 0);
4029 if (running)
4030 p->sched_class->put_prev_task(rq, p);
4031
4032 if (rt_prio(prio))
4033 p->sched_class = &rt_sched_class;
4034 else
4035 p->sched_class = &fair_sched_class;
4036
4037 p->prio = prio;
4038
4039 if (running)
4040 p->sched_class->set_curr_task(rq);
4041 if (on_rq)
4042 enqueue_task(rq, p, oldprio < prio ? ENQUEUE_HEAD : 0);
4043
4044 check_class_changed(rq, p, prev_class, oldprio);
4045out_unlock:
4046 __task_rq_unlock(rq);
4047}
4048#endif
4049void set_user_nice(struct task_struct *p, long nice)
4050{
4051 int old_prio, delta, on_rq;
4052 unsigned long flags;
4053 struct rq *rq;
4054
4055 if (TASK_NICE(p) == nice || nice < -20 || nice > 19)
4056 return;
4057
4058
4059
4060
4061 rq = task_rq_lock(p, &flags);
4062
4063
4064
4065
4066
4067
4068 if (task_has_rt_policy(p)) {
4069 p->static_prio = NICE_TO_PRIO(nice);
4070 goto out_unlock;
4071 }
4072 on_rq = p->on_rq;
4073 if (on_rq)
4074 dequeue_task(rq, p, 0);
4075
4076 p->static_prio = NICE_TO_PRIO(nice);
4077 set_load_weight(p);
4078 old_prio = p->prio;
4079 p->prio = effective_prio(p);
4080 delta = p->prio - old_prio;
4081
4082 if (on_rq) {
4083 enqueue_task(rq, p, 0);
4084
4085
4086
4087
4088 if (delta < 0 || (delta > 0 && task_running(rq, p)))
4089 resched_task(rq->curr);
4090 }
4091out_unlock:
4092 task_rq_unlock(rq, p, &flags);
4093}
4094EXPORT_SYMBOL(set_user_nice);
4095
4096
4097
4098
4099
4100
4101int can_nice(const struct task_struct *p, const int nice)
4102{
4103
4104 int nice_rlim = 20 - nice;
4105
4106 return (nice_rlim <= task_rlimit(p, RLIMIT_NICE) ||
4107 capable(CAP_SYS_NICE));
4108}
4109
4110#ifdef __ARCH_WANT_SYS_NICE
4111
4112
4113
4114
4115
4116
4117
4118
4119SYSCALL_DEFINE1(nice, int, increment)
4120{
4121 long nice, retval;
4122
4123
4124
4125
4126
4127
4128 if (increment < -40)
4129 increment = -40;
4130 if (increment > 40)
4131 increment = 40;
4132
4133 nice = TASK_NICE(current) + increment;
4134 if (nice < -20)
4135 nice = -20;
4136 if (nice > 19)
4137 nice = 19;
4138
4139 if (increment < 0 && !can_nice(current, nice))
4140 return -EPERM;
4141
4142 retval = security_task_setnice(current, nice);
4143 if (retval)
4144 return retval;
4145
4146 set_user_nice(current, nice);
4147 return 0;
4148}
4149
4150#endif
4151
4152
4153
4154
4155
4156
4157
4158
4159
4160int task_prio(const struct task_struct *p)
4161{
4162 return p->prio - MAX_RT_PRIO;
4163}
4164
4165
4166
4167
4168
4169int task_nice(const struct task_struct *p)
4170{
4171 return TASK_NICE(p);
4172}
4173EXPORT_SYMBOL(task_nice);
4174
4175
4176
4177
4178
4179int idle_cpu(int cpu)
4180{
4181 struct rq *rq = cpu_rq(cpu);
4182
4183 if (rq->curr != rq->idle)
4184 return 0;
4185
4186 if (rq->nr_running)
4187 return 0;
4188
4189#ifdef CONFIG_SMP
4190 if (!llist_empty(&rq->wake_list))
4191 return 0;
4192#endif
4193
4194 return 1;
4195}
4196
4197
4198
4199
4200
4201struct task_struct *idle_task(int cpu)
4202{
4203 return cpu_rq(cpu)->idle;
4204}
4205
4206
4207
4208
4209
4210static struct task_struct *find_process_by_pid(pid_t pid)
4211{
4212 return pid ? find_task_by_vpid(pid) : current;
4213}
4214
4215
4216static void
4217__setscheduler(struct rq *rq, struct task_struct *p, int policy, int prio)
4218{
4219 p->policy = policy;
4220 p->rt_priority = prio;
4221 p->normal_prio = normal_prio(p);
4222
4223 p->prio = rt_mutex_getprio(p);
4224 if (rt_prio(p->prio))
4225 p->sched_class = &rt_sched_class;
4226 else
4227 p->sched_class = &fair_sched_class;
4228 set_load_weight(p);
4229}
4230
4231
4232
4233
4234static bool check_same_owner(struct task_struct *p)
4235{
4236 const struct cred *cred = current_cred(), *pcred;
4237 bool match;
4238
4239 rcu_read_lock();
4240 pcred = __task_cred(p);
4241 match = (uid_eq(cred->euid, pcred->euid) ||
4242 uid_eq(cred->euid, pcred->uid));
4243 rcu_read_unlock();
4244 return match;
4245}
4246
4247static int __sched_setscheduler(struct task_struct *p, int policy,
4248 const struct sched_param *param, bool user)
4249{
4250 int retval, oldprio, oldpolicy = -1, on_rq, running;
4251 unsigned long flags;
4252 const struct sched_class *prev_class;
4253 struct rq *rq;
4254 int reset_on_fork;
4255
4256
4257 BUG_ON(in_interrupt());
4258recheck:
4259
4260 if (policy < 0) {
4261 reset_on_fork = p->sched_reset_on_fork;
4262 policy = oldpolicy = p->policy;
4263 } else {
4264 reset_on_fork = !!(policy & SCHED_RESET_ON_FORK);
4265 policy &= ~SCHED_RESET_ON_FORK;
4266
4267 if (policy != SCHED_FIFO && policy != SCHED_RR &&
4268 policy != SCHED_NORMAL && policy != SCHED_BATCH &&
4269 policy != SCHED_IDLE)
4270 return -EINVAL;
4271 }
4272
4273
4274
4275
4276
4277
4278 if (param->sched_priority < 0 ||
4279 (p->mm && param->sched_priority > MAX_USER_RT_PRIO-1) ||
4280 (!p->mm && param->sched_priority > MAX_RT_PRIO-1))
4281 return -EINVAL;
4282 if (rt_policy(policy) != (param->sched_priority != 0))
4283 return -EINVAL;
4284
4285
4286
4287
4288 if (user && !capable(CAP_SYS_NICE)) {
4289 if (rt_policy(policy)) {
4290 unsigned long rlim_rtprio =
4291 task_rlimit(p, RLIMIT_RTPRIO);
4292
4293
4294 if (policy != p->policy && !rlim_rtprio)
4295 return -EPERM;
4296
4297
4298 if (param->sched_priority > p->rt_priority &&
4299 param->sched_priority > rlim_rtprio)
4300 return -EPERM;
4301 }
4302
4303
4304
4305
4306
4307 if (p->policy == SCHED_IDLE && policy != SCHED_IDLE) {
4308 if (!can_nice(p, TASK_NICE(p)))
4309 return -EPERM;
4310 }
4311
4312
4313 if (!check_same_owner(p))
4314 return -EPERM;
4315
4316
4317 if (p->sched_reset_on_fork && !reset_on_fork)
4318 return -EPERM;
4319 }
4320
4321 if (user) {
4322 retval = security_task_setscheduler(p);
4323 if (retval)
4324 return retval;
4325 }
4326
4327
4328
4329
4330
4331
4332
4333
4334 rq = task_rq_lock(p, &flags);
4335
4336
4337
4338
4339 if (p == rq->stop) {
4340 task_rq_unlock(rq, p, &flags);
4341 return -EINVAL;
4342 }
4343
4344
4345
4346
4347 if (unlikely(policy == p->policy && (!rt_policy(policy) ||
4348 param->sched_priority == p->rt_priority))) {
4349 task_rq_unlock(rq, p, &flags);
4350 return 0;
4351 }
4352
4353#ifdef CONFIG_RT_GROUP_SCHED
4354 if (user) {
4355
4356
4357
4358
4359 if (rt_bandwidth_enabled() && rt_policy(policy) &&
4360 task_group(p)->rt_bandwidth.rt_runtime == 0 &&
4361 !task_group_is_autogroup(task_group(p))) {
4362 task_rq_unlock(rq, p, &flags);
4363 return -EPERM;
4364 }
4365 }
4366#endif
4367
4368
4369 if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) {
4370 policy = oldpolicy = -1;
4371 task_rq_unlock(rq, p, &flags);
4372 goto recheck;
4373 }
4374 on_rq = p->on_rq;
4375 running = task_current(rq, p);
4376 if (on_rq)
4377 dequeue_task(rq, p, 0);
4378 if (running)
4379 p->sched_class->put_prev_task(rq, p);
4380
4381 p->sched_reset_on_fork = reset_on_fork;
4382
4383 oldprio = p->prio;
4384 prev_class = p->sched_class;
4385 __setscheduler(rq, p, policy, param->sched_priority);
4386
4387 if (running)
4388 p->sched_class->set_curr_task(rq);
4389 if (on_rq)
4390 enqueue_task(rq, p, 0);
4391
4392 check_class_changed(rq, p, prev_class, oldprio);
4393 task_rq_unlock(rq, p, &flags);
4394
4395 rt_mutex_adjust_pi(p);
4396
4397 return 0;
4398}
4399
4400
4401
4402
4403
4404
4405
4406
4407
4408int sched_setscheduler(struct task_struct *p, int policy,
4409 const struct sched_param *param)
4410{
4411 return __sched_setscheduler(p, policy, param, true);
4412}
4413EXPORT_SYMBOL_GPL(sched_setscheduler);
4414
4415
4416
4417
4418
4419
4420
4421
4422
4423
4424
4425
4426int sched_setscheduler_nocheck(struct task_struct *p, int policy,
4427 const struct sched_param *param)
4428{
4429 return __sched_setscheduler(p, policy, param, false);
4430}
4431
4432static int
4433do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
4434{
4435 struct sched_param lparam;
4436 struct task_struct *p;
4437 int retval;
4438
4439 if (!param || pid < 0)
4440 return -EINVAL;
4441 if (copy_from_user(&lparam, param, sizeof(struct sched_param)))
4442 return -EFAULT;
4443
4444 rcu_read_lock();
4445 retval = -ESRCH;
4446 p = find_process_by_pid(pid);
4447 if (p != NULL)
4448 retval = sched_setscheduler(p, policy, &lparam);
4449 rcu_read_unlock();
4450
4451 return retval;
4452}
4453
4454
4455
4456
4457
4458
4459
4460SYSCALL_DEFINE3(sched_setscheduler, pid_t, pid, int, policy,
4461 struct sched_param __user *, param)
4462{
4463
4464 if (policy < 0)
4465 return -EINVAL;
4466
4467 return do_sched_setscheduler(pid, policy, param);
4468}
4469
4470
4471
4472
4473
4474
4475SYSCALL_DEFINE2(sched_setparam, pid_t, pid, struct sched_param __user *, param)
4476{
4477 return do_sched_setscheduler(pid, -1, param);
4478}
4479
4480
4481
4482
4483
4484SYSCALL_DEFINE1(sched_getscheduler, pid_t, pid)
4485{
4486 struct task_struct *p;
4487 int retval;
4488
4489 if (pid < 0)
4490 return -EINVAL;
4491
4492 retval = -ESRCH;
4493 rcu_read_lock();
4494 p = find_process_by_pid(pid);
4495 if (p) {
4496 retval = security_task_getscheduler(p);
4497 if (!retval)
4498 retval = p->policy
4499 | (p->sched_reset_on_fork ? SCHED_RESET_ON_FORK : 0);
4500 }
4501 rcu_read_unlock();
4502 return retval;
4503}
4504
4505
4506
4507
4508
4509
4510SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param)
4511{
4512 struct sched_param lp;
4513 struct task_struct *p;
4514 int retval;
4515
4516 if (!param || pid < 0)
4517 return -EINVAL;
4518
4519 rcu_read_lock();
4520 p = find_process_by_pid(pid);
4521 retval = -ESRCH;
4522 if (!p)
4523 goto out_unlock;
4524
4525 retval = security_task_getscheduler(p);
4526 if (retval)
4527 goto out_unlock;
4528
4529 lp.sched_priority = p->rt_priority;
4530 rcu_read_unlock();
4531
4532
4533
4534
4535 retval = copy_to_user(param, &lp, sizeof(*param)) ? -EFAULT : 0;
4536
4537 return retval;
4538
4539out_unlock:
4540 rcu_read_unlock();
4541 return retval;
4542}
4543
4544long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
4545{
4546 cpumask_var_t cpus_allowed, new_mask;
4547 struct task_struct *p;
4548 int retval;
4549
4550 get_online_cpus();
4551 rcu_read_lock();
4552
4553 p = find_process_by_pid(pid);
4554 if (!p) {
4555 rcu_read_unlock();
4556 put_online_cpus();
4557 return -ESRCH;
4558 }
4559
4560
4561 get_task_struct(p);
4562 rcu_read_unlock();
4563
4564 if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL)) {
4565 retval = -ENOMEM;
4566 goto out_put_task;
4567 }
4568 if (!alloc_cpumask_var(&new_mask, GFP_KERNEL)) {
4569 retval = -ENOMEM;
4570 goto out_free_cpus_allowed;
4571 }
4572 retval = -EPERM;
4573 if (!check_same_owner(p) && !ns_capable(task_user_ns(p), CAP_SYS_NICE))
4574 goto out_unlock;
4575
4576 retval = security_task_setscheduler(p);
4577 if (retval)
4578 goto out_unlock;
4579
4580 cpuset_cpus_allowed(p, cpus_allowed);
4581 cpumask_and(new_mask, in_mask, cpus_allowed);
4582again:
4583 retval = set_cpus_allowed_ptr(p, new_mask);
4584
4585 if (!retval) {
4586 cpuset_cpus_allowed(p, cpus_allowed);
4587 if (!cpumask_subset(new_mask, cpus_allowed)) {
4588
4589
4590
4591
4592
4593 cpumask_copy(new_mask, cpus_allowed);
4594 goto again;
4595 }
4596 }
4597out_unlock:
4598 free_cpumask_var(new_mask);
4599out_free_cpus_allowed:
4600 free_cpumask_var(cpus_allowed);
4601out_put_task:
4602 put_task_struct(p);
4603 put_online_cpus();
4604 return retval;
4605}
4606
4607static int get_user_cpu_mask(unsigned long __user *user_mask_ptr, unsigned len,
4608 struct cpumask *new_mask)
4609{
4610 if (len < cpumask_size())
4611 cpumask_clear(new_mask);
4612 else if (len > cpumask_size())
4613 len = cpumask_size();
4614
4615 return copy_from_user(new_mask, user_mask_ptr, len) ? -EFAULT : 0;
4616}
4617
4618
4619
4620
4621
4622
4623
4624SYSCALL_DEFINE3(sched_setaffinity, pid_t, pid, unsigned int, len,
4625 unsigned long __user *, user_mask_ptr)
4626{
4627 cpumask_var_t new_mask;
4628 int retval;
4629
4630 if (!alloc_cpumask_var(&new_mask, GFP_KERNEL))
4631 return -ENOMEM;
4632
4633 retval = get_user_cpu_mask(user_mask_ptr, len, new_mask);
4634 if (retval == 0)
4635 retval = sched_setaffinity(pid, new_mask);
4636 free_cpumask_var(new_mask);
4637 return retval;
4638}
4639
4640long sched_getaffinity(pid_t pid, struct cpumask *mask)
4641{
4642 struct task_struct *p;
4643 unsigned long flags;
4644 int retval;
4645
4646 get_online_cpus();
4647 rcu_read_lock();
4648
4649 retval = -ESRCH;
4650 p = find_process_by_pid(pid);
4651 if (!p)
4652 goto out_unlock;
4653
4654 retval = security_task_getscheduler(p);
4655 if (retval)
4656 goto out_unlock;
4657
4658 raw_spin_lock_irqsave(&p->pi_lock, flags);
4659 cpumask_and(mask, &p->cpus_allowed, cpu_online_mask);
4660 raw_spin_unlock_irqrestore(&p->pi_lock, flags);
4661
4662out_unlock:
4663 rcu_read_unlock();
4664 put_online_cpus();
4665
4666 return retval;
4667}
4668
4669
4670
4671
4672
4673
4674
4675SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len,
4676 unsigned long __user *, user_mask_ptr)
4677{
4678 int ret;
4679 cpumask_var_t mask;
4680
4681 if ((len * BITS_PER_BYTE) < nr_cpu_ids)
4682 return -EINVAL;
4683 if (len & (sizeof(unsigned long)-1))
4684 return -EINVAL;
4685
4686 if (!alloc_cpumask_var(&mask, GFP_KERNEL))
4687 return -ENOMEM;
4688
4689 ret = sched_getaffinity(pid, mask);
4690 if (ret == 0) {
4691 size_t retlen = min_t(size_t, len, cpumask_size());
4692
4693 if (copy_to_user(user_mask_ptr, mask, retlen))
4694 ret = -EFAULT;
4695 else
4696 ret = retlen;
4697 }
4698 free_cpumask_var(mask);
4699
4700 return ret;
4701}
4702
4703
4704
4705
4706
4707
4708
4709SYSCALL_DEFINE0(sched_yield)
4710{
4711 struct rq *rq = this_rq_lock();
4712
4713 schedstat_inc(rq, yld_count);
4714 current->sched_class->yield_task(rq);
4715
4716
4717
4718
4719
4720 __release(rq->lock);
4721 spin_release(&rq->lock.dep_map, 1, _THIS_IP_);
4722 do_raw_spin_unlock(&rq->lock);
4723 sched_preempt_enable_no_resched();
4724
4725 schedule();
4726
4727 return 0;
4728}
4729
4730static inline int should_resched(void)
4731{
4732 return need_resched() && !(preempt_count() & PREEMPT_ACTIVE);
4733}
4734
4735static void __cond_resched(void)
4736{
4737 add_preempt_count(PREEMPT_ACTIVE);
4738 __schedule();
4739 sub_preempt_count(PREEMPT_ACTIVE);
4740}
4741
4742int __sched _cond_resched(void)
4743{
4744 if (should_resched()) {
4745 __cond_resched();
4746 return 1;
4747 }
4748 return 0;
4749}
4750EXPORT_SYMBOL(_cond_resched);
4751
4752
4753
4754
4755
4756
4757
4758
4759
4760int __cond_resched_lock(spinlock_t *lock)
4761{
4762 int resched = should_resched();
4763 int ret = 0;
4764
4765 lockdep_assert_held(lock);
4766
4767 if (spin_needbreak(lock) || resched) {
4768 spin_unlock(lock);
4769 if (resched)
4770 __cond_resched();
4771 else
4772 cpu_relax();
4773 ret = 1;
4774 spin_lock(lock);
4775 }
4776 return ret;
4777}
4778EXPORT_SYMBOL(__cond_resched_lock);
4779
4780int __sched __cond_resched_softirq(void)
4781{
4782 BUG_ON(!in_softirq());
4783
4784 if (should_resched()) {
4785 local_bh_enable();
4786 __cond_resched();
4787 local_bh_disable();
4788 return 1;
4789 }
4790 return 0;
4791}
4792EXPORT_SYMBOL(__cond_resched_softirq);
4793
4794
4795
4796
4797
4798
4799
4800
4801
4802
4803
4804
4805
4806
4807
4808
4809
4810
4811
4812
4813
4814
4815
4816void __sched yield(void)
4817{
4818 set_current_state(TASK_RUNNING);
4819 sys_sched_yield();
4820}
4821EXPORT_SYMBOL(yield);
4822
4823
4824
4825
4826
4827
4828
4829
4830
4831
4832
4833
4834
4835bool __sched yield_to(struct task_struct *p, bool preempt)
4836{
4837 struct task_struct *curr = current;
4838 struct rq *rq, *p_rq;
4839 unsigned long flags;
4840 bool yielded = 0;
4841
4842 local_irq_save(flags);
4843 rq = this_rq();
4844
4845again:
4846 p_rq = task_rq(p);
4847 double_rq_lock(rq, p_rq);
4848 while (task_rq(p) != p_rq) {
4849 double_rq_unlock(rq, p_rq);
4850 goto again;
4851 }
4852
4853 if (!curr->sched_class->yield_to_task)
4854 goto out;
4855
4856 if (curr->sched_class != p->sched_class)
4857 goto out;
4858
4859 if (task_running(p_rq, p) || p->state)
4860 goto out;
4861
4862 yielded = curr->sched_class->yield_to_task(rq, p, preempt);
4863 if (yielded) {
4864 schedstat_inc(rq, yld_count);
4865
4866
4867
4868
4869 if (preempt && rq != p_rq)
4870 resched_task(p_rq->curr);
4871 } else {
4872
4873
4874
4875
4876
4877 rq->skip_clock_update = 0;
4878 }
4879
4880out:
4881 double_rq_unlock(rq, p_rq);
4882 local_irq_restore(flags);
4883
4884 if (yielded)
4885 schedule();
4886
4887 return yielded;
4888}
4889EXPORT_SYMBOL_GPL(yield_to);
4890
4891
4892
4893
4894
4895void __sched io_schedule(void)
4896{
4897 struct rq *rq = raw_rq();
4898
4899 delayacct_blkio_start();
4900 atomic_inc(&rq->nr_iowait);
4901 blk_flush_plug(current);
4902 current->in_iowait = 1;
4903 schedule();
4904 current->in_iowait = 0;
4905 atomic_dec(&rq->nr_iowait);
4906 delayacct_blkio_end();
4907}
4908EXPORT_SYMBOL(io_schedule);
4909
4910long __sched io_schedule_timeout(long timeout)
4911{
4912 struct rq *rq = raw_rq();
4913 long ret;
4914
4915 delayacct_blkio_start();
4916 atomic_inc(&rq->nr_iowait);
4917 blk_flush_plug(current);
4918 current->in_iowait = 1;
4919 ret = schedule_timeout(timeout);
4920 current->in_iowait = 0;
4921 atomic_dec(&rq->nr_iowait);
4922 delayacct_blkio_end();
4923 return ret;
4924}
4925
4926
4927
4928
4929
4930
4931
4932
4933SYSCALL_DEFINE1(sched_get_priority_max, int, policy)
4934{
4935 int ret = -EINVAL;
4936
4937 switch (policy) {
4938 case SCHED_FIFO:
4939 case SCHED_RR:
4940 ret = MAX_USER_RT_PRIO-1;
4941 break;
4942 case SCHED_NORMAL:
4943 case SCHED_BATCH:
4944 case SCHED_IDLE:
4945 ret = 0;
4946 break;
4947 }
4948 return ret;
4949}
4950
4951
4952
4953
4954
4955
4956
4957
4958SYSCALL_DEFINE1(sched_get_priority_min, int, policy)
4959{
4960 int ret = -EINVAL;
4961
4962 switch (policy) {
4963 case SCHED_FIFO:
4964 case SCHED_RR:
4965 ret = 1;
4966 break;
4967 case SCHED_NORMAL:
4968 case SCHED_BATCH:
4969 case SCHED_IDLE:
4970 ret = 0;
4971 }
4972 return ret;
4973}
4974
4975
4976
4977
4978
4979
4980
4981
4982
4983SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
4984 struct timespec __user *, interval)
4985{
4986 struct task_struct *p;
4987 unsigned int time_slice;
4988 unsigned long flags;
4989 struct rq *rq;
4990 int retval;
4991 struct timespec t;
4992
4993 if (pid < 0)
4994 return -EINVAL;
4995
4996 retval = -ESRCH;
4997 rcu_read_lock();
4998 p = find_process_by_pid(pid);
4999 if (!p)
5000 goto out_unlock;
5001
5002 retval = security_task_getscheduler(p);
5003 if (retval)
5004 goto out_unlock;
5005
5006 rq = task_rq_lock(p, &flags);
5007 time_slice = p->sched_class->get_rr_interval(rq, p);
5008 task_rq_unlock(rq, p, &flags);
5009
5010 rcu_read_unlock();
5011 jiffies_to_timespec(time_slice, &t);
5012 retval = copy_to_user(interval, &t, sizeof(t)) ? -EFAULT : 0;
5013 return retval;
5014
5015out_unlock:
5016 rcu_read_unlock();
5017 return retval;
5018}
5019
5020static const char stat_nam[] = TASK_STATE_TO_CHAR_STR;
5021
5022void sched_show_task(struct task_struct *p)
5023{
5024 unsigned long free = 0;
5025 unsigned state;
5026
5027 state = p->state ? __ffs(p->state) + 1 : 0;
5028 printk(KERN_INFO "%-15.15s %c", p->comm,
5029 state < sizeof(stat_nam) - 1 ? stat_nam[state] : '?');
5030#if BITS_PER_LONG == 32
5031 if (state == TASK_RUNNING)
5032 printk(KERN_CONT " running ");
5033 else
5034 printk(KERN_CONT " %08lx ", thread_saved_pc(p));
5035#else
5036 if (state == TASK_RUNNING)
5037 printk(KERN_CONT " running task ");
5038 else
5039 printk(KERN_CONT " %016lx ", thread_saved_pc(p));
5040#endif
5041#ifdef CONFIG_DEBUG_STACK_USAGE
5042 free = stack_not_used(p);
5043#endif
5044 printk(KERN_CONT "%5lu %5d %6d 0x%08lx\n", free,
5045 task_pid_nr(p), task_pid_nr(rcu_dereference(p->real_parent)),
5046 (unsigned long)task_thread_info(p)->flags);
5047
5048 show_stack(p, NULL);
5049}
5050
5051void show_state_filter(unsigned long state_filter)
5052{
5053 struct task_struct *g, *p;
5054
5055#if BITS_PER_LONG == 32
5056 printk(KERN_INFO
5057 " task PC stack pid father\n");
5058#else
5059 printk(KERN_INFO
5060 " task PC stack pid father\n");
5061#endif
5062 rcu_read_lock();
5063 do_each_thread(g, p) {
5064
5065
5066
5067
5068 touch_nmi_watchdog();
5069 if (!state_filter || (p->state & state_filter))
5070 sched_show_task(p);
5071 } while_each_thread(g, p);
5072
5073 touch_all_softlockup_watchdogs();
5074
5075#ifdef CONFIG_SCHED_DEBUG
5076 sysrq_sched_debug_show();
5077#endif
5078 rcu_read_unlock();
5079
5080
5081
5082 if (!state_filter)
5083 debug_show_all_locks();
5084}
5085
5086void __cpuinit init_idle_bootup_task(struct task_struct *idle)
5087{
5088 idle->sched_class = &idle_sched_class;
5089}
5090
5091
5092
5093
5094
5095
5096
5097
5098
5099void __cpuinit init_idle(struct task_struct *idle, int cpu)
5100{
5101 struct rq *rq = cpu_rq(cpu);
5102 unsigned long flags;
5103
5104 raw_spin_lock_irqsave(&rq->lock, flags);
5105
5106 __sched_fork(idle);
5107 idle->state = TASK_RUNNING;
5108 idle->se.exec_start = sched_clock();
5109
5110 do_set_cpus_allowed(idle, cpumask_of(cpu));
5111
5112
5113
5114
5115
5116
5117
5118
5119
5120
5121 rcu_read_lock();
5122 __set_task_cpu(idle, cpu);
5123 rcu_read_unlock();
5124
5125 rq->curr = rq->idle = idle;
5126#if defined(CONFIG_SMP)
5127 idle->on_cpu = 1;
5128#endif
5129 raw_spin_unlock_irqrestore(&rq->lock, flags);
5130
5131
5132 task_thread_info(idle)->preempt_count = 0;
5133
5134
5135
5136
5137 idle->sched_class = &idle_sched_class;
5138 ftrace_graph_init_idle_task(idle, cpu);
5139#if defined(CONFIG_SMP)
5140 sprintf(idle->comm, "%s/%d", INIT_TASK_COMM, cpu);
5141#endif
5142}
5143
5144#ifdef CONFIG_SMP
5145void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
5146{
5147 if (p->sched_class && p->sched_class->set_cpus_allowed)
5148 p->sched_class->set_cpus_allowed(p, new_mask);
5149
5150 cpumask_copy(&p->cpus_allowed, new_mask);
5151 p->nr_cpus_allowed = cpumask_weight(new_mask);
5152}
5153
5154
5155
5156
5157
5158
5159
5160
5161
5162
5163
5164
5165
5166
5167
5168
5169
5170
5171
5172
5173
5174
5175
5176
5177int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
5178{
5179 unsigned long flags;
5180 struct rq *rq;
5181 unsigned int dest_cpu;
5182 int ret = 0;
5183
5184 rq = task_rq_lock(p, &flags);
5185
5186 if (cpumask_equal(&p->cpus_allowed, new_mask))
5187 goto out;
5188
5189 if (!cpumask_intersects(new_mask, cpu_active_mask)) {
5190 ret = -EINVAL;
5191 goto out;
5192 }
5193
5194 if (unlikely((p->flags & PF_THREAD_BOUND) && p != current)) {
5195 ret = -EINVAL;
5196 goto out;
5197 }
5198
5199 do_set_cpus_allowed(p, new_mask);
5200
5201
5202 if (cpumask_test_cpu(task_cpu(p), new_mask))
5203 goto out;
5204
5205 dest_cpu = cpumask_any_and(cpu_active_mask, new_mask);
5206 if (p->on_rq) {
5207 struct migration_arg arg = { p, dest_cpu };
5208
5209 task_rq_unlock(rq, p, &flags);
5210 stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg);
5211 tlb_migrate_finish(p->mm);
5212 return 0;
5213 }
5214out:
5215 task_rq_unlock(rq, p, &flags);
5216
5217 return ret;
5218}
5219EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr);
5220
5221
5222
5223
5224
5225
5226
5227
5228
5229
5230
5231
5232static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
5233{
5234 struct rq *rq_dest, *rq_src;
5235 int ret = 0;
5236
5237 if (unlikely(!cpu_active(dest_cpu)))
5238 return ret;
5239
5240 rq_src = cpu_rq(src_cpu);
5241 rq_dest = cpu_rq(dest_cpu);
5242
5243 raw_spin_lock(&p->pi_lock);
5244 double_rq_lock(rq_src, rq_dest);
5245
5246 if (task_cpu(p) != src_cpu)
5247 goto done;
5248
5249 if (!cpumask_test_cpu(dest_cpu, tsk_cpus_allowed(p)))
5250 goto fail;
5251
5252
5253
5254
5255
5256 if (p->on_rq) {
5257 dequeue_task(rq_src, p, 0);
5258 set_task_cpu(p, dest_cpu);
5259 enqueue_task(rq_dest, p, 0);
5260 check_preempt_curr(rq_dest, p, 0);
5261 }
5262done:
5263 ret = 1;
5264fail:
5265 double_rq_unlock(rq_src, rq_dest);
5266 raw_spin_unlock(&p->pi_lock);
5267 return ret;
5268}
5269
5270
5271
5272
5273
5274
5275static int migration_cpu_stop(void *data)
5276{
5277 struct migration_arg *arg = data;
5278
5279
5280
5281
5282
5283 local_irq_disable();
5284 __migrate_task(arg->task, raw_smp_processor_id(), arg->dest_cpu);
5285 local_irq_enable();
5286 return 0;
5287}
5288
5289#ifdef CONFIG_HOTPLUG_CPU
5290
5291
5292
5293
5294
5295void idle_task_exit(void)
5296{
5297 struct mm_struct *mm = current->active_mm;
5298
5299 BUG_ON(cpu_online(smp_processor_id()));
5300
5301 if (mm != &init_mm)
5302 switch_mm(mm, &init_mm, current);
5303 mmdrop(mm);
5304}
5305
5306
5307
5308
5309
5310
5311
5312
5313static void calc_load_migrate(struct rq *rq)
5314{
5315 long delta = calc_load_fold_active(rq);
5316 if (delta)
5317 atomic_long_add(delta, &calc_load_tasks);
5318}
5319
5320
5321
5322
5323
5324
5325
5326
5327
5328static void migrate_tasks(unsigned int dead_cpu)
5329{
5330 struct rq *rq = cpu_rq(dead_cpu);
5331 struct task_struct *next, *stop = rq->stop;
5332 int dest_cpu;
5333
5334
5335
5336
5337
5338
5339
5340
5341
5342
5343 rq->stop = NULL;
5344
5345 for ( ; ; ) {
5346
5347
5348
5349
5350 if (rq->nr_running == 1)
5351 break;
5352
5353 next = pick_next_task(rq);
5354 BUG_ON(!next);
5355 next->sched_class->put_prev_task(rq, next);
5356
5357
5358 dest_cpu = select_fallback_rq(dead_cpu, next);
5359 raw_spin_unlock(&rq->lock);
5360
5361 __migrate_task(next, dead_cpu, dest_cpu);
5362
5363 raw_spin_lock(&rq->lock);
5364 }
5365
5366 rq->stop = stop;
5367}
5368
5369#endif
5370
5371#if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL)
5372
5373static struct ctl_table sd_ctl_dir[] = {
5374 {
5375 .procname = "sched_domain",
5376 .mode = 0555,
5377 },
5378 {}
5379};
5380
5381static struct ctl_table sd_ctl_root[] = {
5382 {
5383 .procname = "kernel",
5384 .mode = 0555,
5385 .child = sd_ctl_dir,
5386 },
5387 {}
5388};
5389
5390static struct ctl_table *sd_alloc_ctl_entry(int n)
5391{
5392 struct ctl_table *entry =
5393 kcalloc(n, sizeof(struct ctl_table), GFP_KERNEL);
5394
5395 return entry;
5396}
5397
5398static void sd_free_ctl_entry(struct ctl_table **tablep)
5399{
5400 struct ctl_table *entry;
5401
5402
5403
5404
5405
5406
5407
5408 for (entry = *tablep; entry->mode; entry++) {
5409 if (entry->child)
5410 sd_free_ctl_entry(&entry->child);
5411 if (entry->proc_handler == NULL)
5412 kfree(entry->procname);
5413 }
5414
5415 kfree(*tablep);
5416 *tablep = NULL;
5417}
5418
5419static void
5420set_table_entry(struct ctl_table *entry,
5421 const char *procname, void *data, int maxlen,
5422 umode_t mode, proc_handler *proc_handler)
5423{
5424 entry->procname = procname;
5425 entry->data = data;
5426 entry->maxlen = maxlen;
5427 entry->mode = mode;
5428 entry->proc_handler = proc_handler;
5429}
5430
5431static struct ctl_table *
5432sd_alloc_ctl_domain_table(struct sched_domain *sd)
5433{
5434 struct ctl_table *table = sd_alloc_ctl_entry(13);
5435
5436 if (table == NULL)
5437 return NULL;
5438
5439 set_table_entry(&table[0], "min_interval", &sd->min_interval,
5440 sizeof(long), 0644, proc_doulongvec_minmax);
5441 set_table_entry(&table[1], "max_interval", &sd->max_interval,
5442 sizeof(long), 0644, proc_doulongvec_minmax);
5443 set_table_entry(&table[2], "busy_idx", &sd->busy_idx,
5444 sizeof(int), 0644, proc_dointvec_minmax);
5445 set_table_entry(&table[3], "idle_idx", &sd->idle_idx,
5446 sizeof(int), 0644, proc_dointvec_minmax);
5447 set_table_entry(&table[4], "newidle_idx", &sd->newidle_idx,
5448 sizeof(int), 0644, proc_dointvec_minmax);
5449 set_table_entry(&table[5], "wake_idx", &sd->wake_idx,
5450 sizeof(int), 0644, proc_dointvec_minmax);
5451 set_table_entry(&table[6], "forkexec_idx", &sd->forkexec_idx,
5452 sizeof(int), 0644, proc_dointvec_minmax);
5453 set_table_entry(&table[7], "busy_factor", &sd->busy_factor,
5454 sizeof(int), 0644, proc_dointvec_minmax);
5455 set_table_entry(&table[8], "imbalance_pct", &sd->imbalance_pct,
5456 sizeof(int), 0644, proc_dointvec_minmax);
5457 set_table_entry(&table[9], "cache_nice_tries",
5458 &sd->cache_nice_tries,
5459 sizeof(int), 0644, proc_dointvec_minmax);
5460 set_table_entry(&table[10], "flags", &sd->flags,
5461 sizeof(int), 0644, proc_dointvec_minmax);
5462 set_table_entry(&table[11], "name", sd->name,
5463 CORENAME_MAX_SIZE, 0444, proc_dostring);
5464
5465
5466 return table;
5467}
5468
5469static ctl_table *sd_alloc_ctl_cpu_table(int cpu)
5470{
5471 struct ctl_table *entry, *table;
5472 struct sched_domain *sd;
5473 int domain_num = 0, i;
5474 char buf[32];
5475
5476 for_each_domain(cpu, sd)
5477 domain_num++;
5478 entry = table = sd_alloc_ctl_entry(domain_num + 1);
5479 if (table == NULL)
5480 return NULL;
5481
5482 i = 0;
5483 for_each_domain(cpu, sd) {
5484 snprintf(buf, 32, "domain%d", i);
5485 entry->procname = kstrdup(buf, GFP_KERNEL);
5486 entry->mode = 0555;
5487 entry->child = sd_alloc_ctl_domain_table(sd);
5488 entry++;
5489 i++;
5490 }
5491 return table;
5492}
5493
5494static struct ctl_table_header *sd_sysctl_header;
5495static void register_sched_domain_sysctl(void)
5496{
5497 int i, cpu_num = num_possible_cpus();
5498 struct ctl_table *entry = sd_alloc_ctl_entry(cpu_num + 1);
5499 char buf[32];
5500
5501 WARN_ON(sd_ctl_dir[0].child);
5502 sd_ctl_dir[0].child = entry;
5503
5504 if (entry == NULL)
5505 return;
5506
5507 for_each_possible_cpu(i) {
5508 snprintf(buf, 32, "cpu%d", i);
5509 entry->procname = kstrdup(buf, GFP_KERNEL);
5510 entry->mode = 0555;
5511 entry->child = sd_alloc_ctl_cpu_table(i);
5512 entry++;
5513 }
5514
5515 WARN_ON(sd_sysctl_header);
5516 sd_sysctl_header = register_sysctl_table(sd_ctl_root);
5517}
5518
5519
5520static void unregister_sched_domain_sysctl(void)
5521{
5522 if (sd_sysctl_header)
5523 unregister_sysctl_table(sd_sysctl_header);
5524 sd_sysctl_header = NULL;
5525 if (sd_ctl_dir[0].child)
5526 sd_free_ctl_entry(&sd_ctl_dir[0].child);
5527}
5528#else
5529static void register_sched_domain_sysctl(void)
5530{
5531}
5532static void unregister_sched_domain_sysctl(void)
5533{
5534}
5535#endif
5536
5537static void set_rq_online(struct rq *rq)
5538{
5539 if (!rq->online) {
5540 const struct sched_class *class;
5541
5542 cpumask_set_cpu(rq->cpu, rq->rd->online);
5543 rq->online = 1;
5544
5545 for_each_class(class) {
5546 if (class->rq_online)
5547 class->rq_online(rq);
5548 }
5549 }
5550}
5551
5552static void set_rq_offline(struct rq *rq)
5553{
5554 if (rq->online) {
5555 const struct sched_class *class;
5556
5557 for_each_class(class) {
5558 if (class->rq_offline)
5559 class->rq_offline(rq);
5560 }
5561
5562 cpumask_clear_cpu(rq->cpu, rq->rd->online);
5563 rq->online = 0;
5564 }
5565}
5566
5567
5568
5569
5570
5571static int __cpuinit
5572migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
5573{
5574 int cpu = (long)hcpu;
5575 unsigned long flags;
5576 struct rq *rq = cpu_rq(cpu);
5577
5578 switch (action & ~CPU_TASKS_FROZEN) {
5579
5580 case CPU_UP_PREPARE:
5581 rq->calc_load_update = calc_load_update;
5582 break;
5583
5584 case CPU_ONLINE:
5585
5586 raw_spin_lock_irqsave(&rq->lock, flags);
5587 if (rq->rd) {
5588 BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
5589
5590 set_rq_online(rq);
5591 }
5592 raw_spin_unlock_irqrestore(&rq->lock, flags);
5593 break;
5594
5595#ifdef CONFIG_HOTPLUG_CPU
5596 case CPU_DYING:
5597 sched_ttwu_pending();
5598
5599 raw_spin_lock_irqsave(&rq->lock, flags);
5600 if (rq->rd) {
5601 BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
5602 set_rq_offline(rq);
5603 }
5604 migrate_tasks(cpu);
5605 BUG_ON(rq->nr_running != 1);
5606 raw_spin_unlock_irqrestore(&rq->lock, flags);
5607 break;
5608
5609 case CPU_DEAD:
5610 calc_load_migrate(rq);
5611 break;
5612#endif
5613 }
5614
5615 update_max_interval();
5616
5617 return NOTIFY_OK;
5618}
5619
5620
5621
5622
5623
5624
5625static struct notifier_block __cpuinitdata migration_notifier = {
5626 .notifier_call = migration_call,
5627 .priority = CPU_PRI_MIGRATION,
5628};
5629
5630static int __cpuinit sched_cpu_active(struct notifier_block *nfb,
5631 unsigned long action, void *hcpu)
5632{
5633 switch (action & ~CPU_TASKS_FROZEN) {
5634 case CPU_STARTING:
5635 case CPU_DOWN_FAILED:
5636 set_cpu_active((long)hcpu, true);
5637 return NOTIFY_OK;
5638 default:
5639 return NOTIFY_DONE;
5640 }
5641}
5642
5643static int __cpuinit sched_cpu_inactive(struct notifier_block *nfb,
5644 unsigned long action, void *hcpu)
5645{
5646 switch (action & ~CPU_TASKS_FROZEN) {
5647 case CPU_DOWN_PREPARE:
5648 set_cpu_active((long)hcpu, false);
5649 return NOTIFY_OK;
5650 default:
5651 return NOTIFY_DONE;
5652 }
5653}
5654
5655static int __init migration_init(void)
5656{
5657 void *cpu = (void *)(long)smp_processor_id();
5658 int err;
5659
5660
5661 err = migration_call(&migration_notifier, CPU_UP_PREPARE, cpu);
5662 BUG_ON(err == NOTIFY_BAD);
5663 migration_call(&migration_notifier, CPU_ONLINE, cpu);
5664 register_cpu_notifier(&migration_notifier);
5665
5666
5667 cpu_notifier(sched_cpu_active, CPU_PRI_SCHED_ACTIVE);
5668 cpu_notifier(sched_cpu_inactive, CPU_PRI_SCHED_INACTIVE);
5669
5670 return 0;
5671}
5672early_initcall(migration_init);
5673#endif
5674
5675#ifdef CONFIG_SMP
5676
5677static cpumask_var_t sched_domains_tmpmask;
5678
5679#ifdef CONFIG_SCHED_DEBUG
5680
5681static __read_mostly int sched_debug_enabled;
5682
5683static int __init sched_debug_setup(char *str)
5684{
5685 sched_debug_enabled = 1;
5686
5687 return 0;
5688}
5689early_param("sched_debug", sched_debug_setup);
5690
5691static inline bool sched_debug(void)
5692{
5693 return sched_debug_enabled;
5694}
5695
5696static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
5697 struct cpumask *groupmask)
5698{
5699 struct sched_group *group = sd->groups;
5700 char str[256];
5701
5702 cpulist_scnprintf(str, sizeof(str), sched_domain_span(sd));
5703 cpumask_clear(groupmask);
5704
5705 printk(KERN_DEBUG "%*s domain %d: ", level, "", level);
5706
5707 if (!(sd->flags & SD_LOAD_BALANCE)) {
5708 printk("does not load-balance\n");
5709 if (sd->parent)
5710 printk(KERN_ERR "ERROR: !SD_LOAD_BALANCE domain"
5711 " has parent");
5712 return -1;
5713 }
5714
5715 printk(KERN_CONT "span %s level %s\n", str, sd->name);
5716
5717 if (!cpumask_test_cpu(cpu, sched_domain_span(sd))) {
5718 printk(KERN_ERR "ERROR: domain->span does not contain "
5719 "CPU%d\n", cpu);
5720 }
5721 if (!cpumask_test_cpu(cpu, sched_group_cpus(group))) {
5722 printk(KERN_ERR "ERROR: domain->groups does not contain"
5723 " CPU%d\n", cpu);
5724 }
5725
5726 printk(KERN_DEBUG "%*s groups:", level + 1, "");
5727 do {
5728 if (!group) {
5729 printk("\n");
5730 printk(KERN_ERR "ERROR: group is NULL\n");
5731 break;
5732 }
5733
5734
5735
5736
5737
5738
5739 if (!group->sgp->power_orig) {
5740 printk(KERN_CONT "\n");
5741 printk(KERN_ERR "ERROR: domain->cpu_power not "
5742 "set\n");
5743 break;
5744 }
5745
5746 if (!cpumask_weight(sched_group_cpus(group))) {
5747 printk(KERN_CONT "\n");
5748 printk(KERN_ERR "ERROR: empty group\n");
5749 break;
5750 }
5751
5752 if (!(sd->flags & SD_OVERLAP) &&
5753 cpumask_intersects(groupmask, sched_group_cpus(group))) {
5754 printk(KERN_CONT "\n");
5755 printk(KERN_ERR "ERROR: repeated CPUs\n");
5756 break;
5757 }
5758
5759 cpumask_or(groupmask, groupmask, sched_group_cpus(group));
5760
5761 cpulist_scnprintf(str, sizeof(str), sched_group_cpus(group));
5762
5763 printk(KERN_CONT " %s", str);
5764 if (group->sgp->power != SCHED_POWER_SCALE) {
5765 printk(KERN_CONT " (cpu_power = %d)",
5766 group->sgp->power);
5767 }
5768
5769 group = group->next;
5770 } while (group != sd->groups);
5771 printk(KERN_CONT "\n");
5772
5773 if (!cpumask_equal(sched_domain_span(sd), groupmask))
5774 printk(KERN_ERR "ERROR: groups don't span domain->span\n");
5775
5776 if (sd->parent &&
5777 !cpumask_subset(groupmask, sched_domain_span(sd->parent)))
5778 printk(KERN_ERR "ERROR: parent span is not a superset "
5779 "of domain->span\n");
5780 return 0;
5781}
5782
5783static void sched_domain_debug(struct sched_domain *sd, int cpu)
5784{
5785 int level = 0;
5786
5787 if (!sched_debug_enabled)
5788 return;
5789
5790 if (!sd) {
5791 printk(KERN_DEBUG "CPU%d attaching NULL sched-domain.\n", cpu);
5792 return;
5793 }
5794
5795 printk(KERN_DEBUG "CPU%d attaching sched-domain:\n", cpu);
5796
5797 for (;;) {
5798 if (sched_domain_debug_one(sd, cpu, level, sched_domains_tmpmask))
5799 break;
5800 level++;
5801 sd = sd->parent;
5802 if (!sd)
5803 break;
5804 }
5805}
5806#else
5807# define sched_domain_debug(sd, cpu) do { } while (0)
5808static inline bool sched_debug(void)
5809{
5810 return false;
5811}
5812#endif
5813
5814static int sd_degenerate(struct sched_domain *sd)
5815{
5816 if (cpumask_weight(sched_domain_span(sd)) == 1)
5817 return 1;
5818
5819
5820 if (sd->flags & (SD_LOAD_BALANCE |
5821 SD_BALANCE_NEWIDLE |
5822 SD_BALANCE_FORK |
5823 SD_BALANCE_EXEC |
5824 SD_SHARE_CPUPOWER |
5825 SD_SHARE_PKG_RESOURCES)) {
5826 if (sd->groups != sd->groups->next)
5827 return 0;
5828 }
5829
5830
5831 if (sd->flags & (SD_WAKE_AFFINE))
5832 return 0;
5833
5834 return 1;
5835}
5836
5837static int
5838sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent)
5839{
5840 unsigned long cflags = sd->flags, pflags = parent->flags;
5841
5842 if (sd_degenerate(parent))
5843 return 1;
5844
5845 if (!cpumask_equal(sched_domain_span(sd), sched_domain_span(parent)))
5846 return 0;
5847
5848
5849 if (parent->groups == parent->groups->next) {
5850 pflags &= ~(SD_LOAD_BALANCE |
5851 SD_BALANCE_NEWIDLE |
5852 SD_BALANCE_FORK |
5853 SD_BALANCE_EXEC |
5854 SD_SHARE_CPUPOWER |
5855 SD_SHARE_PKG_RESOURCES);
5856 if (nr_node_ids == 1)
5857 pflags &= ~SD_SERIALIZE;
5858 }
5859 if (~cflags & pflags)
5860 return 0;
5861
5862 return 1;
5863}
5864
5865static void free_rootdomain(struct rcu_head *rcu)
5866{
5867 struct root_domain *rd = container_of(rcu, struct root_domain, rcu);
5868
5869 cpupri_cleanup(&rd->cpupri);
5870 free_cpumask_var(rd->rto_mask);
5871 free_cpumask_var(rd->online);
5872 free_cpumask_var(rd->span);
5873 kfree(rd);
5874}
5875
5876static void rq_attach_root(struct rq *rq, struct root_domain *rd)
5877{
5878 struct root_domain *old_rd = NULL;
5879 unsigned long flags;
5880
5881 raw_spin_lock_irqsave(&rq->lock, flags);
5882
5883 if (rq->rd) {
5884 old_rd = rq->rd;
5885
5886 if (cpumask_test_cpu(rq->cpu, old_rd->online))
5887 set_rq_offline(rq);
5888
5889 cpumask_clear_cpu(rq->cpu, old_rd->span);
5890
5891
5892
5893
5894
5895
5896 if (!atomic_dec_and_test(&old_rd->refcount))
5897 old_rd = NULL;
5898 }
5899
5900 atomic_inc(&rd->refcount);
5901 rq->rd = rd;
5902
5903 cpumask_set_cpu(rq->cpu, rd->span);
5904 if (cpumask_test_cpu(rq->cpu, cpu_active_mask))
5905 set_rq_online(rq);
5906
5907 raw_spin_unlock_irqrestore(&rq->lock, flags);
5908
5909 if (old_rd)
5910 call_rcu_sched(&old_rd->rcu, free_rootdomain);
5911}
5912
5913static int init_rootdomain(struct root_domain *rd)
5914{
5915 memset(rd, 0, sizeof(*rd));
5916
5917 if (!alloc_cpumask_var(&rd->span, GFP_KERNEL))
5918 goto out;
5919 if (!alloc_cpumask_var(&rd->online, GFP_KERNEL))
5920 goto free_span;
5921 if (!alloc_cpumask_var(&rd->rto_mask, GFP_KERNEL))
5922 goto free_online;
5923
5924 if (cpupri_init(&rd->cpupri) != 0)
5925 goto free_rto_mask;
5926 return 0;
5927
5928free_rto_mask:
5929 free_cpumask_var(rd->rto_mask);
5930free_online:
5931 free_cpumask_var(rd->online);
5932free_span:
5933 free_cpumask_var(rd->span);
5934out:
5935 return -ENOMEM;
5936}
5937
5938
5939
5940
5941
5942struct root_domain def_root_domain;
5943
5944static void init_defrootdomain(void)
5945{
5946 init_rootdomain(&def_root_domain);
5947
5948 atomic_set(&def_root_domain.refcount, 1);
5949}
5950
5951static struct root_domain *alloc_rootdomain(void)
5952{
5953 struct root_domain *rd;
5954
5955 rd = kmalloc(sizeof(*rd), GFP_KERNEL);
5956 if (!rd)
5957 return NULL;
5958
5959 if (init_rootdomain(rd) != 0) {
5960 kfree(rd);
5961 return NULL;
5962 }
5963
5964 return rd;
5965}
5966
5967static void free_sched_groups(struct sched_group *sg, int free_sgp)
5968{
5969 struct sched_group *tmp, *first;
5970
5971 if (!sg)
5972 return;
5973
5974