1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29#include <linux/mm.h>
30#include <linux/module.h>
31#include <linux/nmi.h>
32#include <linux/init.h>
33#include <linux/uaccess.h>
34#include <linux/highmem.h>
35#include <asm/mmu_context.h>
36#include <linux/interrupt.h>
37#include <linux/capability.h>
38#include <linux/completion.h>
39#include <linux/kernel_stat.h>
40#include <linux/debug_locks.h>
41#include <linux/perf_event.h>
42#include <linux/security.h>
43#include <linux/notifier.h>
44#include <linux/profile.h>
45#include <linux/freezer.h>
46#include <linux/vmalloc.h>
47#include <linux/blkdev.h>
48#include <linux/delay.h>
49#include <linux/pid_namespace.h>
50#include <linux/smp.h>
51#include <linux/threads.h>
52#include <linux/timer.h>
53#include <linux/rcupdate.h>
54#include <linux/cpu.h>
55#include <linux/cpuset.h>
56#include <linux/percpu.h>
57#include <linux/proc_fs.h>
58#include <linux/seq_file.h>
59#include <linux/stop_machine.h>
60#include <linux/sysctl.h>
61#include <linux/syscalls.h>
62#include <linux/times.h>
63#include <linux/tsacct_kern.h>
64#include <linux/kprobes.h>
65#include <linux/delayacct.h>
66#include <linux/unistd.h>
67#include <linux/pagemap.h>
68#include <linux/hrtimer.h>
69#include <linux/tick.h>
70#include <linux/debugfs.h>
71#include <linux/ctype.h>
72#include <linux/ftrace.h>
73#include <linux/slab.h>
74
75#include <asm/tlb.h>
76#include <asm/irq_regs.h>
77#include <asm/mutex.h>
78#ifdef CONFIG_PARAVIRT
79#include <asm/paravirt.h>
80#endif
81
82#include "sched_cpupri.h"
83#include "workqueue_sched.h"
84#include "sched_autogroup.h"
85
86#define CREATE_TRACE_POINTS
87#include <trace/events/sched.h>
88
89
90
91
92
93
94#define NICE_TO_PRIO(nice) (MAX_RT_PRIO + (nice) + 20)
95#define PRIO_TO_NICE(prio) ((prio) - MAX_RT_PRIO - 20)
96#define TASK_NICE(p) PRIO_TO_NICE((p)->static_prio)
97
98
99
100
101
102
103#define USER_PRIO(p) ((p)-MAX_RT_PRIO)
104#define TASK_USER_PRIO(p) USER_PRIO((p)->static_prio)
105#define MAX_USER_PRIO (USER_PRIO(MAX_PRIO))
106
107
108
109
110#define NS_TO_JIFFIES(TIME) ((unsigned long)(TIME) / (NSEC_PER_SEC / HZ))
111
112#define NICE_0_LOAD SCHED_LOAD_SCALE
113#define NICE_0_SHIFT SCHED_LOAD_SHIFT
114
115
116
117
118
119
120
121#define DEF_TIMESLICE (100 * HZ / 1000)
122
123
124
125
126#define RUNTIME_INF ((u64)~0ULL)
127
128static inline int rt_policy(int policy)
129{
130 if (policy == SCHED_FIFO || policy == SCHED_RR)
131 return 1;
132 return 0;
133}
134
135static inline int task_has_rt_policy(struct task_struct *p)
136{
137 return rt_policy(p->policy);
138}
139
140
141
142
143struct rt_prio_array {
144 DECLARE_BITMAP(bitmap, MAX_RT_PRIO+1);
145 struct list_head queue[MAX_RT_PRIO];
146};
147
148struct rt_bandwidth {
149
150 raw_spinlock_t rt_runtime_lock;
151 ktime_t rt_period;
152 u64 rt_runtime;
153 struct hrtimer rt_period_timer;
154};
155
156static struct rt_bandwidth def_rt_bandwidth;
157
158static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun);
159
160static enum hrtimer_restart sched_rt_period_timer(struct hrtimer *timer)
161{
162 struct rt_bandwidth *rt_b =
163 container_of(timer, struct rt_bandwidth, rt_period_timer);
164 ktime_t now;
165 int overrun;
166 int idle = 0;
167
168 for (;;) {
169 now = hrtimer_cb_get_time(timer);
170 overrun = hrtimer_forward(timer, now, rt_b->rt_period);
171
172 if (!overrun)
173 break;
174
175 idle = do_sched_rt_period_timer(rt_b, overrun);
176 }
177
178 return idle ? HRTIMER_NORESTART : HRTIMER_RESTART;
179}
180
181static
182void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime)
183{
184 rt_b->rt_period = ns_to_ktime(period);
185 rt_b->rt_runtime = runtime;
186
187 raw_spin_lock_init(&rt_b->rt_runtime_lock);
188
189 hrtimer_init(&rt_b->rt_period_timer,
190 CLOCK_MONOTONIC, HRTIMER_MODE_REL);
191 rt_b->rt_period_timer.function = sched_rt_period_timer;
192}
193
194static inline int rt_bandwidth_enabled(void)
195{
196 return sysctl_sched_rt_runtime >= 0;
197}
198
199static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
200{
201 ktime_t now;
202
203 if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF)
204 return;
205
206 if (hrtimer_active(&rt_b->rt_period_timer))
207 return;
208
209 raw_spin_lock(&rt_b->rt_runtime_lock);
210 for (;;) {
211 unsigned long delta;
212 ktime_t soft, hard;
213
214 if (hrtimer_active(&rt_b->rt_period_timer))
215 break;
216
217 now = hrtimer_cb_get_time(&rt_b->rt_period_timer);
218 hrtimer_forward(&rt_b->rt_period_timer, now, rt_b->rt_period);
219
220 soft = hrtimer_get_softexpires(&rt_b->rt_period_timer);
221 hard = hrtimer_get_expires(&rt_b->rt_period_timer);
222 delta = ktime_to_ns(ktime_sub(hard, soft));
223 __hrtimer_start_range_ns(&rt_b->rt_period_timer, soft, delta,
224 HRTIMER_MODE_ABS_PINNED, 0);
225 }
226 raw_spin_unlock(&rt_b->rt_runtime_lock);
227}
228
229#ifdef CONFIG_RT_GROUP_SCHED
230static void destroy_rt_bandwidth(struct rt_bandwidth *rt_b)
231{
232 hrtimer_cancel(&rt_b->rt_period_timer);
233}
234#endif
235
236
237
238
239
240static DEFINE_MUTEX(sched_domains_mutex);
241
242#ifdef CONFIG_CGROUP_SCHED
243
244#include <linux/cgroup.h>
245
246struct cfs_rq;
247
248static LIST_HEAD(task_groups);
249
250
251struct task_group {
252 struct cgroup_subsys_state css;
253
254#ifdef CONFIG_FAIR_GROUP_SCHED
255
256 struct sched_entity **se;
257
258 struct cfs_rq **cfs_rq;
259 unsigned long shares;
260
261 atomic_t load_weight;
262#endif
263
264#ifdef CONFIG_RT_GROUP_SCHED
265 struct sched_rt_entity **rt_se;
266 struct rt_rq **rt_rq;
267
268 struct rt_bandwidth rt_bandwidth;
269#endif
270
271 struct rcu_head rcu;
272 struct list_head list;
273
274 struct task_group *parent;
275 struct list_head siblings;
276 struct list_head children;
277
278#ifdef CONFIG_SCHED_AUTOGROUP
279 struct autogroup *autogroup;
280#endif
281};
282
283
284static DEFINE_SPINLOCK(task_group_lock);
285
286#ifdef CONFIG_FAIR_GROUP_SCHED
287
288# define ROOT_TASK_GROUP_LOAD NICE_0_LOAD
289
290
291
292
293
294
295
296
297
298#define MIN_SHARES (1UL << 1)
299#define MAX_SHARES (1UL << 18)
300
301static int root_task_group_load = ROOT_TASK_GROUP_LOAD;
302#endif
303
304
305
306
307struct task_group root_task_group;
308
309#endif
310
311
312struct cfs_rq {
313 struct load_weight load;
314 unsigned long nr_running;
315
316 u64 exec_clock;
317 u64 min_vruntime;
318#ifndef CONFIG_64BIT
319 u64 min_vruntime_copy;
320#endif
321
322 struct rb_root tasks_timeline;
323 struct rb_node *rb_leftmost;
324
325 struct list_head tasks;
326 struct list_head *balance_iterator;
327
328
329
330
331
332 struct sched_entity *curr, *next, *last, *skip;
333
334#ifdef CONFIG_SCHED_DEBUG
335 unsigned int nr_spread_over;
336#endif
337
338#ifdef CONFIG_FAIR_GROUP_SCHED
339 struct rq *rq;
340
341
342
343
344
345
346
347
348
349 int on_list;
350 struct list_head leaf_cfs_rq_list;
351 struct task_group *tg;
352
353#ifdef CONFIG_SMP
354
355
356
357 unsigned long task_weight;
358
359
360
361
362
363
364
365 unsigned long h_load;
366
367
368
369
370
371
372
373
374 u64 load_avg;
375 u64 load_period;
376 u64 load_stamp, load_last, load_unacc_exec_time;
377
378 unsigned long load_contribution;
379#endif
380#endif
381};
382
383
384struct rt_rq {
385 struct rt_prio_array active;
386 unsigned long rt_nr_running;
387#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
388 struct {
389 int curr;
390#ifdef CONFIG_SMP
391 int next;
392#endif
393 } highest_prio;
394#endif
395#ifdef CONFIG_SMP
396 unsigned long rt_nr_migratory;
397 unsigned long rt_nr_total;
398 int overloaded;
399 struct plist_head pushable_tasks;
400#endif
401 int rt_throttled;
402 u64 rt_time;
403 u64 rt_runtime;
404
405 raw_spinlock_t rt_runtime_lock;
406
407#ifdef CONFIG_RT_GROUP_SCHED
408 unsigned long rt_nr_boosted;
409
410 struct rq *rq;
411 struct list_head leaf_rt_rq_list;
412 struct task_group *tg;
413#endif
414};
415
416#ifdef CONFIG_SMP
417
418
419
420
421
422
423
424
425
426struct root_domain {
427 atomic_t refcount;
428 atomic_t rto_count;
429 struct rcu_head rcu;
430 cpumask_var_t span;
431 cpumask_var_t online;
432
433
434
435
436
437 cpumask_var_t rto_mask;
438 struct cpupri cpupri;
439};
440
441
442
443
444
445static struct root_domain def_root_domain;
446
447#endif
448
449
450
451
452
453
454
455
456struct rq {
457
458 raw_spinlock_t lock;
459
460
461
462
463
464 unsigned long nr_running;
465 #define CPU_LOAD_IDX_MAX 5
466 unsigned long cpu_load[CPU_LOAD_IDX_MAX];
467 unsigned long last_load_update_tick;
468#ifdef CONFIG_NO_HZ
469 u64 nohz_stamp;
470 unsigned char nohz_balance_kick;
471#endif
472 int skip_clock_update;
473
474
475 struct load_weight load;
476 unsigned long nr_load_updates;
477 u64 nr_switches;
478
479 struct cfs_rq cfs;
480 struct rt_rq rt;
481
482#ifdef CONFIG_FAIR_GROUP_SCHED
483
484 struct list_head leaf_cfs_rq_list;
485#endif
486#ifdef CONFIG_RT_GROUP_SCHED
487 struct list_head leaf_rt_rq_list;
488#endif
489
490
491
492
493
494
495
496 unsigned long nr_uninterruptible;
497
498 struct task_struct *curr, *idle, *stop;
499 unsigned long next_balance;
500 struct mm_struct *prev_mm;
501
502 u64 clock;
503 u64 clock_task;
504
505 atomic_t nr_iowait;
506
507#ifdef CONFIG_SMP
508 struct root_domain *rd;
509 struct sched_domain *sd;
510
511 unsigned long cpu_power;
512
513 unsigned char idle_at_tick;
514
515 int post_schedule;
516 int active_balance;
517 int push_cpu;
518 struct cpu_stop_work active_balance_work;
519
520 int cpu;
521 int online;
522
523 unsigned long avg_load_per_task;
524
525 u64 rt_avg;
526 u64 age_stamp;
527 u64 idle_stamp;
528 u64 avg_idle;
529#endif
530
531#ifdef CONFIG_IRQ_TIME_ACCOUNTING
532 u64 prev_irq_time;
533#endif
534#ifdef CONFIG_PARAVIRT
535 u64 prev_steal_time;
536#endif
537#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
538 u64 prev_steal_time_rq;
539#endif
540
541
542 unsigned long calc_load_update;
543 long calc_load_active;
544
545#ifdef CONFIG_SCHED_HRTICK
546#ifdef CONFIG_SMP
547 int hrtick_csd_pending;
548 struct call_single_data hrtick_csd;
549#endif
550 struct hrtimer hrtick_timer;
551#endif
552
553#ifdef CONFIG_SCHEDSTATS
554
555 struct sched_info rq_sched_info;
556 unsigned long long rq_cpu_time;
557
558
559
560 unsigned int yld_count;
561
562
563 unsigned int sched_switch;
564 unsigned int sched_count;
565 unsigned int sched_goidle;
566
567
568 unsigned int ttwu_count;
569 unsigned int ttwu_local;
570#endif
571
572#ifdef CONFIG_SMP
573 struct task_struct *wake_list;
574#endif
575};
576
577static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
578
579
580static void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags);
581
582static inline int cpu_of(struct rq *rq)
583{
584#ifdef CONFIG_SMP
585 return rq->cpu;
586#else
587 return 0;
588#endif
589}
590
591#define rcu_dereference_check_sched_domain(p) \
592 rcu_dereference_check((p), \
593 lockdep_is_held(&sched_domains_mutex))
594
595
596
597
598
599
600
601
602#define for_each_domain(cpu, __sd) \
603 for (__sd = rcu_dereference_check_sched_domain(cpu_rq(cpu)->sd); __sd; __sd = __sd->parent)
604
605#define cpu_rq(cpu) (&per_cpu(runqueues, (cpu)))
606#define this_rq() (&__get_cpu_var(runqueues))
607#define task_rq(p) cpu_rq(task_cpu(p))
608#define cpu_curr(cpu) (cpu_rq(cpu)->curr)
609#define raw_rq() (&__raw_get_cpu_var(runqueues))
610
611#ifdef CONFIG_CGROUP_SCHED
612
613
614
615
616
617
618
619
620
621static inline struct task_group *task_group(struct task_struct *p)
622{
623 struct task_group *tg;
624 struct cgroup_subsys_state *css;
625
626 css = task_subsys_state_check(p, cpu_cgroup_subsys_id,
627 lockdep_is_held(&p->pi_lock) ||
628 lockdep_is_held(&task_rq(p)->lock));
629 tg = container_of(css, struct task_group, css);
630
631 return autogroup_task_group(p, tg);
632}
633
634
635static inline void set_task_rq(struct task_struct *p, unsigned int cpu)
636{
637#ifdef CONFIG_FAIR_GROUP_SCHED
638 p->se.cfs_rq = task_group(p)->cfs_rq[cpu];
639 p->se.parent = task_group(p)->se[cpu];
640#endif
641
642#ifdef CONFIG_RT_GROUP_SCHED
643 p->rt.rt_rq = task_group(p)->rt_rq[cpu];
644 p->rt.parent = task_group(p)->rt_se[cpu];
645#endif
646}
647
648#else
649
650static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { }
651static inline struct task_group *task_group(struct task_struct *p)
652{
653 return NULL;
654}
655
656#endif
657
658static void update_rq_clock_task(struct rq *rq, s64 delta);
659
660static void update_rq_clock(struct rq *rq)
661{
662 s64 delta;
663
664 if (rq->skip_clock_update > 0)
665 return;
666
667 delta = sched_clock_cpu(cpu_of(rq)) - rq->clock;
668 rq->clock += delta;
669 update_rq_clock_task(rq, delta);
670}
671
672
673
674
675#ifdef CONFIG_SCHED_DEBUG
676# define const_debug __read_mostly
677#else
678# define const_debug static const
679#endif
680
681
682
683
684
685
686
687
688int runqueue_is_locked(int cpu)
689{
690 return raw_spin_is_locked(&cpu_rq(cpu)->lock);
691}
692
693
694
695
696
697#define SCHED_FEAT(name, enabled) \
698 __SCHED_FEAT_##name ,
699
700enum {
701#include "sched_features.h"
702};
703
704#undef SCHED_FEAT
705
706#define SCHED_FEAT(name, enabled) \
707 (1UL << __SCHED_FEAT_##name) * enabled |
708
709const_debug unsigned int sysctl_sched_features =
710#include "sched_features.h"
711 0;
712
713#undef SCHED_FEAT
714
715#ifdef CONFIG_SCHED_DEBUG
716#define SCHED_FEAT(name, enabled) \
717 #name ,
718
719static __read_mostly char *sched_feat_names[] = {
720#include "sched_features.h"
721 NULL
722};
723
724#undef SCHED_FEAT
725
726static int sched_feat_show(struct seq_file *m, void *v)
727{
728 int i;
729
730 for (i = 0; sched_feat_names[i]; i++) {
731 if (!(sysctl_sched_features & (1UL << i)))
732 seq_puts(m, "NO_");
733 seq_printf(m, "%s ", sched_feat_names[i]);
734 }
735 seq_puts(m, "\n");
736
737 return 0;
738}
739
740static ssize_t
741sched_feat_write(struct file *filp, const char __user *ubuf,
742 size_t cnt, loff_t *ppos)
743{
744 char buf[64];
745 char *cmp;
746 int neg = 0;
747 int i;
748
749 if (cnt > 63)
750 cnt = 63;
751
752 if (copy_from_user(&buf, ubuf, cnt))
753 return -EFAULT;
754
755 buf[cnt] = 0;
756 cmp = strstrip(buf);
757
758 if (strncmp(cmp, "NO_", 3) == 0) {
759 neg = 1;
760 cmp += 3;
761 }
762
763 for (i = 0; sched_feat_names[i]; i++) {
764 if (strcmp(cmp, sched_feat_names[i]) == 0) {
765 if (neg)
766 sysctl_sched_features &= ~(1UL << i);
767 else
768 sysctl_sched_features |= (1UL << i);
769 break;
770 }
771 }
772
773 if (!sched_feat_names[i])
774 return -EINVAL;
775
776 *ppos += cnt;
777
778 return cnt;
779}
780
781static int sched_feat_open(struct inode *inode, struct file *filp)
782{
783 return single_open(filp, sched_feat_show, NULL);
784}
785
786static const struct file_operations sched_feat_fops = {
787 .open = sched_feat_open,
788 .write = sched_feat_write,
789 .read = seq_read,
790 .llseek = seq_lseek,
791 .release = single_release,
792};
793
794static __init int sched_init_debug(void)
795{
796 debugfs_create_file("sched_features", 0644, NULL, NULL,
797 &sched_feat_fops);
798
799 return 0;
800}
801late_initcall(sched_init_debug);
802
803#endif
804
805#define sched_feat(x) (sysctl_sched_features & (1UL << __SCHED_FEAT_##x))
806
807
808
809
810
811const_debug unsigned int sysctl_sched_nr_migrate = 32;
812
813
814
815
816
817
818
819const_debug unsigned int sysctl_sched_time_avg = MSEC_PER_SEC;
820
821
822
823
824
825unsigned int sysctl_sched_rt_period = 1000000;
826
827static __read_mostly int scheduler_running;
828
829
830
831
832
833int sysctl_sched_rt_runtime = 950000;
834
835static inline u64 global_rt_period(void)
836{
837 return (u64)sysctl_sched_rt_period * NSEC_PER_USEC;
838}
839
840static inline u64 global_rt_runtime(void)
841{
842 if (sysctl_sched_rt_runtime < 0)
843 return RUNTIME_INF;
844
845 return (u64)sysctl_sched_rt_runtime * NSEC_PER_USEC;
846}
847
848#ifndef prepare_arch_switch
849# define prepare_arch_switch(next) do { } while (0)
850#endif
851#ifndef finish_arch_switch
852# define finish_arch_switch(prev) do { } while (0)
853#endif
854
855static inline int task_current(struct rq *rq, struct task_struct *p)
856{
857 return rq->curr == p;
858}
859
860static inline int task_running(struct rq *rq, struct task_struct *p)
861{
862#ifdef CONFIG_SMP
863 return p->on_cpu;
864#else
865 return task_current(rq, p);
866#endif
867}
868
869#ifndef __ARCH_WANT_UNLOCKED_CTXSW
870static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next)
871{
872#ifdef CONFIG_SMP
873
874
875
876
877
878 next->on_cpu = 1;
879#endif
880}
881
882static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
883{
884#ifdef CONFIG_SMP
885
886
887
888
889
890 smp_wmb();
891 prev->on_cpu = 0;
892#endif
893#ifdef CONFIG_DEBUG_SPINLOCK
894
895 rq->lock.owner = current;
896#endif
897
898
899
900
901
902 spin_acquire(&rq->lock.dep_map, 0, 0, _THIS_IP_);
903
904 raw_spin_unlock_irq(&rq->lock);
905}
906
907#else
908static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next)
909{
910#ifdef CONFIG_SMP
911
912
913
914
915
916 next->on_cpu = 1;
917#endif
918#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
919 raw_spin_unlock_irq(&rq->lock);
920#else
921 raw_spin_unlock(&rq->lock);
922#endif
923}
924
925static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
926{
927#ifdef CONFIG_SMP
928
929
930
931
932
933 smp_wmb();
934 prev->on_cpu = 0;
935#endif
936#ifndef __ARCH_WANT_INTERRUPTS_ON_CTXSW
937 local_irq_enable();
938#endif
939}
940#endif
941
942
943
944
945static inline struct rq *__task_rq_lock(struct task_struct *p)
946 __acquires(rq->lock)
947{
948 struct rq *rq;
949
950 lockdep_assert_held(&p->pi_lock);
951
952 for (;;) {
953 rq = task_rq(p);
954 raw_spin_lock(&rq->lock);
955 if (likely(rq == task_rq(p)))
956 return rq;
957 raw_spin_unlock(&rq->lock);
958 }
959}
960
961
962
963
964static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags)
965 __acquires(p->pi_lock)
966 __acquires(rq->lock)
967{
968 struct rq *rq;
969
970 for (;;) {
971 raw_spin_lock_irqsave(&p->pi_lock, *flags);
972 rq = task_rq(p);
973 raw_spin_lock(&rq->lock);
974 if (likely(rq == task_rq(p)))
975 return rq;
976 raw_spin_unlock(&rq->lock);
977 raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
978 }
979}
980
981static void __task_rq_unlock(struct rq *rq)
982 __releases(rq->lock)
983{
984 raw_spin_unlock(&rq->lock);
985}
986
987static inline void
988task_rq_unlock(struct rq *rq, struct task_struct *p, unsigned long *flags)
989 __releases(rq->lock)
990 __releases(p->pi_lock)
991{
992 raw_spin_unlock(&rq->lock);
993 raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
994}
995
996
997
998
999static struct rq *this_rq_lock(void)
1000 __acquires(rq->lock)
1001{
1002 struct rq *rq;
1003
1004 local_irq_disable();
1005 rq = this_rq();
1006 raw_spin_lock(&rq->lock);
1007
1008 return rq;
1009}
1010
1011#ifdef CONFIG_SCHED_HRTICK
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028static inline int hrtick_enabled(struct rq *rq)
1029{
1030 if (!sched_feat(HRTICK))
1031 return 0;
1032 if (!cpu_active(cpu_of(rq)))
1033 return 0;
1034 return hrtimer_is_hres_active(&rq->hrtick_timer);
1035}
1036
1037static void hrtick_clear(struct rq *rq)
1038{
1039 if (hrtimer_active(&rq->hrtick_timer))
1040 hrtimer_cancel(&rq->hrtick_timer);
1041}
1042
1043
1044
1045
1046
1047static enum hrtimer_restart hrtick(struct hrtimer *timer)
1048{
1049 struct rq *rq = container_of(timer, struct rq, hrtick_timer);
1050
1051 WARN_ON_ONCE(cpu_of(rq) != smp_processor_id());
1052
1053 raw_spin_lock(&rq->lock);
1054 update_rq_clock(rq);
1055 rq->curr->sched_class->task_tick(rq, rq->curr, 1);
1056 raw_spin_unlock(&rq->lock);
1057
1058 return HRTIMER_NORESTART;
1059}
1060
1061#ifdef CONFIG_SMP
1062
1063
1064
1065static void __hrtick_start(void *arg)
1066{
1067 struct rq *rq = arg;
1068
1069 raw_spin_lock(&rq->lock);
1070 hrtimer_restart(&rq->hrtick_timer);
1071 rq->hrtick_csd_pending = 0;
1072 raw_spin_unlock(&rq->lock);
1073}
1074
1075
1076
1077
1078
1079
1080static void hrtick_start(struct rq *rq, u64 delay)
1081{
1082 struct hrtimer *timer = &rq->hrtick_timer;
1083 ktime_t time = ktime_add_ns(timer->base->get_time(), delay);
1084
1085 hrtimer_set_expires(timer, time);
1086
1087 if (rq == this_rq()) {
1088 hrtimer_restart(timer);
1089 } else if (!rq->hrtick_csd_pending) {
1090 __smp_call_function_single(cpu_of(rq), &rq->hrtick_csd, 0);
1091 rq->hrtick_csd_pending = 1;
1092 }
1093}
1094
1095static int
1096hotplug_hrtick(struct notifier_block *nfb, unsigned long action, void *hcpu)
1097{
1098 int cpu = (int)(long)hcpu;
1099
1100 switch (action) {
1101 case CPU_UP_CANCELED:
1102 case CPU_UP_CANCELED_FROZEN:
1103 case CPU_DOWN_PREPARE:
1104 case CPU_DOWN_PREPARE_FROZEN:
1105 case CPU_DEAD:
1106 case CPU_DEAD_FROZEN:
1107 hrtick_clear(cpu_rq(cpu));
1108 return NOTIFY_OK;
1109 }
1110
1111 return NOTIFY_DONE;
1112}
1113
1114static __init void init_hrtick(void)
1115{
1116 hotcpu_notifier(hotplug_hrtick, 0);
1117}
1118#else
1119
1120
1121
1122
1123
1124static void hrtick_start(struct rq *rq, u64 delay)
1125{
1126 __hrtimer_start_range_ns(&rq->hrtick_timer, ns_to_ktime(delay), 0,
1127 HRTIMER_MODE_REL_PINNED, 0);
1128}
1129
1130static inline void init_hrtick(void)
1131{
1132}
1133#endif
1134
1135static void init_rq_hrtick(struct rq *rq)
1136{
1137#ifdef CONFIG_SMP
1138 rq->hrtick_csd_pending = 0;
1139
1140 rq->hrtick_csd.flags = 0;
1141 rq->hrtick_csd.func = __hrtick_start;
1142 rq->hrtick_csd.info = rq;
1143#endif
1144
1145 hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1146 rq->hrtick_timer.function = hrtick;
1147}
1148#else
1149static inline void hrtick_clear(struct rq *rq)
1150{
1151}
1152
1153static inline void init_rq_hrtick(struct rq *rq)
1154{
1155}
1156
1157static inline void init_hrtick(void)
1158{
1159}
1160#endif
1161
1162
1163
1164
1165
1166
1167
1168
1169#ifdef CONFIG_SMP
1170
1171#ifndef tsk_is_polling
1172#define tsk_is_polling(t) test_tsk_thread_flag(t, TIF_POLLING_NRFLAG)
1173#endif
1174
1175static void resched_task(struct task_struct *p)
1176{
1177 int cpu;
1178
1179 assert_raw_spin_locked(&task_rq(p)->lock);
1180
1181 if (test_tsk_need_resched(p))
1182 return;
1183
1184 set_tsk_need_resched(p);
1185
1186 cpu = task_cpu(p);
1187 if (cpu == smp_processor_id())
1188 return;
1189
1190
1191 smp_mb();
1192 if (!tsk_is_polling(p))
1193 smp_send_reschedule(cpu);
1194}
1195
1196static void resched_cpu(int cpu)
1197{
1198 struct rq *rq = cpu_rq(cpu);
1199 unsigned long flags;
1200
1201 if (!raw_spin_trylock_irqsave(&rq->lock, flags))
1202 return;
1203 resched_task(cpu_curr(cpu));
1204 raw_spin_unlock_irqrestore(&rq->lock, flags);
1205}
1206
1207#ifdef CONFIG_NO_HZ
1208
1209
1210
1211
1212
1213
1214
1215
1216int get_nohz_timer_target(void)
1217{
1218 int cpu = smp_processor_id();
1219 int i;
1220 struct sched_domain *sd;
1221
1222 rcu_read_lock();
1223 for_each_domain(cpu, sd) {
1224 for_each_cpu(i, sched_domain_span(sd)) {
1225 if (!idle_cpu(i)) {
1226 cpu = i;
1227 goto unlock;
1228 }
1229 }
1230 }
1231unlock:
1232 rcu_read_unlock();
1233 return cpu;
1234}
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245void wake_up_idle_cpu(int cpu)
1246{
1247 struct rq *rq = cpu_rq(cpu);
1248
1249 if (cpu == smp_processor_id())
1250 return;
1251
1252
1253
1254
1255
1256
1257
1258
1259 if (rq->curr != rq->idle)
1260 return;
1261
1262
1263
1264
1265
1266
1267 set_tsk_need_resched(rq->idle);
1268
1269
1270 smp_mb();
1271 if (!tsk_is_polling(rq->idle))
1272 smp_send_reschedule(cpu);
1273}
1274
1275#endif
1276
1277static u64 sched_avg_period(void)
1278{
1279 return (u64)sysctl_sched_time_avg * NSEC_PER_MSEC / 2;
1280}
1281
1282static void sched_avg_update(struct rq *rq)
1283{
1284 s64 period = sched_avg_period();
1285
1286 while ((s64)(rq->clock - rq->age_stamp) > period) {
1287
1288
1289
1290
1291
1292 asm("" : "+rm" (rq->age_stamp));
1293 rq->age_stamp += period;
1294 rq->rt_avg /= 2;
1295 }
1296}
1297
1298static void sched_rt_avg_update(struct rq *rq, u64 rt_delta)
1299{
1300 rq->rt_avg += rt_delta;
1301 sched_avg_update(rq);
1302}
1303
1304#else
1305static void resched_task(struct task_struct *p)
1306{
1307 assert_raw_spin_locked(&task_rq(p)->lock);
1308 set_tsk_need_resched(p);
1309}
1310
1311static void sched_rt_avg_update(struct rq *rq, u64 rt_delta)
1312{
1313}
1314
1315static void sched_avg_update(struct rq *rq)
1316{
1317}
1318#endif
1319
1320#if BITS_PER_LONG == 32
1321# define WMULT_CONST (~0UL)
1322#else
1323# define WMULT_CONST (1UL << 32)
1324#endif
1325
1326#define WMULT_SHIFT 32
1327
1328
1329
1330
1331#define SRR(x, y) (((x) + (1UL << ((y) - 1))) >> (y))
1332
1333
1334
1335
1336static unsigned long
1337calc_delta_mine(unsigned long delta_exec, unsigned long weight,
1338 struct load_weight *lw)
1339{
1340 u64 tmp;
1341
1342
1343
1344
1345
1346
1347 if (likely(weight > (1UL << SCHED_LOAD_RESOLUTION)))
1348 tmp = (u64)delta_exec * scale_load_down(weight);
1349 else
1350 tmp = (u64)delta_exec;
1351
1352 if (!lw->inv_weight) {
1353 unsigned long w = scale_load_down(lw->weight);
1354
1355 if (BITS_PER_LONG > 32 && unlikely(w >= WMULT_CONST))
1356 lw->inv_weight = 1;
1357 else if (unlikely(!w))
1358 lw->inv_weight = WMULT_CONST;
1359 else
1360 lw->inv_weight = WMULT_CONST / w;
1361 }
1362
1363
1364
1365
1366 if (unlikely(tmp > WMULT_CONST))
1367 tmp = SRR(SRR(tmp, WMULT_SHIFT/2) * lw->inv_weight,
1368 WMULT_SHIFT/2);
1369 else
1370 tmp = SRR(tmp * lw->inv_weight, WMULT_SHIFT);
1371
1372 return (unsigned long)min(tmp, (u64)(unsigned long)LONG_MAX);
1373}
1374
1375static inline void update_load_add(struct load_weight *lw, unsigned long inc)
1376{
1377 lw->weight += inc;
1378 lw->inv_weight = 0;
1379}
1380
1381static inline void update_load_sub(struct load_weight *lw, unsigned long dec)
1382{
1383 lw->weight -= dec;
1384 lw->inv_weight = 0;
1385}
1386
1387static inline void update_load_set(struct load_weight *lw, unsigned long w)
1388{
1389 lw->weight = w;
1390 lw->inv_weight = 0;
1391}
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402#define WEIGHT_IDLEPRIO 3
1403#define WMULT_IDLEPRIO 1431655765
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417static const int prio_to_weight[40] = {
1418 88761, 71755, 56483, 46273, 36291,
1419 29154, 23254, 18705, 14949, 11916,
1420 9548, 7620, 6100, 4904, 3906,
1421 3121, 2501, 1991, 1586, 1277,
1422 1024, 820, 655, 526, 423,
1423 335, 272, 215, 172, 137,
1424 110, 87, 70, 56, 45,
1425 36, 29, 23, 18, 15,
1426};
1427
1428
1429
1430
1431
1432
1433
1434
1435static const u32 prio_to_wmult[40] = {
1436 48388, 59856, 76040, 92818, 118348,
1437 147320, 184698, 229616, 287308, 360437,
1438 449829, 563644, 704093, 875809, 1099582,
1439 1376151, 1717300, 2157191, 2708050, 3363326,
1440 4194304, 5237765, 6557202, 8165337, 10153587,
1441 12820798, 15790321, 19976592, 24970740, 31350126,
1442 39045157, 49367440, 61356676, 76695844, 95443717,
1443 119304647, 148102320, 186737708, 238609294, 286331153,
1444};
1445
1446
1447enum cpuacct_stat_index {
1448 CPUACCT_STAT_USER,
1449 CPUACCT_STAT_SYSTEM,
1450
1451 CPUACCT_STAT_NSTATS,
1452};
1453
1454#ifdef CONFIG_CGROUP_CPUACCT
1455static void cpuacct_charge(struct task_struct *tsk, u64 cputime);
1456static void cpuacct_update_stats(struct task_struct *tsk,
1457 enum cpuacct_stat_index idx, cputime_t val);
1458#else
1459static inline void cpuacct_charge(struct task_struct *tsk, u64 cputime) {}
1460static inline void cpuacct_update_stats(struct task_struct *tsk,
1461 enum cpuacct_stat_index idx, cputime_t val) {}
1462#endif
1463
1464static inline void inc_cpu_load(struct rq *rq, unsigned long load)
1465{
1466 update_load_add(&rq->load, load);
1467}
1468
1469static inline void dec_cpu_load(struct rq *rq, unsigned long load)
1470{
1471 update_load_sub(&rq->load, load);
1472}
1473
1474#if (defined(CONFIG_SMP) && defined(CONFIG_FAIR_GROUP_SCHED)) || defined(CONFIG_RT_GROUP_SCHED)
1475typedef int (*tg_visitor)(struct task_group *, void *);
1476
1477
1478
1479
1480
1481static int walk_tg_tree(tg_visitor down, tg_visitor up, void *data)
1482{
1483 struct task_group *parent, *child;
1484 int ret;
1485
1486 rcu_read_lock();
1487 parent = &root_task_group;
1488down:
1489 ret = (*down)(parent, data);
1490 if (ret)
1491 goto out_unlock;
1492 list_for_each_entry_rcu(child, &parent->children, siblings) {
1493 parent = child;
1494 goto down;
1495
1496up:
1497 continue;
1498 }
1499 ret = (*up)(parent, data);
1500 if (ret)
1501 goto out_unlock;
1502
1503 child = parent;
1504 parent = parent->parent;
1505 if (parent)
1506 goto up;
1507out_unlock:
1508 rcu_read_unlock();
1509
1510 return ret;
1511}
1512
1513static int tg_nop(struct task_group *tg, void *data)
1514{
1515 return 0;
1516}
1517#endif
1518
1519#ifdef CONFIG_SMP
1520
1521static unsigned long weighted_cpuload(const int cpu)
1522{
1523 return cpu_rq(cpu)->load.weight;
1524}
1525
1526
1527
1528
1529
1530
1531
1532
1533static unsigned long source_load(int cpu, int type)
1534{
1535 struct rq *rq = cpu_rq(cpu);
1536 unsigned long total = weighted_cpuload(cpu);
1537
1538 if (type == 0 || !sched_feat(LB_BIAS))
1539 return total;
1540
1541 return min(rq->cpu_load[type-1], total);
1542}
1543
1544
1545
1546
1547
1548static unsigned long target_load(int cpu, int type)
1549{
1550 struct rq *rq = cpu_rq(cpu);
1551 unsigned long total = weighted_cpuload(cpu);
1552
1553 if (type == 0 || !sched_feat(LB_BIAS))
1554 return total;
1555
1556 return max(rq->cpu_load[type-1], total);
1557}
1558
1559static unsigned long power_of(int cpu)
1560{
1561 return cpu_rq(cpu)->cpu_power;
1562}
1563
1564static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd);
1565
1566static unsigned long cpu_avg_load_per_task(int cpu)
1567{
1568 struct rq *rq = cpu_rq(cpu);
1569 unsigned long nr_running = ACCESS_ONCE(rq->nr_running);
1570
1571 if (nr_running)
1572 rq->avg_load_per_task = rq->load.weight / nr_running;
1573 else
1574 rq->avg_load_per_task = 0;
1575
1576 return rq->avg_load_per_task;
1577}
1578
1579#ifdef CONFIG_PREEMPT
1580
1581static void double_rq_lock(struct rq *rq1, struct rq *rq2);
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest)
1592 __releases(this_rq->lock)
1593 __acquires(busiest->lock)
1594 __acquires(this_rq->lock)
1595{
1596 raw_spin_unlock(&this_rq->lock);
1597 double_rq_lock(this_rq, busiest);
1598
1599 return 1;
1600}
1601
1602#else
1603
1604
1605
1606
1607
1608
1609
1610static int _double_lock_balance(struct rq *this_rq, struct rq *busiest)
1611 __releases(this_rq->lock)
1612 __acquires(busiest->lock)
1613 __acquires(this_rq->lock)
1614{
1615 int ret = 0;
1616
1617 if (unlikely(!raw_spin_trylock(&busiest->lock))) {
1618 if (busiest < this_rq) {
1619 raw_spin_unlock(&this_rq->lock);
1620 raw_spin_lock(&busiest->lock);
1621 raw_spin_lock_nested(&this_rq->lock,
1622 SINGLE_DEPTH_NESTING);
1623 ret = 1;
1624 } else
1625 raw_spin_lock_nested(&busiest->lock,
1626 SINGLE_DEPTH_NESTING);
1627 }
1628 return ret;
1629}
1630
1631#endif
1632
1633
1634
1635
1636static int double_lock_balance(struct rq *this_rq, struct rq *busiest)
1637{
1638 if (unlikely(!irqs_disabled())) {
1639
1640 raw_spin_unlock(&this_rq->lock);
1641 BUG_ON(1);
1642 }
1643
1644 return _double_lock_balance(this_rq, busiest);
1645}
1646
1647static inline void double_unlock_balance(struct rq *this_rq, struct rq *busiest)
1648 __releases(busiest->lock)
1649{
1650 raw_spin_unlock(&busiest->lock);
1651 lock_set_subclass(&this_rq->lock.dep_map, 0, _RET_IP_);
1652}
1653
1654
1655
1656
1657
1658
1659
1660static void double_rq_lock(struct rq *rq1, struct rq *rq2)
1661 __acquires(rq1->lock)
1662 __acquires(rq2->lock)
1663{
1664 BUG_ON(!irqs_disabled());
1665 if (rq1 == rq2) {
1666 raw_spin_lock(&rq1->lock);
1667 __acquire(rq2->lock);
1668 } else {
1669 if (rq1 < rq2) {
1670 raw_spin_lock(&rq1->lock);
1671 raw_spin_lock_nested(&rq2->lock, SINGLE_DEPTH_NESTING);
1672 } else {
1673 raw_spin_lock(&rq2->lock);
1674 raw_spin_lock_nested(&rq1->lock, SINGLE_DEPTH_NESTING);
1675 }
1676 }
1677}
1678
1679
1680
1681
1682
1683
1684
1685static void double_rq_unlock(struct rq *rq1, struct rq *rq2)
1686 __releases(rq1->lock)
1687 __releases(rq2->lock)
1688{
1689 raw_spin_unlock(&rq1->lock);
1690 if (rq1 != rq2)
1691 raw_spin_unlock(&rq2->lock);
1692 else
1693 __release(rq2->lock);
1694}
1695
1696#else
1697
1698
1699
1700
1701
1702
1703
1704static void double_rq_lock(struct rq *rq1, struct rq *rq2)
1705 __acquires(rq1->lock)
1706 __acquires(rq2->lock)
1707{
1708 BUG_ON(!irqs_disabled());
1709 BUG_ON(rq1 != rq2);
1710 raw_spin_lock(&rq1->lock);
1711 __acquire(rq2->lock);
1712}
1713
1714
1715
1716
1717
1718
1719
1720static void double_rq_unlock(struct rq *rq1, struct rq *rq2)
1721 __releases(rq1->lock)
1722 __releases(rq2->lock)
1723{
1724 BUG_ON(rq1 != rq2);
1725 raw_spin_unlock(&rq1->lock);
1726 __release(rq2->lock);
1727}
1728
1729#endif
1730
1731static void calc_load_account_idle(struct rq *this_rq);
1732static void update_sysctl(void);
1733static int get_update_sysctl_factor(void);
1734static void update_cpu_load(struct rq *this_rq);
1735
1736static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
1737{
1738 set_task_rq(p, cpu);
1739#ifdef CONFIG_SMP
1740
1741
1742
1743
1744
1745 smp_wmb();
1746 task_thread_info(p)->cpu = cpu;
1747#endif
1748}
1749
1750static const struct sched_class rt_sched_class;
1751
1752#define sched_class_highest (&stop_sched_class)
1753#define for_each_class(class) \
1754 for (class = sched_class_highest; class; class = class->next)
1755
1756#include "sched_stats.h"
1757
1758static void inc_nr_running(struct rq *rq)
1759{
1760 rq->nr_running++;
1761}
1762
1763static void dec_nr_running(struct rq *rq)
1764{
1765 rq->nr_running--;
1766}
1767
1768static void set_load_weight(struct task_struct *p)
1769{
1770 int prio = p->static_prio - MAX_RT_PRIO;
1771 struct load_weight *load = &p->se.load;
1772
1773
1774
1775
1776 if (p->policy == SCHED_IDLE) {
1777 load->weight = scale_load(WEIGHT_IDLEPRIO);
1778 load->inv_weight = WMULT_IDLEPRIO;
1779 return;
1780 }
1781
1782 load->weight = scale_load(prio_to_weight[prio]);
1783 load->inv_weight = prio_to_wmult[prio];
1784}
1785
1786static void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
1787{
1788 update_rq_clock(rq);
1789 sched_info_queued(p);
1790 p->sched_class->enqueue_task(rq, p, flags);
1791}
1792
1793static void dequeue_task(struct rq *rq, struct task_struct *p, int flags)
1794{
1795 update_rq_clock(rq);
1796 sched_info_dequeued(p);
1797 p->sched_class->dequeue_task(rq, p, flags);
1798}
1799
1800
1801
1802
1803static void activate_task(struct rq *rq, struct task_struct *p, int flags)
1804{
1805 if (task_contributes_to_load(p))
1806 rq->nr_uninterruptible--;
1807
1808 enqueue_task(rq, p, flags);
1809 inc_nr_running(rq);
1810}
1811
1812
1813
1814
1815static void deactivate_task(struct rq *rq, struct task_struct *p, int flags)
1816{
1817 if (task_contributes_to_load(p))
1818 rq->nr_uninterruptible++;
1819
1820 dequeue_task(rq, p, flags);
1821 dec_nr_running(rq);
1822}
1823
1824#ifdef CONFIG_IRQ_TIME_ACCOUNTING
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837static DEFINE_PER_CPU(u64, cpu_hardirq_time);
1838static DEFINE_PER_CPU(u64, cpu_softirq_time);
1839
1840static DEFINE_PER_CPU(u64, irq_start_time);
1841static int sched_clock_irqtime;
1842
1843void enable_sched_clock_irqtime(void)
1844{
1845 sched_clock_irqtime = 1;
1846}
1847
1848void disable_sched_clock_irqtime(void)
1849{
1850 sched_clock_irqtime = 0;
1851}
1852
1853#ifndef CONFIG_64BIT
1854static DEFINE_PER_CPU(seqcount_t, irq_time_seq);
1855
1856static inline void irq_time_write_begin(void)
1857{
1858 __this_cpu_inc(irq_time_seq.sequence);
1859 smp_wmb();
1860}
1861
1862static inline void irq_time_write_end(void)
1863{
1864 smp_wmb();
1865 __this_cpu_inc(irq_time_seq.sequence);
1866}
1867
1868static inline u64 irq_time_read(int cpu)
1869{
1870 u64 irq_time;
1871 unsigned seq;
1872
1873 do {
1874 seq = read_seqcount_begin(&per_cpu(irq_time_seq, cpu));
1875 irq_time = per_cpu(cpu_softirq_time, cpu) +
1876 per_cpu(cpu_hardirq_time, cpu);
1877 } while (read_seqcount_retry(&per_cpu(irq_time_seq, cpu), seq));
1878
1879 return irq_time;
1880}
1881#else
1882static inline void irq_time_write_begin(void)
1883{
1884}
1885
1886static inline void irq_time_write_end(void)
1887{
1888}
1889
1890static inline u64 irq_time_read(int cpu)
1891{
1892 return per_cpu(cpu_softirq_time, cpu) + per_cpu(cpu_hardirq_time, cpu);
1893}
1894#endif
1895
1896
1897
1898
1899
1900void account_system_vtime(struct task_struct *curr)
1901{
1902 unsigned long flags;
1903 s64 delta;
1904 int cpu;
1905
1906 if (!sched_clock_irqtime)
1907 return;
1908
1909 local_irq_save(flags);
1910
1911 cpu = smp_processor_id();
1912 delta = sched_clock_cpu(cpu) - __this_cpu_read(irq_start_time);
1913 __this_cpu_add(irq_start_time, delta);
1914
1915 irq_time_write_begin();
1916
1917
1918
1919
1920
1921
1922 if (hardirq_count())
1923 __this_cpu_add(cpu_hardirq_time, delta);
1924 else if (in_serving_softirq() && curr != this_cpu_ksoftirqd())
1925 __this_cpu_add(cpu_softirq_time, delta);
1926
1927 irq_time_write_end();
1928 local_irq_restore(flags);
1929}
1930EXPORT_SYMBOL_GPL(account_system_vtime);
1931
1932#endif
1933
1934#ifdef CONFIG_PARAVIRT
1935static inline u64 steal_ticks(u64 steal)
1936{
1937 if (unlikely(steal > NSEC_PER_SEC))
1938 return div_u64(steal, TICK_NSEC);
1939
1940 return __iter_div_u64_rem(steal, TICK_NSEC, &steal);
1941}
1942#endif
1943
1944static void update_rq_clock_task(struct rq *rq, s64 delta)
1945{
1946
1947
1948
1949
1950#if defined(CONFIG_IRQ_TIME_ACCOUNTING) || defined(CONFIG_PARAVIRT_TIME_ACCOUNTING)
1951 s64 steal = 0, irq_delta = 0;
1952#endif
1953#ifdef CONFIG_IRQ_TIME_ACCOUNTING
1954 irq_delta = irq_time_read(cpu_of(rq)) - rq->prev_irq_time;
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971 if (irq_delta > delta)
1972 irq_delta = delta;
1973
1974 rq->prev_irq_time += irq_delta;
1975 delta -= irq_delta;
1976#endif
1977#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
1978 if (static_branch((¶virt_steal_rq_enabled))) {
1979 u64 st;
1980
1981 steal = paravirt_steal_clock(cpu_of(rq));
1982 steal -= rq->prev_steal_time_rq;
1983
1984 if (unlikely(steal > delta))
1985 steal = delta;
1986
1987 st = steal_ticks(steal);
1988 steal = st * TICK_NSEC;
1989
1990 rq->prev_steal_time_rq += steal;
1991
1992 delta -= steal;
1993 }
1994#endif
1995
1996 rq->clock_task += delta;
1997
1998#if defined(CONFIG_IRQ_TIME_ACCOUNTING) || defined(CONFIG_PARAVIRT_TIME_ACCOUNTING)
1999 if ((irq_delta + steal) && sched_feat(NONTASK_POWER))
2000 sched_rt_avg_update(rq, irq_delta + steal);
2001#endif
2002}
2003
2004#ifdef CONFIG_IRQ_TIME_ACCOUNTING
2005static int irqtime_account_hi_update(void)
2006{
2007 struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
2008 unsigned long flags;
2009 u64 latest_ns;
2010 int ret = 0;
2011
2012 local_irq_save(flags);
2013 latest_ns = this_cpu_read(cpu_hardirq_time);
2014 if (cputime64_gt(nsecs_to_cputime64(latest_ns), cpustat->irq))
2015 ret = 1;
2016 local_irq_restore(flags);
2017 return ret;
2018}
2019
2020static int irqtime_account_si_update(void)
2021{
2022 struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
2023 unsigned long flags;
2024 u64 latest_ns;
2025 int ret = 0;
2026
2027 local_irq_save(flags);
2028 latest_ns = this_cpu_read(cpu_softirq_time);
2029 if (cputime64_gt(nsecs_to_cputime64(latest_ns), cpustat->softirq))
2030 ret = 1;
2031 local_irq_restore(flags);
2032 return ret;
2033}
2034
2035#else
2036
2037#define sched_clock_irqtime (0)
2038
2039#endif
2040
2041#include "sched_idletask.c"
2042#include "sched_fair.c"
2043#include "sched_rt.c"
2044#include "sched_autogroup.c"
2045#include "sched_stoptask.c"
2046#ifdef CONFIG_SCHED_DEBUG
2047# include "sched_debug.c"
2048#endif
2049
2050void sched_set_stop_task(int cpu, struct task_struct *stop)
2051{
2052 struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 };
2053 struct task_struct *old_stop = cpu_rq(cpu)->stop;
2054
2055 if (stop) {
2056
2057
2058
2059
2060
2061
2062
2063
2064 sched_setscheduler_nocheck(stop, SCHED_FIFO, ¶m);
2065
2066 stop->sched_class = &stop_sched_class;
2067 }
2068
2069 cpu_rq(cpu)->stop = stop;
2070
2071 if (old_stop) {
2072
2073
2074
2075
2076 old_stop->sched_class = &rt_sched_class;
2077 }
2078}
2079
2080
2081
2082
2083static inline int __normal_prio(struct task_struct *p)
2084{
2085 return p->static_prio;
2086}
2087
2088
2089
2090
2091
2092
2093
2094
2095static inline int normal_prio(struct task_struct *p)
2096{
2097 int prio;
2098
2099 if (task_has_rt_policy(p))
2100 prio = MAX_RT_PRIO-1 - p->rt_priority;
2101 else
2102 prio = __normal_prio(p);
2103 return prio;
2104}
2105
2106
2107
2108
2109
2110
2111
2112
2113static int effective_prio(struct task_struct *p)
2114{
2115 p->normal_prio = normal_prio(p);
2116
2117
2118
2119
2120
2121 if (!rt_prio(p->prio))
2122 return p->normal_prio;
2123 return p->prio;
2124}
2125
2126
2127
2128
2129
2130inline int task_curr(const struct task_struct *p)
2131{
2132 return cpu_curr(task_cpu(p)) == p;
2133}
2134
2135static inline void check_class_changed(struct rq *rq, struct task_struct *p,
2136 const struct sched_class *prev_class,
2137 int oldprio)
2138{
2139 if (prev_class != p->sched_class) {
2140 if (prev_class->switched_from)
2141 prev_class->switched_from(rq, p);
2142 p->sched_class->switched_to(rq, p);
2143 } else if (oldprio != p->prio)
2144 p->sched_class->prio_changed(rq, p, oldprio);
2145}
2146
2147static void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
2148{
2149 const struct sched_class *class;
2150
2151 if (p->sched_class == rq->curr->sched_class) {
2152 rq->curr->sched_class->check_preempt_curr(rq, p, flags);
2153 } else {
2154 for_each_class(class) {
2155 if (class == rq->curr->sched_class)
2156 break;
2157 if (class == p->sched_class) {
2158 resched_task(rq->curr);
2159 break;
2160 }
2161 }
2162 }
2163
2164
2165
2166
2167
2168 if (rq->curr->on_rq && test_tsk_need_resched(rq->curr))
2169 rq->skip_clock_update = 1;
2170}
2171
2172#ifdef CONFIG_SMP
2173
2174
2175
2176static int
2177task_hot(struct task_struct *p, u64 now, struct sched_domain *sd)
2178{
2179 s64 delta;
2180
2181 if (p->sched_class != &fair_sched_class)
2182 return 0;
2183
2184 if (unlikely(p->policy == SCHED_IDLE))
2185 return 0;
2186
2187
2188
2189
2190 if (sched_feat(CACHE_HOT_BUDDY) && this_rq()->nr_running &&
2191 (&p->se == cfs_rq_of(&p->se)->next ||
2192 &p->se == cfs_rq_of(&p->se)->last))
2193 return 1;
2194
2195 if (sysctl_sched_migration_cost == -1)
2196 return 1;
2197 if (sysctl_sched_migration_cost == 0)
2198 return 0;
2199
2200 delta = now - p->se.exec_start;
2201
2202 return delta < (s64)sysctl_sched_migration_cost;
2203}
2204
2205void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
2206{
2207#ifdef CONFIG_SCHED_DEBUG
2208
2209
2210
2211
2212 WARN_ON_ONCE(p->state != TASK_RUNNING && p->state != TASK_WAKING &&
2213 !(task_thread_info(p)->preempt_count & PREEMPT_ACTIVE));
2214
2215#ifdef CONFIG_LOCKDEP
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226 WARN_ON_ONCE(debug_locks && !(lockdep_is_held(&p->pi_lock) ||
2227 lockdep_is_held(&task_rq(p)->lock)));
2228#endif
2229#endif
2230
2231 trace_sched_migrate_task(p, new_cpu);
2232
2233 if (task_cpu(p) != new_cpu) {
2234 p->se.nr_migrations++;
2235 perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, 1, NULL, 0);
2236 }
2237
2238 __set_task_cpu(p, new_cpu);
2239}
2240
2241struct migration_arg {
2242 struct task_struct *task;
2243 int dest_cpu;
2244};
2245
2246static int migration_cpu_stop(void *data);
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264unsigned long wait_task_inactive(struct task_struct *p, long match_state)
2265{
2266 unsigned long flags;
2267 int running, on_rq;
2268 unsigned long ncsw;
2269 struct rq *rq;
2270
2271 for (;;) {
2272
2273
2274
2275
2276
2277
2278 rq = task_rq(p);
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291 while (task_running(rq, p)) {
2292 if (match_state && unlikely(p->state != match_state))
2293 return 0;
2294 cpu_relax();
2295 }
2296
2297
2298
2299
2300
2301
2302 rq = task_rq_lock(p, &flags);
2303 trace_sched_wait_task(p);
2304 running = task_running(rq, p);
2305 on_rq = p->on_rq;
2306 ncsw = 0;
2307 if (!match_state || p->state == match_state)
2308 ncsw = p->nvcsw | LONG_MIN;
2309 task_rq_unlock(rq, p, &flags);
2310
2311
2312
2313
2314 if (unlikely(!ncsw))
2315 break;
2316
2317
2318
2319
2320
2321
2322
2323 if (unlikely(running)) {
2324 cpu_relax();
2325 continue;
2326 }
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337 if (unlikely(on_rq)) {
2338 ktime_t to = ktime_set(0, NSEC_PER_SEC/HZ);
2339
2340 set_current_state(TASK_UNINTERRUPTIBLE);
2341 schedule_hrtimeout(&to, HRTIMER_MODE_REL);
2342 continue;
2343 }
2344
2345
2346
2347
2348
2349
2350 break;
2351 }
2352
2353 return ncsw;
2354}
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369void kick_process(struct task_struct *p)
2370{
2371 int cpu;
2372
2373 preempt_disable();
2374 cpu = task_cpu(p);
2375 if ((cpu != smp_processor_id()) && task_curr(p))
2376 smp_send_reschedule(cpu);
2377 preempt_enable();
2378}
2379EXPORT_SYMBOL_GPL(kick_process);
2380#endif
2381
2382#ifdef CONFIG_SMP
2383
2384
2385
2386static int select_fallback_rq(int cpu, struct task_struct *p)
2387{
2388 int dest_cpu;
2389 const struct cpumask *nodemask = cpumask_of_node(cpu_to_node(cpu));
2390
2391
2392 for_each_cpu_and(dest_cpu, nodemask, cpu_active_mask)
2393 if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed))
2394 return dest_cpu;
2395
2396
2397 dest_cpu = cpumask_any_and(&p->cpus_allowed, cpu_active_mask);
2398 if (dest_cpu < nr_cpu_ids)
2399 return dest_cpu;
2400
2401
2402 dest_cpu = cpuset_cpus_allowed_fallback(p);
2403
2404
2405
2406
2407
2408 if (p->mm && printk_ratelimit()) {
2409 printk(KERN_INFO "process %d (%s) no longer affine to cpu%d\n",
2410 task_pid_nr(p), p->comm, cpu);
2411 }
2412
2413 return dest_cpu;
2414}
2415
2416
2417
2418
2419static inline
2420int select_task_rq(struct task_struct *p, int sd_flags, int wake_flags)
2421{
2422 int cpu = p->sched_class->select_task_rq(p, sd_flags, wake_flags);
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434 if (unlikely(!cpumask_test_cpu(cpu, &p->cpus_allowed) ||
2435 !cpu_online(cpu)))
2436 cpu = select_fallback_rq(task_cpu(p), p);
2437
2438 return cpu;
2439}
2440
2441static void update_avg(u64 *avg, u64 sample)
2442{
2443 s64 diff = sample - *avg;
2444 *avg += diff >> 3;
2445}
2446#endif
2447
2448static void
2449ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
2450{
2451#ifdef CONFIG_SCHEDSTATS
2452 struct rq *rq = this_rq();
2453
2454#ifdef CONFIG_SMP
2455 int this_cpu = smp_processor_id();
2456
2457 if (cpu == this_cpu) {
2458 schedstat_inc(rq, ttwu_local);
2459 schedstat_inc(p, se.statistics.nr_wakeups_local);
2460 } else {
2461 struct sched_domain *sd;
2462
2463 schedstat_inc(p, se.statistics.nr_wakeups_remote);
2464 rcu_read_lock();
2465 for_each_domain(this_cpu, sd) {
2466 if (cpumask_test_cpu(cpu, sched_domain_span(sd))) {
2467 schedstat_inc(sd, ttwu_wake_remote);
2468 break;
2469 }
2470 }
2471 rcu_read_unlock();
2472 }
2473
2474 if (wake_flags & WF_MIGRATED)
2475 schedstat_inc(p, se.statistics.nr_wakeups_migrate);
2476
2477#endif
2478
2479 schedstat_inc(rq, ttwu_count);
2480 schedstat_inc(p, se.statistics.nr_wakeups);
2481
2482 if (wake_flags & WF_SYNC)
2483 schedstat_inc(p, se.statistics.nr_wakeups_sync);
2484
2485#endif
2486}
2487
2488static void ttwu_activate(struct rq *rq, struct task_struct *p, int en_flags)
2489{
2490 activate_task(rq, p, en_flags);
2491 p->on_rq = 1;
2492
2493
2494 if (p->flags & PF_WQ_WORKER)
2495 wq_worker_waking_up(p, cpu_of(rq));
2496}
2497
2498
2499
2500
2501static void
2502ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags)
2503{
2504 trace_sched_wakeup(p, true);
2505 check_preempt_curr(rq, p, wake_flags);
2506
2507 p->state = TASK_RUNNING;
2508#ifdef CONFIG_SMP
2509 if (p->sched_class->task_woken)
2510 p->sched_class->task_woken(rq, p);
2511
2512 if (rq->idle_stamp) {
2513 u64 delta = rq->clock - rq->idle_stamp;
2514 u64 max = 2*sysctl_sched_migration_cost;
2515
2516 if (delta > max)
2517 rq->avg_idle = max;
2518 else
2519 update_avg(&rq->avg_idle, delta);
2520 rq->idle_stamp = 0;
2521 }
2522#endif
2523}
2524
2525static void
2526ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags)
2527{
2528#ifdef CONFIG_SMP
2529 if (p->sched_contributes_to_load)
2530 rq->nr_uninterruptible--;
2531#endif
2532
2533 ttwu_activate(rq, p, ENQUEUE_WAKEUP | ENQUEUE_WAKING);
2534 ttwu_do_wakeup(rq, p, wake_flags);
2535}
2536
2537
2538
2539
2540
2541
2542
2543static int ttwu_remote(struct task_struct *p, int wake_flags)
2544{
2545 struct rq *rq;
2546 int ret = 0;
2547
2548 rq = __task_rq_lock(p);
2549 if (p->on_rq) {
2550 ttwu_do_wakeup(rq, p, wake_flags);
2551 ret = 1;
2552 }
2553 __task_rq_unlock(rq);
2554
2555 return ret;
2556}
2557
2558#ifdef CONFIG_SMP
2559static void sched_ttwu_do_pending(struct task_struct *list)
2560{
2561 struct rq *rq = this_rq();
2562
2563 raw_spin_lock(&rq->lock);
2564
2565 while (list) {
2566 struct task_struct *p = list;
2567 list = list->wake_entry;
2568 ttwu_do_activate(rq, p, 0);
2569 }
2570
2571 raw_spin_unlock(&rq->lock);
2572}
2573
2574#ifdef CONFIG_HOTPLUG_CPU
2575
2576static void sched_ttwu_pending(void)
2577{
2578 struct rq *rq = this_rq();
2579 struct task_struct *list = xchg(&rq->wake_list, NULL);
2580
2581 if (!list)
2582 return;
2583
2584 sched_ttwu_do_pending(list);
2585}
2586
2587#endif
2588
2589void scheduler_ipi(void)
2590{
2591 struct rq *rq = this_rq();
2592 struct task_struct *list = xchg(&rq->wake_list, NULL);
2593
2594 if (!list)
2595 return;
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610 irq_enter();
2611 sched_ttwu_do_pending(list);
2612 irq_exit();
2613}
2614
2615static void ttwu_queue_remote(struct task_struct *p, int cpu)
2616{
2617 struct rq *rq = cpu_rq(cpu);
2618 struct task_struct *next = rq->wake_list;
2619
2620 for (;;) {
2621 struct task_struct *old = next;
2622
2623 p->wake_entry = next;
2624 next = cmpxchg(&rq->wake_list, old, p);
2625 if (next == old)
2626 break;
2627 }
2628
2629 if (!next)
2630 smp_send_reschedule(cpu);
2631}
2632
2633#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
2634static int ttwu_activate_remote(struct task_struct *p, int wake_flags)
2635{
2636 struct rq *rq;
2637 int ret = 0;
2638
2639 rq = __task_rq_lock(p);
2640 if (p->on_cpu) {
2641 ttwu_activate(rq, p, ENQUEUE_WAKEUP);
2642 ttwu_do_wakeup(rq, p, wake_flags);
2643 ret = 1;
2644 }
2645 __task_rq_unlock(rq);
2646
2647 return ret;
2648
2649}
2650#endif
2651#endif
2652
2653static void ttwu_queue(struct task_struct *p, int cpu)
2654{
2655 struct rq *rq = cpu_rq(cpu);
2656
2657#if defined(CONFIG_SMP)
2658 if (sched_feat(TTWU_QUEUE) && cpu != smp_processor_id()) {
2659 sched_clock_cpu(cpu);
2660 ttwu_queue_remote(p, cpu);
2661 return;
2662 }
2663#endif
2664
2665 raw_spin_lock(&rq->lock);
2666 ttwu_do_activate(rq, p, 0);
2667 raw_spin_unlock(&rq->lock);
2668}
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685static int
2686try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
2687{
2688 unsigned long flags;
2689 int cpu, success = 0;
2690
2691 smp_wmb();
2692 raw_spin_lock_irqsave(&p->pi_lock, flags);
2693 if (!(p->state & state))
2694 goto out;
2695
2696 success = 1;
2697 cpu = task_cpu(p);
2698
2699 if (p->on_rq && ttwu_remote(p, wake_flags))
2700 goto stat;
2701
2702#ifdef CONFIG_SMP
2703
2704
2705
2706
2707 while (p->on_cpu) {
2708#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
2709
2710
2711
2712
2713
2714
2715
2716 if (ttwu_activate_remote(p, wake_flags))
2717 goto stat;
2718#else
2719 cpu_relax();
2720#endif
2721 }
2722
2723
2724
2725 smp_rmb();
2726
2727 p->sched_contributes_to_load = !!task_contributes_to_load(p);
2728 p->state = TASK_WAKING;
2729
2730 if (p->sched_class->task_waking)
2731 p->sched_class->task_waking(p);
2732
2733 cpu = select_task_rq(p, SD_BALANCE_WAKE, wake_flags);
2734 if (task_cpu(p) != cpu) {
2735 wake_flags |= WF_MIGRATED;
2736 set_task_cpu(p, cpu);
2737 }
2738#endif
2739
2740 ttwu_queue(p, cpu);
2741stat:
2742 ttwu_stat(p, cpu, wake_flags);
2743out:
2744 raw_spin_unlock_irqrestore(&p->pi_lock, flags);
2745
2746 return success;
2747}
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757static void try_to_wake_up_local(struct task_struct *p)
2758{
2759 struct rq *rq = task_rq(p);
2760
2761 BUG_ON(rq != this_rq());
2762 BUG_ON(p == current);
2763 lockdep_assert_held(&rq->lock);
2764
2765 if (!raw_spin_trylock(&p->pi_lock)) {
2766 raw_spin_unlock(&rq->lock);
2767 raw_spin_lock(&p->pi_lock);
2768 raw_spin_lock(&rq->lock);
2769 }
2770
2771 if (!(p->state & TASK_NORMAL))
2772 goto out;
2773
2774 if (!p->on_rq)
2775 ttwu_activate(rq, p, ENQUEUE_WAKEUP);
2776
2777 ttwu_do_wakeup(rq, p, 0);
2778 ttwu_stat(p, smp_processor_id(), 0);
2779out:
2780 raw_spin_unlock(&p->pi_lock);
2781}
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794int wake_up_process(struct task_struct *p)
2795{
2796 return try_to_wake_up(p, TASK_ALL, 0);
2797}
2798EXPORT_SYMBOL(wake_up_process);
2799
2800int wake_up_state(struct task_struct *p, unsigned int state)
2801{
2802 return try_to_wake_up(p, state, 0);
2803}
2804
2805
2806
2807
2808
2809
2810
2811static void __sched_fork(struct task_struct *p)
2812{
2813 p->on_rq = 0;
2814
2815 p->se.on_rq = 0;
2816 p->se.exec_start = 0;
2817 p->se.sum_exec_runtime = 0;
2818 p->se.prev_sum_exec_runtime = 0;
2819 p->se.nr_migrations = 0;
2820 p->se.vruntime = 0;
2821 INIT_LIST_HEAD(&p->se.group_node);
2822
2823#ifdef CONFIG_SCHEDSTATS
2824 memset(&p->se.statistics, 0, sizeof(p->se.statistics));
2825#endif
2826
2827 INIT_LIST_HEAD(&p->rt.run_list);
2828
2829#ifdef CONFIG_PREEMPT_NOTIFIERS
2830 INIT_HLIST_HEAD(&p->preempt_notifiers);
2831#endif
2832}
2833
2834
2835
2836
2837void sched_fork(struct task_struct *p)
2838{
2839 unsigned long flags;
2840 int cpu = get_cpu();
2841
2842 __sched_fork(p);
2843
2844
2845
2846
2847
2848 p->state = TASK_RUNNING;
2849
2850
2851
2852
2853 if (unlikely(p->sched_reset_on_fork)) {
2854 if (p->policy == SCHED_FIFO || p->policy == SCHED_RR) {
2855 p->policy = SCHED_NORMAL;
2856 p->normal_prio = p->static_prio;
2857 }
2858
2859 if (PRIO_TO_NICE(p->static_prio) < 0) {
2860 p->static_prio = NICE_TO_PRIO(0);
2861 p->normal_prio = p->static_prio;
2862 set_load_weight(p);
2863 }
2864
2865
2866
2867
2868
2869 p->sched_reset_on_fork = 0;
2870 }
2871
2872
2873
2874
2875 p->prio = current->normal_prio;
2876
2877 if (!rt_prio(p->prio))
2878 p->sched_class = &fair_sched_class;
2879
2880 if (p->sched_class->task_fork)
2881 p->sched_class->task_fork(p);
2882
2883
2884
2885
2886
2887
2888
2889
2890 raw_spin_lock_irqsave(&p->pi_lock, flags);
2891 set_task_cpu(p, cpu);
2892 raw_spin_unlock_irqrestore(&p->pi_lock, flags);
2893
2894#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
2895 if (likely(sched_info_on()))
2896 memset(&p->sched_info, 0, sizeof(p->sched_info));
2897#endif
2898#if defined(CONFIG_SMP)
2899 p->on_cpu = 0;
2900#endif
2901#ifdef CONFIG_PREEMPT_COUNT
2902
2903 task_thread_info(p)->preempt_count = 1;
2904#endif
2905#ifdef CONFIG_SMP
2906 plist_node_init(&p->pushable_tasks, MAX_PRIO);
2907#endif
2908
2909 put_cpu();
2910}
2911
2912
2913
2914
2915
2916
2917
2918
2919void wake_up_new_task(struct task_struct *p)
2920{
2921 unsigned long flags;
2922 struct rq *rq;
2923
2924 raw_spin_lock_irqsave(&p->pi_lock, flags);
2925#ifdef CONFIG_SMP
2926
2927
2928
2929
2930
2931 set_task_cpu(p, select_task_rq(p, SD_BALANCE_FORK, 0));
2932#endif
2933
2934 rq = __task_rq_lock(p);
2935 activate_task(rq, p, 0);
2936 p->on_rq = 1;
2937 trace_sched_wakeup_new(p, true);
2938 check_preempt_curr(rq, p, WF_FORK);
2939#ifdef CONFIG_SMP
2940 if (p->sched_class->task_woken)
2941 p->sched_class->task_woken(rq, p);
2942#endif
2943 task_rq_unlock(rq, p, &flags);
2944}
2945
2946#ifdef CONFIG_PREEMPT_NOTIFIERS
2947
2948
2949
2950
2951
2952void preempt_notifier_register(struct preempt_notifier *notifier)
2953{
2954 hlist_add_head(¬ifier->link, ¤t->preempt_notifiers);
2955}
2956EXPORT_SYMBOL_GPL(preempt_notifier_register);
2957
2958
2959
2960
2961
2962
2963
2964void preempt_notifier_unregister(struct preempt_notifier *notifier)
2965{
2966 hlist_del(¬ifier->link);
2967}
2968EXPORT_SYMBOL_GPL(preempt_notifier_unregister);
2969
2970static void fire_sched_in_preempt_notifiers(struct task_struct *curr)
2971{
2972 struct preempt_notifier *notifier;
2973 struct hlist_node *node;
2974
2975 hlist_for_each_entry(notifier, node, &curr->preempt_notifiers, link)
2976 notifier->ops->sched_in(notifier, raw_smp_processor_id());
2977}
2978
2979static void
2980fire_sched_out_preempt_notifiers(struct task_struct *curr,
2981 struct task_struct *next)
2982{
2983 struct preempt_notifier *notifier;
2984 struct hlist_node *node;
2985
2986 hlist_for_each_entry(notifier, node, &curr->preempt_notifiers, link)
2987 notifier->ops->sched_out(notifier, next);
2988}
2989
2990#else
2991
2992static void fire_sched_in_preempt_notifiers(struct task_struct *curr)
2993{
2994}
2995
2996static void
2997fire_sched_out_preempt_notifiers(struct task_struct *curr,
2998 struct task_struct *next)
2999{
3000}
3001
3002#endif
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017static inline void
3018prepare_task_switch(struct rq *rq, struct task_struct *prev,
3019 struct task_struct *next)
3020{
3021 sched_info_switch(prev, next);
3022 perf_event_task_sched_out(prev, next);
3023 fire_sched_out_preempt_notifiers(prev, next);
3024 prepare_lock_switch(rq, next);
3025 prepare_arch_switch(next);
3026 trace_sched_switch(prev, next);
3027}
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044static void finish_task_switch(struct rq *rq, struct task_struct *prev)
3045 __releases(rq->lock)
3046{
3047 struct mm_struct *mm = rq->prev_mm;
3048 long prev_state;
3049
3050 rq->prev_mm = NULL;
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063 prev_state = prev->state;
3064 finish_arch_switch(prev);
3065#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
3066 local_irq_disable();
3067#endif
3068 perf_event_task_sched_in(prev, current);
3069#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
3070 local_irq_enable();
3071#endif
3072 finish_lock_switch(rq, prev);
3073
3074 fire_sched_in_preempt_notifiers(current);
3075 if (mm)
3076 mmdrop(mm);
3077 if (unlikely(prev_state == TASK_DEAD)) {
3078
3079
3080
3081
3082 kprobe_flush_task(prev);
3083 put_task_struct(prev);
3084 }
3085}
3086
3087#ifdef CONFIG_SMP
3088
3089
3090static inline void pre_schedule(struct rq *rq, struct task_struct *prev)
3091{
3092 if (prev->sched_class->pre_schedule)
3093 prev->sched_class->pre_schedule(rq, prev);
3094}
3095
3096
3097static inline void post_schedule(struct rq *rq)
3098{
3099 if (rq->post_schedule) {
3100 unsigned long flags;
3101
3102 raw_spin_lock_irqsave(&rq->lock, flags);
3103 if (rq->curr->sched_class->post_schedule)
3104 rq->curr->sched_class->post_schedule(rq);
3105 raw_spin_unlock_irqrestore(&rq->lock, flags);
3106
3107 rq->post_schedule = 0;
3108 }
3109}
3110
3111#else
3112
3113static inline void pre_schedule(struct rq *rq, struct task_struct *p)
3114{
3115}
3116
3117static inline void post_schedule(struct rq *rq)
3118{
3119}
3120
3121#endif
3122
3123
3124
3125
3126
3127asmlinkage void schedule_tail(struct task_struct *prev)
3128 __releases(rq->lock)
3129{
3130 struct rq *rq = this_rq();
3131
3132 finish_task_switch(rq, prev);
3133
3134
3135
3136
3137
3138 post_schedule(rq);
3139
3140#ifdef __ARCH_WANT_UNLOCKED_CTXSW
3141
3142 preempt_enable();
3143#endif
3144 if (current->set_child_tid)
3145 put_user(task_pid_vnr(current), current->set_child_tid);
3146}
3147
3148
3149
3150
3151
3152static inline void
3153context_switch(struct rq *rq, struct task_struct *prev,
3154 struct task_struct *next)
3155{
3156 struct mm_struct *mm, *oldmm;
3157
3158 prepare_task_switch(rq, prev, next);
3159
3160 mm = next->mm;
3161 oldmm = prev->active_mm;
3162
3163
3164
3165
3166
3167 arch_start_context_switch(prev);
3168
3169 if (!mm) {
3170 next->active_mm = oldmm;
3171 atomic_inc(&oldmm->mm_count);
3172 enter_lazy_tlb(oldmm, next);
3173 } else
3174 switch_mm(oldmm, mm, next);
3175
3176 if (!prev->mm) {
3177 prev->active_mm = NULL;
3178 rq->prev_mm = oldmm;
3179 }
3180
3181
3182
3183
3184
3185
3186#ifndef __ARCH_WANT_UNLOCKED_CTXSW
3187 spin_release(&rq->lock.dep_map, 1, _THIS_IP_);
3188#endif
3189
3190
3191 switch_to(prev, next, prev);
3192
3193 barrier();
3194
3195
3196
3197
3198
3199 finish_task_switch(this_rq(), prev);
3200}
3201
3202
3203
3204
3205
3206
3207
3208
3209unsigned long nr_running(void)
3210{
3211 unsigned long i, sum = 0;
3212
3213 for_each_online_cpu(i)
3214 sum += cpu_rq(i)->nr_running;
3215
3216 return sum;
3217}
3218
3219unsigned long nr_uninterruptible(void)
3220{
3221 unsigned long i, sum = 0;
3222
3223 for_each_possible_cpu(i)
3224 sum += cpu_rq(i)->nr_uninterruptible;
3225
3226
3227
3228
3229
3230 if (unlikely((long)sum < 0))
3231 sum = 0;
3232
3233 return sum;
3234}
3235
3236unsigned long long nr_context_switches(void)
3237{
3238 int i;
3239 unsigned long long sum = 0;
3240
3241 for_each_possible_cpu(i)
3242 sum += cpu_rq(i)->nr_switches;
3243
3244 return sum;
3245}
3246
3247unsigned long nr_iowait(void)
3248{
3249 unsigned long i, sum = 0;
3250
3251 for_each_possible_cpu(i)
3252 sum += atomic_read(&cpu_rq(i)->nr_iowait);
3253
3254 return sum;
3255}
3256
3257unsigned long nr_iowait_cpu(int cpu)
3258{
3259 struct rq *this = cpu_rq(cpu);
3260 return atomic_read(&this->nr_iowait);
3261}
3262
3263unsigned long this_cpu_load(void)
3264{
3265 struct rq *this = this_rq();
3266 return this->cpu_load[0];
3267}
3268
3269
3270
3271static atomic_long_t calc_load_tasks;
3272static unsigned long calc_load_update;
3273unsigned long avenrun[3];
3274EXPORT_SYMBOL(avenrun);
3275
3276static long calc_load_fold_active(struct rq *this_rq)
3277{
3278 long nr_active, delta = 0;
3279
3280 nr_active = this_rq->nr_running;
3281 nr_active += (long) this_rq->nr_uninterruptible;
3282
3283 if (nr_active != this_rq->calc_load_active) {
3284 delta = nr_active - this_rq->calc_load_active;
3285 this_rq->calc_load_active = nr_active;
3286 }
3287
3288 return delta;
3289}
3290
3291static unsigned long
3292calc_load(unsigned long load, unsigned long exp, unsigned long active)
3293{
3294 load *= exp;
3295 load += active * (FIXED_1 - exp);
3296 load += 1UL << (FSHIFT - 1);
3297 return load >> FSHIFT;
3298}
3299
3300#ifdef CONFIG_NO_HZ
3301
3302
3303
3304
3305
3306static atomic_long_t calc_load_tasks_idle;
3307
3308static void calc_load_account_idle(struct rq *this_rq)
3309{
3310 long delta;
3311
3312 delta = calc_load_fold_active(this_rq);
3313 if (delta)
3314 atomic_long_add(delta, &calc_load_tasks_idle);
3315}
3316
3317static long calc_load_fold_idle(void)
3318{
3319 long delta = 0;
3320
3321
3322
3323
3324 if (atomic_long_read(&calc_load_tasks_idle))
3325 delta = atomic_long_xchg(&calc_load_tasks_idle, 0);
3326
3327 return delta;
3328}
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345static unsigned long
3346fixed_power_int(unsigned long x, unsigned int frac_bits, unsigned int n)
3347{
3348 unsigned long result = 1UL << frac_bits;
3349
3350 if (n) for (;;) {
3351 if (n & 1) {
3352 result *= x;
3353 result += 1UL << (frac_bits - 1);
3354 result >>= frac_bits;
3355 }
3356 n >>= 1;
3357 if (!n)
3358 break;
3359 x *= x;
3360 x += 1UL << (frac_bits - 1);
3361 x >>= frac_bits;
3362 }
3363
3364 return result;
3365}
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390static unsigned long
3391calc_load_n(unsigned long load, unsigned long exp,
3392 unsigned long active, unsigned int n)
3393{
3394
3395 return calc_load(load, fixed_power_int(exp, FSHIFT, n), active);
3396}
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407static void calc_global_nohz(unsigned long ticks)
3408{
3409 long delta, active, n;
3410
3411 if (time_before(jiffies, calc_load_update))
3412 return;
3413
3414
3415
3416
3417
3418
3419
3420 delta = calc_load_fold_idle();
3421 if (delta)
3422 atomic_long_add(delta, &calc_load_tasks);
3423
3424
3425
3426
3427 if (ticks >= LOAD_FREQ) {
3428 n = ticks / LOAD_FREQ;
3429
3430 active = atomic_long_read(&calc_load_tasks);
3431 active = active > 0 ? active * FIXED_1 : 0;
3432
3433 avenrun[0] = calc_load_n(avenrun[0], EXP_1, active, n);
3434 avenrun[1] = calc_load_n(avenrun[1], EXP_5, active, n);
3435 avenrun[2] = calc_load_n(avenrun[2], EXP_15, active, n);
3436
3437 calc_load_update += n * LOAD_FREQ;
3438 }
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450}
3451#else
3452static void calc_load_account_idle(struct rq *this_rq)
3453{
3454}
3455
3456static inline long calc_load_fold_idle(void)
3457{
3458 return 0;
3459}
3460
3461static void calc_global_nohz(unsigned long ticks)
3462{
3463}
3464#endif
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474void get_avenrun(unsigned long *loads, unsigned long offset, int shift)
3475{
3476 loads[0] = (avenrun[0] + offset) << shift;
3477 loads[1] = (avenrun[1] + offset) << shift;
3478 loads[2] = (avenrun[2] + offset) << shift;
3479}
3480
3481
3482
3483
3484
3485void calc_global_load(unsigned long ticks)
3486{
3487 long active;
3488
3489 calc_global_nohz(ticks);
3490
3491 if (time_before(jiffies, calc_load_update + 10))
3492 return;
3493
3494 active = atomic_long_read(&calc_load_tasks);
3495 active = active > 0 ? active * FIXED_1 : 0;
3496
3497 avenrun[0] = calc_load(avenrun[0], EXP_1, active);
3498 avenrun[1] = calc_load(avenrun[1], EXP_5, active);
3499 avenrun[2] = calc_load(avenrun[2], EXP_15, active);
3500
3501 calc_load_update += LOAD_FREQ;
3502}
3503
3504
3505
3506
3507
3508static void calc_load_account_active(struct rq *this_rq)
3509{
3510 long delta;
3511
3512 if (time_before(jiffies, this_rq->calc_load_update))
3513 return;
3514
3515 delta = calc_load_fold_active(this_rq);
3516 delta += calc_load_fold_idle();
3517 if (delta)
3518 atomic_long_add(delta, &calc_load_tasks);
3519
3520 this_rq->calc_load_update += LOAD_FREQ;
3521}
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550#define DEGRADE_SHIFT 7
3551static const unsigned char
3552 degrade_zero_ticks[CPU_LOAD_IDX_MAX] = {0, 8, 32, 64, 128};
3553static const unsigned char
3554 degrade_factor[CPU_LOAD_IDX_MAX][DEGRADE_SHIFT + 1] = {
3555 {0, 0, 0, 0, 0, 0, 0, 0},
3556 {64, 32, 8, 0, 0, 0, 0, 0},
3557 {96, 72, 40, 12, 1, 0, 0},
3558 {112, 98, 75, 43, 15, 1, 0},
3559 {120, 112, 98, 76, 45, 16, 2} };
3560
3561
3562
3563
3564
3565
3566static unsigned long
3567decay_load_missed(unsigned long load, unsigned long missed_updates, int idx)
3568{
3569 int j = 0;
3570
3571 if (!missed_updates)
3572 return load;
3573
3574 if (missed_updates >= degrade_zero_ticks[idx])
3575 return 0;
3576
3577 if (idx == 1)
3578 return load >> missed_updates;
3579
3580 while (missed_updates) {
3581 if (missed_updates % 2)
3582 load = (load * degrade_factor[idx][j]) >> DEGRADE_SHIFT;
3583
3584 missed_updates >>= 1;
3585 j++;
3586 }
3587 return load;
3588}
3589
3590
3591
3592
3593
3594
3595static void update_cpu_load(struct rq *this_rq)
3596{
3597 unsigned long this_load = this_rq->load.weight;
3598 unsigned long curr_jiffies = jiffies;
3599 unsigned long pending_updates;
3600 int i, scale;
3601
3602 this_rq->nr_load_updates++;
3603
3604
3605 if (curr_jiffies == this_rq->last_load_update_tick)
3606 return;
3607
3608 pending_updates = curr_jiffies - this_rq->last_load_update_tick;
3609 this_rq->last_load_update_tick = curr_jiffies;
3610
3611
3612 this_rq->cpu_load[0] = this_load;
3613 for (i = 1, scale = 2; i < CPU_LOAD_IDX_MAX; i++, scale += scale) {
3614 unsigned long old_load, new_load;
3615
3616
3617
3618 old_load = this_rq->cpu_load[i];
3619 old_load = decay_load_missed(old_load, pending_updates - 1, i);
3620 new_load = this_load;
3621
3622
3623
3624
3625
3626 if (new_load > old_load)
3627 new_load += scale - 1;
3628
3629 this_rq->cpu_load[i] = (old_load * (scale - 1) + new_load) >> i;
3630 }
3631
3632 sched_avg_update(this_rq);
3633}
3634
3635static void update_cpu_load_active(struct rq *this_rq)
3636{
3637 update_cpu_load(this_rq);
3638
3639 calc_load_account_active(this_rq);
3640}
3641
3642#ifdef CONFIG_SMP
3643
3644
3645
3646
3647
3648void sched_exec(void)
3649{
3650 struct task_struct *p = current;
3651 unsigned long flags;
3652 int dest_cpu;
3653
3654 raw_spin_lock_irqsave(&p->pi_lock, flags);
3655 dest_cpu = p->sched_class->select_task_rq(p, SD_BALANCE_EXEC, 0);
3656 if (dest_cpu == smp_processor_id())
3657 goto unlock;
3658
3659 if (likely(cpu_active(dest_cpu))) {
3660 struct migration_arg arg = { p, dest_cpu };
3661
3662 raw_spin_unlock_irqrestore(&p->pi_lock, flags);
3663 stop_one_cpu(task_cpu(p), migration_cpu_stop, &arg);
3664 return;
3665 }
3666unlock:
3667 raw_spin_unlock_irqrestore(&p->pi_lock, flags);
3668}
3669
3670#endif
3671
3672DEFINE_PER_CPU(struct kernel_stat, kstat);
3673
3674EXPORT_PER_CPU_SYMBOL(kstat);
3675
3676
3677
3678
3679
3680
3681
3682static u64 do_task_delta_exec(struct task_struct *p, struct rq *rq)
3683{
3684 u64 ns = 0;
3685
3686 if (task_current(rq, p)) {
3687 update_rq_clock(rq);
3688 ns = rq->clock_task - p->se.exec_start;
3689 if ((s64)ns < 0)
3690 ns = 0;
3691 }
3692
3693 return ns;
3694}
3695
3696unsigned long long task_delta_exec(struct task_struct *p)
3697{
3698 unsigned long flags;
3699 struct rq *rq;
3700 u64 ns = 0;
3701
3702 rq = task_rq_lock(p, &flags);
3703 ns = do_task_delta_exec(p, rq);
3704 task_rq_unlock(rq, p, &flags);
3705
3706 return ns;
3707}
3708
3709
3710
3711
3712
3713
3714unsigned long long task_sched_runtime(struct task_struct *p)
3715{
3716 unsigned long flags;
3717 struct rq *rq;
3718 u64 ns = 0;
3719
3720 rq = task_rq_lock(p, &flags);
3721 ns = p->se.sum_exec_runtime + do_task_delta_exec(p, rq);
3722 task_rq_unlock(rq, p, &flags);
3723
3724 return ns;
3725}
3726
3727
3728
3729
3730
3731
3732
3733void account_user_time(struct task_struct *p, cputime_t cputime,
3734 cputime_t cputime_scaled)
3735{
3736 struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
3737 cputime64_t tmp;
3738
3739
3740 p->utime = cputime_add(p->utime, cputime);
3741 p->utimescaled = cputime_add(p->utimescaled, cputime_scaled);
3742 account_group_user_time(p, cputime);
3743
3744
3745 tmp = cputime_to_cputime64(cputime);
3746 if (TASK_NICE(p) > 0)
3747 cpustat->nice = cputime64_add(cpustat->nice, tmp);
3748 else
3749 cpustat->user = cputime64_add(cpustat->user, tmp);
3750
3751 cpuacct_update_stats(p, CPUACCT_STAT_USER, cputime);
3752
3753 acct_update_integrals(p);
3754}
3755
3756
3757
3758
3759
3760
3761
3762static void account_guest_time(struct task_struct *p, cputime_t cputime,
3763 cputime_t cputime_scaled)
3764{
3765 cputime64_t tmp;
3766 struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
3767
3768 tmp = cputime_to_cputime64(cputime);
3769
3770
3771 p->utime = cputime_add(p->utime, cputime);
3772 p->utimescaled = cputime_add(p->utimescaled, cputime_scaled);
3773 account_group_user_time(p, cputime);
3774 p->gtime = cputime_add(p->gtime, cputime);
3775
3776
3777 if (TASK_NICE(p) > 0) {
3778 cpustat->nice = cputime64_add(cpustat->nice, tmp);
3779 cpustat->guest_nice = cputime64_add(cpustat->guest_nice, tmp);
3780 } else {
3781 cpustat->user = cputime64_add(cpustat->user, tmp);
3782 cpustat->guest = cputime64_add(cpustat->guest, tmp);
3783 }
3784}
3785
3786
3787
3788
3789
3790
3791
3792
3793static inline
3794void __account_system_time(struct task_struct *p, cputime_t cputime,
3795 cputime_t cputime_scaled, cputime64_t *target_cputime64)
3796{
3797 cputime64_t tmp = cputime_to_cputime64(cputime);
3798
3799
3800 p->stime = cputime_add(p->stime, cputime);
3801 p->stimescaled = cputime_add(p->stimescaled, cputime_scaled);
3802 account_group_system_time(p, cputime);
3803
3804
3805 *target_cputime64 = cputime64_add(*target_cputime64, tmp);
3806 cpuacct_update_stats(p, CPUACCT_STAT_SYSTEM, cputime);
3807
3808
3809 acct_update_integrals(p);
3810}
3811
3812
3813
3814
3815
3816
3817
3818
3819void account_system_time(struct task_struct *p, int hardirq_offset,
3820 cputime_t cputime, cputime_t cputime_scaled)
3821{
3822 struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
3823 cputime64_t *target_cputime64;
3824
3825 if ((p->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0)) {
3826 account_guest_time(p, cputime, cputime_scaled);
3827 return;
3828 }
3829
3830 if (hardirq_count() - hardirq_offset)
3831 target_cputime64 = &cpustat->irq;
3832 else if (in_serving_softirq())
3833 target_cputime64 = &cpustat->softirq;
3834 else
3835 target_cputime64 = &cpustat->system;
3836
3837 __account_system_time(p, cputime, cputime_scaled, target_cputime64);
3838}
3839
3840
3841
3842
3843
3844void account_steal_time(cputime_t cputime)
3845{
3846 struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
3847 cputime64_t cputime64 = cputime_to_cputime64(cputime);
3848
3849 cpustat->steal = cputime64_add(cpustat->steal, cputime64);
3850}
3851
3852
3853
3854
3855
3856void account_idle_time(cputime_t cputime)
3857{
3858 struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
3859 cputime64_t cputime64 = cputime_to_cputime64(cputime);
3860 struct rq *rq = this_rq();
3861
3862 if (atomic_read(&rq->nr_iowait) > 0)
3863 cpustat->iowait = cputime64_add(cpustat->iowait, cputime64);
3864 else
3865 cpustat->idle = cputime64_add(cpustat->idle, cputime64);
3866}
3867
3868static __always_inline bool steal_account_process_tick(void)
3869{
3870#ifdef CONFIG_PARAVIRT
3871 if (static_branch(¶virt_steal_enabled)) {
3872 u64 steal, st = 0;
3873
3874 steal = paravirt_steal_clock(smp_processor_id());
3875 steal -= this_rq()->prev_steal_time;
3876
3877 st = steal_ticks(steal);
3878 this_rq()->prev_steal_time += st * TICK_NSEC;
3879
3880 account_steal_time(st);
3881 return st;
3882 }
3883#endif
3884 return false;
3885}
3886
3887#ifndef CONFIG_VIRT_CPU_ACCOUNTING
3888
3889#ifdef CONFIG_IRQ_TIME_ACCOUNTING
3890
3891
3892
3893
3894
3895
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905
3906
3907
3908
3909
3910
3911static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
3912 struct rq *rq)
3913{
3914 cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy);
3915 cputime64_t tmp = cputime_to_cputime64(cputime_one_jiffy);
3916 struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
3917
3918 if (steal_account_process_tick())
3919 return;
3920
3921 if (irqtime_account_hi_update()) {
3922 cpustat->irq = cputime64_add(cpustat->irq, tmp);
3923 } else if (irqtime_account_si_update()) {
3924 cpustat->softirq = cputime64_add(cpustat->softirq, tmp);
3925 } else if (this_cpu_ksoftirqd() == p) {
3926
3927
3928
3929
3930
3931 __account_system_time(p, cputime_one_jiffy, one_jiffy_scaled,
3932 &cpustat->softirq);
3933 } else if (user_tick) {
3934 account_user_time(p, cputime_one_jiffy, one_jiffy_scaled);
3935 } else if (p == rq->idle) {
3936 account_idle_time(cputime_one_jiffy);
3937 } else if (p->flags & PF_VCPU) {
3938 account_guest_time(p, cputime_one_jiffy, one_jiffy_scaled);
3939 } else {
3940 __account_system_time(p, cputime_one_jiffy, one_jiffy_scaled,
3941 &cpustat->system);
3942 }
3943}
3944
3945static void irqtime_account_idle_ticks(int ticks)
3946{
3947 int i;
3948 struct rq *rq = this_rq();
3949
3950 for (i = 0; i < ticks; i++)
3951 irqtime_account_process_tick(current, 0, rq);
3952}
3953#else
3954static void irqtime_account_idle_ticks(int ticks) {}
3955static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
3956 struct rq *rq) {}
3957#endif
3958
3959
3960
3961
3962
3963
3964void account_process_tick(struct task_struct *p, int user_tick)
3965{
3966 cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy);
3967 struct rq *rq = this_rq();
3968
3969 if (sched_clock_irqtime) {
3970 irqtime_account_process_tick(p, user_tick, rq);
3971 return;
3972 }
3973
3974 if (steal_account_process_tick())
3975 return;
3976
3977 if (user_tick)
3978 account_user_time(p, cputime_one_jiffy, one_jiffy_scaled);
3979 else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET))
3980 account_system_time(p, HARDIRQ_OFFSET, cputime_one_jiffy,
3981 one_jiffy_scaled);
3982 else
3983 account_idle_time(cputime_one_jiffy);
3984}
3985
3986
3987
3988
3989
3990
3991void account_steal_ticks(unsigned long ticks)
3992{
3993 account_steal_time(jiffies_to_cputime(ticks));
3994}
3995
3996
3997
3998
3999
4000void account_idle_ticks(unsigned long ticks)
4001{
4002
4003 if (sched_clock_irqtime) {
4004 irqtime_account_idle_ticks(ticks);
4005 return;
4006 }
4007
4008 account_idle_time(jiffies_to_cputime(ticks));
4009}
4010
4011#endif
4012
4013
4014
4015
4016#ifdef CONFIG_VIRT_CPU_ACCOUNTING
4017void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
4018{
4019 *ut = p->utime;
4020 *st = p->stime;
4021}
4022
4023void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
4024{
4025 struct task_cputime cputime;
4026
4027 thread_group_cputime(p, &cputime);
4028
4029 *ut = cputime.utime;
4030 *st = cputime.stime;
4031}
4032#else
4033
4034#ifndef nsecs_to_cputime
4035# define nsecs_to_cputime(__nsecs) nsecs_to_jiffies(__nsecs)
4036#endif
4037
4038void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
4039{
4040 cputime_t rtime, utime = p->utime, total = cputime_add(utime, p->stime);
4041
4042
4043
4044
4045 rtime = nsecs_to_cputime(p->se.sum_exec_runtime);
4046
4047 if (total) {
4048 u64 temp = rtime;
4049
4050 temp *= utime;
4051 do_div(temp, total);
4052 utime = (cputime_t)temp;
4053 } else
4054 utime = rtime;
4055
4056
4057
4058
4059 p->prev_utime = max(p->prev_utime, utime);
4060 p->prev_stime = max(p->prev_stime, cputime_sub(rtime, p->prev_utime));
4061
4062 *ut = p->prev_utime;
4063 *st = p->prev_stime;
4064}
4065
4066
4067
4068
4069void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
4070{
4071 struct signal_struct *sig = p->signal;
4072 struct task_cputime cputime;
4073 cputime_t rtime, utime, total;
4074
4075 thread_group_cputime(p, &cputime);
4076
4077 total = cputime_add(cputime.utime, cputime.stime);
4078 rtime = nsecs_to_cputime(cputime.sum_exec_runtime);
4079
4080 if (total) {
4081 u64 temp = rtime;
4082
4083 temp *= cputime.utime;
4084 do_div(temp, total);
4085 utime = (cputime_t)temp;
4086 } else
4087 utime = rtime;
4088
4089 sig->prev_utime = max(sig->prev_utime, utime);
4090 sig->prev_stime = max(sig->prev_stime,
4091 cputime_sub(rtime, sig->prev_utime));
4092
4093 *ut = sig->prev_utime;
4094 *st = sig->prev_stime;
4095}
4096#endif
4097
4098
4099
4100
4101
4102void scheduler_tick(void)
4103{
4104 int cpu = smp_processor_id();
4105 struct rq *rq = cpu_rq(cpu);
4106 struct task_struct *curr = rq->curr;
4107
4108 sched_clock_tick();
4109
4110 raw_spin_lock(&rq->lock);
4111 update_rq_clock(rq);
4112 update_cpu_load_active(rq);
4113 curr->sched_class->task_tick(rq, curr, 0);
4114 raw_spin_unlock(&rq->lock);
4115
4116 perf_event_task_tick();
4117
4118#ifdef CONFIG_SMP
4119 rq->idle_at_tick = idle_cpu(cpu);
4120 trigger_load_balance(rq, cpu);
4121#endif
4122}
4123
4124notrace unsigned long get_parent_ip(unsigned long addr)
4125{
4126 if (in_lock_functions(addr)) {
4127 addr = CALLER_ADDR2;
4128 if (in_lock_functions(addr))
4129 addr = CALLER_ADDR3;
4130 }
4131 return addr;
4132}
4133
4134#if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \
4135 defined(CONFIG_PREEMPT_TRACER))
4136
4137void __kprobes add_preempt_count(int val)
4138{
4139#ifdef CONFIG_DEBUG_PREEMPT
4140
4141
4142
4143 if (DEBUG_LOCKS_WARN_ON((preempt_count() < 0)))
4144 return;
4145#endif
4146 preempt_count() += val;
4147#ifdef CONFIG_DEBUG_PREEMPT
4148
4149
4150
4151 DEBUG_LOCKS_WARN_ON((preempt_count() & PREEMPT_MASK) >=
4152 PREEMPT_MASK - 10);
4153#endif
4154 if (preempt_count() == val)
4155 trace_preempt_off(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1));
4156}
4157EXPORT_SYMBOL(add_preempt_count);
4158
4159void __kprobes sub_preempt_count(int val)
4160{
4161#ifdef CONFIG_DEBUG_PREEMPT
4162
4163
4164
4165 if (DEBUG_LOCKS_WARN_ON(val > preempt_count()))
4166 return;
4167
4168
4169
4170 if (DEBUG_LOCKS_WARN_ON((val < PREEMPT_MASK) &&
4171 !(preempt_count() & PREEMPT_MASK)))
4172 return;
4173#endif
4174
4175 if (preempt_count() == val)
4176 trace_preempt_on(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1));
4177 preempt_count() -= val;
4178}
4179EXPORT_SYMBOL(sub_preempt_count);
4180
4181#endif
4182
4183
4184
4185
4186static noinline void __schedule_bug(struct task_struct *prev)
4187{
4188 struct pt_regs *regs = get_irq_regs();
4189
4190 printk(KERN_ERR "BUG: scheduling while atomic: %s/%d/0x%08x\n",
4191 prev->comm, prev->pid, preempt_count());
4192
4193 debug_show_held_locks(prev);
4194 print_modules();
4195 if (irqs_disabled())
4196 print_irqtrace_events(prev);
4197
4198 if (regs)
4199 show_regs(regs);
4200 else
4201 dump_stack();
4202}
4203
4204
4205
4206
4207static inline void schedule_debug(struct task_struct *prev)
4208{
4209
4210
4211
4212
4213
4214 if (unlikely(in_atomic_preempt_off() && !prev->exit_state))
4215 __schedule_bug(prev);
4216
4217 profile_hit(SCHED_PROFILING, __builtin_return_address(0));
4218
4219 schedstat_inc(this_rq(), sched_count);
4220}
4221
4222static void put_prev_task(struct rq *rq, struct task_struct *prev)
4223{
4224 if (prev->on_rq || rq->skip_clock_update < 0)
4225 update_rq_clock(rq);
4226 prev->sched_class->put_prev_task(rq, prev);
4227}
4228
4229
4230
4231
4232static inline struct task_struct *
4233pick_next_task(struct rq *rq)
4234{
4235 const struct sched_class *class;
4236 struct task_struct *p;
4237
4238
4239
4240
4241
4242 if (likely(rq->nr_running == rq->cfs.nr_running)) {
4243 p = fair_sched_class.pick_next_task(rq);
4244 if (likely(p))
4245 return p;
4246 }
4247
4248 for_each_class(class) {
4249 p = class->pick_next_task(rq);
4250 if (p)
4251 return p;
4252 }
4253
4254 BUG();
4255}
4256
4257
4258
4259
4260static void __sched __schedule(void)
4261{
4262 struct task_struct *prev, *next;
4263 unsigned long *switch_count;
4264 struct rq *rq;
4265 int cpu;
4266
4267need_resched:
4268 preempt_disable();
4269 cpu = smp_processor_id();
4270 rq = cpu_rq(cpu);
4271 rcu_note_context_switch(cpu);
4272 prev = rq->curr;
4273
4274 schedule_debug(prev);
4275
4276 if (sched_feat(HRTICK))
4277 hrtick_clear(rq);
4278
4279 raw_spin_lock_irq(&rq->lock);
4280
4281 switch_count = &prev->nivcsw;
4282 if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) {
4283 if (unlikely(signal_pending_state(prev->state, prev))) {
4284 prev->state = TASK_RUNNING;
4285 } else {
4286 deactivate_task(rq, prev, DEQUEUE_SLEEP);
4287 prev->on_rq = 0;
4288
4289
4290
4291
4292
4293
4294 if (prev->flags & PF_WQ_WORKER) {
4295 struct task_struct *to_wakeup;
4296
4297 to_wakeup = wq_worker_sleeping(prev, cpu);
4298 if (to_wakeup)
4299 try_to_wake_up_local(to_wakeup);
4300 }
4301 }
4302 switch_count = &prev->nvcsw;
4303 }
4304
4305 pre_schedule(rq, prev);
4306
4307 if (unlikely(!rq->nr_running))
4308 idle_balance(cpu, rq);
4309
4310 put_prev_task(rq, prev);
4311 next = pick_next_task(rq);
4312 clear_tsk_need_resched(prev);
4313 rq->skip_clock_update = 0;
4314
4315 if (likely(prev != next)) {
4316 rq->nr_switches++;
4317 rq->curr = next;
4318 ++*switch_count;
4319
4320 context_switch(rq, prev, next);
4321
4322
4323
4324
4325
4326
4327 cpu = smp_processor_id();
4328 rq = cpu_rq(cpu);
4329 } else
4330 raw_spin_unlock_irq(&rq->lock);
4331
4332 post_schedule(rq);
4333
4334 preempt_enable_no_resched();
4335 if (need_resched())
4336 goto need_resched;
4337}
4338
4339static inline void sched_submit_work(struct task_struct *tsk)
4340{
4341 if (!tsk->state)
4342 return;
4343
4344
4345
4346
4347 if (blk_needs_flush_plug(tsk))
4348 blk_schedule_flush_plug(tsk);
4349}
4350
4351asmlinkage void __sched schedule(void)
4352{
4353 struct task_struct *tsk = current;
4354
4355 sched_submit_work(tsk);
4356 __schedule();
4357}
4358EXPORT_SYMBOL(schedule);
4359
4360#ifdef CONFIG_MUTEX_SPIN_ON_OWNER
4361
4362static inline bool owner_running(struct mutex *lock, struct task_struct *owner)
4363{
4364 if (lock->owner != owner)
4365 return false;
4366
4367
4368
4369
4370
4371
4372
4373 barrier();
4374
4375 return owner->on_cpu;
4376}
4377
4378
4379
4380
4381
4382int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner)
4383{
4384 if (!sched_feat(OWNER_SPIN))
4385 return 0;
4386
4387 rcu_read_lock();
4388 while (owner_running(lock, owner)) {
4389 if (need_resched())
4390 break;
4391
4392 arch_mutex_cpu_relax();
4393 }
4394 rcu_read_unlock();
4395
4396
4397
4398
4399
4400
4401 return lock->owner == NULL;
4402}
4403#endif
4404
4405#ifdef CONFIG_PREEMPT
4406
4407
4408
4409
4410
4411asmlinkage void __sched notrace preempt_schedule(void)
4412{
4413 struct thread_info *ti = current_thread_info();
4414
4415
4416
4417
4418
4419 if (likely(ti->preempt_count || irqs_disabled()))
4420 return;
4421
4422 do {
4423 add_preempt_count_notrace(PREEMPT_ACTIVE);
4424 __schedule();
4425 sub_preempt_count_notrace(PREEMPT_ACTIVE);
4426
4427
4428
4429
4430
4431 barrier();
4432 } while (need_resched());
4433}
4434EXPORT_SYMBOL(preempt_schedule);
4435
4436
4437
4438
4439
4440
4441
4442asmlinkage void __sched preempt_schedule_irq(void)
4443{
4444 struct thread_info *ti = current_thread_info();
4445
4446
4447 BUG_ON(ti->preempt_count || !irqs_disabled());
4448
4449 do {
4450 add_preempt_count(PREEMPT_ACTIVE);
4451 local_irq_enable();
4452 __schedule();
4453 local_irq_disable();
4454 sub_preempt_count(PREEMPT_ACTIVE);
4455
4456
4457
4458
4459
4460 barrier();
4461 } while (need_resched());
4462}
4463
4464#endif
4465
4466int default_wake_function(wait_queue_t *curr, unsigned mode, int wake_flags,
4467 void *key)
4468{
4469 return try_to_wake_up(curr->private, mode, wake_flags);
4470}
4471EXPORT_SYMBOL(default_wake_function);
4472
4473
4474
4475
4476
4477
4478
4479
4480
4481
4482static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
4483 int nr_exclusive, int wake_flags, void *key)
4484{
4485 wait_queue_t *curr, *next;
4486
4487 list_for_each_entry_safe(curr, next, &q->task_list, task_list) {
4488 unsigned flags = curr->flags;
4489
4490 if (curr->func(curr, mode, wake_flags, key) &&
4491 (flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive)
4492 break;
4493 }
4494}
4495
4496
4497
4498
4499
4500
4501
4502
4503
4504
4505
4506void __wake_up(wait_queue_head_t *q, unsigned int mode,
4507 int nr_exclusive, void *key)
4508{
4509 unsigned long flags;
4510
4511 spin_lock_irqsave(&q->lock, flags);
4512 __wake_up_common(q, mode, nr_exclusive, 0, key);
4513 spin_unlock_irqrestore(&q->lock, flags);
4514}
4515EXPORT_SYMBOL(__wake_up);
4516
4517
4518
4519
4520void __wake_up_locked(wait_queue_head_t *q, unsigned int mode)
4521{
4522 __wake_up_common(q, mode, 1, 0, NULL);
4523}
4524EXPORT_SYMBOL_GPL(__wake_up_locked);
4525
4526void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key)
4527{
4528 __wake_up_common(q, mode, 1, 0, key);
4529}
4530EXPORT_SYMBOL_GPL(__wake_up_locked_key);
4531
4532
4533
4534
4535
4536
4537
4538
4539
4540
4541
4542
4543
4544
4545
4546
4547
4548
4549void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode,
4550 int nr_exclusive, void *key)
4551{
4552 unsigned long flags;
4553 int wake_flags = WF_SYNC;
4554
4555 if (unlikely(!q))
4556 return;
4557
4558 if (unlikely(!nr_exclusive))
4559 wake_flags = 0;
4560
4561 spin_lock_irqsave(&q->lock, flags);
4562 __wake_up_common(q, mode, nr_exclusive, wake_flags, key);
4563 spin_unlock_irqrestore(&q->lock, flags);
4564}
4565EXPORT_SYMBOL_GPL(__wake_up_sync_key);
4566
4567
4568
4569
4570void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr_exclusive)
4571{
4572 __wake_up_sync_key(q, mode, nr_exclusive, NULL);
4573}
4574EXPORT_SYMBOL_GPL(__wake_up_sync);
4575
4576
4577
4578
4579
4580
4581
4582
4583
4584
4585
4586
4587
4588void complete(struct completion *x)
4589{
4590 unsigned long flags;
4591
4592 spin_lock_irqsave(&x->wait.lock, flags);
4593 x->done++;
4594 __wake_up_common(&x->wait, TASK_NORMAL, 1, 0, NULL);
4595 spin_unlock_irqrestore(&x->wait.lock, flags);
4596}
4597EXPORT_SYMBOL(complete);
4598
4599
4600
4601
4602
4603
4604
4605
4606
4607
4608void complete_all(struct completion *x)
4609{
4610 unsigned long flags;
4611
4612 spin_lock_irqsave(&x->wait.lock, flags);
4613 x->done += UINT_MAX/2;
4614 __wake_up_common(&x->wait, TASK_NORMAL, 0, 0, NULL);
4615 spin_unlock_irqrestore(&x->wait.lock, flags);
4616}
4617EXPORT_SYMBOL(complete_all);
4618
4619static inline long __sched
4620do_wait_for_common(struct completion *x, long timeout, int state)
4621{
4622 if (!x->done) {
4623 DECLARE_WAITQUEUE(wait, current);
4624
4625 __add_wait_queue_tail_exclusive(&x->wait, &wait);
4626 do {
4627 if (signal_pending_state(state, current)) {
4628 timeout = -ERESTARTSYS;
4629 break;
4630 }
4631 __set_current_state(state);
4632 spin_unlock_irq(&x->wait.lock);
4633 timeout = schedule_timeout(timeout);
4634 spin_lock_irq(&x->wait.lock);
4635 } while (!x->done && timeout);
4636 __remove_wait_queue(&x->wait, &wait);
4637 if (!x->done)
4638 return timeout;
4639 }
4640 x->done--;
4641 return timeout ?: 1;
4642}
4643
4644static long __sched
4645wait_for_common(struct completion *x, long timeout, int state)
4646{
4647 might_sleep();
4648
4649 spin_lock_irq(&x->wait.lock);
4650 timeout = do_wait_for_common(x, timeout, state);
4651 spin_unlock_irq(&x->wait.lock);
4652 return timeout;
4653}
4654
4655
4656
4657
4658
4659
4660
4661
4662
4663
4664
4665void __sched wait_for_completion(struct completion *x)
4666{
4667 wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_UNINTERRUPTIBLE);
4668}
4669EXPORT_SYMBOL(wait_for_completion);
4670
4671
4672
4673
4674
4675
4676
4677
4678
4679
4680unsigned long __sched
4681wait_for_completion_timeout(struct completion *x, unsigned long timeout)
4682{
4683 return wait_for_common(x, timeout, TASK_UNINTERRUPTIBLE);
4684}
4685EXPORT_SYMBOL(wait_for_completion_timeout);
4686
4687
4688
4689
4690
4691
4692
4693
4694int __sched wait_for_completion_interruptible(struct completion *x)
4695{
4696 long t = wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_INTERRUPTIBLE);
4697 if (t == -ERESTARTSYS)
4698 return t;
4699 return 0;
4700}
4701EXPORT_SYMBOL(wait_for_completion_interruptible);
4702
4703
4704
4705
4706
4707
4708
4709
4710
4711long __sched
4712wait_for_completion_interruptible_timeout(struct completion *x,
4713 unsigned long timeout)
4714{
4715 return wait_for_common(x, timeout, TASK_INTERRUPTIBLE);
4716}
4717EXPORT_SYMBOL(wait_for_completion_interruptible_timeout);
4718
4719
4720
4721
4722
4723
4724
4725
4726int __sched wait_for_completion_killable(struct completion *x)
4727{
4728 long t = wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_KILLABLE);
4729 if (t == -ERESTARTSYS)
4730 return t;
4731 return 0;
4732}
4733EXPORT_SYMBOL(wait_for_completion_killable);
4734
4735
4736
4737
4738
4739
4740
4741
4742
4743
4744long __sched
4745wait_for_completion_killable_timeout(struct completion *x,
4746 unsigned long timeout)
4747{
4748 return wait_for_common(x, timeout, TASK_KILLABLE);
4749}
4750EXPORT_SYMBOL(wait_for_completion_killable_timeout);
4751
4752
4753
4754
4755
4756
4757
4758
4759
4760
4761
4762
4763
4764bool try_wait_for_completion(struct completion *x)
4765{
4766 unsigned long flags;
4767 int ret = 1;
4768
4769 spin_lock_irqsave(&x->wait.lock, flags);
4770 if (!x->done)
4771 ret = 0;
4772 else
4773 x->done--;
4774 spin_unlock_irqrestore(&x->wait.lock, flags);
4775 return ret;
4776}
4777EXPORT_SYMBOL(try_wait_for_completion);
4778
4779
4780
4781
4782
4783
4784
4785
4786
4787bool completion_done(struct completion *x)
4788{
4789 unsigned long flags;
4790 int ret = 1;
4791
4792 spin_lock_irqsave(&x->wait.lock, flags);
4793 if (!x->done)
4794 ret = 0;
4795 spin_unlock_irqrestore(&x->wait.lock, flags);
4796 return ret;
4797}
4798EXPORT_SYMBOL(completion_done);
4799
4800static long __sched
4801sleep_on_common(wait_queue_head_t *q, int state, long timeout)
4802{
4803 unsigned long flags;
4804 wait_queue_t wait;
4805
4806 init_waitqueue_entry(&wait, current);
4807
4808 __set_current_state(state);
4809
4810 spin_lock_irqsave(&q->lock, flags);
4811 __add_wait_queue(q, &wait);
4812 spin_unlock(&q->lock);
4813 timeout = schedule_timeout(timeout);
4814 spin_lock_irq(&q->lock);
4815 __remove_wait_queue(q, &wait);
4816 spin_unlock_irqrestore(&q->lock, flags);
4817
4818 return timeout;
4819}
4820
4821void __sched interruptible_sleep_on(wait_queue_head_t *q)
4822{
4823 sleep_on_common(q, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
4824}
4825EXPORT_SYMBOL(interruptible_sleep_on);
4826
4827long __sched
4828interruptible_sleep_on_timeout(wait_queue_head_t *q, long timeout)
4829{
4830 return sleep_on_common(q, TASK_INTERRUPTIBLE, timeout);
4831}
4832EXPORT_SYMBOL(interruptible_sleep_on_timeout);
4833
4834void __sched sleep_on(wait_queue_head_t *q)
4835{
4836 sleep_on_common(q, TASK_UNINTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
4837}
4838EXPORT_SYMBOL(sleep_on);
4839
4840long __sched sleep_on_timeout(wait_queue_head_t *q, long timeout)
4841{
4842 return sleep_on_common(q, TASK_UNINTERRUPTIBLE, timeout);
4843}
4844EXPORT_SYMBOL(sleep_on_timeout);
4845
4846#ifdef CONFIG_RT_MUTEXES
4847
4848
4849
4850
4851
4852
4853
4854
4855
4856
4857
4858void rt_mutex_setprio(struct task_struct *p, int prio)
4859{
4860 int oldprio, on_rq, running;
4861 struct rq *rq;
4862 const struct sched_class *prev_class;
4863
4864 BUG_ON(prio < 0 || prio > MAX_PRIO);
4865
4866 rq = __task_rq_lock(p);
4867
4868 trace_sched_pi_setprio(p, prio);
4869 oldprio = p->prio;
4870 prev_class = p->sched_class;
4871 on_rq = p->on_rq;
4872 running = task_current(rq, p);
4873 if (on_rq)
4874 dequeue_task(rq, p, 0);
4875 if (running)
4876 p->sched_class->put_prev_task(rq, p);
4877
4878 if (rt_prio(prio))
4879 p->sched_class = &rt_sched_class;
4880 else
4881 p->sched_class = &fair_sched_class;
4882
4883 p->prio = prio;
4884
4885 if (running)
4886 p->sched_class->set_curr_task(rq);
4887 if (on_rq)
4888 enqueue_task(rq, p, oldprio < prio ? ENQUEUE_HEAD : 0);
4889
4890 check_class_changed(rq, p, prev_class, oldprio);
4891 __task_rq_unlock(rq);
4892}
4893
4894#endif
4895
4896void set_user_nice(struct task_struct *p, long nice)
4897{
4898 int old_prio, delta, on_rq;
4899 unsigned long flags;
4900 struct rq *rq;
4901
4902 if (TASK_NICE(p) == nice || nice < -20 || nice > 19)
4903 return;
4904
4905
4906
4907
4908 rq = task_rq_lock(p, &flags);
4909
4910
4911
4912
4913
4914
4915 if (task_has_rt_policy(p)) {
4916 p->static_prio = NICE_TO_PRIO(nice);
4917 goto out_unlock;
4918 }
4919 on_rq = p->on_rq;
4920 if (on_rq)
4921 dequeue_task(rq, p, 0);
4922
4923 p->static_prio = NICE_TO_PRIO(nice);
4924 set_load_weight(p);
4925 old_prio = p->prio;
4926 p->prio = effective_prio(p);
4927 delta = p->prio - old_prio;
4928
4929 if (on_rq) {
4930 enqueue_task(rq, p, 0);
4931
4932
4933
4934
4935 if (delta < 0 || (delta > 0 && task_running(rq, p)))
4936 resched_task(rq->curr);
4937 }
4938out_unlock:
4939 task_rq_unlock(rq, p, &flags);
4940}
4941EXPORT_SYMBOL(set_user_nice);
4942
4943
4944
4945
4946
4947
4948int can_nice(const struct task_struct *p, const int nice)
4949{
4950
4951 int nice_rlim = 20 - nice;
4952
4953 return (nice_rlim <= task_rlimit(p, RLIMIT_NICE) ||
4954 capable(CAP_SYS_NICE));
4955}
4956
4957#ifdef __ARCH_WANT_SYS_NICE
4958
4959
4960
4961
4962
4963
4964
4965
4966SYSCALL_DEFINE1(nice, int, increment)
4967{
4968 long nice, retval;
4969
4970
4971
4972
4973
4974
4975 if (increment < -40)
4976 increment = -40;
4977 if (increment > 40)
4978 increment = 40;
4979
4980 nice = TASK_NICE(current) + increment;
4981 if (nice < -20)
4982 nice = -20;
4983 if (nice > 19)
4984 nice = 19;
4985
4986 if (increment < 0 && !can_nice(current, nice))
4987 return -EPERM;
4988
4989 retval = security_task_setnice(current, nice);
4990 if (retval)
4991 return retval;
4992
4993 set_user_nice(current, nice);
4994 return 0;
4995}
4996
4997#endif
4998
4999
5000
5001
5002
5003
5004
5005
5006
5007int task_prio(const struct task_struct *p)
5008{
5009 return p->prio - MAX_RT_PRIO;
5010}
5011
5012
5013
5014
5015
5016int task_nice(const struct task_struct *p)
5017{
5018 return TASK_NICE(p);
5019}
5020EXPORT_SYMBOL(task_nice);
5021
5022
5023
5024
5025
5026int idle_cpu(int cpu)
5027{
5028 return cpu_curr(cpu) == cpu_rq(cpu)->idle;
5029}
5030
5031
5032
5033
5034
5035struct task_struct *idle_task(int cpu)
5036{
5037 return cpu_rq(cpu)->idle;
5038}
5039
5040
5041
5042
5043
5044static struct task_struct *find_process_by_pid(pid_t pid)
5045{
5046 return pid ? find_task_by_vpid(pid) : current;
5047}
5048
5049
5050static void
5051__setscheduler(struct rq *rq, struct task_struct *p, int policy, int prio)
5052{
5053 p->policy = policy;
5054 p->rt_priority = prio;
5055 p->normal_prio = normal_prio(p);
5056
5057 p->prio = rt_mutex_getprio(p);
5058 if (rt_prio(p->prio))
5059 p->sched_class = &rt_sched_class;
5060 else
5061 p->sched_class = &fair_sched_class;
5062 set_load_weight(p);
5063}
5064
5065
5066
5067
5068static bool check_same_owner(struct task_struct *p)
5069{
5070 const struct cred *cred = current_cred(), *pcred;
5071 bool match;
5072
5073 rcu_read_lock();
5074 pcred = __task_cred(p);
5075 if (cred->user->user_ns == pcred->user->user_ns)
5076 match = (cred->euid == pcred->euid ||
5077 cred->euid == pcred->uid);
5078 else
5079 match = false;
5080 rcu_read_unlock();
5081 return match;
5082}
5083
5084static int __sched_setscheduler(struct task_struct *p, int policy,
5085 const struct sched_param *param, bool user)
5086{
5087 int retval, oldprio, oldpolicy = -1, on_rq, running;
5088 unsigned long flags;
5089 const struct sched_class *prev_class;
5090 struct rq *rq;
5091 int reset_on_fork;
5092
5093
5094 BUG_ON(in_interrupt());
5095recheck:
5096
5097 if (policy < 0) {
5098 reset_on_fork = p->sched_reset_on_fork;
5099 policy = oldpolicy = p->policy;
5100 } else {
5101 reset_on_fork = !!(policy & SCHED_RESET_ON_FORK);
5102 policy &= ~SCHED_RESET_ON_FORK;
5103
5104 if (policy != SCHED_FIFO && policy != SCHED_RR &&
5105 policy != SCHED_NORMAL && policy != SCHED_BATCH &&
5106 policy != SCHED_IDLE)
5107 return -EINVAL;
5108 }
5109
5110
5111
5112
5113
5114
5115 if (param->sched_priority < 0 ||
5116 (p->mm && param->sched_priority > MAX_USER_RT_PRIO-1) ||
5117 (!p->mm && param->sched_priority > MAX_RT_PRIO-1))
5118 return -EINVAL;
5119 if (rt_policy(policy) != (param->sched_priority != 0))
5120 return -EINVAL;
5121
5122
5123
5124
5125 if (user && !capable(CAP_SYS_NICE)) {
5126 if (rt_policy(policy)) {
5127 unsigned long rlim_rtprio =
5128 task_rlimit(p, RLIMIT_RTPRIO);
5129
5130
5131 if (policy != p->policy && !rlim_rtprio)
5132 return -EPERM;
5133
5134
5135 if (param->sched_priority > p->rt_priority &&
5136 param->sched_priority > rlim_rtprio)
5137 return -EPERM;
5138 }
5139
5140
5141
5142
5143
5144 if (p->policy == SCHED_IDLE && policy != SCHED_IDLE) {
5145 if (!can_nice(p, TASK_NICE(p)))
5146 return -EPERM;
5147 }
5148
5149
5150 if (!check_same_owner(p))
5151 return -EPERM;
5152
5153
5154 if (p->sched_reset_on_fork && !reset_on_fork)
5155 return -EPERM;
5156 }
5157
5158 if (user) {
5159 retval = security_task_setscheduler(p);
5160 if (retval)
5161 return retval;
5162 }
5163
5164
5165
5166
5167
5168
5169
5170
5171 rq = task_rq_lock(p, &flags);
5172
5173
5174
5175
5176 if (p == rq->stop) {
5177 task_rq_unlock(rq, p, &flags);
5178 return -EINVAL;
5179 }
5180
5181
5182
5183
5184 if (unlikely(policy == p->policy && (!rt_policy(policy) ||
5185 param->sched_priority == p->rt_priority))) {
5186
5187 __task_rq_unlock(rq);
5188 raw_spin_unlock_irqrestore(&p->pi_lock, flags);
5189 return 0;
5190 }
5191
5192#ifdef CONFIG_RT_GROUP_SCHED
5193 if (user) {
5194
5195
5196
5197
5198 if (rt_bandwidth_enabled() && rt_policy(policy) &&
5199 task_group(p)->rt_bandwidth.rt_runtime == 0 &&
5200 !task_group_is_autogroup(task_group(p))) {
5201 task_rq_unlock(rq, p, &flags);
5202 return -EPERM;
5203 }
5204 }
5205#endif
5206
5207
5208 if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) {
5209 policy = oldpolicy = -1;
5210 task_rq_unlock(rq, p, &flags);
5211 goto recheck;
5212 }
5213 on_rq = p->on_rq;
5214 running = task_current(rq, p);
5215 if (on_rq)
5216 deactivate_task(rq, p, 0);
5217 if (running)
5218 p->sched_class->put_prev_task(rq, p);
5219
5220 p->sched_reset_on_fork = reset_on_fork;
5221
5222 oldprio = p->prio;
5223 prev_class = p->sched_class;
5224 __setscheduler(rq, p, policy, param->sched_priority);
5225
5226 if (running)
5227 p->sched_class->set_curr_task(rq);
5228 if (on_rq)
5229 activate_task(rq, p, 0);
5230
5231 check_class_changed(rq, p, prev_class, oldprio);
5232 task_rq_unlock(rq, p, &flags);
5233
5234 rt_mutex_adjust_pi(p);
5235
5236 return 0;
5237}
5238
5239
5240
5241
5242
5243
5244
5245
5246
5247int sched_setscheduler(struct task_struct *p, int policy,
5248 const struct sched_param *param)
5249{
5250 return __sched_setscheduler(p, policy, param, true);
5251}
5252EXPORT_SYMBOL_GPL(sched_setscheduler);
5253
5254
5255
5256
5257
5258
5259
5260
5261
5262
5263
5264
5265int sched_setscheduler_nocheck(struct task_struct *p, int policy,
5266 const struct sched_param *param)
5267{
5268 return __sched_setscheduler(p, policy, param, false);
5269}
5270
5271static int
5272do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
5273{
5274 struct sched_param lparam;
5275 struct task_struct *p;
5276 int retval;
5277
5278 if (!param || pid < 0)
5279 return -EINVAL;
5280 if (copy_from_user(&lparam, param, sizeof(struct sched_param)))
5281 return -EFAULT;
5282
5283 rcu_read_lock();
5284 retval = -ESRCH;
5285 p = find_process_by_pid(pid);
5286 if (p != NULL)
5287 retval = sched_setscheduler(p, policy, &lparam);
5288 rcu_read_unlock();
5289
5290 return retval;
5291}
5292
5293
5294
5295
5296
5297
5298
5299SYSCALL_DEFINE3(sched_setscheduler, pid_t, pid, int, policy,
5300 struct sched_param __user *, param)
5301{
5302
5303 if (policy < 0)
5304 return -EINVAL;
5305
5306 return do_sched_setscheduler(pid, policy, param);
5307}
5308
5309
5310
5311
5312
5313
5314SYSCALL_DEFINE2(sched_setparam, pid_t, pid, struct sched_param __user *, param)
5315{
5316 return do_sched_setscheduler(pid, -1, param);
5317}
5318
5319
5320
5321
5322
5323SYSCALL_DEFINE1(sched_getscheduler, pid_t, pid)
5324{
5325 struct task_struct *p;
5326 int retval;
5327
5328 if (pid < 0)
5329 return -EINVAL;
5330
5331 retval = -ESRCH;
5332 rcu_read_lock();
5333 p = find_process_by_pid(pid);
5334 if (p) {
5335 retval = security_task_getscheduler(p);
5336 if (!retval)
5337 retval = p->policy
5338 | (p->sched_reset_on_fork ? SCHED_RESET_ON_FORK : 0);
5339 }
5340 rcu_read_unlock();
5341 return retval;
5342}
5343
5344
5345
5346
5347
5348
5349SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param)
5350{
5351 struct sched_param lp;
5352 struct task_struct *p;
5353 int retval;
5354
5355 if (!param || pid < 0)
5356 return -EINVAL;
5357
5358 rcu_read_lock();
5359 p = find_process_by_pid(pid);
5360 retval = -ESRCH;
5361 if (!p)
5362 goto out_unlock;
5363
5364 retval = security_task_getscheduler(p);
5365 if (retval)
5366 goto out_unlock;
5367
5368 lp.sched_priority = p->rt_priority;
5369 rcu_read_unlock();
5370
5371
5372
5373
5374 retval = copy_to_user(param, &lp, sizeof(*param)) ? -EFAULT : 0;
5375
5376 return retval;
5377
5378out_unlock:
5379 rcu_read_unlock();
5380 return retval;
5381}
5382
5383long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
5384{
5385 cpumask_var_t cpus_allowed, new_mask;
5386 struct task_struct *p;
5387 int retval;
5388
5389 get_online_cpus();
5390 rcu_read_lock();
5391
5392 p = find_process_by_pid(pid);
5393 if (!p) {
5394 rcu_read_unlock();
5395 put_online_cpus();
5396 return -ESRCH;
5397 }
5398
5399
5400 get_task_struct(p);
5401 rcu_read_unlock();
5402
5403 if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL)) {
5404 retval = -ENOMEM;
5405 goto out_put_task;
5406 }
5407 if (!alloc_cpumask_var(&new_mask, GFP_KERNEL)) {
5408 retval = -ENOMEM;
5409 goto out_free_cpus_allowed;
5410 }
5411 retval = -EPERM;
5412 if (!check_same_owner(p) && !task_ns_capable(p, CAP_SYS_NICE))
5413 goto out_unlock;
5414
5415 retval = security_task_setscheduler(p);
5416 if (retval)
5417 goto out_unlock;
5418
5419 cpuset_cpus_allowed(p, cpus_allowed);
5420 cpumask_and(new_mask, in_mask, cpus_allowed);
5421again:
5422 retval = set_cpus_allowed_ptr(p, new_mask);
5423
5424 if (!retval) {
5425 cpuset_cpus_allowed(p, cpus_allowed);
5426 if (!cpumask_subset(new_mask, cpus_allowed)) {
5427
5428
5429
5430
5431
5432 cpumask_copy(new_mask, cpus_allowed);
5433 goto again;
5434 }
5435 }
5436out_unlock:
5437 free_cpumask_var(new_mask);
5438out_free_cpus_allowed:
5439 free_cpumask_var(cpus_allowed);
5440out_put_task:
5441 put_task_struct(p);
5442 put_online_cpus();
5443 return retval;
5444}
5445
5446static int get_user_cpu_mask(unsigned long __user *user_mask_ptr, unsigned len,
5447 struct cpumask *new_mask)
5448{
5449 if (len < cpumask_size())
5450 cpumask_clear(new_mask);
5451 else if (len > cpumask_size())
5452 len = cpumask_size();
5453
5454 return copy_from_user(new_mask, user_mask_ptr, len) ? -EFAULT : 0;
5455}
5456
5457
5458
5459
5460
5461
5462
5463SYSCALL_DEFINE3(sched_setaffinity, pid_t, pid, unsigned int, len,
5464 unsigned long __user *, user_mask_ptr)
5465{
5466 cpumask_var_t new_mask;
5467 int retval;
5468
5469 if (!alloc_cpumask_var(&new_mask, GFP_KERNEL))
5470 return -ENOMEM;
5471
5472 retval = get_user_cpu_mask(user_mask_ptr, len, new_mask);
5473 if (retval == 0)
5474 retval = sched_setaffinity(pid, new_mask);
5475 free_cpumask_var(new_mask);
5476 return retval;
5477}
5478
5479long sched_getaffinity(pid_t pid, struct cpumask *mask)
5480{
5481 struct task_struct *p;
5482 unsigned long flags;
5483 int retval;
5484
5485 get_online_cpus();
5486 rcu_read_lock();
5487
5488 retval = -ESRCH;
5489 p = find_process_by_pid(pid);
5490 if (!p)
5491 goto out_unlock;
5492
5493 retval = security_task_getscheduler(p);
5494 if (retval)
5495 goto out_unlock;
5496
5497 raw_spin_lock_irqsave(&p->pi_lock, flags);
5498 cpumask_and(mask, &p->cpus_allowed, cpu_online_mask);
5499 raw_spin_unlock_irqrestore(&p->pi_lock, flags);
5500
5501out_unlock:
5502 rcu_read_unlock();
5503 put_online_cpus();
5504
5505 return retval;
5506}
5507
5508
5509
5510
5511
5512
5513
5514SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len,
5515 unsigned long __user *, user_mask_ptr)
5516{
5517 int ret;
5518 cpumask_var_t mask;
5519
5520 if ((len * BITS_PER_BYTE) < nr_cpu_ids)
5521 return -EINVAL;
5522 if (len & (sizeof(unsigned long)-1))
5523 return -EINVAL;
5524
5525 if (!alloc_cpumask_var(&mask, GFP_KERNEL))
5526 return -ENOMEM;
5527
5528 ret = sched_getaffinity(pid, mask);
5529 if (ret == 0) {
5530 size_t retlen = min_t(size_t, len, cpumask_size());
5531
5532 if (copy_to_user(user_mask_ptr, mask, retlen))
5533 ret = -EFAULT;
5534 else
5535 ret = retlen;
5536 }
5537 free_cpumask_var(mask);
5538
5539 return ret;
5540}
5541
5542
5543
5544
5545
5546
5547
5548SYSCALL_DEFINE0(sched_yield)
5549{
5550 struct rq *rq = this_rq_lock();
5551
5552 schedstat_inc(rq, yld_count);
5553 current->sched_class->yield_task(rq);
5554
5555
5556
5557
5558
5559 __release(rq->lock);
5560 spin_release(&rq->lock.dep_map, 1, _THIS_IP_);
5561 do_raw_spin_unlock(&rq->lock);
5562 preempt_enable_no_resched();
5563
5564 schedule();
5565
5566 return 0;
5567}
5568
5569static inline int should_resched(void)
5570{
5571 return need_resched() && !(preempt_count() & PREEMPT_ACTIVE);
5572}
5573
5574static void __cond_resched(void)
5575{
5576 add_preempt_count(PREEMPT_ACTIVE);
5577 __schedule();
5578 sub_preempt_count(PREEMPT_ACTIVE);
5579}
5580
5581int __sched _cond_resched(void)
5582{
5583 if (should_resched()) {
5584 __cond_resched();
5585 return 1;
5586 }
5587 return 0;
5588}
5589EXPORT_SYMBOL(_cond_resched);
5590
5591
5592
5593
5594
5595
5596
5597
5598
5599int __cond_resched_lock(spinlock_t *lock)
5600{
5601 int resched = should_resched();
5602 int ret = 0;
5603
5604 lockdep_assert_held(lock);
5605
5606 if (spin_needbreak(lock) || resched) {
5607 spin_unlock(lock);
5608 if (resched)
5609 __cond_resched();
5610 else
5611 cpu_relax();
5612 ret = 1;
5613 spin_lock(lock);
5614 }
5615 return ret;
5616}
5617EXPORT_SYMBOL(__cond_resched_lock);
5618
5619int __sched __cond_resched_softirq(void)
5620{
5621 BUG_ON(!in_softirq());
5622
5623 if (should_resched()) {
5624 local_bh_enable();
5625 __cond_resched();
5626 local_bh_disable();
5627 return 1;
5628 }
5629 return 0;
5630}
5631EXPORT_SYMBOL(__cond_resched_softirq);
5632
5633
5634
5635
5636
5637
5638
5639void __sched yield(void)
5640{
5641 set_current_state(TASK_RUNNING);
5642 sys_sched_yield();
5643}
5644EXPORT_SYMBOL(yield);
5645
5646
5647
5648
5649
5650
5651
5652
5653
5654
5655
5656
5657
5658bool __sched yield_to(struct task_struct *p, bool preempt)
5659{
5660 struct task_struct *curr = current;
5661 struct rq *rq, *p_rq;
5662 unsigned long flags;
5663 bool yielded = 0;
5664
5665 local_irq_save(flags);
5666 rq = this_rq();
5667
5668again:
5669 p_rq = task_rq(p);
5670 double_rq_lock(rq, p_rq);
5671 while (task_rq(p) != p_rq) {
5672 double_rq_unlock(rq, p_rq);
5673 goto again;
5674 }
5675
5676 if (!curr->sched_class->yield_to_task)
5677 goto out;
5678
5679 if (curr->sched_class != p->sched_class)
5680 goto out;
5681
5682 if (task_running(p_rq, p) || p->state)
5683 goto out;
5684
5685 yielded = curr->sched_class->yield_to_task(rq, p, preempt);
5686 if (yielded) {
5687 schedstat_inc(rq, yld_count);
5688
5689
5690
5691
5692 if (preempt && rq != p_rq)
5693 resched_task(p_rq->curr);
5694 }
5695
5696out:
5697 double_rq_unlock(rq, p_rq);
5698 local_irq_restore(flags);
5699
5700 if (yielded)
5701 schedule();
5702
5703 return yielded;
5704}
5705EXPORT_SYMBOL_GPL(yield_to);
5706
5707
5708
5709
5710
5711void __sched io_schedule(void)
5712{
5713 struct rq *rq = raw_rq();
5714
5715 delayacct_blkio_start();
5716 atomic_inc(&rq->nr_iowait);
5717 blk_flush_plug(current);
5718 current->in_iowait = 1;
5719 schedule();
5720 current->in_iowait = 0;
5721 atomic_dec(&rq->nr_iowait);
5722 delayacct_blkio_end();
5723}
5724EXPORT_SYMBOL(io_schedule);
5725
5726long __sched io_schedule_timeout(long timeout)
5727{
5728 struct rq *rq = raw_rq();
5729 long ret;
5730
5731 delayacct_blkio_start();
5732 atomic_inc(&rq->nr_iowait);
5733 blk_flush_plug(current);
5734 current->in_iowait = 1;
5735 ret = schedule_timeout(timeout);
5736 current->in_iowait = 0;
5737 atomic_dec(&rq->nr_iowait);
5738 delayacct_blkio_end();
5739 return ret;
5740}
5741
5742
5743
5744
5745
5746
5747
5748
5749SYSCALL_DEFINE1(sched_get_priority_max, int, policy)
5750{
5751 int ret = -EINVAL;
5752
5753 switch (policy) {
5754 case SCHED_FIFO:
5755 case SCHED_RR:
5756 ret = MAX_USER_RT_PRIO-1;
5757 break;
5758 case SCHED_NORMAL:
5759 case SCHED_BATCH:
5760 case SCHED_IDLE:
5761 ret = 0;
5762 break;
5763 }
5764 return ret;
5765}
5766
5767
5768
5769
5770
5771
5772
5773
5774SYSCALL_DEFINE1(sched_get_priority_min, int, policy)
5775{
5776 int ret = -EINVAL;
5777
5778 switch (policy) {
5779 case SCHED_FIFO:
5780 case SCHED_RR:
5781 ret = 1;
5782 break;
5783 case SCHED_NORMAL:
5784 case SCHED_BATCH:
5785 case SCHED_IDLE:
5786 ret = 0;
5787 }
5788 return ret;
5789}
5790
5791
5792
5793
5794
5795
5796
5797
5798
5799SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
5800 struct timespec __user *, interval)
5801{
5802 struct task_struct *p;
5803 unsigned int time_slice;
5804 unsigned long flags;
5805 struct rq *rq;
5806 int retval;
5807 struct timespec t;
5808
5809 if (pid < 0)
5810 return -EINVAL;
5811
5812 retval = -ESRCH;
5813 rcu_read_lock();
5814 p = find_process_by_pid(pid);
5815 if (!p)
5816 goto out_unlock;
5817
5818 retval = security_task_getscheduler(p);
5819 if (retval)
5820 goto out_unlock;
5821
5822 rq = task_rq_lock(p, &flags);
5823 time_slice = p->sched_class->get_rr_interval(rq, p);
5824 task_rq_unlock(rq, p, &flags);
5825
5826 rcu_read_unlock();
5827 jiffies_to_timespec(time_slice, &t);
5828 retval = copy_to_user(interval, &t, sizeof(t)) ? -EFAULT : 0;
5829 return retval;
5830
5831out_unlock:
5832 rcu_read_unlock();
5833 return retval;
5834}
5835
5836static const char stat_nam[] = TASK_STATE_TO_CHAR_STR;
5837
5838void sched_show_task(struct task_struct *p)
5839{
5840 unsigned long free = 0;
5841 unsigned state;
5842
5843 state = p->state ? __ffs(p->state) + 1 : 0;
5844 printk(KERN_INFO "%-15.15s %c", p->comm,
5845 state < sizeof(stat_nam) - 1 ? stat_nam[state] : '?');
5846#if BITS_PER_LONG == 32
5847 if (state == TASK_RUNNING)
5848 printk(KERN_CONT " running ");
5849 else
5850 printk(KERN_CONT " %08lx ", thread_saved_pc(p));
5851#else
5852 if (state == TASK_RUNNING)
5853 printk(KERN_CONT " running task ");
5854 else
5855 printk(KERN_CONT " %016lx ", thread_saved_pc(p));
5856#endif
5857#ifdef CONFIG_DEBUG_STACK_USAGE
5858 free = stack_not_used(p);
5859#endif
5860 printk(KERN_CONT "%5lu %5d %6d 0x%08lx\n", free,
5861 task_pid_nr(p), task_pid_nr(p->real_parent),
5862 (unsigned long)task_thread_info(p)->flags);
5863
5864 show_stack(p, NULL);
5865}
5866
5867void show_state_filter(unsigned long state_filter)
5868{
5869 struct task_struct *g, *p;
5870
5871#if BITS_PER_LONG == 32
5872 printk(KERN_INFO
5873 " task PC stack pid father\n");
5874#else
5875 printk(KERN_INFO
5876 " task PC stack pid father\n");
5877#endif
5878 read_lock(&tasklist_lock);
5879 do_each_thread(g, p) {
5880
5881
5882
5883
5884 touch_nmi_watchdog();
5885 if (!state_filter || (p->state & state_filter))
5886 sched_show_task(p);
5887 } while_each_thread(g, p);
5888
5889 touch_all_softlockup_watchdogs();
5890
5891#ifdef CONFIG_SCHED_DEBUG
5892 sysrq_sched_debug_show();
5893#endif
5894 read_unlock(&tasklist_lock);
5895
5896
5897
5898 if (!state_filter)
5899 debug_show_all_locks();
5900}
5901
5902void __cpuinit init_idle_bootup_task(struct task_struct *idle)
5903{
5904 idle->sched_class = &idle_sched_class;
5905}
5906
5907
5908
5909
5910
5911
5912
5913
5914
5915void __cpuinit init_idle(struct task_struct *idle, int cpu)
5916{
5917 struct rq *rq = cpu_rq(cpu);
5918 unsigned long flags;
5919
5920 raw_spin_lock_irqsave(&rq->lock, flags);
5921
5922 __sched_fork(idle);
5923 idle->state = TASK_RUNNING;
5924 idle->se.exec_start = sched_clock();
5925
5926 do_set_cpus_allowed(idle, cpumask_of(cpu));
5927
5928
5929
5930
5931
5932
5933
5934
5935
5936
5937 rcu_read_lock();
5938 __set_task_cpu(idle, cpu);
5939 rcu_read_unlock();
5940
5941 rq->curr = rq->idle = idle;
5942#if defined(CONFIG_SMP)
5943 idle->on_cpu = 1;
5944#endif
5945 raw_spin_unlock_irqrestore(&rq->lock, flags);
5946
5947
5948 task_thread_info(idle)->preempt_count = 0;
5949
5950
5951
5952
5953 idle->sched_class = &idle_sched_class;
5954 ftrace_graph_init_idle_task(idle, cpu);
5955}
5956
5957
5958
5959
5960
5961
5962
5963
5964cpumask_var_t nohz_cpu_mask;
5965
5966
5967
5968
5969
5970
5971
5972
5973
5974
5975static int get_update_sysctl_factor(void)
5976{
5977 unsigned int cpus = min_t(int, num_online_cpus(), 8);
5978 unsigned int factor;
5979
5980 switch (sysctl_sched_tunable_scaling) {
5981 case SCHED_TUNABLESCALING_NONE:
5982 factor = 1;
5983 break;
5984 case SCHED_TUNABLESCALING_LINEAR:
5985 factor = cpus;
5986 break;
5987 case SCHED_TUNABLESCALING_LOG:
5988 default:
5989 factor = 1 + ilog2(cpus);
5990 break;
5991 }
5992
5993 return factor;
5994}
5995
5996static void update_sysctl(void)
5997{
5998 unsigned int factor = get_update_sysctl_factor();
5999
6000#define SET_SYSCTL(name) \
6001 (sysctl_##name = (factor) * normalized_sysctl_##name)
6002 SET_SYSCTL(sched_min_granularity);
6003 SET_SYSCTL(sched_latency);
6004 SET_SYSCTL(sched_wakeup_granularity);
6005#undef SET_SYSCTL
6006}
6007
6008static inline void sched_init_granularity(void)
6009{
6010 update_sysctl();
6011}
6012
6013#ifdef CONFIG_SMP
6014void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
6015{
6016 if (p->sched_class && p->sched_class->set_cpus_allowed)
6017 p->sched_class->set_cpus_allowed(p, new_mask);
6018 else {
6019 cpumask_copy(&p->cpus_allowed, new_mask);
6020 p->rt.nr_cpus_allowed = cpumask_weight(new_mask);
6021 }
6022}
6023
6024
6025
6026
6027
6028
6029
6030
6031
6032
6033
6034
6035
6036
6037
6038
6039
6040
6041
6042
6043
6044
6045
6046
6047int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
6048{
6049 unsigned long flags;
6050 struct rq *rq;
6051 unsigned int dest_cpu;
6052 int ret = 0;
6053
6054 rq = task_rq_lock(p, &flags);
6055
6056 if (cpumask_equal(&p->cpus_allowed, new_mask))
6057 goto out;
6058
6059 if (!cpumask_intersects(new_mask, cpu_active_mask)) {
6060 ret = -EINVAL;
6061 goto out;
6062 }
6063
6064 if (unlikely((p->flags & PF_THREAD_BOUND) && p != current)) {
6065 ret = -EINVAL;
6066 goto out;
6067 }
6068
6069 do_set_cpus_allowed(p, new_mask);
6070
6071
6072 if (cpumask_test_cpu(task_cpu(p), new_mask))
6073 goto out;
6074
6075 dest_cpu = cpumask_any_and(cpu_active_mask, new_mask);
6076 if (p->on_rq) {
6077 struct migration_arg arg = { p, dest_cpu };
6078
6079 task_rq_unlock(rq, p, &flags);
6080 stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg);
6081 tlb_migrate_finish(p->mm);
6082 return 0;
6083 }
6084out:
6085 task_rq_unlock(rq, p, &flags);
6086
6087 return ret;
6088}
6089EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr);
6090
6091
6092
6093
6094
6095
6096
6097
6098
6099
6100
6101
6102static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
6103{
6104 struct rq *rq_dest, *rq_src;
6105 int ret = 0;
6106
6107 if (unlikely(!cpu_active(dest_cpu)))
6108 return ret;
6109
6110 rq_src = cpu_rq(src_cpu);
6111 rq_dest = cpu_rq(dest_cpu);
6112
6113 raw_spin_lock(&p->pi_lock);
6114 double_rq_lock(rq_src, rq_dest);
6115
6116 if (task_cpu(p) != src_cpu)
6117 goto done;
6118
6119 if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed))
6120 goto fail;
6121
6122
6123
6124
6125
6126 if (p->on_rq) {
6127 deactivate_task(rq_src, p, 0);
6128 set_task_cpu(p, dest_cpu);
6129 activate_task(rq_dest, p, 0);
6130 check_preempt_curr(rq_dest, p, 0);
6131 }
6132done:
6133 ret = 1;
6134fail:
6135 double_rq_unlock(rq_src, rq_dest);
6136 raw_spin_unlock(&p->pi_lock);
6137 return ret;
6138}
6139
6140
6141
6142
6143
6144
6145static int migration_cpu_stop(void *data)
6146{
6147 struct migration_arg *arg = data;
6148
6149
6150
6151
6152
6153 local_irq_disable();
6154 __migrate_task(arg->task, raw_smp_processor_id(), arg->dest_cpu);
6155 local_irq_enable();
6156 return 0;
6157}
6158
6159#ifdef CONFIG_HOTPLUG_CPU
6160
6161
6162
6163
6164
6165void idle_task_exit(void)
6166{
6167 struct mm_struct *mm = current->active_mm;
6168
6169 BUG_ON(cpu_online(smp_processor_id()));
6170
6171 if (mm != &init_mm)
6172 switch_mm(mm, &init_mm, current);
6173 mmdrop(mm);
6174}
6175
6176
6177
6178
6179
6180
6181
6182
6183static void migrate_nr_uninterruptible(struct rq *rq_src)
6184{
6185 struct rq *rq_dest = cpu_rq(cpumask_any(cpu_active_mask));
6186
6187 rq_dest->nr_uninterruptible += rq_src->nr_uninterruptible;
6188 rq_src->nr_uninterruptible = 0;
6189}
6190
6191
6192
6193
6194static void calc_gl