1
2
3
4
5
6#include <linux/kernel.h>
7#include <linux/sched.h>
8#include <linux/mm.h>
9#include <linux/pagemap.h>
10#include <linux/threads.h>
11#include <linux/smp.h>
12#include <linux/smp_lock.h>
13#include <linux/interrupt.h>
14#include <linux/kernel_stat.h>
15#include <linux/delay.h>
16#include <linux/init.h>
17#include <linux/spinlock.h>
18#include <linux/fs.h>
19#include <linux/seq_file.h>
20#include <linux/cache.h>
21#include <linux/jiffies.h>
22#include <linux/profile.h>
23
24#include <asm/head.h>
25#include <asm/ptrace.h>
26#include <asm/atomic.h>
27#include <asm/tlbflush.h>
28#include <asm/mmu_context.h>
29#include <asm/cpudata.h>
30
31#include <asm/irq.h>
32#include <asm/page.h>
33#include <asm/pgtable.h>
34#include <asm/oplib.h>
35#include <asm/uaccess.h>
36#include <asm/timer.h>
37#include <asm/starfire.h>
38#include <asm/tlb.h>
39
40extern int linux_num_cpus;
41extern void calibrate_delay(void);
42
43
44static unsigned char boot_cpu_id;
45
46cpumask_t cpu_online_map = CPU_MASK_NONE;
47cpumask_t phys_cpu_present_map = CPU_MASK_NONE;
48static cpumask_t smp_commenced_mask;
49static cpumask_t cpu_callout_map;
50
51void smp_info(struct seq_file *m)
52{
53 int i;
54
55 seq_printf(m, "State:\n");
56 for (i = 0; i < NR_CPUS; i++) {
57 if (cpu_online(i))
58 seq_printf(m,
59 "CPU%d:\t\tonline\n", i);
60 }
61}
62
63void smp_bogo(struct seq_file *m)
64{
65 int i;
66
67 for (i = 0; i < NR_CPUS; i++)
68 if (cpu_online(i))
69 seq_printf(m,
70 "Cpu%dBogo\t: %lu.%02lu\n"
71 "Cpu%dClkTck\t: %016lx\n",
72 i, cpu_data(i).udelay_val / (500000/HZ),
73 (cpu_data(i).udelay_val / (5000/HZ)) % 100,
74 i, cpu_data(i).clock_tick);
75}
76
77void __init smp_store_cpu_info(int id)
78{
79 int cpu_node;
80
81
82
83 cpu_data(id).udelay_val = loops_per_jiffy;
84
85 cpu_find_by_mid(id, &cpu_node);
86 cpu_data(id).clock_tick = prom_getintdefault(cpu_node,
87 "clock-frequency", 0);
88
89 cpu_data(id).pgcache_size = 0;
90 cpu_data(id).pte_cache[0] = NULL;
91 cpu_data(id).pte_cache[1] = NULL;
92 cpu_data(id).pgdcache_size = 0;
93 cpu_data(id).pgd_cache = NULL;
94 cpu_data(id).idle_volume = 1;
95}
96
97static void smp_setup_percpu_timer(void);
98
99static volatile unsigned long callin_flag = 0;
100
101extern void inherit_locked_prom_mappings(int save_p);
102
103void __init smp_callin(void)
104{
105 int cpuid = hard_smp_processor_id();
106
107 inherit_locked_prom_mappings(0);
108
109 __flush_tlb_all();
110
111 smp_setup_percpu_timer();
112
113 local_irq_enable();
114
115 calibrate_delay();
116 smp_store_cpu_info(cpuid);
117 callin_flag = 1;
118 __asm__ __volatile__("membar #Sync\n\t"
119 "flush %%g6" : : : "memory");
120
121
122
123
124 clear_thread_flag(TIF_NEWCHILD);
125
126
127 atomic_inc(&init_mm.mm_count);
128 current->active_mm = &init_mm;
129
130 while (!cpu_isset(cpuid, smp_commenced_mask))
131 membar("#LoadLoad");
132
133 cpu_set(cpuid, cpu_online_map);
134}
135
136void cpu_panic(void)
137{
138 printk("CPU[%d]: Returns from cpu_idle!\n", smp_processor_id());
139 panic("SMP bolixed\n");
140}
141
142static unsigned long current_tick_offset;
143
144
145
146
147
148
149
150
151#define MASTER 0
152#define SLAVE (SMP_CACHE_BYTES/sizeof(unsigned long))
153
154#define NUM_ROUNDS 64
155#define NUM_ITERS 5
156
157static DEFINE_SPINLOCK(itc_sync_lock);
158static unsigned long go[SLAVE + 1];
159
160#define DEBUG_TICK_SYNC 0
161
162static inline long get_delta (long *rt, long *master)
163{
164 unsigned long best_t0 = 0, best_t1 = ~0UL, best_tm = 0;
165 unsigned long tcenter, t0, t1, tm;
166 unsigned long i;
167
168 for (i = 0; i < NUM_ITERS; i++) {
169 t0 = tick_ops->get_tick();
170 go[MASTER] = 1;
171 membar("#StoreLoad");
172 while (!(tm = go[SLAVE]))
173 membar("#LoadLoad");
174 go[SLAVE] = 0;
175 membar("#StoreStore");
176 t1 = tick_ops->get_tick();
177
178 if (t1 - t0 < best_t1 - best_t0)
179 best_t0 = t0, best_t1 = t1, best_tm = tm;
180 }
181
182 *rt = best_t1 - best_t0;
183 *master = best_tm - best_t0;
184
185
186 tcenter = (best_t0/2 + best_t1/2);
187 if (best_t0 % 2 + best_t1 % 2 == 2)
188 tcenter++;
189 return tcenter - best_tm;
190}
191
192void smp_synchronize_tick_client(void)
193{
194 long i, delta, adj, adjust_latency = 0, done = 0;
195 unsigned long flags, rt, master_time_stamp, bound;
196#if DEBUG_TICK_SYNC
197 struct {
198 long rt;
199 long master;
200 long diff;
201 long lat;
202 } t[NUM_ROUNDS];
203#endif
204
205 go[MASTER] = 1;
206
207 while (go[MASTER])
208 membar("#LoadLoad");
209
210 local_irq_save(flags);
211 {
212 for (i = 0; i < NUM_ROUNDS; i++) {
213 delta = get_delta(&rt, &master_time_stamp);
214 if (delta == 0) {
215 done = 1;
216 bound = rt;
217 }
218
219 if (!done) {
220 if (i > 0) {
221 adjust_latency += -delta;
222 adj = -delta + adjust_latency/4;
223 } else
224 adj = -delta;
225
226 tick_ops->add_tick(adj, current_tick_offset);
227 }
228#if DEBUG_TICK_SYNC
229 t[i].rt = rt;
230 t[i].master = master_time_stamp;
231 t[i].diff = delta;
232 t[i].lat = adjust_latency/4;
233#endif
234 }
235 }
236 local_irq_restore(flags);
237
238#if DEBUG_TICK_SYNC
239 for (i = 0; i < NUM_ROUNDS; i++)
240 printk("rt=%5ld master=%5ld diff=%5ld adjlat=%5ld\n",
241 t[i].rt, t[i].master, t[i].diff, t[i].lat);
242#endif
243
244 printk(KERN_INFO "CPU %d: synchronized TICK with master CPU (last diff %ld cycles,"
245 "maxerr %lu cycles)\n", smp_processor_id(), delta, rt);
246}
247
248static void smp_start_sync_tick_client(int cpu);
249
250static void smp_synchronize_one_tick(int cpu)
251{
252 unsigned long flags, i;
253
254 go[MASTER] = 0;
255
256 smp_start_sync_tick_client(cpu);
257
258
259 while (!go[MASTER])
260 membar("#LoadLoad");
261
262
263 go[MASTER] = 0;
264 membar("#StoreLoad");
265
266 spin_lock_irqsave(&itc_sync_lock, flags);
267 {
268 for (i = 0; i < NUM_ROUNDS*NUM_ITERS; i++) {
269 while (!go[MASTER])
270 membar("#LoadLoad");
271 go[MASTER] = 0;
272 membar("#StoreStore");
273 go[SLAVE] = tick_ops->get_tick();
274 membar("#StoreLoad");
275 }
276 }
277 spin_unlock_irqrestore(&itc_sync_lock, flags);
278}
279
280extern unsigned long sparc64_cpu_startup;
281
282
283
284
285
286static struct thread_info *cpu_new_thread = NULL;
287
288static int __devinit smp_boot_one_cpu(unsigned int cpu)
289{
290 unsigned long entry =
291 (unsigned long)(&sparc64_cpu_startup);
292 unsigned long cookie =
293 (unsigned long)(&cpu_new_thread);
294 struct task_struct *p;
295 int timeout, ret, cpu_node;
296
297 p = fork_idle(cpu);
298 callin_flag = 0;
299 cpu_new_thread = p->thread_info;
300 cpu_set(cpu, cpu_callout_map);
301
302 cpu_find_by_mid(cpu, &cpu_node);
303 prom_startcpu(cpu_node, entry, cookie);
304
305 for (timeout = 0; timeout < 5000000; timeout++) {
306 if (callin_flag)
307 break;
308 udelay(100);
309 }
310 if (callin_flag) {
311 ret = 0;
312 } else {
313 printk("Processor %d is stuck.\n", cpu);
314 cpu_clear(cpu, cpu_callout_map);
315 ret = -ENODEV;
316 }
317 cpu_new_thread = NULL;
318
319 return ret;
320}
321
322static void spitfire_xcall_helper(u64 data0, u64 data1, u64 data2, u64 pstate, unsigned long cpu)
323{
324 u64 result, target;
325 int stuck, tmp;
326
327 if (this_is_starfire) {
328
329 cpu = (((cpu & 0x3c) << 1) |
330 ((cpu & 0x40) >> 4) |
331 (cpu & 0x3));
332 }
333
334 target = (cpu << 14) | 0x70;
335again:
336
337
338
339
340
341
342
343 tmp = 0x40;
344 __asm__ __volatile__(
345 "wrpr %1, %2, %%pstate\n\t"
346 "stxa %4, [%0] %3\n\t"
347 "stxa %5, [%0+%8] %3\n\t"
348 "add %0, %8, %0\n\t"
349 "stxa %6, [%0+%8] %3\n\t"
350 "membar #Sync\n\t"
351 "stxa %%g0, [%7] %3\n\t"
352 "membar #Sync\n\t"
353 "mov 0x20, %%g1\n\t"
354 "ldxa [%%g1] 0x7f, %%g0\n\t"
355 "membar #Sync"
356 : "=r" (tmp)
357 : "r" (pstate), "i" (PSTATE_IE), "i" (ASI_INTR_W),
358 "r" (data0), "r" (data1), "r" (data2), "r" (target),
359 "r" (0x10), "0" (tmp)
360 : "g1");
361
362
363 stuck = 100000;
364 do {
365 __asm__ __volatile__("ldxa [%%g0] %1, %0"
366 : "=r" (result)
367 : "i" (ASI_INTR_DISPATCH_STAT));
368 if (result == 0) {
369 __asm__ __volatile__("wrpr %0, 0x0, %%pstate"
370 : : "r" (pstate));
371 return;
372 }
373 stuck -= 1;
374 if (stuck == 0)
375 break;
376 } while (result & 0x1);
377 __asm__ __volatile__("wrpr %0, 0x0, %%pstate"
378 : : "r" (pstate));
379 if (stuck == 0) {
380 printk("CPU[%d]: mondo stuckage result[%016lx]\n",
381 smp_processor_id(), result);
382 } else {
383 udelay(2);
384 goto again;
385 }
386}
387
388static __inline__ void spitfire_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mask)
389{
390 u64 pstate;
391 int i;
392
393 __asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate));
394 for_each_cpu_mask(i, mask)
395 spitfire_xcall_helper(data0, data1, data2, pstate, i);
396}
397
398
399
400
401
402static void cheetah_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mask)
403{
404 u64 pstate, ver;
405 int nack_busy_id, is_jalapeno;
406
407 if (cpus_empty(mask))
408 return;
409
410
411
412
413
414 __asm__ ("rdpr %%ver, %0" : "=r" (ver));
415 is_jalapeno = ((ver >> 32) == 0x003e0016);
416
417 __asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate));
418
419retry:
420 __asm__ __volatile__("wrpr %0, %1, %%pstate\n\t"
421 : : "r" (pstate), "i" (PSTATE_IE));
422
423
424 __asm__ __volatile__("stxa %0, [%3] %6\n\t"
425 "stxa %1, [%4] %6\n\t"
426 "stxa %2, [%5] %6\n\t"
427 "membar #Sync\n\t"
428 :
429 : "r" (data0), "r" (data1), "r" (data2),
430 "r" (0x40), "r" (0x50), "r" (0x60),
431 "i" (ASI_INTR_W));
432
433 nack_busy_id = 0;
434 {
435 int i;
436
437 for_each_cpu_mask(i, mask) {
438 u64 target = (i << 14) | 0x70;
439
440 if (!is_jalapeno)
441 target |= (nack_busy_id << 24);
442 __asm__ __volatile__(
443 "stxa %%g0, [%0] %1\n\t"
444 "membar #Sync\n\t"
445 :
446 : "r" (target), "i" (ASI_INTR_W));
447 nack_busy_id++;
448 }
449 }
450
451
452 {
453 u64 dispatch_stat;
454 long stuck;
455
456 stuck = 100000 * nack_busy_id;
457 do {
458 __asm__ __volatile__("ldxa [%%g0] %1, %0"
459 : "=r" (dispatch_stat)
460 : "i" (ASI_INTR_DISPATCH_STAT));
461 if (dispatch_stat == 0UL) {
462 __asm__ __volatile__("wrpr %0, 0x0, %%pstate"
463 : : "r" (pstate));
464 return;
465 }
466 if (!--stuck)
467 break;
468 } while (dispatch_stat & 0x5555555555555555UL);
469
470 __asm__ __volatile__("wrpr %0, 0x0, %%pstate"
471 : : "r" (pstate));
472
473 if ((dispatch_stat & ~(0x5555555555555555UL)) == 0) {
474
475
476
477 printk("CPU[%d]: mondo stuckage result[%016lx]\n",
478 smp_processor_id(), dispatch_stat);
479 } else {
480 int i, this_busy_nack = 0;
481
482
483
484
485 udelay(2 * nack_busy_id);
486
487
488
489
490 for_each_cpu_mask(i, mask) {
491 u64 check_mask;
492
493 if (is_jalapeno)
494 check_mask = (0x2UL << (2*i));
495 else
496 check_mask = (0x2UL <<
497 this_busy_nack);
498 if ((dispatch_stat & check_mask) == 0)
499 cpu_clear(i, mask);
500 this_busy_nack += 2;
501 }
502
503 goto retry;
504 }
505 }
506}
507
508
509
510
511static void smp_cross_call_masked(unsigned long *func, u32 ctx, u64 data1, u64 data2, cpumask_t mask)
512{
513 u64 data0 = (((u64)ctx)<<32 | (((u64)func) & 0xffffffff));
514 int this_cpu = get_cpu();
515
516 cpus_and(mask, mask, cpu_online_map);
517 cpu_clear(this_cpu, mask);
518
519 if (tlb_type == spitfire)
520 spitfire_xcall_deliver(data0, data1, data2, mask);
521 else
522 cheetah_xcall_deliver(data0, data1, data2, mask);
523
524
525 put_cpu();
526}
527
528extern unsigned long xcall_sync_tick;
529
530static void smp_start_sync_tick_client(int cpu)
531{
532 cpumask_t mask = cpumask_of_cpu(cpu);
533
534 smp_cross_call_masked(&xcall_sync_tick,
535 0, 0, 0, mask);
536}
537
538
539#define smp_cross_call(func, ctx, data1, data2) \
540 smp_cross_call_masked(func, ctx, data1, data2, cpu_online_map)
541
542struct call_data_struct {
543 void (*func) (void *info);
544 void *info;
545 atomic_t finished;
546 int wait;
547};
548
549static DEFINE_SPINLOCK(call_lock);
550static struct call_data_struct *call_data;
551
552extern unsigned long xcall_call_function;
553
554
555
556
557
558int smp_call_function(void (*func)(void *info), void *info,
559 int nonatomic, int wait)
560{
561 struct call_data_struct data;
562 int cpus = num_online_cpus() - 1;
563 long timeout;
564
565 if (!cpus)
566 return 0;
567
568
569 WARN_ON(irqs_disabled());
570
571 data.func = func;
572 data.info = info;
573 atomic_set(&data.finished, 0);
574 data.wait = wait;
575
576 spin_lock(&call_lock);
577
578 call_data = &data;
579
580 smp_cross_call(&xcall_call_function, 0, 0, 0);
581
582
583
584
585
586 timeout = 1000000;
587 while (atomic_read(&data.finished) != cpus) {
588 if (--timeout <= 0)
589 goto out_timeout;
590 barrier();
591 udelay(1);
592 }
593
594 spin_unlock(&call_lock);
595
596 return 0;
597
598out_timeout:
599 spin_unlock(&call_lock);
600 printk("XCALL: Remote cpus not responding, ncpus=%ld finished=%ld\n",
601 (long) num_online_cpus() - 1L,
602 (long) atomic_read(&data.finished));
603 return 0;
604}
605
606void smp_call_function_client(int irq, struct pt_regs *regs)
607{
608 void (*func) (void *info) = call_data->func;
609 void *info = call_data->info;
610
611 clear_softint(1 << irq);
612 if (call_data->wait) {
613
614 func(info);
615 atomic_inc(&call_data->finished);
616 } else {
617
618 atomic_inc(&call_data->finished);
619 func(info);
620 }
621}
622
623extern unsigned long xcall_flush_tlb_mm;
624extern unsigned long xcall_flush_tlb_pending;
625extern unsigned long xcall_flush_tlb_kernel_range;
626extern unsigned long xcall_flush_tlb_all_spitfire;
627extern unsigned long xcall_flush_tlb_all_cheetah;
628extern unsigned long xcall_report_regs;
629extern unsigned long xcall_receive_signal;
630extern unsigned long xcall_flush_dcache_page_cheetah;
631extern unsigned long xcall_flush_dcache_page_spitfire;
632
633#ifdef CONFIG_DEBUG_DCFLUSH
634extern atomic_t dcpage_flushes;
635extern atomic_t dcpage_flushes_xcall;
636#endif
637
638static __inline__ void __local_flush_dcache_page(struct page *page)
639{
640#if (L1DCACHE_SIZE > PAGE_SIZE)
641 __flush_dcache_page(page_address(page),
642 ((tlb_type == spitfire) &&
643 page_mapping(page) != NULL));
644#else
645 if (page_mapping(page) != NULL &&
646 tlb_type == spitfire)
647 __flush_icache_page(__pa(page_address(page)));
648#endif
649}
650
651void smp_flush_dcache_page_impl(struct page *page, int cpu)
652{
653 cpumask_t mask = cpumask_of_cpu(cpu);
654 int this_cpu = get_cpu();
655
656#ifdef CONFIG_DEBUG_DCFLUSH
657 atomic_inc(&dcpage_flushes);
658#endif
659 if (cpu == this_cpu) {
660 __local_flush_dcache_page(page);
661 } else if (cpu_online(cpu)) {
662 void *pg_addr = page_address(page);
663 u64 data0;
664
665 if (tlb_type == spitfire) {
666 data0 =
667 ((u64)&xcall_flush_dcache_page_spitfire);
668 if (page_mapping(page) != NULL)
669 data0 |= ((u64)1 << 32);
670 spitfire_xcall_deliver(data0,
671 __pa(pg_addr),
672 (u64) pg_addr,
673 mask);
674 } else {
675 data0 =
676 ((u64)&xcall_flush_dcache_page_cheetah);
677 cheetah_xcall_deliver(data0,
678 __pa(pg_addr),
679 0, mask);
680 }
681#ifdef CONFIG_DEBUG_DCFLUSH
682 atomic_inc(&dcpage_flushes_xcall);
683#endif
684 }
685
686 put_cpu();
687}
688
689void flush_dcache_page_all(struct mm_struct *mm, struct page *page)
690{
691 void *pg_addr = page_address(page);
692 cpumask_t mask = cpu_online_map;
693 u64 data0;
694 int this_cpu = get_cpu();
695
696 cpu_clear(this_cpu, mask);
697
698#ifdef CONFIG_DEBUG_DCFLUSH
699 atomic_inc(&dcpage_flushes);
700#endif
701 if (cpus_empty(mask))
702 goto flush_self;
703 if (tlb_type == spitfire) {
704 data0 = ((u64)&xcall_flush_dcache_page_spitfire);
705 if (page_mapping(page) != NULL)
706 data0 |= ((u64)1 << 32);
707 spitfire_xcall_deliver(data0,
708 __pa(pg_addr),
709 (u64) pg_addr,
710 mask);
711 } else {
712 data0 = ((u64)&xcall_flush_dcache_page_cheetah);
713 cheetah_xcall_deliver(data0,
714 __pa(pg_addr),
715 0, mask);
716 }
717#ifdef CONFIG_DEBUG_DCFLUSH
718 atomic_inc(&dcpage_flushes_xcall);
719#endif
720 flush_self:
721 __local_flush_dcache_page(page);
722
723 put_cpu();
724}
725
726void smp_receive_signal(int cpu)
727{
728 cpumask_t mask = cpumask_of_cpu(cpu);
729
730 if (cpu_online(cpu)) {
731 u64 data0 = (((u64)&xcall_receive_signal) & 0xffffffff);
732
733 if (tlb_type == spitfire)
734 spitfire_xcall_deliver(data0, 0, 0, mask);
735 else
736 cheetah_xcall_deliver(data0, 0, 0, mask);
737 }
738}
739
740void smp_receive_signal_client(int irq, struct pt_regs *regs)
741{
742
743 clear_softint(1 << irq);
744}
745
746void smp_report_regs(void)
747{
748 smp_cross_call(&xcall_report_regs, 0, 0, 0);
749}
750
751void smp_flush_tlb_all(void)
752{
753 if (tlb_type == spitfire)
754 smp_cross_call(&xcall_flush_tlb_all_spitfire, 0, 0, 0);
755 else
756 smp_cross_call(&xcall_flush_tlb_all_cheetah, 0, 0, 0);
757 __flush_tlb_all();
758}
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798void smp_flush_tlb_mm(struct mm_struct *mm)
799{
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814 if (atomic_read(&mm->mm_users) == 0)
815 return;
816
817 {
818 u32 ctx = CTX_HWBITS(mm->context);
819 int cpu = get_cpu();
820
821 if (atomic_read(&mm->mm_users) == 1) {
822 mm->cpu_vm_mask = cpumask_of_cpu(cpu);
823 goto local_flush_and_out;
824 }
825
826 smp_cross_call_masked(&xcall_flush_tlb_mm,
827 ctx, 0, 0,
828 mm->cpu_vm_mask);
829
830 local_flush_and_out:
831 __flush_tlb_mm(ctx, SECONDARY_CONTEXT);
832
833 put_cpu();
834 }
835}
836
837void smp_flush_tlb_pending(struct mm_struct *mm, unsigned long nr, unsigned long *vaddrs)
838{
839 u32 ctx = CTX_HWBITS(mm->context);
840 int cpu = get_cpu();
841
842 if (mm == current->active_mm && atomic_read(&mm->mm_users) == 1) {
843 mm->cpu_vm_mask = cpumask_of_cpu(cpu);
844 goto local_flush_and_out;
845 } else {
846
847
848
849
850
851
852
853 if (0) {
854 cpumask_t this_cpu_mask = cpumask_of_cpu(cpu);
855
856
857
858
859
860 if (cpus_equal(mm->cpu_vm_mask, this_cpu_mask))
861 goto local_flush_and_out;
862 }
863 }
864
865 smp_cross_call_masked(&xcall_flush_tlb_pending,
866 ctx, nr, (unsigned long) vaddrs,
867 mm->cpu_vm_mask);
868
869local_flush_and_out:
870 __flush_tlb_pending(ctx, nr, vaddrs);
871
872 put_cpu();
873}
874
875void smp_flush_tlb_kernel_range(unsigned long start, unsigned long end)
876{
877 start &= PAGE_MASK;
878 end = PAGE_ALIGN(end);
879 if (start != end) {
880 smp_cross_call(&xcall_flush_tlb_kernel_range,
881 0, start, end);
882
883 __flush_tlb_kernel_range(start, end);
884 }
885}
886
887
888
889extern unsigned long xcall_capture;
890
891static atomic_t smp_capture_depth = ATOMIC_INIT(0);
892static atomic_t smp_capture_registry = ATOMIC_INIT(0);
893static unsigned long penguins_are_doing_time;
894
895void smp_capture(void)
896{
897 int result = atomic_add_ret(1, &smp_capture_depth);
898
899 if (result == 1) {
900 int ncpus = num_online_cpus();
901
902#ifdef CAPTURE_DEBUG
903 printk("CPU[%d]: Sending penguins to jail...",
904 smp_processor_id());
905#endif
906 penguins_are_doing_time = 1;
907 membar("#StoreStore | #LoadStore");
908 atomic_inc(&smp_capture_registry);
909 smp_cross_call(&xcall_capture, 0, 0, 0);
910 while (atomic_read(&smp_capture_registry) != ncpus)
911 membar("#LoadLoad");
912#ifdef CAPTURE_DEBUG
913 printk("done\n");
914#endif
915 }
916}
917
918void smp_release(void)
919{
920 if (atomic_dec_and_test(&smp_capture_depth)) {
921#ifdef CAPTURE_DEBUG
922 printk("CPU[%d]: Giving pardon to "
923 "imprisoned penguins\n",
924 smp_processor_id());
925#endif
926 penguins_are_doing_time = 0;
927 membar("#StoreStore | #StoreLoad");
928 atomic_dec(&smp_capture_registry);
929 }
930}
931
932
933
934
935extern void prom_world(int);
936extern void save_alternate_globals(unsigned long *);
937extern void restore_alternate_globals(unsigned long *);
938void smp_penguin_jailcell(int irq, struct pt_regs *regs)
939{
940 unsigned long global_save[24];
941
942 clear_softint(1 << irq);
943
944 preempt_disable();
945
946 __asm__ __volatile__("flushw");
947 save_alternate_globals(global_save);
948 prom_world(1);
949 atomic_inc(&smp_capture_registry);
950 membar("#StoreLoad | #StoreStore");
951 while (penguins_are_doing_time)
952 membar("#LoadLoad");
953 restore_alternate_globals(global_save);
954 atomic_dec(&smp_capture_registry);
955 prom_world(0);
956
957 preempt_enable();
958}
959
960extern unsigned long xcall_promstop;
961
962void smp_promstop_others(void)
963{
964 smp_cross_call(&xcall_promstop, 0, 0, 0);
965}
966
967#define prof_multiplier(__cpu) cpu_data(__cpu).multiplier
968#define prof_counter(__cpu) cpu_data(__cpu).counter
969
970void smp_percpu_timer_interrupt(struct pt_regs *regs)
971{
972 unsigned long compare, tick, pstate;
973 int cpu = smp_processor_id();
974 int user = user_mode(regs);
975
976
977
978
979 {
980 unsigned long tick_mask = tick_ops->softint_mask;
981
982 if (!(get_softint() & tick_mask)) {
983 extern void handler_irq(int, struct pt_regs *);
984
985 handler_irq(14, regs);
986 return;
987 }
988 clear_softint(tick_mask);
989 }
990
991 do {
992 profile_tick(CPU_PROFILING, regs);
993 if (!--prof_counter(cpu)) {
994 irq_enter();
995
996 if (cpu == boot_cpu_id) {
997 kstat_this_cpu.irqs[0]++;
998 timer_tick_interrupt(regs);
999 }
1000
1001 update_process_times(user);
1002
1003 irq_exit();
1004
1005 prof_counter(cpu) = prof_multiplier(cpu);
1006 }
1007
1008
1009
1010
1011 __asm__ __volatile__("rdpr %%pstate, %0\n\t"
1012 "wrpr %0, %1, %%pstate"
1013 : "=r" (pstate)
1014 : "i" (PSTATE_IE));
1015
1016 compare = tick_ops->add_compare(current_tick_offset);
1017 tick = tick_ops->get_tick();
1018
1019
1020 __asm__ __volatile__("wrpr %0, 0x0, %%pstate"
1021 :
1022 : "r" (pstate));
1023 } while (time_after_eq(tick, compare));
1024}
1025
1026static void __init smp_setup_percpu_timer(void)
1027{
1028 int cpu = smp_processor_id();
1029 unsigned long pstate;
1030
1031 prof_counter(cpu) = prof_multiplier(cpu) = 1;
1032
1033
1034
1035
1036 __asm__ __volatile__("rdpr %%pstate, %0\n\t"
1037 "wrpr %0, %1, %%pstate"
1038 : "=r" (pstate)
1039 : "i" (PSTATE_IE));
1040
1041 tick_ops->init_tick(current_tick_offset);
1042
1043
1044 __asm__ __volatile__("wrpr %0, 0x0, %%pstate"
1045 :
1046 : "r" (pstate));
1047}
1048
1049void __init smp_tick_init(void)
1050{
1051 boot_cpu_id = hard_smp_processor_id();
1052 current_tick_offset = timer_tick_offset;
1053
1054 cpu_set(boot_cpu_id, cpu_online_map);
1055 prof_counter(boot_cpu_id) = prof_multiplier(boot_cpu_id) = 1;
1056}
1057
1058cycles_t cacheflush_time;
1059unsigned long cache_decay_ticks;
1060
1061extern unsigned long cheetah_tune_scheduling(void);
1062
1063static void __init smp_tune_scheduling(void)
1064{
1065 unsigned long orig_flush_base, flush_base, flags, *p;
1066 unsigned int ecache_size, order;
1067 cycles_t tick1, tick2, raw;
1068 int cpu_node;
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080 printk("SMP: Calibrating ecache flush... ");
1081 if (tlb_type == cheetah || tlb_type == cheetah_plus) {
1082 cacheflush_time = cheetah_tune_scheduling();
1083 goto report;
1084 }
1085
1086 cpu_find_by_instance(0, &cpu_node, NULL);
1087 ecache_size = prom_getintdefault(cpu_node,
1088 "ecache-size", (512 * 1024));
1089 if (ecache_size > (4 * 1024 * 1024))
1090 ecache_size = (4 * 1024 * 1024);
1091 orig_flush_base = flush_base =
1092 __get_free_pages(GFP_KERNEL, order = get_order(ecache_size));
1093
1094 if (flush_base != 0UL) {
1095 local_irq_save(flags);
1096
1097
1098
1099
1100 for (p = (unsigned long *)flush_base;
1101 ((unsigned long)p) < (flush_base + (ecache_size<<1));
1102 p += (64 / sizeof(unsigned long)))
1103 *((volatile unsigned long *)p);
1104
1105 tick1 = tick_ops->get_tick();
1106
1107 __asm__ __volatile__("1:\n\t"
1108 "ldx [%0 + 0x000], %%g1\n\t"
1109 "ldx [%0 + 0x040], %%g2\n\t"
1110 "ldx [%0 + 0x080], %%g3\n\t"
1111 "ldx [%0 + 0x0c0], %%g5\n\t"
1112 "add %0, 0x100, %0\n\t"
1113 "cmp %0, %2\n\t"
1114 "bne,pt %%xcc, 1b\n\t"
1115 " nop"
1116 : "=&r" (flush_base)
1117 : "0" (flush_base),
1118 "r" (flush_base + ecache_size)
1119 : "g1", "g2", "g3", "g5");
1120
1121 tick2 = tick_ops->get_tick();
1122
1123 local_irq_restore(flags);
1124
1125 raw = (tick2 - tick1);
1126
1127
1128
1129
1130 cacheflush_time = (raw - (raw >> 2));
1131
1132 free_pages(orig_flush_base, order);
1133 } else {
1134 cacheflush_time = ((ecache_size << 2) +
1135 (ecache_size << 1));
1136 }
1137report:
1138
1139 cache_decay_ticks = cacheflush_time / timer_tick_offset;
1140 if (cache_decay_ticks < 1)
1141 cache_decay_ticks = 1;
1142
1143 printk("Using heuristic of %ld cycles, %ld ticks.\n",
1144 cacheflush_time, cache_decay_ticks);
1145}
1146
1147
1148static DEFINE_SPINLOCK(prof_setup_lock);
1149
1150int setup_profiling_timer(unsigned int multiplier)
1151{
1152 unsigned long flags;
1153 int i;
1154
1155 if ((!multiplier) || (timer_tick_offset / multiplier) < 1000)
1156 return -EINVAL;
1157
1158 spin_lock_irqsave(&prof_setup_lock, flags);
1159 for (i = 0; i < NR_CPUS; i++)
1160 prof_multiplier(i) = multiplier;
1161 current_tick_offset = (timer_tick_offset / multiplier);
1162 spin_unlock_irqrestore(&prof_setup_lock, flags);
1163
1164 return 0;
1165}
1166
1167void __init smp_prepare_cpus(unsigned int max_cpus)
1168{
1169 int instance, mid;
1170
1171 instance = 0;
1172 while (!cpu_find_by_instance(instance, NULL, &mid)) {
1173 if (mid < max_cpus)
1174 cpu_set(mid, phys_cpu_present_map);
1175 instance++;
1176 }
1177
1178 if (num_possible_cpus() > max_cpus) {
1179 instance = 0;
1180 while (!cpu_find_by_instance(instance, NULL, &mid)) {
1181 if (mid != boot_cpu_id) {
1182 cpu_clear(mid, phys_cpu_present_map);
1183 if (num_possible_cpus() <= max_cpus)
1184 break;
1185 }
1186 instance++;
1187 }
1188 }
1189
1190 smp_store_cpu_info(boot_cpu_id);
1191}
1192
1193void __devinit smp_prepare_boot_cpu(void)
1194{
1195 if (hard_smp_processor_id() >= NR_CPUS) {
1196 prom_printf("Serious problem, boot cpu id >= NR_CPUS\n");
1197 prom_halt();
1198 }
1199
1200 current_thread_info()->cpu = hard_smp_processor_id();
1201 cpu_set(smp_processor_id(), cpu_online_map);
1202 cpu_set(smp_processor_id(), phys_cpu_present_map);
1203}
1204
1205int __devinit __cpu_up(unsigned int cpu)
1206{
1207 int ret = smp_boot_one_cpu(cpu);
1208
1209 if (!ret) {
1210 cpu_set(cpu, smp_commenced_mask);
1211 while (!cpu_isset(cpu, cpu_online_map))
1212 mb();
1213 if (!cpu_isset(cpu, cpu_online_map)) {
1214 ret = -ENODEV;
1215 } else {
1216 smp_synchronize_one_tick(cpu);
1217 }
1218 }
1219 return ret;
1220}
1221
1222void __init smp_cpus_done(unsigned int max_cpus)
1223{
1224 unsigned long bogosum = 0;
1225 int i;
1226
1227 for (i = 0; i < NR_CPUS; i++) {
1228 if (cpu_online(i))
1229 bogosum += cpu_data(i).udelay_val;
1230 }
1231 printk("Total of %ld processors activated "
1232 "(%lu.%02lu BogoMIPS).\n",
1233 (long) num_online_cpus(),
1234 bogosum/(500000/HZ),
1235 (bogosum/(5000/HZ))%100);
1236
1237
1238
1239
1240 smp_tune_scheduling();
1241}
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253void smp_send_reschedule(int cpu)
1254{
1255 if (cpu_data(cpu).idle_volume == 0)
1256 smp_receive_signal(cpu);
1257}
1258
1259
1260
1261
1262void smp_send_stop(void)
1263{
1264}
1265
1266