1
2
3
4
5
6
7
8#include "habanalabs.h"
9
10#include <linux/slab.h>
11
12
13
14
15
16
17
18
19
20inline u32 hl_hw_queue_add_ptr(u32 ptr, u16 val)
21{
22 ptr += val;
23 ptr &= ((HL_QUEUE_LENGTH << 1) - 1);
24 return ptr;
25}
26static inline int queue_ci_get(atomic_t *ci, u32 queue_len)
27{
28 return atomic_read(ci) & ((queue_len << 1) - 1);
29}
30
31static inline int queue_free_slots(struct hl_hw_queue *q, u32 queue_len)
32{
33 int delta = (q->pi - queue_ci_get(&q->ci, queue_len));
34
35 if (delta >= 0)
36 return (queue_len - delta);
37 else
38 return (abs(delta) - queue_len);
39}
40
41void hl_hw_queue_update_ci(struct hl_cs *cs)
42{
43 struct hl_device *hdev = cs->ctx->hdev;
44 struct hl_hw_queue *q;
45 int i;
46
47 if (hdev->disabled)
48 return;
49
50 q = &hdev->kernel_queues[0];
51
52
53 if (!hdev->asic_prop.max_queues || q->queue_type == QUEUE_TYPE_HW)
54 return;
55
56
57
58
59
60
61 for (i = 0 ; i < hdev->asic_prop.max_queues ; i++, q++) {
62 if (!cs_needs_completion(cs) || q->queue_type == QUEUE_TYPE_INT)
63 atomic_add(cs->jobs_in_queue_cnt[i], &q->ci);
64 }
65}
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83static void ext_and_hw_queue_submit_bd(struct hl_device *hdev,
84 struct hl_hw_queue *q, u32 ctl, u32 len, u64 ptr)
85{
86 struct hl_bd *bd;
87
88 bd = q->kernel_address;
89 bd += hl_pi_2_offset(q->pi);
90 bd->ctl = cpu_to_le32(ctl);
91 bd->len = cpu_to_le32(len);
92 bd->ptr = cpu_to_le64(ptr);
93
94 q->pi = hl_queue_inc_ptr(q->pi);
95 hdev->asic_funcs->ring_doorbell(hdev, q->hw_queue_id, q->pi);
96}
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116static int ext_queue_sanity_checks(struct hl_device *hdev,
117 struct hl_hw_queue *q, int num_of_entries,
118 bool reserve_cq_entry)
119{
120 atomic_t *free_slots =
121 &hdev->completion_queue[q->cq_id].free_slots_cnt;
122 int free_slots_cnt;
123
124
125 free_slots_cnt = queue_free_slots(q, HL_QUEUE_LENGTH);
126
127 if (free_slots_cnt < num_of_entries) {
128 dev_dbg(hdev->dev, "Queue %d doesn't have room for %d CBs\n",
129 q->hw_queue_id, num_of_entries);
130 return -EAGAIN;
131 }
132
133 if (reserve_cq_entry) {
134
135
136
137
138
139
140
141 if (atomic_add_negative(num_of_entries * -1, free_slots)) {
142 dev_dbg(hdev->dev, "No space for %d on CQ %d\n",
143 num_of_entries, q->hw_queue_id);
144 atomic_add(num_of_entries, free_slots);
145 return -EAGAIN;
146 }
147 }
148
149 return 0;
150}
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165static int int_queue_sanity_checks(struct hl_device *hdev,
166 struct hl_hw_queue *q,
167 int num_of_entries)
168{
169 int free_slots_cnt;
170
171 if (num_of_entries > q->int_queue_len) {
172 dev_err(hdev->dev,
173 "Cannot populate queue %u with %u jobs\n",
174 q->hw_queue_id, num_of_entries);
175 return -ENOMEM;
176 }
177
178
179 free_slots_cnt = queue_free_slots(q, q->int_queue_len);
180
181 if (free_slots_cnt < num_of_entries) {
182 dev_dbg(hdev->dev, "Queue %d doesn't have room for %d CBs\n",
183 q->hw_queue_id, num_of_entries);
184 return -EAGAIN;
185 }
186
187 return 0;
188}
189
190
191
192
193
194
195
196
197
198
199
200static int hw_queue_sanity_checks(struct hl_device *hdev, struct hl_hw_queue *q,
201 int num_of_entries)
202{
203 int free_slots_cnt;
204
205
206 free_slots_cnt = queue_free_slots(q, HL_QUEUE_LENGTH);
207
208 if (free_slots_cnt < num_of_entries) {
209 dev_dbg(hdev->dev, "Queue %d doesn't have room for %d CBs\n",
210 q->hw_queue_id, num_of_entries);
211 return -EAGAIN;
212 }
213
214 return 0;
215}
216
217
218
219
220
221
222
223
224
225
226
227
228int hl_hw_queue_send_cb_no_cmpl(struct hl_device *hdev, u32 hw_queue_id,
229 u32 cb_size, u64 cb_ptr)
230{
231 struct hl_hw_queue *q = &hdev->kernel_queues[hw_queue_id];
232 int rc = 0;
233
234
235
236
237
238
239
240
241
242 if (q->queue_type != QUEUE_TYPE_CPU)
243 hdev->asic_funcs->hw_queues_lock(hdev);
244
245 if (hdev->disabled) {
246 rc = -EPERM;
247 goto out;
248 }
249
250
251
252
253
254
255 if (q->queue_type != QUEUE_TYPE_HW) {
256 rc = ext_queue_sanity_checks(hdev, q, 1, false);
257 if (rc)
258 goto out;
259 }
260
261 ext_and_hw_queue_submit_bd(hdev, q, 0, cb_size, cb_ptr);
262
263out:
264 if (q->queue_type != QUEUE_TYPE_CPU)
265 hdev->asic_funcs->hw_queues_unlock(hdev);
266
267 return rc;
268}
269
270
271
272
273
274
275
276
277
278static void ext_queue_schedule_job(struct hl_cs_job *job)
279{
280 struct hl_device *hdev = job->cs->ctx->hdev;
281 struct hl_hw_queue *q = &hdev->kernel_queues[job->hw_queue_id];
282 struct hl_cq_entry cq_pkt;
283 struct hl_cq *cq;
284 u64 cq_addr;
285 struct hl_cb *cb;
286 u32 ctl;
287 u32 len;
288 u64 ptr;
289
290
291
292
293
294 ctl = ((q->pi << BD_CTL_SHADOW_INDEX_SHIFT) & BD_CTL_SHADOW_INDEX_MASK);
295
296 cb = job->patched_cb;
297 len = job->job_cb_size;
298 ptr = cb->bus_address;
299
300
301 if (!cs_needs_completion(job->cs))
302 goto submit_bd;
303
304 cq_pkt.data = cpu_to_le32(
305 ((q->pi << CQ_ENTRY_SHADOW_INDEX_SHIFT)
306 & CQ_ENTRY_SHADOW_INDEX_MASK) |
307 FIELD_PREP(CQ_ENTRY_SHADOW_INDEX_VALID_MASK, 1) |
308 FIELD_PREP(CQ_ENTRY_READY_MASK, 1));
309
310
311
312
313
314
315
316
317 cq = &hdev->completion_queue[q->cq_id];
318 cq_addr = cq->bus_address + cq->pi * sizeof(struct hl_cq_entry);
319
320 hdev->asic_funcs->add_end_of_cb_packets(hdev, cb->kernel_address, len,
321 cq_addr,
322 le32_to_cpu(cq_pkt.data),
323 q->msi_vec,
324 job->contains_dma_pkt);
325
326 q->shadow_queue[hl_pi_2_offset(q->pi)] = job;
327
328 cq->pi = hl_cq_inc_ptr(cq->pi);
329
330submit_bd:
331 ext_and_hw_queue_submit_bd(hdev, q, ctl, len, ptr);
332}
333
334
335
336
337
338
339
340
341
342static void int_queue_schedule_job(struct hl_cs_job *job)
343{
344 struct hl_device *hdev = job->cs->ctx->hdev;
345 struct hl_hw_queue *q = &hdev->kernel_queues[job->hw_queue_id];
346 struct hl_bd bd;
347 __le64 *pi;
348
349 bd.ctl = 0;
350 bd.len = cpu_to_le32(job->job_cb_size);
351
352 if (job->is_kernel_allocated_cb)
353
354
355
356 bd.ptr = cpu_to_le64(job->user_cb->bus_address);
357 else
358 bd.ptr = cpu_to_le64((u64) (uintptr_t) job->user_cb);
359
360 pi = q->kernel_address + (q->pi & (q->int_queue_len - 1)) * sizeof(bd);
361
362 q->pi++;
363 q->pi &= ((q->int_queue_len << 1) - 1);
364
365 hdev->asic_funcs->pqe_write(hdev, pi, &bd);
366
367 hdev->asic_funcs->ring_doorbell(hdev, q->hw_queue_id, q->pi);
368}
369
370
371
372
373
374
375
376
377
378static void hw_queue_schedule_job(struct hl_cs_job *job)
379{
380 struct hl_device *hdev = job->cs->ctx->hdev;
381 struct hl_hw_queue *q = &hdev->kernel_queues[job->hw_queue_id];
382 u64 ptr;
383 u32 offset, ctl, len;
384
385
386
387
388
389
390
391 offset = job->cs->sequence & (hdev->asic_prop.max_pending_cs - 1);
392 ctl = ((offset << BD_CTL_COMP_OFFSET_SHIFT) & BD_CTL_COMP_OFFSET_MASK) |
393 ((q->pi << BD_CTL_COMP_DATA_SHIFT) & BD_CTL_COMP_DATA_MASK);
394
395 len = job->job_cb_size;
396
397
398
399
400
401
402
403 if (job->patched_cb)
404 ptr = job->patched_cb->bus_address;
405 else if (job->is_kernel_allocated_cb)
406 ptr = job->user_cb->bus_address;
407 else
408 ptr = (u64) (uintptr_t) job->user_cb;
409
410 ext_and_hw_queue_submit_bd(hdev, q, ctl, len, ptr);
411}
412
413static int init_signal_cs(struct hl_device *hdev,
414 struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl)
415{
416 struct hl_sync_stream_properties *prop;
417 struct hl_hw_sob *hw_sob;
418 u32 q_idx;
419 int rc = 0;
420
421 q_idx = job->hw_queue_id;
422 prop = &hdev->kernel_queues[q_idx].sync_stream_prop;
423 hw_sob = &prop->hw_sob[prop->curr_sob_offset];
424
425 cs_cmpl->hw_sob = hw_sob;
426 cs_cmpl->sob_val = prop->next_sob_val;
427
428 dev_dbg(hdev->dev,
429 "generate signal CB, sob_id: %d, sob val: 0x%x, q_idx: %d\n",
430 cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val, q_idx);
431
432
433
434
435 hdev->asic_funcs->gen_signal_cb(hdev, job->patched_cb,
436 cs_cmpl->hw_sob->sob_id, 0, true);
437
438 rc = hl_cs_signal_sob_wraparound_handler(hdev, q_idx, &hw_sob, 1);
439
440 return rc;
441}
442
443static void init_wait_cs(struct hl_device *hdev, struct hl_cs *cs,
444 struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl)
445{
446 struct hl_cs_compl *signal_cs_cmpl;
447 struct hl_sync_stream_properties *prop;
448 struct hl_gen_wait_properties wait_prop;
449 u32 q_idx;
450
451 q_idx = job->hw_queue_id;
452 prop = &hdev->kernel_queues[q_idx].sync_stream_prop;
453
454 signal_cs_cmpl = container_of(cs->signal_fence,
455 struct hl_cs_compl,
456 base_fence);
457
458
459 cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
460 cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
461
462 dev_dbg(hdev->dev,
463 "generate wait CB, sob_id: %d, sob_val: 0x%x, mon_id: %d, q_idx: %d\n",
464 cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val,
465 prop->base_mon_id, q_idx);
466
467 wait_prop.data = (void *) job->patched_cb;
468 wait_prop.sob_base = cs_cmpl->hw_sob->sob_id;
469 wait_prop.sob_mask = 0x1;
470 wait_prop.sob_val = cs_cmpl->sob_val;
471 wait_prop.mon_id = prop->base_mon_id;
472 wait_prop.q_idx = q_idx;
473 wait_prop.size = 0;
474 hdev->asic_funcs->gen_wait_cb(hdev, &wait_prop);
475
476 kref_get(&cs_cmpl->hw_sob->kref);
477
478
479
480
481
482 mb();
483 hl_fence_put(cs->signal_fence);
484 cs->signal_fence = NULL;
485}
486
487
488
489
490
491
492
493static int init_signal_wait_cs(struct hl_cs *cs)
494{
495 struct hl_ctx *ctx = cs->ctx;
496 struct hl_device *hdev = ctx->hdev;
497 struct hl_cs_job *job;
498 struct hl_cs_compl *cs_cmpl =
499 container_of(cs->fence, struct hl_cs_compl, base_fence);
500 int rc = 0;
501
502
503 job = list_first_entry(&cs->job_list, struct hl_cs_job,
504 cs_node);
505
506 if (cs->type & CS_TYPE_SIGNAL)
507 rc = init_signal_cs(hdev, job, cs_cmpl);
508 else if (cs->type & CS_TYPE_WAIT)
509 init_wait_cs(hdev, cs, job, cs_cmpl);
510
511 return rc;
512}
513
514
515
516
517
518int hl_hw_queue_schedule_cs(struct hl_cs *cs)
519{
520 enum hl_device_status status;
521 struct hl_cs_counters_atomic *cntr;
522 struct hl_ctx *ctx = cs->ctx;
523 struct hl_device *hdev = ctx->hdev;
524 struct hl_cs_job *job, *tmp;
525 struct hl_hw_queue *q;
526 int rc = 0, i, cq_cnt;
527 bool first_entry;
528 u32 max_queues;
529
530 cntr = &hdev->aggregated_cs_counters;
531
532 hdev->asic_funcs->hw_queues_lock(hdev);
533
534 if (!hl_device_operational(hdev, &status)) {
535 atomic64_inc(&cntr->device_in_reset_drop_cnt);
536 atomic64_inc(&ctx->cs_counters.device_in_reset_drop_cnt);
537 dev_err(hdev->dev,
538 "device is %s, CS rejected!\n", hdev->status[status]);
539 rc = -EPERM;
540 goto out;
541 }
542
543 max_queues = hdev->asic_prop.max_queues;
544
545 q = &hdev->kernel_queues[0];
546 for (i = 0, cq_cnt = 0 ; i < max_queues ; i++, q++) {
547 if (cs->jobs_in_queue_cnt[i]) {
548 switch (q->queue_type) {
549 case QUEUE_TYPE_EXT:
550 rc = ext_queue_sanity_checks(hdev, q,
551 cs->jobs_in_queue_cnt[i],
552 cs_needs_completion(cs) ?
553 true : false);
554 break;
555 case QUEUE_TYPE_INT:
556 rc = int_queue_sanity_checks(hdev, q,
557 cs->jobs_in_queue_cnt[i]);
558 break;
559 case QUEUE_TYPE_HW:
560 rc = hw_queue_sanity_checks(hdev, q,
561 cs->jobs_in_queue_cnt[i]);
562 break;
563 default:
564 dev_err(hdev->dev, "Queue type %d is invalid\n",
565 q->queue_type);
566 rc = -EINVAL;
567 break;
568 }
569
570 if (rc) {
571 atomic64_inc(
572 &ctx->cs_counters.queue_full_drop_cnt);
573 atomic64_inc(&cntr->queue_full_drop_cnt);
574 goto unroll_cq_resv;
575 }
576
577 if (q->queue_type == QUEUE_TYPE_EXT)
578 cq_cnt++;
579 }
580 }
581
582 if ((cs->type == CS_TYPE_SIGNAL) || (cs->type == CS_TYPE_WAIT)) {
583 rc = init_signal_wait_cs(cs);
584 if (rc) {
585 dev_err(hdev->dev, "Failed to submit signal cs\n");
586 goto unroll_cq_resv;
587 }
588 } else if (cs->type == CS_TYPE_COLLECTIVE_WAIT)
589 hdev->asic_funcs->collective_wait_init_cs(cs);
590
591
592 spin_lock(&hdev->cs_mirror_lock);
593
594
595 if (cs->staged_cs && !cs->staged_first) {
596 struct hl_cs *staged_cs;
597
598 staged_cs = hl_staged_cs_find_first(hdev, cs->staged_sequence);
599 if (!staged_cs) {
600 dev_err(hdev->dev,
601 "Cannot find staged submission sequence %llu",
602 cs->staged_sequence);
603 rc = -EINVAL;
604 goto unlock_cs_mirror;
605 }
606
607 if (is_staged_cs_last_exists(hdev, staged_cs)) {
608 dev_err(hdev->dev,
609 "Staged submission sequence %llu already submitted",
610 cs->staged_sequence);
611 rc = -EINVAL;
612 goto unlock_cs_mirror;
613 }
614
615 list_add_tail(&cs->staged_cs_node, &staged_cs->staged_cs_node);
616 }
617
618 list_add_tail(&cs->mirror_node, &hdev->cs_mirror_list);
619
620
621 first_entry = list_first_entry(&hdev->cs_mirror_list,
622 struct hl_cs, mirror_node) == cs;
623 if ((hdev->timeout_jiffies != MAX_SCHEDULE_TIMEOUT) &&
624 first_entry && cs_needs_timeout(cs)) {
625 cs->tdr_active = true;
626 schedule_delayed_work(&cs->work_tdr, cs->timeout_jiffies);
627
628 }
629
630 spin_unlock(&hdev->cs_mirror_lock);
631
632 list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node)
633 switch (job->queue_type) {
634 case QUEUE_TYPE_EXT:
635 ext_queue_schedule_job(job);
636 break;
637 case QUEUE_TYPE_INT:
638 int_queue_schedule_job(job);
639 break;
640 case QUEUE_TYPE_HW:
641 hw_queue_schedule_job(job);
642 break;
643 default:
644 break;
645 }
646
647 cs->submitted = true;
648
649 goto out;
650
651unlock_cs_mirror:
652 spin_unlock(&hdev->cs_mirror_lock);
653unroll_cq_resv:
654 q = &hdev->kernel_queues[0];
655 for (i = 0 ; (i < max_queues) && (cq_cnt > 0) ; i++, q++) {
656 if ((q->queue_type == QUEUE_TYPE_EXT) &&
657 (cs->jobs_in_queue_cnt[i])) {
658 atomic_t *free_slots =
659 &hdev->completion_queue[i].free_slots_cnt;
660 atomic_add(cs->jobs_in_queue_cnt[i], free_slots);
661 cq_cnt--;
662 }
663 }
664
665out:
666 hdev->asic_funcs->hw_queues_unlock(hdev);
667
668 return rc;
669}
670
671
672
673
674
675
676
677void hl_hw_queue_inc_ci_kernel(struct hl_device *hdev, u32 hw_queue_id)
678{
679 struct hl_hw_queue *q = &hdev->kernel_queues[hw_queue_id];
680
681 atomic_inc(&q->ci);
682}
683
684static int ext_and_cpu_queue_init(struct hl_device *hdev, struct hl_hw_queue *q,
685 bool is_cpu_queue)
686{
687 void *p;
688 int rc;
689
690 if (is_cpu_queue)
691 p = hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev,
692 HL_QUEUE_SIZE_IN_BYTES,
693 &q->bus_address);
694 else
695 p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
696 HL_QUEUE_SIZE_IN_BYTES,
697 &q->bus_address,
698 GFP_KERNEL | __GFP_ZERO);
699 if (!p)
700 return -ENOMEM;
701
702 q->kernel_address = p;
703
704 q->shadow_queue = kmalloc_array(HL_QUEUE_LENGTH,
705 sizeof(*q->shadow_queue),
706 GFP_KERNEL);
707 if (!q->shadow_queue) {
708 dev_err(hdev->dev,
709 "Failed to allocate shadow queue for H/W queue %d\n",
710 q->hw_queue_id);
711 rc = -ENOMEM;
712 goto free_queue;
713 }
714
715
716 atomic_set(&q->ci, 0);
717 q->pi = 0;
718
719 return 0;
720
721free_queue:
722 if (is_cpu_queue)
723 hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev,
724 HL_QUEUE_SIZE_IN_BYTES,
725 q->kernel_address);
726 else
727 hdev->asic_funcs->asic_dma_free_coherent(hdev,
728 HL_QUEUE_SIZE_IN_BYTES,
729 q->kernel_address,
730 q->bus_address);
731
732 return rc;
733}
734
735static int int_queue_init(struct hl_device *hdev, struct hl_hw_queue *q)
736{
737 void *p;
738
739 p = hdev->asic_funcs->get_int_queue_base(hdev, q->hw_queue_id,
740 &q->bus_address, &q->int_queue_len);
741 if (!p) {
742 dev_err(hdev->dev,
743 "Failed to get base address for internal queue %d\n",
744 q->hw_queue_id);
745 return -EFAULT;
746 }
747
748 q->kernel_address = p;
749 q->pi = 0;
750 atomic_set(&q->ci, 0);
751
752 return 0;
753}
754
755static int cpu_queue_init(struct hl_device *hdev, struct hl_hw_queue *q)
756{
757 return ext_and_cpu_queue_init(hdev, q, true);
758}
759
760static int ext_queue_init(struct hl_device *hdev, struct hl_hw_queue *q)
761{
762 return ext_and_cpu_queue_init(hdev, q, false);
763}
764
765static int hw_queue_init(struct hl_device *hdev, struct hl_hw_queue *q)
766{
767 void *p;
768
769 p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
770 HL_QUEUE_SIZE_IN_BYTES,
771 &q->bus_address,
772 GFP_KERNEL | __GFP_ZERO);
773 if (!p)
774 return -ENOMEM;
775
776 q->kernel_address = p;
777
778
779 atomic_set(&q->ci, 0);
780 q->pi = 0;
781
782 return 0;
783}
784
785static void sync_stream_queue_init(struct hl_device *hdev, u32 q_idx)
786{
787 struct hl_sync_stream_properties *sync_stream_prop;
788 struct asic_fixed_properties *prop = &hdev->asic_prop;
789 struct hl_hw_sob *hw_sob;
790 int sob, reserved_mon_idx, queue_idx;
791
792 sync_stream_prop = &hdev->kernel_queues[q_idx].sync_stream_prop;
793
794
795
796
797
798
799 if (hdev->kernel_queues[q_idx].collective_mode ==
800 HL_COLLECTIVE_MASTER) {
801 reserved_mon_idx = hdev->collective_mon_idx;
802
803
804 sync_stream_prop->collective_mstr_mon_id[0] =
805 prop->collective_first_mon + reserved_mon_idx;
806
807
808 sync_stream_prop->collective_mstr_mon_id[1] =
809 prop->collective_first_mon + reserved_mon_idx + 1;
810
811 hdev->collective_mon_idx += HL_COLLECTIVE_RSVD_MSTR_MONS;
812 } else if (hdev->kernel_queues[q_idx].collective_mode ==
813 HL_COLLECTIVE_SLAVE) {
814 reserved_mon_idx = hdev->collective_mon_idx++;
815
816
817 sync_stream_prop->collective_slave_mon_id =
818 prop->collective_first_mon + reserved_mon_idx;
819 }
820
821 if (!hdev->kernel_queues[q_idx].supports_sync_stream)
822 return;
823
824 queue_idx = hdev->sync_stream_queue_idx++;
825
826 sync_stream_prop->base_sob_id = prop->sync_stream_first_sob +
827 (queue_idx * HL_RSVD_SOBS);
828 sync_stream_prop->base_mon_id = prop->sync_stream_first_mon +
829 (queue_idx * HL_RSVD_MONS);
830 sync_stream_prop->next_sob_val = 1;
831 sync_stream_prop->curr_sob_offset = 0;
832
833 for (sob = 0 ; sob < HL_RSVD_SOBS ; sob++) {
834 hw_sob = &sync_stream_prop->hw_sob[sob];
835 hw_sob->hdev = hdev;
836 hw_sob->sob_id = sync_stream_prop->base_sob_id + sob;
837 hw_sob->q_idx = q_idx;
838 kref_init(&hw_sob->kref);
839 }
840}
841
842static void sync_stream_queue_reset(struct hl_device *hdev, u32 q_idx)
843{
844 struct hl_sync_stream_properties *prop =
845 &hdev->kernel_queues[q_idx].sync_stream_prop;
846
847
848
849
850
851 kref_init(&prop->hw_sob[prop->curr_sob_offset].kref);
852 prop->curr_sob_offset = 0;
853 prop->next_sob_val = 1;
854}
855
856
857
858
859
860
861
862
863
864
865
866static int queue_init(struct hl_device *hdev, struct hl_hw_queue *q,
867 u32 hw_queue_id)
868{
869 int rc;
870
871 q->hw_queue_id = hw_queue_id;
872
873 switch (q->queue_type) {
874 case QUEUE_TYPE_EXT:
875 rc = ext_queue_init(hdev, q);
876 break;
877 case QUEUE_TYPE_INT:
878 rc = int_queue_init(hdev, q);
879 break;
880 case QUEUE_TYPE_CPU:
881 rc = cpu_queue_init(hdev, q);
882 break;
883 case QUEUE_TYPE_HW:
884 rc = hw_queue_init(hdev, q);
885 break;
886 case QUEUE_TYPE_NA:
887 q->valid = 0;
888 return 0;
889 default:
890 dev_crit(hdev->dev, "wrong queue type %d during init\n",
891 q->queue_type);
892 rc = -EINVAL;
893 break;
894 }
895
896 sync_stream_queue_init(hdev, q->hw_queue_id);
897
898 if (rc)
899 return rc;
900
901 q->valid = 1;
902
903 return 0;
904}
905
906
907
908
909
910
911
912
913
914static void queue_fini(struct hl_device *hdev, struct hl_hw_queue *q)
915{
916 if (!q->valid)
917 return;
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937 if (q->queue_type == QUEUE_TYPE_INT)
938 return;
939
940 kfree(q->shadow_queue);
941
942 if (q->queue_type == QUEUE_TYPE_CPU)
943 hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev,
944 HL_QUEUE_SIZE_IN_BYTES,
945 q->kernel_address);
946 else
947 hdev->asic_funcs->asic_dma_free_coherent(hdev,
948 HL_QUEUE_SIZE_IN_BYTES,
949 q->kernel_address,
950 q->bus_address);
951}
952
953int hl_hw_queues_create(struct hl_device *hdev)
954{
955 struct asic_fixed_properties *asic = &hdev->asic_prop;
956 struct hl_hw_queue *q;
957 int i, rc, q_ready_cnt;
958
959 hdev->kernel_queues = kcalloc(asic->max_queues,
960 sizeof(*hdev->kernel_queues), GFP_KERNEL);
961
962 if (!hdev->kernel_queues) {
963 dev_err(hdev->dev, "Not enough memory for H/W queues\n");
964 return -ENOMEM;
965 }
966
967
968 for (i = 0, q_ready_cnt = 0, q = hdev->kernel_queues;
969 i < asic->max_queues ; i++, q_ready_cnt++, q++) {
970
971 q->queue_type = asic->hw_queues_props[i].type;
972 q->supports_sync_stream =
973 asic->hw_queues_props[i].supports_sync_stream;
974 q->collective_mode = asic->hw_queues_props[i].collective_mode;
975 rc = queue_init(hdev, q, i);
976 if (rc) {
977 dev_err(hdev->dev,
978 "failed to initialize queue %d\n", i);
979 goto release_queues;
980 }
981 }
982
983 return 0;
984
985release_queues:
986 for (i = 0, q = hdev->kernel_queues ; i < q_ready_cnt ; i++, q++)
987 queue_fini(hdev, q);
988
989 kfree(hdev->kernel_queues);
990
991 return rc;
992}
993
994void hl_hw_queues_destroy(struct hl_device *hdev)
995{
996 struct hl_hw_queue *q;
997 u32 max_queues = hdev->asic_prop.max_queues;
998 int i;
999
1000 for (i = 0, q = hdev->kernel_queues ; i < max_queues ; i++, q++)
1001 queue_fini(hdev, q);
1002
1003 kfree(hdev->kernel_queues);
1004}
1005
1006void hl_hw_queue_reset(struct hl_device *hdev, bool hard_reset)
1007{
1008 struct hl_hw_queue *q;
1009 u32 max_queues = hdev->asic_prop.max_queues;
1010 int i;
1011
1012 for (i = 0, q = hdev->kernel_queues ; i < max_queues ; i++, q++) {
1013 if ((!q->valid) ||
1014 ((!hard_reset) && (q->queue_type == QUEUE_TYPE_CPU)))
1015 continue;
1016 q->pi = 0;
1017 atomic_set(&q->ci, 0);
1018
1019 if (q->supports_sync_stream)
1020 sync_stream_queue_reset(hdev, q->hw_queue_id);
1021 }
1022}
1023