1
2
3
4
5
6
7
8#include <uapi/misc/habanalabs.h>
9#include "habanalabs.h"
10#include "../include/hw_ip/mmu/mmu_general.h"
11
12#include <linux/uaccess.h>
13#include <linux/slab.h>
14
15#define HL_MMU_DEBUG 0
16
17
18#define DRAM_POOL_PAGE_SIZE SZ_8M
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args,
54 u32 *ret_handle)
55{
56 struct hl_device *hdev = ctx->hdev;
57 struct hl_vm *vm = &hdev->vm;
58 struct hl_vm_phys_pg_pack *phys_pg_pack;
59 u64 paddr = 0, total_size, num_pgs, i;
60 u32 num_curr_pgs, page_size;
61 int handle, rc;
62 bool contiguous;
63
64 num_curr_pgs = 0;
65 page_size = hdev->asic_prop.dram_page_size;
66 num_pgs = DIV_ROUND_UP_ULL(args->alloc.mem_size, page_size);
67 total_size = num_pgs * page_size;
68
69 if (!total_size) {
70 dev_err(hdev->dev, "Cannot allocate 0 bytes\n");
71 return -EINVAL;
72 }
73
74 contiguous = args->flags & HL_MEM_CONTIGUOUS;
75
76 if (contiguous) {
77 paddr = (u64) gen_pool_alloc(vm->dram_pg_pool, total_size);
78 if (!paddr) {
79 dev_err(hdev->dev,
80 "failed to allocate %llu contiguous pages with total size of %llu\n",
81 num_pgs, total_size);
82 return -ENOMEM;
83 }
84 }
85
86 phys_pg_pack = kzalloc(sizeof(*phys_pg_pack), GFP_KERNEL);
87 if (!phys_pg_pack) {
88 rc = -ENOMEM;
89 goto pages_pack_err;
90 }
91
92 phys_pg_pack->vm_type = VM_TYPE_PHYS_PACK;
93 phys_pg_pack->asid = ctx->asid;
94 phys_pg_pack->npages = num_pgs;
95 phys_pg_pack->page_size = page_size;
96 phys_pg_pack->total_size = total_size;
97 phys_pg_pack->flags = args->flags;
98 phys_pg_pack->contiguous = contiguous;
99
100 phys_pg_pack->pages = kvmalloc_array(num_pgs, sizeof(u64), GFP_KERNEL);
101 if (ZERO_OR_NULL_PTR(phys_pg_pack->pages)) {
102 rc = -ENOMEM;
103 goto pages_arr_err;
104 }
105
106 if (phys_pg_pack->contiguous) {
107 for (i = 0 ; i < num_pgs ; i++)
108 phys_pg_pack->pages[i] = paddr + i * page_size;
109 } else {
110 for (i = 0 ; i < num_pgs ; i++) {
111 phys_pg_pack->pages[i] = (u64) gen_pool_alloc(
112 vm->dram_pg_pool,
113 page_size);
114 if (!phys_pg_pack->pages[i]) {
115 dev_err(hdev->dev,
116 "Failed to allocate device memory (out of memory)\n");
117 rc = -ENOMEM;
118 goto page_err;
119 }
120
121 num_curr_pgs++;
122 }
123 }
124
125 spin_lock(&vm->idr_lock);
126 handle = idr_alloc(&vm->phys_pg_pack_handles, phys_pg_pack, 1, 0,
127 GFP_KERNEL);
128 spin_unlock(&vm->idr_lock);
129
130 if (handle < 0) {
131 dev_err(hdev->dev, "Failed to get handle for page\n");
132 rc = -EFAULT;
133 goto idr_err;
134 }
135
136 for (i = 0 ; i < num_pgs ; i++)
137 kref_get(&vm->dram_pg_pool_refcount);
138
139 phys_pg_pack->handle = handle;
140
141 atomic64_add(phys_pg_pack->total_size, &ctx->dram_phys_mem);
142 atomic64_add(phys_pg_pack->total_size, &hdev->dram_used_mem);
143
144 *ret_handle = handle;
145
146 return 0;
147
148idr_err:
149page_err:
150 if (!phys_pg_pack->contiguous)
151 for (i = 0 ; i < num_curr_pgs ; i++)
152 gen_pool_free(vm->dram_pg_pool, phys_pg_pack->pages[i],
153 page_size);
154
155 kvfree(phys_pg_pack->pages);
156pages_arr_err:
157 kfree(phys_pg_pack);
158pages_pack_err:
159 if (contiguous)
160 gen_pool_free(vm->dram_pg_pool, paddr, total_size);
161
162 return rc;
163}
164
165
166
167
168
169
170
171
172
173
174
175
176
177static int dma_map_host_va(struct hl_device *hdev, u64 addr, u64 size,
178 struct hl_userptr **p_userptr)
179{
180 struct hl_userptr *userptr;
181 int rc;
182
183 userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
184 if (!userptr) {
185 rc = -ENOMEM;
186 goto userptr_err;
187 }
188
189 rc = hl_pin_host_memory(hdev, addr, size, userptr);
190 if (rc) {
191 dev_err(hdev->dev, "Failed to pin host memory\n");
192 goto pin_err;
193 }
194
195 rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl,
196 userptr->sgt->nents, DMA_BIDIRECTIONAL);
197 if (rc) {
198 dev_err(hdev->dev, "failed to map sgt with DMA region\n");
199 goto dma_map_err;
200 }
201
202 userptr->dma_mapped = true;
203 userptr->dir = DMA_BIDIRECTIONAL;
204 userptr->vm_type = VM_TYPE_USERPTR;
205
206 *p_userptr = userptr;
207
208 return 0;
209
210dma_map_err:
211 hl_unpin_host_memory(hdev, userptr);
212pin_err:
213 kfree(userptr);
214userptr_err:
215
216 return rc;
217}
218
219
220
221
222
223
224
225
226
227
228static void dma_unmap_host_va(struct hl_device *hdev,
229 struct hl_userptr *userptr)
230{
231 hl_unpin_host_memory(hdev, userptr);
232 kfree(userptr);
233}
234
235
236
237
238
239
240
241
242
243static void dram_pg_pool_do_release(struct kref *ref)
244{
245 struct hl_vm *vm = container_of(ref, struct hl_vm,
246 dram_pg_pool_refcount);
247
248
249
250
251
252 idr_destroy(&vm->phys_pg_pack_handles);
253 gen_pool_destroy(vm->dram_pg_pool);
254}
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270static int free_phys_pg_pack(struct hl_device *hdev,
271 struct hl_vm_phys_pg_pack *phys_pg_pack)
272{
273 struct hl_vm *vm = &hdev->vm;
274 u64 i;
275 int rc = 0;
276
277 if (phys_pg_pack->created_from_userptr)
278 goto end;
279
280 if (phys_pg_pack->contiguous) {
281 if (hdev->memory_scrub && !hdev->disabled) {
282 rc = hdev->asic_funcs->scrub_device_mem(hdev,
283 phys_pg_pack->pages[0],
284 phys_pg_pack->total_size);
285 if (rc)
286 dev_err(hdev->dev,
287 "Failed to scrub contiguous device memory\n");
288 }
289
290 gen_pool_free(vm->dram_pg_pool, phys_pg_pack->pages[0],
291 phys_pg_pack->total_size);
292
293 for (i = 0; i < phys_pg_pack->npages ; i++)
294 kref_put(&vm->dram_pg_pool_refcount,
295 dram_pg_pool_do_release);
296 } else {
297 for (i = 0 ; i < phys_pg_pack->npages ; i++) {
298 if (hdev->memory_scrub && !hdev->disabled && rc == 0) {
299 rc = hdev->asic_funcs->scrub_device_mem(
300 hdev,
301 phys_pg_pack->pages[i],
302 phys_pg_pack->page_size);
303 if (rc)
304 dev_err(hdev->dev,
305 "Failed to scrub device memory\n");
306 }
307 gen_pool_free(vm->dram_pg_pool,
308 phys_pg_pack->pages[i],
309 phys_pg_pack->page_size);
310 kref_put(&vm->dram_pg_pool_refcount,
311 dram_pg_pool_do_release);
312 }
313 }
314
315 if (rc && !hdev->disabled)
316 hl_device_reset(hdev, HL_RESET_HARD);
317
318end:
319 kvfree(phys_pg_pack->pages);
320 kfree(phys_pg_pack);
321
322 return rc;
323}
324
325
326
327
328
329
330
331
332
333static int free_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args)
334{
335 struct hl_device *hdev = ctx->hdev;
336 struct hl_vm *vm = &hdev->vm;
337 struct hl_vm_phys_pg_pack *phys_pg_pack;
338 u32 handle = args->free.handle;
339
340 spin_lock(&vm->idr_lock);
341 phys_pg_pack = idr_find(&vm->phys_pg_pack_handles, handle);
342 if (phys_pg_pack) {
343 if (atomic_read(&phys_pg_pack->mapping_cnt) > 0) {
344 dev_err(hdev->dev, "handle %u is mapped, cannot free\n",
345 handle);
346 spin_unlock(&vm->idr_lock);
347 return -EINVAL;
348 }
349
350
351
352
353
354
355 idr_remove(&vm->phys_pg_pack_handles, handle);
356 spin_unlock(&vm->idr_lock);
357
358 atomic64_sub(phys_pg_pack->total_size, &ctx->dram_phys_mem);
359 atomic64_sub(phys_pg_pack->total_size, &hdev->dram_used_mem);
360
361 return free_phys_pg_pack(hdev, phys_pg_pack);
362 } else {
363 spin_unlock(&vm->idr_lock);
364 dev_err(hdev->dev,
365 "free device memory failed, no match for handle %u\n",
366 handle);
367 return -EINVAL;
368 }
369
370 return 0;
371}
372
373
374
375
376
377
378
379
380
381
382
383static void clear_va_list_locked(struct hl_device *hdev,
384 struct list_head *va_list)
385{
386 struct hl_vm_va_block *va_block, *tmp;
387
388 list_for_each_entry_safe(va_block, tmp, va_list, node) {
389 list_del(&va_block->node);
390 kfree(va_block);
391 }
392}
393
394
395
396
397
398
399
400
401
402
403
404static void print_va_list_locked(struct hl_device *hdev,
405 struct list_head *va_list)
406{
407#if HL_MMU_DEBUG
408 struct hl_vm_va_block *va_block;
409
410 dev_dbg(hdev->dev, "print va list:\n");
411
412 list_for_each_entry(va_block, va_list, node)
413 dev_dbg(hdev->dev,
414 "va block, start: 0x%llx, end: 0x%llx, size: %llu\n",
415 va_block->start, va_block->end, va_block->size);
416#endif
417}
418
419
420
421
422
423
424
425
426
427
428
429
430
431static void merge_va_blocks_locked(struct hl_device *hdev,
432 struct list_head *va_list, struct hl_vm_va_block *va_block)
433{
434 struct hl_vm_va_block *prev, *next;
435
436 prev = list_prev_entry(va_block, node);
437 if (&prev->node != va_list && prev->end + 1 == va_block->start) {
438 prev->end = va_block->end;
439 prev->size = prev->end - prev->start;
440 list_del(&va_block->node);
441 kfree(va_block);
442 va_block = prev;
443 }
444
445 next = list_next_entry(va_block, node);
446 if (&next->node != va_list && va_block->end + 1 == next->start) {
447 next->start = va_block->start;
448 next->size = next->end - next->start;
449 list_del(&va_block->node);
450 kfree(va_block);
451 }
452}
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467static int add_va_block_locked(struct hl_device *hdev,
468 struct list_head *va_list, u64 start, u64 end)
469{
470 struct hl_vm_va_block *va_block, *res = NULL;
471 u64 size = end - start;
472
473 print_va_list_locked(hdev, va_list);
474
475 list_for_each_entry(va_block, va_list, node) {
476
477 if (hl_mem_area_crosses_range(start, size, va_block->start,
478 va_block->end)) {
479 dev_err(hdev->dev,
480 "block crossing ranges at start 0x%llx, end 0x%llx\n",
481 va_block->start, va_block->end);
482 return -EINVAL;
483 }
484
485 if (va_block->end < start)
486 res = va_block;
487 }
488
489 va_block = kmalloc(sizeof(*va_block), GFP_KERNEL);
490 if (!va_block)
491 return -ENOMEM;
492
493 va_block->start = start;
494 va_block->end = end;
495 va_block->size = size;
496
497 if (!res)
498 list_add(&va_block->node, va_list);
499 else
500 list_add(&va_block->node, &res->node);
501
502 merge_va_blocks_locked(hdev, va_list, va_block);
503
504 print_va_list_locked(hdev, va_list);
505
506 return 0;
507}
508
509
510
511
512
513
514
515
516
517
518
519static inline int add_va_block(struct hl_device *hdev,
520 struct hl_va_range *va_range, u64 start, u64 end)
521{
522 int rc;
523
524 mutex_lock(&va_range->lock);
525 rc = add_va_block_locked(hdev, &va_range->list, start, end);
526 mutex_unlock(&va_range->lock);
527
528 return rc;
529}
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546static u64 get_va_block(struct hl_device *hdev,
547 struct hl_va_range *va_range,
548 u64 size, u64 hint_addr, u32 va_block_align)
549{
550 struct hl_vm_va_block *va_block, *new_va_block = NULL;
551 u64 tmp_hint_addr, valid_start, valid_size, prev_start, prev_end,
552 align_mask, reserved_valid_start = 0, reserved_valid_size = 0;
553 bool add_prev = false;
554 bool is_align_pow_2 = is_power_of_2(va_range->page_size);
555
556 if (is_align_pow_2)
557 align_mask = ~((u64)va_block_align - 1);
558 else
559
560
561
562
563
564 size = DIV_ROUND_UP_ULL(size, va_range->page_size) *
565 va_range->page_size;
566
567 tmp_hint_addr = hint_addr;
568
569
570 if ((is_align_pow_2 && (hint_addr & (va_block_align - 1))) ||
571 (!is_align_pow_2 &&
572 do_div(tmp_hint_addr, va_range->page_size))) {
573
574 dev_dbg(hdev->dev,
575 "Hint address 0x%llx will be ignored because it is not aligned\n",
576 hint_addr);
577 hint_addr = 0;
578 }
579
580 mutex_lock(&va_range->lock);
581
582 print_va_list_locked(hdev, &va_range->list);
583
584 list_for_each_entry(va_block, &va_range->list, node) {
585
586 valid_start = va_block->start;
587
588 if (is_align_pow_2 && (valid_start & (va_block_align - 1))) {
589 valid_start &= align_mask;
590 valid_start += va_block_align;
591 if (valid_start > va_block->end)
592 continue;
593 }
594
595 valid_size = va_block->end - valid_start;
596 if (valid_size < size)
597 continue;
598
599
600 if (!new_va_block || (valid_size < reserved_valid_size)) {
601 new_va_block = va_block;
602 reserved_valid_start = valid_start;
603 reserved_valid_size = valid_size;
604 }
605
606 if (hint_addr && hint_addr >= valid_start &&
607 (hint_addr + size) <= va_block->end) {
608 new_va_block = va_block;
609 reserved_valid_start = hint_addr;
610 reserved_valid_size = valid_size;
611 break;
612 }
613 }
614
615 if (!new_va_block) {
616 dev_err(hdev->dev, "no available va block for size %llu\n",
617 size);
618 goto out;
619 }
620
621
622
623
624
625 if (reserved_valid_start > new_va_block->start) {
626 prev_start = new_va_block->start;
627 prev_end = reserved_valid_start - 1;
628
629 new_va_block->start = reserved_valid_start;
630 new_va_block->size = reserved_valid_size;
631
632 add_prev = true;
633 }
634
635 if (new_va_block->size > size) {
636 new_va_block->start += size;
637 new_va_block->size = new_va_block->end - new_va_block->start;
638 } else {
639 list_del(&new_va_block->node);
640 kfree(new_va_block);
641 }
642
643 if (add_prev)
644 add_va_block_locked(hdev, &va_range->list, prev_start,
645 prev_end);
646
647 print_va_list_locked(hdev, &va_range->list);
648out:
649 mutex_unlock(&va_range->lock);
650
651 return reserved_valid_start;
652}
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669u64 hl_reserve_va_block(struct hl_device *hdev, struct hl_ctx *ctx,
670 enum hl_va_range_type type, u32 size, u32 alignment)
671{
672 return get_va_block(hdev, ctx->va_range[type], size, 0,
673 max(alignment, ctx->va_range[type]->page_size));
674}
675
676
677
678
679
680
681
682
683
684static int hl_get_va_range_type(struct hl_ctx *ctx, u64 address, u64 size,
685 enum hl_va_range_type *type)
686{
687 int i;
688
689 for (i = 0 ; i < HL_VA_RANGE_TYPE_MAX; i++) {
690 if (hl_mem_area_inside_range(address, size,
691 ctx->va_range[i]->start_addr,
692 ctx->va_range[i]->end_addr)) {
693 *type = i;
694 return 0;
695 }
696 }
697
698 return -EINVAL;
699}
700
701
702
703
704
705
706
707
708
709
710
711int hl_unreserve_va_block(struct hl_device *hdev, struct hl_ctx *ctx,
712 u64 start_addr, u64 size)
713{
714 enum hl_va_range_type type;
715 int rc;
716
717 rc = hl_get_va_range_type(ctx, start_addr, size, &type);
718 if (rc) {
719 dev_err(hdev->dev,
720 "cannot find va_range for va %#llx size %llu",
721 start_addr, size);
722 return rc;
723 }
724
725 rc = add_va_block(hdev, ctx->va_range[type], start_addr,
726 start_addr + size - 1);
727 if (rc)
728 dev_warn(hdev->dev,
729 "add va block failed for vaddr: 0x%llx\n", start_addr);
730
731 return rc;
732}
733
734
735
736
737
738
739
740
741
742
743static u32 get_sg_info(struct scatterlist *sg, dma_addr_t *dma_addr)
744{
745 *dma_addr = sg_dma_address(sg);
746
747 return ((((*dma_addr) & (PAGE_SIZE - 1)) + sg_dma_len(sg)) +
748 (PAGE_SIZE - 1)) >> PAGE_SHIFT;
749}
750
751
752
753
754
755
756
757
758
759
760
761
762
763static int init_phys_pg_pack_from_userptr(struct hl_ctx *ctx,
764 struct hl_userptr *userptr,
765 struct hl_vm_phys_pg_pack **pphys_pg_pack)
766{
767 struct hl_vm_phys_pg_pack *phys_pg_pack;
768 struct scatterlist *sg;
769 dma_addr_t dma_addr;
770 u64 page_mask, total_npages;
771 u32 npages, page_size = PAGE_SIZE,
772 huge_page_size = ctx->hdev->asic_prop.pmmu_huge.page_size;
773 bool first = true, is_huge_page_opt = true;
774 int rc, i, j;
775 u32 pgs_in_huge_page = huge_page_size >> __ffs(page_size);
776
777 phys_pg_pack = kzalloc(sizeof(*phys_pg_pack), GFP_KERNEL);
778 if (!phys_pg_pack)
779 return -ENOMEM;
780
781 phys_pg_pack->vm_type = userptr->vm_type;
782 phys_pg_pack->created_from_userptr = true;
783 phys_pg_pack->asid = ctx->asid;
784 atomic_set(&phys_pg_pack->mapping_cnt, 1);
785
786
787
788
789
790
791
792 total_npages = 0;
793 for_each_sg(userptr->sgt->sgl, sg, userptr->sgt->nents, i) {
794 npages = get_sg_info(sg, &dma_addr);
795
796 total_npages += npages;
797
798 if ((npages % pgs_in_huge_page) ||
799 (dma_addr & (huge_page_size - 1)))
800 is_huge_page_opt = false;
801 }
802
803 if (is_huge_page_opt) {
804 page_size = huge_page_size;
805 do_div(total_npages, pgs_in_huge_page);
806 }
807
808 page_mask = ~(((u64) page_size) - 1);
809
810 phys_pg_pack->pages = kvmalloc_array(total_npages, sizeof(u64),
811 GFP_KERNEL);
812 if (ZERO_OR_NULL_PTR(phys_pg_pack->pages)) {
813 rc = -ENOMEM;
814 goto page_pack_arr_mem_err;
815 }
816
817 phys_pg_pack->npages = total_npages;
818 phys_pg_pack->page_size = page_size;
819 phys_pg_pack->total_size = total_npages * page_size;
820
821 j = 0;
822 for_each_sg(userptr->sgt->sgl, sg, userptr->sgt->nents, i) {
823 npages = get_sg_info(sg, &dma_addr);
824
825
826 if (first) {
827 first = false;
828 phys_pg_pack->offset = dma_addr & (page_size - 1);
829 dma_addr &= page_mask;
830 }
831
832 while (npages) {
833 phys_pg_pack->pages[j++] = dma_addr;
834 dma_addr += page_size;
835
836 if (is_huge_page_opt)
837 npages -= pgs_in_huge_page;
838 else
839 npages--;
840 }
841 }
842
843 *pphys_pg_pack = phys_pg_pack;
844
845 return 0;
846
847page_pack_arr_mem_err:
848 kfree(phys_pg_pack);
849
850 return rc;
851}
852
853
854
855
856
857
858
859
860
861
862
863
864static int map_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr,
865 struct hl_vm_phys_pg_pack *phys_pg_pack)
866{
867 struct hl_device *hdev = ctx->hdev;
868 u64 next_vaddr = vaddr, paddr, mapped_pg_cnt = 0, i;
869 u32 page_size = phys_pg_pack->page_size;
870 int rc = 0;
871 bool is_host_addr;
872
873 for (i = 0 ; i < phys_pg_pack->npages ; i++) {
874 paddr = phys_pg_pack->pages[i];
875
876 rc = hl_mmu_map_page(ctx, next_vaddr, paddr, page_size,
877 (i + 1) == phys_pg_pack->npages);
878 if (rc) {
879 dev_err(hdev->dev,
880 "map failed for handle %u, npages: %llu, mapped: %llu",
881 phys_pg_pack->handle, phys_pg_pack->npages,
882 mapped_pg_cnt);
883 goto err;
884 }
885
886 mapped_pg_cnt++;
887 next_vaddr += page_size;
888 }
889
890 return 0;
891
892err:
893 is_host_addr = !hl_is_dram_va(hdev, vaddr);
894
895 next_vaddr = vaddr;
896 for (i = 0 ; i < mapped_pg_cnt ; i++) {
897 if (hl_mmu_unmap_page(ctx, next_vaddr, page_size,
898 (i + 1) == mapped_pg_cnt))
899 dev_warn_ratelimited(hdev->dev,
900 "failed to unmap handle %u, va: 0x%llx, pa: 0x%llx, page size: %u\n",
901 phys_pg_pack->handle, next_vaddr,
902 phys_pg_pack->pages[i], page_size);
903
904 next_vaddr += page_size;
905
906
907
908
909
910
911
912
913
914 if (hdev->pldm || (is_host_addr && (i & 0x7FFF) == 0))
915 usleep_range(50, 200);
916 }
917
918 return rc;
919}
920
921
922
923
924
925
926
927static void unmap_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr,
928 struct hl_vm_phys_pg_pack *phys_pg_pack)
929{
930 struct hl_device *hdev = ctx->hdev;
931 u64 next_vaddr, i;
932 bool is_host_addr;
933 u32 page_size;
934
935 is_host_addr = !hl_is_dram_va(hdev, vaddr);
936 page_size = phys_pg_pack->page_size;
937 next_vaddr = vaddr;
938
939 for (i = 0 ; i < phys_pg_pack->npages ; i++, next_vaddr += page_size) {
940 if (hl_mmu_unmap_page(ctx, next_vaddr, page_size,
941 (i + 1) == phys_pg_pack->npages))
942 dev_warn_ratelimited(hdev->dev,
943 "unmap failed for vaddr: 0x%llx\n", next_vaddr);
944
945
946
947
948
949
950
951
952
953 if (hdev->pldm || (is_host_addr && (i & 0x7FFF) == 0))
954 usleep_range(50, 200);
955 }
956}
957
958static int get_paddr_from_handle(struct hl_ctx *ctx, struct hl_mem_in *args,
959 u64 *paddr)
960{
961 struct hl_device *hdev = ctx->hdev;
962 struct hl_vm *vm = &hdev->vm;
963 struct hl_vm_phys_pg_pack *phys_pg_pack;
964 u32 handle;
965
966 handle = lower_32_bits(args->map_device.handle);
967 spin_lock(&vm->idr_lock);
968 phys_pg_pack = idr_find(&vm->phys_pg_pack_handles, handle);
969 if (!phys_pg_pack) {
970 spin_unlock(&vm->idr_lock);
971 dev_err(hdev->dev, "no match for handle %u\n", handle);
972 return -EINVAL;
973 }
974
975 *paddr = phys_pg_pack->pages[0];
976
977 spin_unlock(&vm->idr_lock);
978
979 return 0;
980}
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
996 u64 *device_addr)
997{
998 struct hl_device *hdev = ctx->hdev;
999 struct hl_vm *vm = &hdev->vm;
1000 struct hl_vm_phys_pg_pack *phys_pg_pack;
1001 struct hl_userptr *userptr = NULL;
1002 struct hl_vm_hash_node *hnode;
1003 struct hl_va_range *va_range;
1004 enum vm_type_t *vm_type;
1005 u64 ret_vaddr, hint_addr;
1006 u32 handle = 0, va_block_align;
1007 int rc;
1008 bool is_userptr = args->flags & HL_MEM_USERPTR;
1009
1010
1011 *device_addr = 0;
1012
1013 if (is_userptr) {
1014 u64 addr = args->map_host.host_virt_addr,
1015 size = args->map_host.mem_size;
1016 u32 page_size = hdev->asic_prop.pmmu.page_size,
1017 huge_page_size = hdev->asic_prop.pmmu_huge.page_size;
1018
1019 rc = dma_map_host_va(hdev, addr, size, &userptr);
1020 if (rc) {
1021 dev_err(hdev->dev, "failed to get userptr from va\n");
1022 return rc;
1023 }
1024
1025 rc = init_phys_pg_pack_from_userptr(ctx, userptr,
1026 &phys_pg_pack);
1027 if (rc) {
1028 dev_err(hdev->dev,
1029 "unable to init page pack for vaddr 0x%llx\n",
1030 addr);
1031 goto init_page_pack_err;
1032 }
1033
1034 vm_type = (enum vm_type_t *) userptr;
1035 hint_addr = args->map_host.hint_addr;
1036 handle = phys_pg_pack->handle;
1037
1038
1039 if (phys_pg_pack->page_size == page_size) {
1040 va_range = ctx->va_range[HL_VA_RANGE_TYPE_HOST];
1041
1042
1043
1044
1045
1046 if (addr & (huge_page_size - 1))
1047 va_block_align = page_size;
1048 else
1049 va_block_align = huge_page_size;
1050 } else {
1051
1052
1053
1054
1055 va_range = ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE];
1056 va_block_align = huge_page_size;
1057 }
1058 } else {
1059 handle = lower_32_bits(args->map_device.handle);
1060
1061 spin_lock(&vm->idr_lock);
1062 phys_pg_pack = idr_find(&vm->phys_pg_pack_handles, handle);
1063 if (!phys_pg_pack) {
1064 spin_unlock(&vm->idr_lock);
1065 dev_err(hdev->dev,
1066 "no match for handle %u\n", handle);
1067 return -EINVAL;
1068 }
1069
1070
1071 atomic_inc(&phys_pg_pack->mapping_cnt);
1072
1073 spin_unlock(&vm->idr_lock);
1074
1075 vm_type = (enum vm_type_t *) phys_pg_pack;
1076
1077 hint_addr = args->map_device.hint_addr;
1078
1079
1080 va_range = ctx->va_range[HL_VA_RANGE_TYPE_DRAM];
1081 va_block_align = hdev->asic_prop.dmmu.page_size;
1082 }
1083
1084
1085
1086
1087
1088 if (!is_userptr && !(phys_pg_pack->flags & HL_MEM_SHARED) &&
1089 phys_pg_pack->asid != ctx->asid) {
1090 dev_err(hdev->dev,
1091 "Failed to map memory, handle %u is not shared\n",
1092 handle);
1093 rc = -EPERM;
1094 goto shared_err;
1095 }
1096
1097 hnode = kzalloc(sizeof(*hnode), GFP_KERNEL);
1098 if (!hnode) {
1099 rc = -ENOMEM;
1100 goto hnode_err;
1101 }
1102
1103 ret_vaddr = get_va_block(hdev, va_range, phys_pg_pack->total_size,
1104 hint_addr, va_block_align);
1105 if (!ret_vaddr) {
1106 dev_err(hdev->dev, "no available va block for handle %u\n",
1107 handle);
1108 rc = -ENOMEM;
1109 goto va_block_err;
1110 }
1111
1112 mutex_lock(&ctx->mmu_lock);
1113
1114 rc = map_phys_pg_pack(ctx, ret_vaddr, phys_pg_pack);
1115 if (rc) {
1116 mutex_unlock(&ctx->mmu_lock);
1117 dev_err(hdev->dev, "mapping page pack failed for handle %u\n",
1118 handle);
1119 goto map_err;
1120 }
1121
1122 rc = hdev->asic_funcs->mmu_invalidate_cache_range(hdev, false,
1123 *vm_type, ctx->asid, ret_vaddr, phys_pg_pack->total_size);
1124
1125 mutex_unlock(&ctx->mmu_lock);
1126
1127 if (rc) {
1128 dev_err(hdev->dev,
1129 "mapping handle %u failed due to MMU cache invalidation\n",
1130 handle);
1131 goto map_err;
1132 }
1133
1134 ret_vaddr += phys_pg_pack->offset;
1135
1136 hnode->ptr = vm_type;
1137 hnode->vaddr = ret_vaddr;
1138
1139 mutex_lock(&ctx->mem_hash_lock);
1140 hash_add(ctx->mem_hash, &hnode->node, ret_vaddr);
1141 mutex_unlock(&ctx->mem_hash_lock);
1142
1143 *device_addr = ret_vaddr;
1144
1145 if (is_userptr)
1146 rc = free_phys_pg_pack(hdev, phys_pg_pack);
1147
1148 return rc;
1149
1150map_err:
1151 if (add_va_block(hdev, va_range, ret_vaddr,
1152 ret_vaddr + phys_pg_pack->total_size - 1))
1153 dev_warn(hdev->dev,
1154 "release va block failed for handle 0x%x, vaddr: 0x%llx\n",
1155 handle, ret_vaddr);
1156
1157va_block_err:
1158 kfree(hnode);
1159hnode_err:
1160shared_err:
1161 atomic_dec(&phys_pg_pack->mapping_cnt);
1162 if (is_userptr)
1163 free_phys_pg_pack(hdev, phys_pg_pack);
1164init_page_pack_err:
1165 if (is_userptr)
1166 dma_unmap_host_va(hdev, userptr);
1167
1168 return rc;
1169}
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181static int unmap_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
1182 bool ctx_free)
1183{
1184 struct hl_device *hdev = ctx->hdev;
1185 struct asic_fixed_properties *prop = &hdev->asic_prop;
1186 struct hl_vm_phys_pg_pack *phys_pg_pack = NULL;
1187 struct hl_vm_hash_node *hnode = NULL;
1188 struct hl_userptr *userptr = NULL;
1189 struct hl_va_range *va_range;
1190 u64 vaddr = args->unmap.device_virt_addr;
1191 enum vm_type_t *vm_type;
1192 bool is_userptr;
1193 int rc = 0;
1194
1195
1196 mutex_lock(&ctx->mem_hash_lock);
1197 hash_for_each_possible(ctx->mem_hash, hnode, node, (unsigned long)vaddr)
1198 if (vaddr == hnode->vaddr)
1199 break;
1200
1201 if (!hnode) {
1202 mutex_unlock(&ctx->mem_hash_lock);
1203 dev_err(hdev->dev,
1204 "unmap failed, no mem hnode for vaddr 0x%llx\n",
1205 vaddr);
1206 return -EINVAL;
1207 }
1208
1209 hash_del(&hnode->node);
1210 mutex_unlock(&ctx->mem_hash_lock);
1211
1212 vm_type = hnode->ptr;
1213
1214 if (*vm_type == VM_TYPE_USERPTR) {
1215 is_userptr = true;
1216 userptr = hnode->ptr;
1217 rc = init_phys_pg_pack_from_userptr(ctx, userptr,
1218 &phys_pg_pack);
1219 if (rc) {
1220 dev_err(hdev->dev,
1221 "unable to init page pack for vaddr 0x%llx\n",
1222 vaddr);
1223 goto vm_type_err;
1224 }
1225
1226 if (phys_pg_pack->page_size ==
1227 hdev->asic_prop.pmmu.page_size)
1228 va_range = ctx->va_range[HL_VA_RANGE_TYPE_HOST];
1229 else
1230 va_range = ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE];
1231 } else if (*vm_type == VM_TYPE_PHYS_PACK) {
1232 is_userptr = false;
1233 va_range = ctx->va_range[HL_VA_RANGE_TYPE_DRAM];
1234 phys_pg_pack = hnode->ptr;
1235 } else {
1236 dev_warn(hdev->dev,
1237 "unmap failed, unknown vm desc for vaddr 0x%llx\n",
1238 vaddr);
1239 rc = -EFAULT;
1240 goto vm_type_err;
1241 }
1242
1243 if (atomic_read(&phys_pg_pack->mapping_cnt) == 0) {
1244 dev_err(hdev->dev, "vaddr 0x%llx is not mapped\n", vaddr);
1245 rc = -EINVAL;
1246 goto mapping_cnt_err;
1247 }
1248
1249 if (!is_userptr && !is_power_of_2(phys_pg_pack->page_size))
1250 vaddr = prop->dram_base_address +
1251 DIV_ROUND_DOWN_ULL(vaddr - prop->dram_base_address,
1252 phys_pg_pack->page_size) *
1253 phys_pg_pack->page_size;
1254 else
1255 vaddr &= ~(((u64) phys_pg_pack->page_size) - 1);
1256
1257 mutex_lock(&ctx->mmu_lock);
1258
1259 unmap_phys_pg_pack(ctx, vaddr, phys_pg_pack);
1260
1261
1262
1263
1264
1265
1266 if (!ctx_free)
1267 rc = hdev->asic_funcs->mmu_invalidate_cache_range(hdev, true,
1268 *vm_type, ctx->asid, vaddr,
1269 phys_pg_pack->total_size);
1270
1271 mutex_unlock(&ctx->mmu_lock);
1272
1273
1274
1275
1276
1277
1278
1279 if (!ctx_free) {
1280 int tmp_rc;
1281
1282 if (rc)
1283 dev_err(hdev->dev,
1284 "unmapping vaddr 0x%llx failed due to MMU cache invalidation\n",
1285 vaddr);
1286
1287 tmp_rc = add_va_block(hdev, va_range, vaddr,
1288 vaddr + phys_pg_pack->total_size - 1);
1289 if (tmp_rc) {
1290 dev_warn(hdev->dev,
1291 "add va block failed for vaddr: 0x%llx\n",
1292 vaddr);
1293 if (!rc)
1294 rc = tmp_rc;
1295 }
1296 }
1297
1298 atomic_dec(&phys_pg_pack->mapping_cnt);
1299 kfree(hnode);
1300
1301 if (is_userptr) {
1302 rc = free_phys_pg_pack(hdev, phys_pg_pack);
1303 dma_unmap_host_va(hdev, userptr);
1304 }
1305
1306 return rc;
1307
1308mapping_cnt_err:
1309 if (is_userptr)
1310 free_phys_pg_pack(hdev, phys_pg_pack);
1311vm_type_err:
1312 mutex_lock(&ctx->mem_hash_lock);
1313 hash_add(ctx->mem_hash, &hnode->node, vaddr);
1314 mutex_unlock(&ctx->mem_hash_lock);
1315
1316 return rc;
1317}
1318
1319static int map_block(struct hl_device *hdev, u64 address, u64 *handle,
1320 u32 *size)
1321{
1322 u32 block_id = 0;
1323 int rc;
1324
1325 rc = hdev->asic_funcs->get_hw_block_id(hdev, address, size, &block_id);
1326
1327 *handle = block_id | HL_MMAP_TYPE_BLOCK;
1328 *handle <<= PAGE_SHIFT;
1329
1330 return rc;
1331}
1332
1333static void hw_block_vm_close(struct vm_area_struct *vma)
1334{
1335 struct hl_vm_hw_block_list_node *lnode =
1336 (struct hl_vm_hw_block_list_node *) vma->vm_private_data;
1337 struct hl_ctx *ctx = lnode->ctx;
1338
1339 mutex_lock(&ctx->hw_block_list_lock);
1340 list_del(&lnode->node);
1341 mutex_unlock(&ctx->hw_block_list_lock);
1342 hl_ctx_put(ctx);
1343 kfree(lnode);
1344 vma->vm_private_data = NULL;
1345}
1346
1347static const struct vm_operations_struct hw_block_vm_ops = {
1348 .close = hw_block_vm_close
1349};
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359int hl_hw_block_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma)
1360{
1361 struct hl_vm_hw_block_list_node *lnode;
1362 struct hl_device *hdev = hpriv->hdev;
1363 struct hl_ctx *ctx = hpriv->ctx;
1364 u32 block_id, block_size;
1365 int rc;
1366
1367
1368
1369
1370 block_id = vma->vm_pgoff;
1371 vma->vm_pgoff = 0;
1372
1373
1374 block_size = vma->vm_end - vma->vm_start;
1375
1376 if (!access_ok((void __user *) (uintptr_t) vma->vm_start, block_size)) {
1377 dev_err(hdev->dev,
1378 "user pointer is invalid - 0x%lx\n",
1379 vma->vm_start);
1380
1381 return -EINVAL;
1382 }
1383
1384 lnode = kzalloc(sizeof(*lnode), GFP_KERNEL);
1385 if (!lnode)
1386 return -ENOMEM;
1387
1388 vma->vm_ops = &hw_block_vm_ops;
1389 vma->vm_private_data = lnode;
1390
1391 hl_ctx_get(hdev, ctx);
1392
1393 rc = hdev->asic_funcs->hw_block_mmap(hdev, vma, block_id, block_size);
1394 if (rc) {
1395 hl_ctx_put(ctx);
1396 kfree(lnode);
1397 return rc;
1398 }
1399
1400 lnode->ctx = ctx;
1401 lnode->vaddr = vma->vm_start;
1402 lnode->size = block_size;
1403 lnode->id = block_id;
1404
1405 mutex_lock(&ctx->hw_block_list_lock);
1406 list_add_tail(&lnode->node, &ctx->hw_block_mem_list);
1407 mutex_unlock(&ctx->hw_block_list_lock);
1408
1409 vma->vm_pgoff = block_id;
1410
1411 return 0;
1412}
1413
1414static int mem_ioctl_no_mmu(struct hl_fpriv *hpriv, union hl_mem_args *args)
1415{
1416 struct hl_device *hdev = hpriv->hdev;
1417 struct hl_ctx *ctx = hpriv->ctx;
1418 u64 block_handle, device_addr = 0;
1419 u32 handle = 0, block_size;
1420 int rc;
1421
1422 switch (args->in.op) {
1423 case HL_MEM_OP_ALLOC:
1424 if (args->in.alloc.mem_size == 0) {
1425 dev_err(hdev->dev,
1426 "alloc size must be larger than 0\n");
1427 rc = -EINVAL;
1428 goto out;
1429 }
1430
1431
1432
1433
1434 args->in.flags |= HL_MEM_CONTIGUOUS;
1435 rc = alloc_device_memory(ctx, &args->in, &handle);
1436
1437 memset(args, 0, sizeof(*args));
1438 args->out.handle = (__u64) handle;
1439 break;
1440
1441 case HL_MEM_OP_FREE:
1442 rc = free_device_memory(ctx, &args->in);
1443 break;
1444
1445 case HL_MEM_OP_MAP:
1446 if (args->in.flags & HL_MEM_USERPTR) {
1447 device_addr = args->in.map_host.host_virt_addr;
1448 rc = 0;
1449 } else {
1450 rc = get_paddr_from_handle(ctx, &args->in,
1451 &device_addr);
1452 }
1453
1454 memset(args, 0, sizeof(*args));
1455 args->out.device_virt_addr = device_addr;
1456 break;
1457
1458 case HL_MEM_OP_UNMAP:
1459 rc = 0;
1460 break;
1461
1462 case HL_MEM_OP_MAP_BLOCK:
1463 rc = map_block(hdev, args->in.map_block.block_addr,
1464 &block_handle, &block_size);
1465 args->out.block_handle = block_handle;
1466 args->out.block_size = block_size;
1467 break;
1468
1469 default:
1470 dev_err(hdev->dev, "Unknown opcode for memory IOCTL\n");
1471 rc = -ENOTTY;
1472 break;
1473 }
1474
1475out:
1476 return rc;
1477}
1478
1479int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data)
1480{
1481 enum hl_device_status status;
1482 union hl_mem_args *args = data;
1483 struct hl_device *hdev = hpriv->hdev;
1484 struct hl_ctx *ctx = hpriv->ctx;
1485 u64 block_handle, device_addr = 0;
1486 u32 handle = 0, block_size;
1487 int rc;
1488
1489 if (!hl_device_operational(hdev, &status)) {
1490 dev_warn_ratelimited(hdev->dev,
1491 "Device is %s. Can't execute MEMORY IOCTL\n",
1492 hdev->status[status]);
1493 return -EBUSY;
1494 }
1495
1496 if (!hdev->mmu_enable)
1497 return mem_ioctl_no_mmu(hpriv, args);
1498
1499 switch (args->in.op) {
1500 case HL_MEM_OP_ALLOC:
1501 if (args->in.alloc.mem_size == 0) {
1502 dev_err(hdev->dev,
1503 "alloc size must be larger than 0\n");
1504 rc = -EINVAL;
1505 goto out;
1506 }
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516 if (!hdev->asic_prop.dram_supports_virtual_memory) {
1517 atomic64_add(args->in.alloc.mem_size,
1518 &ctx->dram_phys_mem);
1519 atomic64_add(args->in.alloc.mem_size,
1520 &hdev->dram_used_mem);
1521
1522 dev_dbg(hdev->dev, "DRAM alloc is not supported\n");
1523 rc = 0;
1524
1525 memset(args, 0, sizeof(*args));
1526 args->out.handle = 0;
1527 goto out;
1528 }
1529
1530 rc = alloc_device_memory(ctx, &args->in, &handle);
1531
1532 memset(args, 0, sizeof(*args));
1533 args->out.handle = (__u64) handle;
1534 break;
1535
1536 case HL_MEM_OP_FREE:
1537
1538
1539
1540
1541
1542
1543
1544
1545 if (!hdev->asic_prop.dram_supports_virtual_memory) {
1546 atomic64_sub(args->in.alloc.mem_size,
1547 &ctx->dram_phys_mem);
1548 atomic64_sub(args->in.alloc.mem_size,
1549 &hdev->dram_used_mem);
1550
1551 dev_dbg(hdev->dev, "DRAM alloc is not supported\n");
1552 rc = 0;
1553
1554 goto out;
1555 }
1556
1557 rc = free_device_memory(ctx, &args->in);
1558 break;
1559
1560 case HL_MEM_OP_MAP:
1561 rc = map_device_va(ctx, &args->in, &device_addr);
1562
1563 memset(args, 0, sizeof(*args));
1564 args->out.device_virt_addr = device_addr;
1565 break;
1566
1567 case HL_MEM_OP_UNMAP:
1568 rc = unmap_device_va(ctx, &args->in, false);
1569 break;
1570
1571 case HL_MEM_OP_MAP_BLOCK:
1572 rc = map_block(hdev, args->in.map_block.block_addr,
1573 &block_handle, &block_size);
1574 args->out.block_handle = block_handle;
1575 args->out.block_size = block_size;
1576 break;
1577
1578 default:
1579 dev_err(hdev->dev, "Unknown opcode for memory IOCTL\n");
1580 rc = -ENOTTY;
1581 break;
1582 }
1583
1584out:
1585 return rc;
1586}
1587
1588static int get_user_memory(struct hl_device *hdev, u64 addr, u64 size,
1589 u32 npages, u64 start, u32 offset,
1590 struct hl_userptr *userptr)
1591{
1592 int rc;
1593
1594 if (!access_ok((void __user *) (uintptr_t) addr, size)) {
1595 dev_err(hdev->dev, "user pointer is invalid - 0x%llx\n", addr);
1596 return -EFAULT;
1597 }
1598
1599 userptr->pages = kvmalloc_array(npages, sizeof(*userptr->pages),
1600 GFP_KERNEL);
1601 if (!userptr->pages)
1602 return -ENOMEM;
1603
1604 rc = pin_user_pages_fast(start, npages,
1605 FOLL_FORCE | FOLL_WRITE | FOLL_LONGTERM,
1606 userptr->pages);
1607
1608 if (rc != npages) {
1609 dev_err(hdev->dev,
1610 "Failed (%d) to pin host memory with user ptr 0x%llx, size 0x%llx, npages %d\n",
1611 rc, addr, size, npages);
1612 if (rc < 0)
1613 goto destroy_pages;
1614 npages = rc;
1615 rc = -EFAULT;
1616 goto put_pages;
1617 }
1618 userptr->npages = npages;
1619
1620 rc = sg_alloc_table_from_pages(userptr->sgt,
1621 userptr->pages,
1622 npages, offset, size, GFP_KERNEL);
1623 if (rc < 0) {
1624 dev_err(hdev->dev, "failed to create SG table from pages\n");
1625 goto put_pages;
1626 }
1627
1628 return 0;
1629
1630put_pages:
1631 unpin_user_pages(userptr->pages, npages);
1632destroy_pages:
1633 kvfree(userptr->pages);
1634 return rc;
1635}
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u64 size,
1649 struct hl_userptr *userptr)
1650{
1651 u64 start, end;
1652 u32 npages, offset;
1653 int rc;
1654
1655 if (!size) {
1656 dev_err(hdev->dev, "size to pin is invalid - %llu\n", size);
1657 return -EINVAL;
1658 }
1659
1660
1661
1662
1663
1664 if (((addr + size) < addr) ||
1665 PAGE_ALIGN(addr + size) < (addr + size)) {
1666 dev_err(hdev->dev,
1667 "user pointer 0x%llx + %llu causes integer overflow\n",
1668 addr, size);
1669 return -EINVAL;
1670 }
1671
1672 userptr->sgt = kzalloc(sizeof(*userptr->sgt), GFP_KERNEL);
1673 if (!userptr->sgt)
1674 return -ENOMEM;
1675
1676 start = addr & PAGE_MASK;
1677 offset = addr & ~PAGE_MASK;
1678 end = PAGE_ALIGN(addr + size);
1679 npages = (end - start) >> PAGE_SHIFT;
1680
1681 userptr->size = size;
1682 userptr->addr = addr;
1683 userptr->dma_mapped = false;
1684 INIT_LIST_HEAD(&userptr->job_node);
1685
1686 rc = get_user_memory(hdev, addr, size, npages, start, offset,
1687 userptr);
1688 if (rc) {
1689 dev_err(hdev->dev,
1690 "failed to get user memory for address 0x%llx\n",
1691 addr);
1692 goto free_sgt;
1693 }
1694
1695 hl_debugfs_add_userptr(hdev, userptr);
1696
1697 return 0;
1698
1699free_sgt:
1700 kfree(userptr->sgt);
1701 return rc;
1702}
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713void hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr)
1714{
1715 hl_debugfs_remove_userptr(hdev, userptr);
1716
1717 if (userptr->dma_mapped)
1718 hdev->asic_funcs->hl_dma_unmap_sg(hdev, userptr->sgt->sgl,
1719 userptr->sgt->nents,
1720 userptr->dir);
1721
1722 unpin_user_pages_dirty_lock(userptr->pages, userptr->npages, true);
1723 kvfree(userptr->pages);
1724
1725 list_del(&userptr->job_node);
1726
1727 sg_free_table(userptr->sgt);
1728 kfree(userptr->sgt);
1729}
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740void hl_userptr_delete_list(struct hl_device *hdev,
1741 struct list_head *userptr_list)
1742{
1743 struct hl_userptr *userptr, *tmp;
1744
1745 list_for_each_entry_safe(userptr, tmp, userptr_list, job_node) {
1746 hl_unpin_host_memory(hdev, userptr);
1747 kfree(userptr);
1748 }
1749
1750 INIT_LIST_HEAD(userptr_list);
1751}
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763bool hl_userptr_is_pinned(struct hl_device *hdev, u64 addr,
1764 u32 size, struct list_head *userptr_list,
1765 struct hl_userptr **userptr)
1766{
1767 list_for_each_entry((*userptr), userptr_list, job_node) {
1768 if ((addr == (*userptr)->addr) && (size == (*userptr)->size))
1769 return true;
1770 }
1771
1772 return false;
1773}
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786static int va_range_init(struct hl_device *hdev, struct hl_va_range *va_range,
1787 u64 start, u64 end, u32 page_size)
1788{
1789 int rc;
1790
1791 INIT_LIST_HEAD(&va_range->list);
1792
1793
1794
1795
1796
1797
1798
1799 if (is_power_of_2(page_size)) {
1800 if (start & (PAGE_SIZE - 1)) {
1801 start &= PAGE_MASK;
1802 start += PAGE_SIZE;
1803 }
1804
1805 if (end & (PAGE_SIZE - 1))
1806 end &= PAGE_MASK;
1807 }
1808
1809 if (start >= end) {
1810 dev_err(hdev->dev, "too small vm range for va list\n");
1811 return -EFAULT;
1812 }
1813
1814 rc = add_va_block(hdev, va_range, start, end);
1815
1816 if (rc) {
1817 dev_err(hdev->dev, "Failed to init host va list\n");
1818 return rc;
1819 }
1820
1821 va_range->start_addr = start;
1822 va_range->end_addr = end;
1823 va_range->page_size = page_size;
1824
1825 return 0;
1826}
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836static void va_range_fini(struct hl_device *hdev, struct hl_va_range *va_range)
1837{
1838 mutex_lock(&va_range->lock);
1839 clear_va_list_locked(hdev, &va_range->list);
1840 mutex_unlock(&va_range->lock);
1841
1842 mutex_destroy(&va_range->lock);
1843 kfree(va_range);
1844}
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863static int vm_ctx_init_with_ranges(struct hl_ctx *ctx,
1864 u64 host_range_start,
1865 u64 host_range_end,
1866 u32 host_page_size,
1867 u64 host_huge_range_start,
1868 u64 host_huge_range_end,
1869 u32 host_huge_page_size,
1870 u64 dram_range_start,
1871 u64 dram_range_end,
1872 u32 dram_page_size)
1873{
1874 struct hl_device *hdev = ctx->hdev;
1875 int i, rc;
1876
1877 for (i = 0 ; i < HL_VA_RANGE_TYPE_MAX ; i++) {
1878 ctx->va_range[i] =
1879 kzalloc(sizeof(struct hl_va_range), GFP_KERNEL);
1880 if (!ctx->va_range[i]) {
1881 rc = -ENOMEM;
1882 goto free_va_range;
1883 }
1884 }
1885
1886 rc = hl_mmu_ctx_init(ctx);
1887 if (rc) {
1888 dev_err(hdev->dev, "failed to init context %d\n", ctx->asid);
1889 goto free_va_range;
1890 }
1891
1892 mutex_init(&ctx->mem_hash_lock);
1893 hash_init(ctx->mem_hash);
1894
1895 mutex_init(&ctx->va_range[HL_VA_RANGE_TYPE_HOST]->lock);
1896
1897 rc = va_range_init(hdev, ctx->va_range[HL_VA_RANGE_TYPE_HOST],
1898 host_range_start, host_range_end, host_page_size);
1899 if (rc) {
1900 dev_err(hdev->dev, "failed to init host vm range\n");
1901 goto mmu_ctx_fini;
1902 }
1903
1904 if (hdev->pmmu_huge_range) {
1905 mutex_init(&ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE]->lock);
1906
1907 rc = va_range_init(hdev,
1908 ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE],
1909 host_huge_range_start, host_huge_range_end,
1910 host_huge_page_size);
1911 if (rc) {
1912 dev_err(hdev->dev,
1913 "failed to init host huge vm range\n");
1914 goto clear_host_va_range;
1915 }
1916 } else {
1917 kfree(ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE]);
1918 ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE] =
1919 ctx->va_range[HL_VA_RANGE_TYPE_HOST];
1920 }
1921
1922 mutex_init(&ctx->va_range[HL_VA_RANGE_TYPE_DRAM]->lock);
1923
1924 rc = va_range_init(hdev, ctx->va_range[HL_VA_RANGE_TYPE_DRAM],
1925 dram_range_start, dram_range_end, dram_page_size);
1926 if (rc) {
1927 dev_err(hdev->dev, "failed to init dram vm range\n");
1928 goto clear_host_huge_va_range;
1929 }
1930
1931 hl_debugfs_add_ctx_mem_hash(hdev, ctx);
1932
1933 return 0;
1934
1935clear_host_huge_va_range:
1936 mutex_destroy(&ctx->va_range[HL_VA_RANGE_TYPE_DRAM]->lock);
1937
1938 if (hdev->pmmu_huge_range) {
1939 mutex_lock(&ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE]->lock);
1940 clear_va_list_locked(hdev,
1941 &ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE]->list);
1942 mutex_unlock(&ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE]->lock);
1943 }
1944clear_host_va_range:
1945 if (hdev->pmmu_huge_range)
1946 mutex_destroy(&ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE]->lock);
1947 mutex_lock(&ctx->va_range[HL_VA_RANGE_TYPE_HOST]->lock);
1948 clear_va_list_locked(hdev, &ctx->va_range[HL_VA_RANGE_TYPE_HOST]->list);
1949 mutex_unlock(&ctx->va_range[HL_VA_RANGE_TYPE_HOST]->lock);
1950mmu_ctx_fini:
1951 mutex_destroy(&ctx->va_range[HL_VA_RANGE_TYPE_HOST]->lock);
1952 mutex_destroy(&ctx->mem_hash_lock);
1953 hl_mmu_ctx_fini(ctx);
1954free_va_range:
1955 for (i = 0 ; i < HL_VA_RANGE_TYPE_MAX ; i++)
1956 kfree(ctx->va_range[i]);
1957
1958 return rc;
1959}
1960
1961int hl_vm_ctx_init(struct hl_ctx *ctx)
1962{
1963 struct asic_fixed_properties *prop = &ctx->hdev->asic_prop;
1964 u64 host_range_start, host_range_end, host_huge_range_start,
1965 host_huge_range_end, dram_range_start, dram_range_end;
1966 u32 host_page_size, host_huge_page_size, dram_page_size;
1967
1968 atomic64_set(&ctx->dram_phys_mem, 0);
1969
1970
1971
1972
1973
1974
1975
1976
1977 if (!ctx->hdev->mmu_enable)
1978 return 0;
1979
1980 dram_range_start = prop->dmmu.start_addr;
1981 dram_range_end = prop->dmmu.end_addr;
1982 dram_page_size = prop->dram_page_size ?
1983 prop->dram_page_size : prop->dmmu.page_size;
1984 host_range_start = prop->pmmu.start_addr;
1985 host_range_end = prop->pmmu.end_addr;
1986 host_page_size = prop->pmmu.page_size;
1987 host_huge_range_start = prop->pmmu_huge.start_addr;
1988 host_huge_range_end = prop->pmmu_huge.end_addr;
1989 host_huge_page_size = prop->pmmu_huge.page_size;
1990
1991 return vm_ctx_init_with_ranges(ctx, host_range_start, host_range_end,
1992 host_page_size, host_huge_range_start,
1993 host_huge_range_end, host_huge_page_size,
1994 dram_range_start, dram_range_end, dram_page_size);
1995}
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016void hl_vm_ctx_fini(struct hl_ctx *ctx)
2017{
2018 struct hl_device *hdev = ctx->hdev;
2019 struct hl_vm *vm = &hdev->vm;
2020 struct hl_vm_phys_pg_pack *phys_pg_list;
2021 struct hl_vm_hash_node *hnode;
2022 struct hlist_node *tmp_node;
2023 struct hl_mem_in args;
2024 int i;
2025
2026 if (!hdev->mmu_enable)
2027 return;
2028
2029 hl_debugfs_remove_ctx_mem_hash(hdev, ctx);
2030
2031
2032
2033
2034
2035 if (!hdev->hard_reset_pending && !hash_empty(ctx->mem_hash))
2036 dev_notice(hdev->dev,
2037 "user released device without removing its memory mappings\n");
2038
2039 hash_for_each_safe(ctx->mem_hash, i, tmp_node, hnode, node) {
2040 dev_dbg(hdev->dev,
2041 "hl_mem_hash_node of vaddr 0x%llx of asid %d is still alive\n",
2042 hnode->vaddr, ctx->asid);
2043 args.unmap.device_virt_addr = hnode->vaddr;
2044 unmap_device_va(ctx, &args, true);
2045 }
2046
2047 mutex_lock(&ctx->mmu_lock);
2048
2049
2050 hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_USERPTR);
2051 hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_PHYS_PACK);
2052
2053 mutex_unlock(&ctx->mmu_lock);
2054
2055 spin_lock(&vm->idr_lock);
2056 idr_for_each_entry(&vm->phys_pg_pack_handles, phys_pg_list, i)
2057 if (phys_pg_list->asid == ctx->asid) {
2058 dev_dbg(hdev->dev,
2059 "page list 0x%px of asid %d is still alive\n",
2060 phys_pg_list, ctx->asid);
2061 atomic64_sub(phys_pg_list->total_size,
2062 &hdev->dram_used_mem);
2063 free_phys_pg_pack(hdev, phys_pg_list);
2064 idr_remove(&vm->phys_pg_pack_handles, i);
2065 }
2066 spin_unlock(&vm->idr_lock);
2067
2068 va_range_fini(hdev, ctx->va_range[HL_VA_RANGE_TYPE_DRAM]);
2069 va_range_fini(hdev, ctx->va_range[HL_VA_RANGE_TYPE_HOST]);
2070
2071 if (hdev->pmmu_huge_range)
2072 va_range_fini(hdev, ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE]);
2073
2074 mutex_destroy(&ctx->mem_hash_lock);
2075 hl_mmu_ctx_fini(ctx);
2076
2077
2078
2079
2080
2081 if (ctx->asid != HL_KERNEL_ASID_ID &&
2082 !hdev->asic_prop.dram_supports_virtual_memory)
2083 atomic64_set(&hdev->dram_used_mem, 0);
2084}
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095int hl_vm_init(struct hl_device *hdev)
2096{
2097 struct asic_fixed_properties *prop = &hdev->asic_prop;
2098 struct hl_vm *vm = &hdev->vm;
2099 int rc;
2100
2101 if (is_power_of_2(prop->dram_page_size))
2102 vm->dram_pg_pool =
2103 gen_pool_create(__ffs(prop->dram_page_size), -1);
2104 else
2105 vm->dram_pg_pool =
2106 gen_pool_create(__ffs(DRAM_POOL_PAGE_SIZE), -1);
2107
2108 if (!vm->dram_pg_pool) {
2109 dev_err(hdev->dev, "Failed to create dram page pool\n");
2110 return -ENOMEM;
2111 }
2112
2113 kref_init(&vm->dram_pg_pool_refcount);
2114
2115 rc = gen_pool_add(vm->dram_pg_pool, prop->dram_user_base_address,
2116 prop->dram_end_address - prop->dram_user_base_address,
2117 -1);
2118
2119 if (rc) {
2120 dev_err(hdev->dev,
2121 "Failed to add memory to dram page pool %d\n", rc);
2122 goto pool_add_err;
2123 }
2124
2125 spin_lock_init(&vm->idr_lock);
2126 idr_init(&vm->phys_pg_pack_handles);
2127
2128 atomic64_set(&hdev->dram_used_mem, 0);
2129
2130 vm->init_done = true;
2131
2132 return 0;
2133
2134pool_add_err:
2135 gen_pool_destroy(vm->dram_pg_pool);
2136
2137 return rc;
2138}
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149void hl_vm_fini(struct hl_device *hdev)
2150{
2151 struct hl_vm *vm = &hdev->vm;
2152
2153 if (!vm->init_done)
2154 return;
2155
2156
2157
2158
2159
2160 if (kref_put(&vm->dram_pg_pool_refcount, dram_pg_pool_do_release) != 1)
2161 dev_warn(hdev->dev, "dram_pg_pool was not destroyed on %s\n",
2162 __func__);
2163
2164 vm->init_done = false;
2165}
2166
2167
2168
2169
2170
2171
2172
2173
2174void hl_hw_block_mem_init(struct hl_ctx *ctx)
2175{
2176 mutex_init(&ctx->hw_block_list_lock);
2177 INIT_LIST_HEAD(&ctx->hw_block_mem_list);
2178}
2179
2180
2181
2182
2183
2184
2185
2186
2187void hl_hw_block_mem_fini(struct hl_ctx *ctx)
2188{
2189 struct hl_vm_hw_block_list_node *lnode, *tmp;
2190
2191 if (!list_empty(&ctx->hw_block_mem_list))
2192 dev_crit(ctx->hdev->dev, "HW block mem list isn't empty\n");
2193
2194 list_for_each_entry_safe(lnode, tmp, &ctx->hw_block_mem_list, node) {
2195 list_del(&lnode->node);
2196 kfree(lnode);
2197 }
2198
2199 mutex_destroy(&ctx->hw_block_list_lock);
2200}
2201