-
Notifications
You must be signed in to change notification settings - Fork 143
/
gdrdrv.c
1533 lines (1271 loc) · 46.7 KB
/
gdrdrv.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/*
* Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include <linux/version.h>
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/types.h>
#include <linux/delay.h>
#include <linux/compiler.h>
#include <linux/string.h>
#include <linux/uaccess.h>
#include <linux/fs.h>
#include <linux/list.h>
#include <linux/mm.h>
#include <linux/io.h>
#include <linux/sched.h>
#include <linux/timex.h>
#include <linux/timer.h>
#include <linux/pci.h>
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,11,0)
#include <linux/sched/signal.h>
#endif
/**
* This is needed for round_up()
*/
#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 11, 0)
#include <linux/math.h>
#endif
/**
* HAVE_UNLOCKED_IOCTL has been dropped in kernel version 5.9.
* There is a chance that the removal might be ported back to 5.x.
* So if HAVE_UNLOCKED_IOCTL is not defined in kernel v5, we define it.
* This also allows backward-compatibility with kernel < 2.6.11.
*/
#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 0, 0) && !defined(HAVE_UNLOCKED_IOCTL)
#define HAVE_UNLOCKED_IOCTL 1
#endif
//-----------------------------------------------------------------------------
static const unsigned int GDRDRV_BF3_PCI_ROOT_DEV_VENDOR_ID = 0x15b3;
static const unsigned int GDRDRV_BF3_PCI_ROOT_DEV_DEVICE_ID[2] = {0xa2da, 0xa2db};
//-----------------------------------------------------------------------------
static int gdrdrv_major = 0;
static int gdrdrv_cpu_can_cache_gpu_mappings = 0;
static int gdrdrv_cpu_must_use_device_mapping = 0;
//-----------------------------------------------------------------------------
#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,32)
/**
* This API is available after Linux kernel 2.6.32
*/
void address_space_init_once(struct address_space *mapping)
{
memset(mapping, 0, sizeof(*mapping));
INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC);
#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,26)
//
// The .tree_lock member variable was changed from type rwlock_t, to
// spinlock_t, on 25 July 2008, by mainline commit
// 19fd6231279be3c3bdd02ed99f9b0eb195978064.
//
rwlock_init(&mapping->tree_lock);
#else
spin_lock_init(&mapping->tree_lock);
#endif
spin_lock_init(&mapping->i_mmap_lock);
INIT_LIST_HEAD(&mapping->private_list);
spin_lock_init(&mapping->private_lock);
INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap);
INIT_LIST_HEAD(&mapping->i_mmap_nonlinear);
}
#endif
#if LINUX_VERSION_CODE < KERNEL_VERSION(6,3,0)
/**
* This API requires Linux kernel 6.3.
* See https://github.com/torvalds/linux/commit/bc292ab00f6c7a661a8a605c714e8a148f629ef6
*/
static inline void vm_flags_set(struct vm_area_struct *vma, vm_flags_t flags)
{
vma->vm_flags |= flags;
}
#endif
#if defined(CONFIG_X86_64) || defined(CONFIG_X86_32)
static inline pgprot_t pgprot_modify_writecombine(pgprot_t old_prot)
{
pgprot_t new_prot = old_prot;
pgprot_val(new_prot) &= ~(_PAGE_PSE | _PAGE_PCD | _PAGE_PWT);
new_prot = __pgprot(pgprot_val(new_prot) | _PAGE_PWT);
return new_prot;
}
static inline pgprot_t pgprot_modify_device(pgprot_t old_prot)
{
// Device mapping should never be called on x86
BUG_ON(1);
return old_prot;
}
#define get_tsc_khz() cpu_khz // tsc_khz
static inline int gdr_pfn_is_ram(unsigned long pfn)
{
// page_is_ram is GPL-only. Regardless there are no x86_64
// platforms supporting coherent GPU mappings, so we would not use
// this function anyway.
return 0;
}
#elif defined(CONFIG_PPC64)
#include <asm/reg.h>
static inline pgprot_t pgprot_modify_writecombine(pgprot_t old_prot)
{
return pgprot_writecombine(old_prot);
}
static inline pgprot_t pgprot_modify_device(pgprot_t old_prot)
{
// Device mapping should never be called on PPC64
BUG_ON(1);
return old_prot;
}
#define get_tsc_khz() (get_cycles()/1000) // dirty hack
static inline int gdr_pfn_is_ram(unsigned long pfn)
{
// catch platforms, e.g. POWER8, POWER9 with GPUs not attached via NVLink,
// where GPU memory is non-coherent
#ifdef GDRDRV_OPENSOURCE_NVIDIA
// page_is_ram is a GPL symbol. We can use it with the open flavor of NVIDIA driver.
return page_is_ram(pfn);
#else
// For the proprietary flavor, we approximate using the following algorithm.
unsigned long start = pfn << PAGE_SHIFT;
unsigned long mask_47bits = (1UL<<47)-1;
return gdrdrv_cpu_can_cache_gpu_mappings && (0 == (start & ~mask_47bits));
#endif
}
#elif defined(CONFIG_ARM64)
static inline pgprot_t pgprot_modify_writecombine(pgprot_t old_prot)
{
return pgprot_writecombine(old_prot);
}
static inline pgprot_t pgprot_modify_device(pgprot_t old_prot)
{
return pgprot_device(old_prot);
}
static inline int gdr_pfn_is_ram(unsigned long pfn)
{
#ifdef GDRDRV_OPENSOURCE_NVIDIA
// page_is_ram is a GPL symbol. We can use it with the open flavor.
return page_is_ram(pfn);
#else
// For the proprietary flavor of NVIDIA driver, we use WC mapping.
return 0;
#endif
}
#else
#error "X86_64/32 or PPC64 or ARM64 is required"
#endif
#include "gdrdrv.h"
#include "nv-p2p.h"
//-----------------------------------------------------------------------------
#ifndef NVIDIA_P2P_MAJOR_VERSION_MASK
#define NVIDIA_P2P_MAJOR_VERSION_MASK 0xffff0000
#endif
#ifndef NVIDIA_P2P_MINOR_VERSION_MASK
#define NVIDIA_P2P_MINOR_VERSION_MASK 0x0000ffff
#endif
#ifndef NVIDIA_P2P_MAJOR_VERSION
#define NVIDIA_P2P_MAJOR_VERSION(v) \
(((v) & NVIDIA_P2P_MAJOR_VERSION_MASK) >> 16)
#endif
#ifndef NVIDIA_P2P_MINOR_VERSION
#define NVIDIA_P2P_MINOR_VERSION(v) \
(((v) & NVIDIA_P2P_MINOR_VERSION_MASK))
#endif
#ifndef NVIDIA_P2P_MAJOR_VERSION_MATCHES
#define NVIDIA_P2P_MAJOR_VERSION_MATCHES(p, v) \
(NVIDIA_P2P_MAJOR_VERSION((p)->version) == NVIDIA_P2P_MAJOR_VERSION(v))
#endif
#ifndef NVIDIA_P2P_VERSION_COMPATIBLE
#define NVIDIA_P2P_VERSION_COMPATIBLE(p, v) \
(NVIDIA_P2P_MAJOR_VERSION_MATCHES(p, v) && \
(NVIDIA_P2P_MINOR_VERSION((p)->version) >= NVIDIA_P2P_MINOR_VERSION(v)))
#endif
#ifndef NVIDIA_P2P_PAGE_TABLE_VERSION_COMPATIBLE
#define NVIDIA_P2P_PAGE_TABLE_VERSION_COMPATIBLE(p) \
NVIDIA_P2P_VERSION_COMPATIBLE(p, NVIDIA_P2P_PAGE_TABLE_VERSION)
#endif
#ifdef GDRDRV_OPENSOURCE_NVIDIA
#define GDRDRV_BUILT_FOR_NVIDIA_FLAVOR_STRING "opensource"
#else
#define GDRDRV_BUILT_FOR_NVIDIA_FLAVOR_STRING "proprietary"
#endif
//-----------------------------------------------------------------------------
#define DEVNAME "gdrdrv"
#define gdr_msg(KRNLVL, FMT, ARGS...) printk(KRNLVL DEVNAME ":%s:" FMT, __func__, ## ARGS)
//#define gdr_msg(KRNLVL, FMT, ARGS...) printk_ratelimited(KRNLVL DEVNAME ":" FMT, ## ARGS)
static int dbg_enabled = 0;
#define gdr_dbg(FMT, ARGS...) \
do { \
if (dbg_enabled) \
gdr_msg(KERN_DEBUG, FMT, ## ARGS); \
} while(0)
static int info_enabled = 0;
#define gdr_info(FMT, ARGS...) \
do { \
if (info_enabled) \
gdr_msg(KERN_INFO, FMT, ## ARGS); \
} while(0)
#define gdr_err(FMT, ARGS...) \
gdr_msg(KERN_DEBUG, FMT, ## ARGS)
static int use_persistent_mapping = 0;
//-----------------------------------------------------------------------------
MODULE_AUTHOR("drossetti@nvidia.com");
MODULE_LICENSE("Dual MIT/GPL");
MODULE_DESCRIPTION("GDRCopy kernel-mode driver built for " GDRDRV_BUILT_FOR_NVIDIA_FLAVOR_STRING " NVIDIA driver");
MODULE_VERSION(GDRDRV_VERSION_STRING);
module_param(dbg_enabled, int, 0000);
MODULE_PARM_DESC(dbg_enabled, "enable debug tracing");
module_param(info_enabled, int, 0000);
MODULE_PARM_DESC(info_enabled, "enable info tracing");
module_param(use_persistent_mapping, int, 0000);
MODULE_PARM_DESC(user_persistent_mapping, "use persistent mapping instead of traditional (non-persistent) mapping");
//-----------------------------------------------------------------------------
#define GPU_PAGE_SHIFT 16
#define GPU_PAGE_SIZE ((u64)1 << GPU_PAGE_SHIFT)
#define GPU_PAGE_OFFSET (GPU_PAGE_SIZE-1)
#define GPU_PAGE_MASK (~GPU_PAGE_OFFSET)
#ifndef MAX
#define MAX(a,b) ((a) > (b) ? (a) : (b))
#endif
#ifndef MIN
#define MIN(a,b) ((a) < (b) ? (a) : (b))
#endif
// compatibility with old Linux kernels
#ifndef ACCESS_ONCE
#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x))
#endif
#ifndef READ_ONCE
#define READ_ONCE(x) ACCESS_ONCE(x)
#endif
//-----------------------------------------------------------------------------
struct gdr_mr {
struct list_head node;
gdr_hnd_t handle;
u64 offset;
u64 p2p_token;
u32 va_space;
u32 page_size;
u64 va;
u64 mapped_size;
gdr_mr_type_t cpu_mapping_type;
nvidia_p2p_page_table_t *page_table;
int cb_flag;
cycles_t tm_cycles;
unsigned int tsc_khz;
struct vm_area_struct *vma;
struct address_space *mapping;
struct rw_semaphore sem;
};
typedef struct gdr_mr gdr_mr_t;
/**
* Prerequisite:
* - mr must be protected by down_read(mr->sem) or stronger.
*/
static int gdr_mr_is_mapped(gdr_mr_t *mr)
{
return mr->cpu_mapping_type != GDR_MR_NONE;
}
static inline void gdrdrv_zap_vma(struct address_space *mapping, struct vm_area_struct *vma)
{
// This function is mainly used for files and the address is relative to
// the file offset. We use vma->pg_off here to unmap this entire range but
// not the other mapped ranges.
unmap_mapping_range(mapping, vma->vm_pgoff << PAGE_SHIFT, vma->vm_end - vma->vm_start, 0);
}
/**
* Prerequisite:
* - mr must be protected by down_write(mr->sem).
*/
static void gdr_mr_destroy_all_mappings(gdr_mr_t *mr)
{
// there is a single mapping at the moment
if (mr->vma)
gdrdrv_zap_vma(mr->mapping, mr->vma);
mr->cpu_mapping_type = GDR_MR_NONE;
}
//-----------------------------------------------------------------------------
struct gdr_info {
// simple low-performance linked-list implementation
struct list_head mr_list;
struct mutex lock;
// Pointer to the pid struct of the creator task group.
// We do not use numerical pid here to avoid issues from pid reuse.
struct pid *tgid;
// Address space unique to this opened file. We need to create a new one
// because filp->f_mapping usually points to inode->i_mapping.
struct address_space mapping;
// The handle number and mmap's offset are equivalent. However, the mmap
// offset is used by the linux kernel when doing m(un)map; hence the range
// cannot be overlapped. We place two ranges next two each other to avoid
// this issue.
gdr_hnd_t next_handle;
int next_handle_overflow;
};
typedef struct gdr_info gdr_info_t;
//-----------------------------------------------------------------------------
static int gdrdrv_check_same_process(gdr_info_t *info, struct task_struct *tsk)
{
int same_proc;
BUG_ON(0 == info);
BUG_ON(0 == tsk);
same_proc = (info->tgid == task_tgid(tsk)) ; // these tasks belong to the same task group
if (!same_proc) {
gdr_dbg("check failed, info:{tgid=%p} this tsk={tgid=%p}\n",
info->tgid, task_tgid(tsk));
}
return same_proc;
}
//-----------------------------------------------------------------------------
static inline int gdr_support_persistent_mapping(void)
{
#if defined(NVIDIA_P2P_CAP_GET_PAGES_PERSISTENT_API)
return 1;
#elif defined(NVIDIA_P2P_CAP_PERSISTENT_PAGES)
return !!(nvidia_p2p_cap_persistent_pages);
#else
return 0;
#endif
}
static inline int gdr_use_persistent_mapping(void)
{
return use_persistent_mapping && gdr_support_persistent_mapping();
}
//-----------------------------------------------------------------------------
static int gdrdrv_open(struct inode *inode, struct file *filp)
{
unsigned int minor = MINOR(inode->i_rdev);
int ret = 0;
gdr_info_t *info = NULL;
gdr_dbg("minor=%d filep=0x%px\n", minor, filp);
if(minor >= 1) {
gdr_err("device minor number too big!\n");
ret = -ENXIO;
goto out;
}
info = kzalloc(sizeof(gdr_info_t), GFP_KERNEL);
if (!info) {
gdr_err("can't alloc kernel memory\n");
ret = -ENOMEM;
goto out;
}
INIT_LIST_HEAD(&info->mr_list);
mutex_init(&info->lock);
// GPU driver does not support sharing GPU allocations at fork time. Hence
// here we track the task group owning the driver fd and prevent other processes
// to use it.
info->tgid = task_tgid(current);
address_space_init_once(&info->mapping);
info->mapping.host = inode;
info->mapping.a_ops = inode->i_mapping->a_ops;
#if LINUX_VERSION_CODE < KERNEL_VERSION(4,0,0)
info->mapping.backing_dev_info = inode->i_mapping->backing_dev_info;
#endif
filp->f_mapping = &info->mapping;
filp->private_data = info;
out:
return ret;
}
//-----------------------------------------------------------------------------
/**
* Clean up and free all resources (e.g., page_table) associated with this mr.
*
* Prerequisites:
* - mr->sem must be under down_write before calling this function.
* - There is no mapping associated with this mr.
*
* After this function returns, mr is freed and cannot be accessed anymore.
*
*/
static void gdr_free_mr_unlocked(gdr_mr_t *mr)
{
int status = 0;
nvidia_p2p_page_table_t *page_table = NULL;
BUG_ON(!mr);
BUG_ON(gdr_mr_is_mapped(mr));
page_table = mr->page_table;
if (page_table) {
gdr_info("invoking nvidia_p2p_put_pages(va=0x%llx p2p_tok=%llx va_tok=%x)\n",
mr->va, mr->p2p_token, mr->va_space);
// We reach here before gdrdrv_get_pages_free_callback.
// However, it might be waiting on semaphore.
// Release the semaphore to let it progresses.
up_write(&mr->sem);
// In case gdrdrv_get_pages_free_callback is inflight, nvidia_p2p_put_pages will be blocked.
#ifdef NVIDIA_P2P_CAP_GET_PAGES_PERSISTENT_API
if (gdr_use_persistent_mapping()) {
status = nvidia_p2p_put_pages_persistent(mr->va, page_table, 0);
if (status) {
gdr_err("nvidia_p2p_put_pages_persistent error %d\n", status);
}
} else {
status = nvidia_p2p_put_pages(mr->p2p_token, mr->va_space, mr->va, page_table);
if (status) {
gdr_err("nvidia_p2p_put_pages error %d, async callback may have been fired\n", status);
}
}
#else
status = nvidia_p2p_put_pages(mr->p2p_token, mr->va_space, mr->va, page_table);
if (status) {
gdr_err("nvidia_p2p_put_pages error %d, async callback may have been fired\n", status);
}
#endif
} else {
gdr_dbg("invoking unpin_buffer while callback has already been fired\n");
// From this point, no other code paths will access this mr.
// We release semaphore and clear the mr.
up_write(&mr->sem);
}
memset(mr, 0, sizeof(*mr));
kfree(mr);
}
//-----------------------------------------------------------------------------
static int gdrdrv_release(struct inode *inode, struct file *filp)
{
gdr_info_t *info = filp->private_data;
gdr_mr_t *mr = NULL;
nvidia_p2p_page_table_t *page_table = NULL;
struct list_head *p, *n;
gdr_dbg("closing\n");
if (!info) {
gdr_err("filp contains no info\n");
return -EIO;
}
// Check that the caller is the same process that did gdrdrv_open
if (!gdrdrv_check_same_process(info, current)) {
gdr_dbg("filp is not opened by the current process\n");
return -EACCES;
}
mutex_lock(&info->lock);
list_for_each_safe(p, n, &info->mr_list) {
page_table = NULL;
mr = list_entry(p, gdr_mr_t, node);
down_write(&mr->sem);
gdr_info("freeing MR=0x%px\n", mr);
if (gdr_mr_is_mapped(mr)) {
gdr_mr_destroy_all_mappings(mr);
}
list_del(&mr->node);
gdr_free_mr_unlocked(mr);
}
mutex_unlock(&info->lock);
filp->f_mapping = NULL;
kfree(info);
filp->private_data = NULL;
return 0;
}
//-----------------------------------------------------------------------------
static gdr_mr_t *gdr_mr_from_handle_unlocked(gdr_info_t *info, gdr_hnd_t handle)
{
gdr_mr_t *mr = NULL;
struct list_head *p;
list_for_each(p, &info->mr_list) {
mr = list_entry(p, gdr_mr_t, node);
gdr_dbg("mr->handle=0x%llx handle=0x%llx\n", mr->handle, handle);
if (handle == mr->handle)
break;
}
return mr;
}
/**
* Convert handle to mr and semaphore-acquire it with read or write.
* If success, that mr is guaranteed to be available until gdr_put_mr is called.
* On success, return mr. Otherwise, return NULL.
*/
static inline gdr_mr_t *gdr_get_mr_from_handle(gdr_info_t *info, gdr_hnd_t handle, int write)
{
gdr_mr_t *mr;
mutex_lock(&info->lock);
mr = gdr_mr_from_handle_unlocked(info, handle);
if (mr) {
if (write)
down_write(&mr->sem);
else
down_read(&mr->sem);
}
mutex_unlock(&info->lock);
return mr;
}
#define gdr_get_mr_from_handle_read(info, handle) (gdr_get_mr_from_handle((info), (handle), 0))
#define gdr_get_mr_from_handle_write(info, handle) (gdr_get_mr_from_handle((info), (handle), 1))
//-----------------------------------------------------------------------------
/**
* Put the mr object. The `write` parameter must match the previous gdr_get_mr_from_handle call.
* After this function returns, mr may cease to exist (freed). It must not be accessed again.
*/
static inline void gdr_put_mr(gdr_mr_t *mr, int write)
{
if (write)
up_write(&mr->sem);
else
up_read(&mr->sem);
}
#define gdr_put_mr_read(mr) (gdr_put_mr((mr), 0))
#define gdr_put_mr_write(mr) (gdr_put_mr((mr), 1))
//-----------------------------------------------------------------------------
// off is host page aligned, because of the kernel interface
// could abuse extra available bits for other purposes
static gdr_hnd_t gdrdrv_handle_from_off(unsigned long off)
{
return (gdr_hnd_t)(off);
}
//-----------------------------------------------------------------------------
typedef void (*gdr_free_callback_fn_t)(void *);
static void gdrdrv_get_pages_free_callback(void *data)
{
gdr_mr_t *mr = data;
nvidia_p2p_page_table_t *page_table = NULL;
gdr_info("free callback\n");
// can't take the info->lock here due to potential AB-BA
// deadlock with internal NV driver lock(s)
down_write(&mr->sem);
mr->cb_flag = 1;
page_table = mr->page_table;
if (page_table) {
nvidia_p2p_free_page_table(page_table);
if (gdr_mr_is_mapped(mr))
gdr_mr_destroy_all_mappings(mr);
} else {
gdr_dbg("free callback, page_table is NULL\n");
}
mr->page_table = NULL;
up_write(&mr->sem);
}
//-----------------------------------------------------------------------------
/**
* Generate mr->handle. This function should be called under info->lock.
*
* Prerequisite:
* - mr->mapped_size is set and round to max(PAGE_SIZE, GPU_PAGE_SIZE)
* - mr->sem must be under down_write before calling this function.
*
* Return 0 if success, -1 if failed.
*/
static inline int gdr_generate_mr_handle(gdr_info_t *info, gdr_mr_t *mr)
{
// The user-space library passes the memory (handle << PAGE_SHIFT) as the
// mmap offset, and offsets are used to determine the VMAs to delete during
// invalidation.
// Hence, we need [(handle << PAGE_SHIFT), (handle << PAGE_SHIFT) + size - 1]
// to correspond to a unique VMA. Note that size here must match the
// original mmap size
gdr_hnd_t next_handle;
WARN_ON(!mutex_is_locked(&info->lock));
// We run out of handle, so fail.
if (unlikely(info->next_handle_overflow))
return -1;
next_handle = info->next_handle + MAX(1, mr->mapped_size >> PAGE_SHIFT);
// The next handle will be overflowed, so we mark it.
if (unlikely((next_handle & ((gdr_hnd_t)(-1) >> PAGE_SHIFT)) < info->next_handle))
info->next_handle_overflow = 1;
mr->handle = info->next_handle;
info->next_handle = next_handle;
return 0;
}
//-----------------------------------------------------------------------------
static int __gdrdrv_pin_buffer(gdr_info_t *info, u64 addr, u64 size, u64 p2p_token, u32 va_space, gdr_hnd_t *p_handle)
{
int ret = 0;
struct nvidia_p2p_page_table *page_table = NULL;
u64 page_virt_start;
u64 page_virt_end;
size_t rounded_size;
gdr_mr_t *mr = NULL;
gdr_free_callback_fn_t free_callback_fn;
#ifndef CONFIG_ARM64
cycles_t ta, tb;
#endif
mr = kmalloc(sizeof(gdr_mr_t), GFP_KERNEL);
if (!mr) {
gdr_err("can't alloc kernel memory\n");
ret = -ENOMEM;
goto out;
}
memset(mr, 0, sizeof(*mr));
// do proper alignment, as required by NVIDIA driver.
// align both size and addr as it is a requirement of nvidia_p2p_get_pages* API
page_virt_start = addr & GPU_PAGE_MASK;
page_virt_end = round_up((addr + size), GPU_PAGE_SIZE);
rounded_size = page_virt_end - page_virt_start;
init_rwsem(&mr->sem);
free_callback_fn = gdr_use_persistent_mapping() ? NULL : gdrdrv_get_pages_free_callback;
mr->offset = addr & GPU_PAGE_OFFSET;
if (free_callback_fn) {
mr->p2p_token = p2p_token;
mr->va_space = va_space;
} else {
// Token cannot be used with persistent mapping.
mr->p2p_token = 0;
mr->va_space = 0;
}
mr->va = page_virt_start;
mr->mapped_size = rounded_size;
mr->cpu_mapping_type = GDR_MR_NONE;
mr->page_table = NULL;
mr->cb_flag = 0;
#ifndef CONFIG_ARM64
ta = get_cycles();
#endif
// After nvidia_p2p_get_pages returns (successfully), gdrdrv_get_pages_free_callback may be invoked anytime.
// mr setup must be done before calling that API. The memory barrier is included in down_write.
// We take this semaphore to prevent race with gdrdrv_get_pages_free_callback.
down_write(&mr->sem);
#ifdef NVIDIA_P2P_CAP_GET_PAGES_PERSISTENT_API
if (free_callback_fn) {
ret = nvidia_p2p_get_pages(mr->p2p_token, mr->va_space, mr->va, mr->mapped_size, &page_table,
free_callback_fn, mr);
gdr_info("invoking nvidia_p2p_get_pages(va=0x%llx len=%lld p2p_tok=%llx va_tok=%x callback=%px)\n",
mr->va, mr->mapped_size, mr->p2p_token, mr->va_space, free_callback_fn);
} else {
ret = nvidia_p2p_get_pages_persistent(mr->va, mr->mapped_size, &page_table, 0);
gdr_info("invoking nvidia_p2p_get_pages_persistent(va=0x%llx len=%lld)\n",
mr->va, mr->mapped_size);
}
#else
ret = nvidia_p2p_get_pages(mr->p2p_token, mr->va_space, mr->va, mr->mapped_size, &page_table,
free_callback_fn, mr);
gdr_info("invoking nvidia_p2p_get_pages(va=0x%llx len=%lld p2p_tok=%llx va_tok=%x callback=%px)\n",
mr->va, mr->mapped_size, mr->p2p_token, mr->va_space, free_callback_fn);
#endif
#ifndef CONFIG_ARM64
tb = get_cycles();
#endif
if (ret < 0) {
gdr_err("nvidia_p2p_get_pages(va=%llx len=%lld p2p_token=%llx va_space=%x callback=%px) failed [ret = %d]\n",
mr->va, mr->mapped_size, mr->p2p_token, mr->va_space, free_callback_fn, ret);
goto out;
}
mr->page_table = page_table;
#ifndef CONFIG_ARM64
mr->tm_cycles = tb - ta;
mr->tsc_khz = get_tsc_khz();
#endif
// check version before accessing page table
if (!NVIDIA_P2P_PAGE_TABLE_VERSION_COMPATIBLE(page_table)) {
gdr_err("incompatible page table version 0x%08x\n", page_table->version);
ret = -EFAULT;
goto out;
}
switch (page_table->page_size) {
case NVIDIA_P2P_PAGE_SIZE_4KB:
mr->page_size = 4*1024;
break;
case NVIDIA_P2P_PAGE_SIZE_64KB:
mr->page_size = 64*1024;
break;
case NVIDIA_P2P_PAGE_SIZE_128KB:
mr->page_size = 128*1024;
break;
default:
gdr_err("unexpected page_size\n");
ret = -EINVAL;
goto out;
}
// we are not really ready for a different page size
if (page_table->page_size != NVIDIA_P2P_PAGE_SIZE_64KB) {
gdr_err("nvidia_p2p_get_pages assumption of 64KB pages failed size_id=%d\n", page_table->page_size);
ret = -EINVAL;
goto out;
}
{
int i;
gdr_dbg("page table entries: %d\n", page_table->entries);
for (i=0; i<MIN(20,page_table->entries); ++i) {
gdr_dbg("page[%d]=0x%016llx%s\n", i, page_table->pages[i]->physical_address, (i>19)?"and counting":"");
}
}
// here a typical driver would use the page_table to fill in some HW
// DMA data structure
mutex_lock(&info->lock);
if (gdr_generate_mr_handle(info, mr) != 0) {
gdr_err("No address space left for BAR1 mapping.\n");
ret = -ENOMEM;
}
if (!ret) {
list_add(&mr->node, &info->mr_list);
*p_handle = mr->handle;
up_write(&mr->sem);
}
mutex_unlock(&info->lock);
out:
if (ret && mr) {
gdr_free_mr_unlocked(mr);
mr = NULL;
}
return ret;
}
//-----------------------------------------------------------------------------
static int __gdrdrv_unpin_buffer(gdr_info_t *info, gdr_hnd_t handle)
{
int ret = 0;
gdr_mr_t *mr = NULL;
// someone might try to traverse the list and/or to do something
// to the mr at the same time, so let's lock here
mutex_lock(&info->lock);
mr = gdr_mr_from_handle_unlocked(info, handle);
if (NULL == mr) {
gdr_err("unexpected handle %llx while unmapping buffer\n", handle);
ret = -EINVAL;
} else {
// Found the mr. Let's lock it.
down_write(&mr->sem);
if (gdr_mr_is_mapped(mr)) {
gdr_mr_destroy_all_mappings(mr);
}
// Remove this handle from the list under info->lock.
// Now race with gdrdrv_get_pages_free_callback is the only thing we need to care about.
list_del(&mr->node);
}
mutex_unlock(&info->lock);
if (ret)
goto out;
gdr_free_mr_unlocked(mr);
out:
return ret;
}
//-----------------------------------------------------------------------------
static int gdrdrv_pin_buffer(gdr_info_t *info, void __user *_params)
{
int ret = 0;
struct GDRDRV_IOC_PIN_BUFFER_PARAMS params = {0};
int has_handle = 0;
gdr_hnd_t handle;
if (copy_from_user(¶ms, _params, sizeof(params))) {
gdr_err("copy_from_user failed on user pointer 0x%px\n", _params);
ret = -EFAULT;
goto out;
}
if (!params.addr) {
gdr_err("NULL device pointer\n");
ret = -EINVAL;
goto out;
}
ret = __gdrdrv_pin_buffer(info, params.addr, params.size, params.p2p_token, params.va_space, &handle);
if (ret)
goto out;
has_handle = 1;
params.handle = handle;
if (copy_to_user(_params, ¶ms, sizeof(params))) {
gdr_err("copy_to_user failed on user pointer 0x%px\n", _params);
ret = -EFAULT;
}
out:
if (ret) {
if (has_handle)
__gdrdrv_unpin_buffer(info, handle);
}
return ret;
}
//-----------------------------------------------------------------------------
static int gdrdrv_unpin_buffer(gdr_info_t *info, void __user *_params)
{
struct GDRDRV_IOC_UNPIN_BUFFER_PARAMS params = {0};
int ret = 0;
if (copy_from_user(¶ms, _params, sizeof(params))) {
gdr_err("copy_from_user failed on user pointer 0x%px\n", _params);
return -EFAULT;
}
ret = __gdrdrv_unpin_buffer(info, params.handle);
return ret;
}
//-----------------------------------------------------------------------------
static int gdrdrv_get_cb_flag(gdr_info_t *info, void __user *_params)
{
struct GDRDRV_IOC_GET_CB_FLAG_PARAMS params = {0};
int ret = 0;
gdr_mr_t *mr = NULL;
if (copy_from_user(¶ms, _params, sizeof(params))) {
gdr_err("copy_from_user failed on user pointer 0x%px\n", _params);
return -EFAULT;
}
mr = gdr_get_mr_from_handle_read(info, params.handle);
if (NULL == mr) {
gdr_err("unexpected handle %llx in get_cb_flag\n", params.handle);
ret = -EINVAL;
goto out;
}
params.flag = !!(mr->cb_flag);
gdr_put_mr_read(mr);
if (copy_to_user(_params, ¶ms, sizeof(params))) {
gdr_err("copy_to_user failed on user pointer 0x%px\n", _params);
ret = -EFAULT;
}
out:
return ret;
}
//-----------------------------------------------------------------------------
static int gdrdrv_get_info(gdr_info_t *info, void __user *_params)
{
struct GDRDRV_IOC_GET_INFO_PARAMS params = {0};
int ret = 0;
gdr_mr_t *mr = NULL;
if (copy_from_user(¶ms, _params, sizeof(params))) {
gdr_err("copy_from_user failed on user pointer 0x%px\n", _params);
ret = -EFAULT;
goto out;
}
mr = gdr_get_mr_from_handle_read(info, params.handle);
if (NULL == mr) {
gdr_err("unexpected handle %llx in get_cb_flag\n", params.handle);
ret = -EINVAL;
goto out;
}