]> asedeno.scripts.mit.edu Git - linux.git/blob - drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
Merge tag 'chrome-platform-for-linus-4.18' of git://git.kernel.org/pub/scm/linux...
[linux.git] / drivers / gpu / drm / amd / amdkfd / kfd_chardev.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  */
22
23 #include <linux/device.h>
24 #include <linux/export.h>
25 #include <linux/err.h>
26 #include <linux/fs.h>
27 #include <linux/file.h>
28 #include <linux/sched.h>
29 #include <linux/slab.h>
30 #include <linux/uaccess.h>
31 #include <linux/compat.h>
32 #include <uapi/linux/kfd_ioctl.h>
33 #include <linux/time.h>
34 #include <linux/mm.h>
35 #include <linux/mman.h>
36 #include <asm/processor.h>
37 #include "kfd_priv.h"
38 #include "kfd_device_queue_manager.h"
39 #include "kfd_dbgmgr.h"
40
41 static long kfd_ioctl(struct file *, unsigned int, unsigned long);
42 static int kfd_open(struct inode *, struct file *);
43 static int kfd_mmap(struct file *, struct vm_area_struct *);
44
45 static const char kfd_dev_name[] = "kfd";
46
47 static const struct file_operations kfd_fops = {
48         .owner = THIS_MODULE,
49         .unlocked_ioctl = kfd_ioctl,
50         .compat_ioctl = kfd_ioctl,
51         .open = kfd_open,
52         .mmap = kfd_mmap,
53 };
54
55 static int kfd_char_dev_major = -1;
56 static struct class *kfd_class;
57 struct device *kfd_device;
58
59 int kfd_chardev_init(void)
60 {
61         int err = 0;
62
63         kfd_char_dev_major = register_chrdev(0, kfd_dev_name, &kfd_fops);
64         err = kfd_char_dev_major;
65         if (err < 0)
66                 goto err_register_chrdev;
67
68         kfd_class = class_create(THIS_MODULE, kfd_dev_name);
69         err = PTR_ERR(kfd_class);
70         if (IS_ERR(kfd_class))
71                 goto err_class_create;
72
73         kfd_device = device_create(kfd_class, NULL,
74                                         MKDEV(kfd_char_dev_major, 0),
75                                         NULL, kfd_dev_name);
76         err = PTR_ERR(kfd_device);
77         if (IS_ERR(kfd_device))
78                 goto err_device_create;
79
80         return 0;
81
82 err_device_create:
83         class_destroy(kfd_class);
84 err_class_create:
85         unregister_chrdev(kfd_char_dev_major, kfd_dev_name);
86 err_register_chrdev:
87         return err;
88 }
89
90 void kfd_chardev_exit(void)
91 {
92         device_destroy(kfd_class, MKDEV(kfd_char_dev_major, 0));
93         class_destroy(kfd_class);
94         unregister_chrdev(kfd_char_dev_major, kfd_dev_name);
95 }
96
97 struct device *kfd_chardev(void)
98 {
99         return kfd_device;
100 }
101
102
103 static int kfd_open(struct inode *inode, struct file *filep)
104 {
105         struct kfd_process *process;
106         bool is_32bit_user_mode;
107
108         if (iminor(inode) != 0)
109                 return -ENODEV;
110
111         is_32bit_user_mode = in_compat_syscall();
112
113         if (is_32bit_user_mode) {
114                 dev_warn(kfd_device,
115                         "Process %d (32-bit) failed to open /dev/kfd\n"
116                         "32-bit processes are not supported by amdkfd\n",
117                         current->pid);
118                 return -EPERM;
119         }
120
121         process = kfd_create_process(filep);
122         if (IS_ERR(process))
123                 return PTR_ERR(process);
124
125         dev_dbg(kfd_device, "process %d opened, compat mode (32 bit) - %d\n",
126                 process->pasid, process->is_32bit_user_mode);
127
128         return 0;
129 }
130
131 static int kfd_ioctl_get_version(struct file *filep, struct kfd_process *p,
132                                         void *data)
133 {
134         struct kfd_ioctl_get_version_args *args = data;
135
136         args->major_version = KFD_IOCTL_MAJOR_VERSION;
137         args->minor_version = KFD_IOCTL_MINOR_VERSION;
138
139         return 0;
140 }
141
142 static int set_queue_properties_from_user(struct queue_properties *q_properties,
143                                 struct kfd_ioctl_create_queue_args *args)
144 {
145         if (args->queue_percentage > KFD_MAX_QUEUE_PERCENTAGE) {
146                 pr_err("Queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n");
147                 return -EINVAL;
148         }
149
150         if (args->queue_priority > KFD_MAX_QUEUE_PRIORITY) {
151                 pr_err("Queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n");
152                 return -EINVAL;
153         }
154
155         if ((args->ring_base_address) &&
156                 (!access_ok(VERIFY_WRITE,
157                         (const void __user *) args->ring_base_address,
158                         sizeof(uint64_t)))) {
159                 pr_err("Can't access ring base address\n");
160                 return -EFAULT;
161         }
162
163         if (!is_power_of_2(args->ring_size) && (args->ring_size != 0)) {
164                 pr_err("Ring size must be a power of 2 or 0\n");
165                 return -EINVAL;
166         }
167
168         if (!access_ok(VERIFY_WRITE,
169                         (const void __user *) args->read_pointer_address,
170                         sizeof(uint32_t))) {
171                 pr_err("Can't access read pointer\n");
172                 return -EFAULT;
173         }
174
175         if (!access_ok(VERIFY_WRITE,
176                         (const void __user *) args->write_pointer_address,
177                         sizeof(uint32_t))) {
178                 pr_err("Can't access write pointer\n");
179                 return -EFAULT;
180         }
181
182         if (args->eop_buffer_address &&
183                 !access_ok(VERIFY_WRITE,
184                         (const void __user *) args->eop_buffer_address,
185                         sizeof(uint32_t))) {
186                 pr_debug("Can't access eop buffer");
187                 return -EFAULT;
188         }
189
190         if (args->ctx_save_restore_address &&
191                 !access_ok(VERIFY_WRITE,
192                         (const void __user *) args->ctx_save_restore_address,
193                         sizeof(uint32_t))) {
194                 pr_debug("Can't access ctx save restore buffer");
195                 return -EFAULT;
196         }
197
198         q_properties->is_interop = false;
199         q_properties->queue_percent = args->queue_percentage;
200         q_properties->priority = args->queue_priority;
201         q_properties->queue_address = args->ring_base_address;
202         q_properties->queue_size = args->ring_size;
203         q_properties->read_ptr = (uint32_t *) args->read_pointer_address;
204         q_properties->write_ptr = (uint32_t *) args->write_pointer_address;
205         q_properties->eop_ring_buffer_address = args->eop_buffer_address;
206         q_properties->eop_ring_buffer_size = args->eop_buffer_size;
207         q_properties->ctx_save_restore_area_address =
208                         args->ctx_save_restore_address;
209         q_properties->ctx_save_restore_area_size = args->ctx_save_restore_size;
210         q_properties->ctl_stack_size = args->ctl_stack_size;
211         if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE ||
212                 args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL)
213                 q_properties->type = KFD_QUEUE_TYPE_COMPUTE;
214         else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA)
215                 q_properties->type = KFD_QUEUE_TYPE_SDMA;
216         else
217                 return -ENOTSUPP;
218
219         if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL)
220                 q_properties->format = KFD_QUEUE_FORMAT_AQL;
221         else
222                 q_properties->format = KFD_QUEUE_FORMAT_PM4;
223
224         pr_debug("Queue Percentage: %d, %d\n",
225                         q_properties->queue_percent, args->queue_percentage);
226
227         pr_debug("Queue Priority: %d, %d\n",
228                         q_properties->priority, args->queue_priority);
229
230         pr_debug("Queue Address: 0x%llX, 0x%llX\n",
231                         q_properties->queue_address, args->ring_base_address);
232
233         pr_debug("Queue Size: 0x%llX, %u\n",
234                         q_properties->queue_size, args->ring_size);
235
236         pr_debug("Queue r/w Pointers: %p, %p\n",
237                         q_properties->read_ptr,
238                         q_properties->write_ptr);
239
240         pr_debug("Queue Format: %d\n", q_properties->format);
241
242         pr_debug("Queue EOP: 0x%llX\n", q_properties->eop_ring_buffer_address);
243
244         pr_debug("Queue CTX save area: 0x%llX\n",
245                         q_properties->ctx_save_restore_area_address);
246
247         return 0;
248 }
249
250 static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
251                                         void *data)
252 {
253         struct kfd_ioctl_create_queue_args *args = data;
254         struct kfd_dev *dev;
255         int err = 0;
256         unsigned int queue_id;
257         struct kfd_process_device *pdd;
258         struct queue_properties q_properties;
259
260         memset(&q_properties, 0, sizeof(struct queue_properties));
261
262         pr_debug("Creating queue ioctl\n");
263
264         err = set_queue_properties_from_user(&q_properties, args);
265         if (err)
266                 return err;
267
268         pr_debug("Looking for gpu id 0x%x\n", args->gpu_id);
269         dev = kfd_device_by_id(args->gpu_id);
270         if (!dev) {
271                 pr_debug("Could not find gpu id 0x%x\n", args->gpu_id);
272                 return -EINVAL;
273         }
274
275         mutex_lock(&p->mutex);
276
277         pdd = kfd_bind_process_to_device(dev, p);
278         if (IS_ERR(pdd)) {
279                 err = -ESRCH;
280                 goto err_bind_process;
281         }
282
283         pr_debug("Creating queue for PASID %d on gpu 0x%x\n",
284                         p->pasid,
285                         dev->id);
286
287         err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, &queue_id);
288         if (err != 0)
289                 goto err_create_queue;
290
291         args->queue_id = queue_id;
292
293
294         /* Return gpu_id as doorbell offset for mmap usage */
295         args->doorbell_offset = (KFD_MMAP_DOORBELL_MASK | args->gpu_id);
296         args->doorbell_offset <<= PAGE_SHIFT;
297
298         mutex_unlock(&p->mutex);
299
300         pr_debug("Queue id %d was created successfully\n", args->queue_id);
301
302         pr_debug("Ring buffer address == 0x%016llX\n",
303                         args->ring_base_address);
304
305         pr_debug("Read ptr address    == 0x%016llX\n",
306                         args->read_pointer_address);
307
308         pr_debug("Write ptr address   == 0x%016llX\n",
309                         args->write_pointer_address);
310
311         return 0;
312
313 err_create_queue:
314 err_bind_process:
315         mutex_unlock(&p->mutex);
316         return err;
317 }
318
319 static int kfd_ioctl_destroy_queue(struct file *filp, struct kfd_process *p,
320                                         void *data)
321 {
322         int retval;
323         struct kfd_ioctl_destroy_queue_args *args = data;
324
325         pr_debug("Destroying queue id %d for pasid %d\n",
326                                 args->queue_id,
327                                 p->pasid);
328
329         mutex_lock(&p->mutex);
330
331         retval = pqm_destroy_queue(&p->pqm, args->queue_id);
332
333         mutex_unlock(&p->mutex);
334         return retval;
335 }
336
337 static int kfd_ioctl_update_queue(struct file *filp, struct kfd_process *p,
338                                         void *data)
339 {
340         int retval;
341         struct kfd_ioctl_update_queue_args *args = data;
342         struct queue_properties properties;
343
344         if (args->queue_percentage > KFD_MAX_QUEUE_PERCENTAGE) {
345                 pr_err("Queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n");
346                 return -EINVAL;
347         }
348
349         if (args->queue_priority > KFD_MAX_QUEUE_PRIORITY) {
350                 pr_err("Queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n");
351                 return -EINVAL;
352         }
353
354         if ((args->ring_base_address) &&
355                 (!access_ok(VERIFY_WRITE,
356                         (const void __user *) args->ring_base_address,
357                         sizeof(uint64_t)))) {
358                 pr_err("Can't access ring base address\n");
359                 return -EFAULT;
360         }
361
362         if (!is_power_of_2(args->ring_size) && (args->ring_size != 0)) {
363                 pr_err("Ring size must be a power of 2 or 0\n");
364                 return -EINVAL;
365         }
366
367         properties.queue_address = args->ring_base_address;
368         properties.queue_size = args->ring_size;
369         properties.queue_percent = args->queue_percentage;
370         properties.priority = args->queue_priority;
371
372         pr_debug("Updating queue id %d for pasid %d\n",
373                         args->queue_id, p->pasid);
374
375         mutex_lock(&p->mutex);
376
377         retval = pqm_update_queue(&p->pqm, args->queue_id, &properties);
378
379         mutex_unlock(&p->mutex);
380
381         return retval;
382 }
383
384 static int kfd_ioctl_set_memory_policy(struct file *filep,
385                                         struct kfd_process *p, void *data)
386 {
387         struct kfd_ioctl_set_memory_policy_args *args = data;
388         struct kfd_dev *dev;
389         int err = 0;
390         struct kfd_process_device *pdd;
391         enum cache_policy default_policy, alternate_policy;
392
393         if (args->default_policy != KFD_IOC_CACHE_POLICY_COHERENT
394             && args->default_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) {
395                 return -EINVAL;
396         }
397
398         if (args->alternate_policy != KFD_IOC_CACHE_POLICY_COHERENT
399             && args->alternate_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) {
400                 return -EINVAL;
401         }
402
403         dev = kfd_device_by_id(args->gpu_id);
404         if (!dev)
405                 return -EINVAL;
406
407         mutex_lock(&p->mutex);
408
409         pdd = kfd_bind_process_to_device(dev, p);
410         if (IS_ERR(pdd)) {
411                 err = -ESRCH;
412                 goto out;
413         }
414
415         default_policy = (args->default_policy == KFD_IOC_CACHE_POLICY_COHERENT)
416                          ? cache_policy_coherent : cache_policy_noncoherent;
417
418         alternate_policy =
419                 (args->alternate_policy == KFD_IOC_CACHE_POLICY_COHERENT)
420                    ? cache_policy_coherent : cache_policy_noncoherent;
421
422         if (!dev->dqm->ops.set_cache_memory_policy(dev->dqm,
423                                 &pdd->qpd,
424                                 default_policy,
425                                 alternate_policy,
426                                 (void __user *)args->alternate_aperture_base,
427                                 args->alternate_aperture_size))
428                 err = -EINVAL;
429
430 out:
431         mutex_unlock(&p->mutex);
432
433         return err;
434 }
435
436 static int kfd_ioctl_set_trap_handler(struct file *filep,
437                                         struct kfd_process *p, void *data)
438 {
439         struct kfd_ioctl_set_trap_handler_args *args = data;
440         struct kfd_dev *dev;
441         int err = 0;
442         struct kfd_process_device *pdd;
443
444         dev = kfd_device_by_id(args->gpu_id);
445         if (dev == NULL)
446                 return -EINVAL;
447
448         mutex_lock(&p->mutex);
449
450         pdd = kfd_bind_process_to_device(dev, p);
451         if (IS_ERR(pdd)) {
452                 err = -ESRCH;
453                 goto out;
454         }
455
456         if (dev->dqm->ops.set_trap_handler(dev->dqm,
457                                         &pdd->qpd,
458                                         args->tba_addr,
459                                         args->tma_addr))
460                 err = -EINVAL;
461
462 out:
463         mutex_unlock(&p->mutex);
464
465         return err;
466 }
467
468 static int kfd_ioctl_dbg_register(struct file *filep,
469                                 struct kfd_process *p, void *data)
470 {
471         struct kfd_ioctl_dbg_register_args *args = data;
472         struct kfd_dev *dev;
473         struct kfd_dbgmgr *dbgmgr_ptr;
474         struct kfd_process_device *pdd;
475         bool create_ok;
476         long status = 0;
477
478         dev = kfd_device_by_id(args->gpu_id);
479         if (!dev)
480                 return -EINVAL;
481
482         if (dev->device_info->asic_family == CHIP_CARRIZO) {
483                 pr_debug("kfd_ioctl_dbg_register not supported on CZ\n");
484                 return -EINVAL;
485         }
486
487         mutex_lock(&p->mutex);
488         mutex_lock(kfd_get_dbgmgr_mutex());
489
490         /*
491          * make sure that we have pdd, if this the first queue created for
492          * this process
493          */
494         pdd = kfd_bind_process_to_device(dev, p);
495         if (IS_ERR(pdd)) {
496                 status = PTR_ERR(pdd);
497                 goto out;
498         }
499
500         if (!dev->dbgmgr) {
501                 /* In case of a legal call, we have no dbgmgr yet */
502                 create_ok = kfd_dbgmgr_create(&dbgmgr_ptr, dev);
503                 if (create_ok) {
504                         status = kfd_dbgmgr_register(dbgmgr_ptr, p);
505                         if (status != 0)
506                                 kfd_dbgmgr_destroy(dbgmgr_ptr);
507                         else
508                                 dev->dbgmgr = dbgmgr_ptr;
509                 }
510         } else {
511                 pr_debug("debugger already registered\n");
512                 status = -EINVAL;
513         }
514
515 out:
516         mutex_unlock(kfd_get_dbgmgr_mutex());
517         mutex_unlock(&p->mutex);
518
519         return status;
520 }
521
522 static int kfd_ioctl_dbg_unregister(struct file *filep,
523                                 struct kfd_process *p, void *data)
524 {
525         struct kfd_ioctl_dbg_unregister_args *args = data;
526         struct kfd_dev *dev;
527         long status;
528
529         dev = kfd_device_by_id(args->gpu_id);
530         if (!dev || !dev->dbgmgr)
531                 return -EINVAL;
532
533         if (dev->device_info->asic_family == CHIP_CARRIZO) {
534                 pr_debug("kfd_ioctl_dbg_unregister not supported on CZ\n");
535                 return -EINVAL;
536         }
537
538         mutex_lock(kfd_get_dbgmgr_mutex());
539
540         status = kfd_dbgmgr_unregister(dev->dbgmgr, p);
541         if (!status) {
542                 kfd_dbgmgr_destroy(dev->dbgmgr);
543                 dev->dbgmgr = NULL;
544         }
545
546         mutex_unlock(kfd_get_dbgmgr_mutex());
547
548         return status;
549 }
550
551 /*
552  * Parse and generate variable size data structure for address watch.
553  * Total size of the buffer and # watch points is limited in order
554  * to prevent kernel abuse. (no bearing to the much smaller HW limitation
555  * which is enforced by dbgdev module)
556  * please also note that the watch address itself are not "copied from user",
557  * since it be set into the HW in user mode values.
558  *
559  */
560 static int kfd_ioctl_dbg_address_watch(struct file *filep,
561                                         struct kfd_process *p, void *data)
562 {
563         struct kfd_ioctl_dbg_address_watch_args *args = data;
564         struct kfd_dev *dev;
565         struct dbg_address_watch_info aw_info;
566         unsigned char *args_buff;
567         long status;
568         void __user *cmd_from_user;
569         uint64_t watch_mask_value = 0;
570         unsigned int args_idx = 0;
571
572         memset((void *) &aw_info, 0, sizeof(struct dbg_address_watch_info));
573
574         dev = kfd_device_by_id(args->gpu_id);
575         if (!dev)
576                 return -EINVAL;
577
578         if (dev->device_info->asic_family == CHIP_CARRIZO) {
579                 pr_debug("kfd_ioctl_dbg_wave_control not supported on CZ\n");
580                 return -EINVAL;
581         }
582
583         cmd_from_user = (void __user *) args->content_ptr;
584
585         /* Validate arguments */
586
587         if ((args->buf_size_in_bytes > MAX_ALLOWED_AW_BUFF_SIZE) ||
588                 (args->buf_size_in_bytes <= sizeof(*args) + sizeof(int) * 2) ||
589                 (cmd_from_user == NULL))
590                 return -EINVAL;
591
592         /* this is the actual buffer to work with */
593         args_buff = memdup_user(cmd_from_user,
594                                 args->buf_size_in_bytes - sizeof(*args));
595         if (IS_ERR(args_buff))
596                 return PTR_ERR(args_buff);
597
598         aw_info.process = p;
599
600         aw_info.num_watch_points = *((uint32_t *)(&args_buff[args_idx]));
601         args_idx += sizeof(aw_info.num_watch_points);
602
603         aw_info.watch_mode = (enum HSA_DBG_WATCH_MODE *) &args_buff[args_idx];
604         args_idx += sizeof(enum HSA_DBG_WATCH_MODE) * aw_info.num_watch_points;
605
606         /*
607          * set watch address base pointer to point on the array base
608          * within args_buff
609          */
610         aw_info.watch_address = (uint64_t *) &args_buff[args_idx];
611
612         /* skip over the addresses buffer */
613         args_idx += sizeof(aw_info.watch_address) * aw_info.num_watch_points;
614
615         if (args_idx >= args->buf_size_in_bytes - sizeof(*args)) {
616                 status = -EINVAL;
617                 goto out;
618         }
619
620         watch_mask_value = (uint64_t) args_buff[args_idx];
621
622         if (watch_mask_value > 0) {
623                 /*
624                  * There is an array of masks.
625                  * set watch mask base pointer to point on the array base
626                  * within args_buff
627                  */
628                 aw_info.watch_mask = (uint64_t *) &args_buff[args_idx];
629
630                 /* skip over the masks buffer */
631                 args_idx += sizeof(aw_info.watch_mask) *
632                                 aw_info.num_watch_points;
633         } else {
634                 /* just the NULL mask, set to NULL and skip over it */
635                 aw_info.watch_mask = NULL;
636                 args_idx += sizeof(aw_info.watch_mask);
637         }
638
639         if (args_idx >= args->buf_size_in_bytes - sizeof(args)) {
640                 status = -EINVAL;
641                 goto out;
642         }
643
644         /* Currently HSA Event is not supported for DBG */
645         aw_info.watch_event = NULL;
646
647         mutex_lock(kfd_get_dbgmgr_mutex());
648
649         status = kfd_dbgmgr_address_watch(dev->dbgmgr, &aw_info);
650
651         mutex_unlock(kfd_get_dbgmgr_mutex());
652
653 out:
654         kfree(args_buff);
655
656         return status;
657 }
658
659 /* Parse and generate fixed size data structure for wave control */
660 static int kfd_ioctl_dbg_wave_control(struct file *filep,
661                                         struct kfd_process *p, void *data)
662 {
663         struct kfd_ioctl_dbg_wave_control_args *args = data;
664         struct kfd_dev *dev;
665         struct dbg_wave_control_info wac_info;
666         unsigned char *args_buff;
667         uint32_t computed_buff_size;
668         long status;
669         void __user *cmd_from_user;
670         unsigned int args_idx = 0;
671
672         memset((void *) &wac_info, 0, sizeof(struct dbg_wave_control_info));
673
674         /* we use compact form, independent of the packing attribute value */
675         computed_buff_size = sizeof(*args) +
676                                 sizeof(wac_info.mode) +
677                                 sizeof(wac_info.operand) +
678                                 sizeof(wac_info.dbgWave_msg.DbgWaveMsg) +
679                                 sizeof(wac_info.dbgWave_msg.MemoryVA) +
680                                 sizeof(wac_info.trapId);
681
682         dev = kfd_device_by_id(args->gpu_id);
683         if (!dev)
684                 return -EINVAL;
685
686         if (dev->device_info->asic_family == CHIP_CARRIZO) {
687                 pr_debug("kfd_ioctl_dbg_wave_control not supported on CZ\n");
688                 return -EINVAL;
689         }
690
691         /* input size must match the computed "compact" size */
692         if (args->buf_size_in_bytes != computed_buff_size) {
693                 pr_debug("size mismatch, computed : actual %u : %u\n",
694                                 args->buf_size_in_bytes, computed_buff_size);
695                 return -EINVAL;
696         }
697
698         cmd_from_user = (void __user *) args->content_ptr;
699
700         if (cmd_from_user == NULL)
701                 return -EINVAL;
702
703         /* copy the entire buffer from user */
704
705         args_buff = memdup_user(cmd_from_user,
706                                 args->buf_size_in_bytes - sizeof(*args));
707         if (IS_ERR(args_buff))
708                 return PTR_ERR(args_buff);
709
710         /* move ptr to the start of the "pay-load" area */
711         wac_info.process = p;
712
713         wac_info.operand = *((enum HSA_DBG_WAVEOP *)(&args_buff[args_idx]));
714         args_idx += sizeof(wac_info.operand);
715
716         wac_info.mode = *((enum HSA_DBG_WAVEMODE *)(&args_buff[args_idx]));
717         args_idx += sizeof(wac_info.mode);
718
719         wac_info.trapId = *((uint32_t *)(&args_buff[args_idx]));
720         args_idx += sizeof(wac_info.trapId);
721
722         wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value =
723                                         *((uint32_t *)(&args_buff[args_idx]));
724         wac_info.dbgWave_msg.MemoryVA = NULL;
725
726         mutex_lock(kfd_get_dbgmgr_mutex());
727
728         pr_debug("Calling dbg manager process %p, operand %u, mode %u, trapId %u, message %u\n",
729                         wac_info.process, wac_info.operand,
730                         wac_info.mode, wac_info.trapId,
731                         wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
732
733         status = kfd_dbgmgr_wave_control(dev->dbgmgr, &wac_info);
734
735         pr_debug("Returned status of dbg manager is %ld\n", status);
736
737         mutex_unlock(kfd_get_dbgmgr_mutex());
738
739         kfree(args_buff);
740
741         return status;
742 }
743
744 static int kfd_ioctl_get_clock_counters(struct file *filep,
745                                 struct kfd_process *p, void *data)
746 {
747         struct kfd_ioctl_get_clock_counters_args *args = data;
748         struct kfd_dev *dev;
749         struct timespec64 time;
750
751         dev = kfd_device_by_id(args->gpu_id);
752         if (dev)
753                 /* Reading GPU clock counter from KGD */
754                 args->gpu_clock_counter =
755                         dev->kfd2kgd->get_gpu_clock_counter(dev->kgd);
756         else
757                 /* Node without GPU resource */
758                 args->gpu_clock_counter = 0;
759
760         /* No access to rdtsc. Using raw monotonic time */
761         getrawmonotonic64(&time);
762         args->cpu_clock_counter = (uint64_t)timespec64_to_ns(&time);
763
764         get_monotonic_boottime64(&time);
765         args->system_clock_counter = (uint64_t)timespec64_to_ns(&time);
766
767         /* Since the counter is in nano-seconds we use 1GHz frequency */
768         args->system_clock_freq = 1000000000;
769
770         return 0;
771 }
772
773
774 static int kfd_ioctl_get_process_apertures(struct file *filp,
775                                 struct kfd_process *p, void *data)
776 {
777         struct kfd_ioctl_get_process_apertures_args *args = data;
778         struct kfd_process_device_apertures *pAperture;
779         struct kfd_process_device *pdd;
780
781         dev_dbg(kfd_device, "get apertures for PASID %d", p->pasid);
782
783         args->num_of_nodes = 0;
784
785         mutex_lock(&p->mutex);
786
787         /*if the process-device list isn't empty*/
788         if (kfd_has_process_device_data(p)) {
789                 /* Run over all pdd of the process */
790                 pdd = kfd_get_first_process_device_data(p);
791                 do {
792                         pAperture =
793                                 &args->process_apertures[args->num_of_nodes];
794                         pAperture->gpu_id = pdd->dev->id;
795                         pAperture->lds_base = pdd->lds_base;
796                         pAperture->lds_limit = pdd->lds_limit;
797                         pAperture->gpuvm_base = pdd->gpuvm_base;
798                         pAperture->gpuvm_limit = pdd->gpuvm_limit;
799                         pAperture->scratch_base = pdd->scratch_base;
800                         pAperture->scratch_limit = pdd->scratch_limit;
801
802                         dev_dbg(kfd_device,
803                                 "node id %u\n", args->num_of_nodes);
804                         dev_dbg(kfd_device,
805                                 "gpu id %u\n", pdd->dev->id);
806                         dev_dbg(kfd_device,
807                                 "lds_base %llX\n", pdd->lds_base);
808                         dev_dbg(kfd_device,
809                                 "lds_limit %llX\n", pdd->lds_limit);
810                         dev_dbg(kfd_device,
811                                 "gpuvm_base %llX\n", pdd->gpuvm_base);
812                         dev_dbg(kfd_device,
813                                 "gpuvm_limit %llX\n", pdd->gpuvm_limit);
814                         dev_dbg(kfd_device,
815                                 "scratch_base %llX\n", pdd->scratch_base);
816                         dev_dbg(kfd_device,
817                                 "scratch_limit %llX\n", pdd->scratch_limit);
818
819                         args->num_of_nodes++;
820
821                         pdd = kfd_get_next_process_device_data(p, pdd);
822                 } while (pdd && (args->num_of_nodes < NUM_OF_SUPPORTED_GPUS));
823         }
824
825         mutex_unlock(&p->mutex);
826
827         return 0;
828 }
829
830 static int kfd_ioctl_get_process_apertures_new(struct file *filp,
831                                 struct kfd_process *p, void *data)
832 {
833         struct kfd_ioctl_get_process_apertures_new_args *args = data;
834         struct kfd_process_device_apertures *pa;
835         struct kfd_process_device *pdd;
836         uint32_t nodes = 0;
837         int ret;
838
839         dev_dbg(kfd_device, "get apertures for PASID %d", p->pasid);
840
841         if (args->num_of_nodes == 0) {
842                 /* Return number of nodes, so that user space can alloacate
843                  * sufficient memory
844                  */
845                 mutex_lock(&p->mutex);
846
847                 if (!kfd_has_process_device_data(p))
848                         goto out_unlock;
849
850                 /* Run over all pdd of the process */
851                 pdd = kfd_get_first_process_device_data(p);
852                 do {
853                         args->num_of_nodes++;
854                         pdd = kfd_get_next_process_device_data(p, pdd);
855                 } while (pdd);
856
857                 goto out_unlock;
858         }
859
860         /* Fill in process-aperture information for all available
861          * nodes, but not more than args->num_of_nodes as that is
862          * the amount of memory allocated by user
863          */
864         pa = kzalloc((sizeof(struct kfd_process_device_apertures) *
865                                 args->num_of_nodes), GFP_KERNEL);
866         if (!pa)
867                 return -ENOMEM;
868
869         mutex_lock(&p->mutex);
870
871         if (!kfd_has_process_device_data(p)) {
872                 args->num_of_nodes = 0;
873                 kfree(pa);
874                 goto out_unlock;
875         }
876
877         /* Run over all pdd of the process */
878         pdd = kfd_get_first_process_device_data(p);
879         do {
880                 pa[nodes].gpu_id = pdd->dev->id;
881                 pa[nodes].lds_base = pdd->lds_base;
882                 pa[nodes].lds_limit = pdd->lds_limit;
883                 pa[nodes].gpuvm_base = pdd->gpuvm_base;
884                 pa[nodes].gpuvm_limit = pdd->gpuvm_limit;
885                 pa[nodes].scratch_base = pdd->scratch_base;
886                 pa[nodes].scratch_limit = pdd->scratch_limit;
887
888                 dev_dbg(kfd_device,
889                         "gpu id %u\n", pdd->dev->id);
890                 dev_dbg(kfd_device,
891                         "lds_base %llX\n", pdd->lds_base);
892                 dev_dbg(kfd_device,
893                         "lds_limit %llX\n", pdd->lds_limit);
894                 dev_dbg(kfd_device,
895                         "gpuvm_base %llX\n", pdd->gpuvm_base);
896                 dev_dbg(kfd_device,
897                         "gpuvm_limit %llX\n", pdd->gpuvm_limit);
898                 dev_dbg(kfd_device,
899                         "scratch_base %llX\n", pdd->scratch_base);
900                 dev_dbg(kfd_device,
901                         "scratch_limit %llX\n", pdd->scratch_limit);
902                 nodes++;
903
904                 pdd = kfd_get_next_process_device_data(p, pdd);
905         } while (pdd && (nodes < args->num_of_nodes));
906         mutex_unlock(&p->mutex);
907
908         args->num_of_nodes = nodes;
909         ret = copy_to_user(
910                         (void __user *)args->kfd_process_device_apertures_ptr,
911                         pa,
912                         (nodes * sizeof(struct kfd_process_device_apertures)));
913         kfree(pa);
914         return ret ? -EFAULT : 0;
915
916 out_unlock:
917         mutex_unlock(&p->mutex);
918         return 0;
919 }
920
921 static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p,
922                                         void *data)
923 {
924         struct kfd_ioctl_create_event_args *args = data;
925         int err;
926
927         /* For dGPUs the event page is allocated in user mode. The
928          * handle is passed to KFD with the first call to this IOCTL
929          * through the event_page_offset field.
930          */
931         if (args->event_page_offset) {
932                 struct kfd_dev *kfd;
933                 struct kfd_process_device *pdd;
934                 void *mem, *kern_addr;
935                 uint64_t size;
936
937                 if (p->signal_page) {
938                         pr_err("Event page is already set\n");
939                         return -EINVAL;
940                 }
941
942                 kfd = kfd_device_by_id(GET_GPU_ID(args->event_page_offset));
943                 if (!kfd) {
944                         pr_err("Getting device by id failed in %s\n", __func__);
945                         return -EINVAL;
946                 }
947
948                 mutex_lock(&p->mutex);
949                 pdd = kfd_bind_process_to_device(kfd, p);
950                 if (IS_ERR(pdd)) {
951                         err = PTR_ERR(pdd);
952                         goto out_unlock;
953                 }
954
955                 mem = kfd_process_device_translate_handle(pdd,
956                                 GET_IDR_HANDLE(args->event_page_offset));
957                 if (!mem) {
958                         pr_err("Can't find BO, offset is 0x%llx\n",
959                                args->event_page_offset);
960                         err = -EINVAL;
961                         goto out_unlock;
962                 }
963                 mutex_unlock(&p->mutex);
964
965                 err = kfd->kfd2kgd->map_gtt_bo_to_kernel(kfd->kgd,
966                                                 mem, &kern_addr, &size);
967                 if (err) {
968                         pr_err("Failed to map event page to kernel\n");
969                         return err;
970                 }
971
972                 err = kfd_event_page_set(p, kern_addr, size);
973                 if (err) {
974                         pr_err("Failed to set event page\n");
975                         return err;
976                 }
977         }
978
979         err = kfd_event_create(filp, p, args->event_type,
980                                 args->auto_reset != 0, args->node_id,
981                                 &args->event_id, &args->event_trigger_data,
982                                 &args->event_page_offset,
983                                 &args->event_slot_index);
984
985         return err;
986
987 out_unlock:
988         mutex_unlock(&p->mutex);
989         return err;
990 }
991
992 static int kfd_ioctl_destroy_event(struct file *filp, struct kfd_process *p,
993                                         void *data)
994 {
995         struct kfd_ioctl_destroy_event_args *args = data;
996
997         return kfd_event_destroy(p, args->event_id);
998 }
999
1000 static int kfd_ioctl_set_event(struct file *filp, struct kfd_process *p,
1001                                 void *data)
1002 {
1003         struct kfd_ioctl_set_event_args *args = data;
1004
1005         return kfd_set_event(p, args->event_id);
1006 }
1007
1008 static int kfd_ioctl_reset_event(struct file *filp, struct kfd_process *p,
1009                                 void *data)
1010 {
1011         struct kfd_ioctl_reset_event_args *args = data;
1012
1013         return kfd_reset_event(p, args->event_id);
1014 }
1015
1016 static int kfd_ioctl_wait_events(struct file *filp, struct kfd_process *p,
1017                                 void *data)
1018 {
1019         struct kfd_ioctl_wait_events_args *args = data;
1020         int err;
1021
1022         err = kfd_wait_on_events(p, args->num_events,
1023                         (void __user *)args->events_ptr,
1024                         (args->wait_for_all != 0),
1025                         args->timeout, &args->wait_result);
1026
1027         return err;
1028 }
1029 static int kfd_ioctl_set_scratch_backing_va(struct file *filep,
1030                                         struct kfd_process *p, void *data)
1031 {
1032         struct kfd_ioctl_set_scratch_backing_va_args *args = data;
1033         struct kfd_process_device *pdd;
1034         struct kfd_dev *dev;
1035         long err;
1036
1037         dev = kfd_device_by_id(args->gpu_id);
1038         if (!dev)
1039                 return -EINVAL;
1040
1041         mutex_lock(&p->mutex);
1042
1043         pdd = kfd_bind_process_to_device(dev, p);
1044         if (IS_ERR(pdd)) {
1045                 err = PTR_ERR(pdd);
1046                 goto bind_process_to_device_fail;
1047         }
1048
1049         pdd->qpd.sh_hidden_private_base = args->va_addr;
1050
1051         mutex_unlock(&p->mutex);
1052
1053         if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS &&
1054             pdd->qpd.vmid != 0)
1055                 dev->kfd2kgd->set_scratch_backing_va(
1056                         dev->kgd, args->va_addr, pdd->qpd.vmid);
1057
1058         return 0;
1059
1060 bind_process_to_device_fail:
1061         mutex_unlock(&p->mutex);
1062         return err;
1063 }
1064
1065 static int kfd_ioctl_get_tile_config(struct file *filep,
1066                 struct kfd_process *p, void *data)
1067 {
1068         struct kfd_ioctl_get_tile_config_args *args = data;
1069         struct kfd_dev *dev;
1070         struct tile_config config;
1071         int err = 0;
1072
1073         dev = kfd_device_by_id(args->gpu_id);
1074         if (!dev)
1075                 return -EINVAL;
1076
1077         dev->kfd2kgd->get_tile_config(dev->kgd, &config);
1078
1079         args->gb_addr_config = config.gb_addr_config;
1080         args->num_banks = config.num_banks;
1081         args->num_ranks = config.num_ranks;
1082
1083         if (args->num_tile_configs > config.num_tile_configs)
1084                 args->num_tile_configs = config.num_tile_configs;
1085         err = copy_to_user((void __user *)args->tile_config_ptr,
1086                         config.tile_config_ptr,
1087                         args->num_tile_configs * sizeof(uint32_t));
1088         if (err) {
1089                 args->num_tile_configs = 0;
1090                 return -EFAULT;
1091         }
1092
1093         if (args->num_macro_tile_configs > config.num_macro_tile_configs)
1094                 args->num_macro_tile_configs =
1095                                 config.num_macro_tile_configs;
1096         err = copy_to_user((void __user *)args->macro_tile_config_ptr,
1097                         config.macro_tile_config_ptr,
1098                         args->num_macro_tile_configs * sizeof(uint32_t));
1099         if (err) {
1100                 args->num_macro_tile_configs = 0;
1101                 return -EFAULT;
1102         }
1103
1104         return 0;
1105 }
1106
1107 static int kfd_ioctl_acquire_vm(struct file *filep, struct kfd_process *p,
1108                                 void *data)
1109 {
1110         struct kfd_ioctl_acquire_vm_args *args = data;
1111         struct kfd_process_device *pdd;
1112         struct kfd_dev *dev;
1113         struct file *drm_file;
1114         int ret;
1115
1116         dev = kfd_device_by_id(args->gpu_id);
1117         if (!dev)
1118                 return -EINVAL;
1119
1120         drm_file = fget(args->drm_fd);
1121         if (!drm_file)
1122                 return -EINVAL;
1123
1124         mutex_lock(&p->mutex);
1125
1126         pdd = kfd_get_process_device_data(dev, p);
1127         if (!pdd) {
1128                 ret = -EINVAL;
1129                 goto err_unlock;
1130         }
1131
1132         if (pdd->drm_file) {
1133                 ret = pdd->drm_file == drm_file ? 0 : -EBUSY;
1134                 goto err_unlock;
1135         }
1136
1137         ret = kfd_process_device_init_vm(pdd, drm_file);
1138         if (ret)
1139                 goto err_unlock;
1140         /* On success, the PDD keeps the drm_file reference */
1141         mutex_unlock(&p->mutex);
1142
1143         return 0;
1144
1145 err_unlock:
1146         mutex_unlock(&p->mutex);
1147         fput(drm_file);
1148         return ret;
1149 }
1150
1151 static bool kfd_dev_is_large_bar(struct kfd_dev *dev)
1152 {
1153         struct kfd_local_mem_info mem_info;
1154
1155         if (debug_largebar) {
1156                 pr_debug("Simulate large-bar allocation on non large-bar machine\n");
1157                 return true;
1158         }
1159
1160         if (dev->device_info->needs_iommu_device)
1161                 return false;
1162
1163         dev->kfd2kgd->get_local_mem_info(dev->kgd, &mem_info);
1164         if (mem_info.local_mem_size_private == 0 &&
1165                         mem_info.local_mem_size_public > 0)
1166                 return true;
1167         return false;
1168 }
1169
1170 static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
1171                                         struct kfd_process *p, void *data)
1172 {
1173         struct kfd_ioctl_alloc_memory_of_gpu_args *args = data;
1174         struct kfd_process_device *pdd;
1175         void *mem;
1176         struct kfd_dev *dev;
1177         int idr_handle;
1178         long err;
1179         uint64_t offset = args->mmap_offset;
1180         uint32_t flags = args->flags;
1181
1182         if (args->size == 0)
1183                 return -EINVAL;
1184
1185         dev = kfd_device_by_id(args->gpu_id);
1186         if (!dev)
1187                 return -EINVAL;
1188
1189         if ((flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) &&
1190                 (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) &&
1191                 !kfd_dev_is_large_bar(dev)) {
1192                 pr_err("Alloc host visible vram on small bar is not allowed\n");
1193                 return -EINVAL;
1194         }
1195
1196         mutex_lock(&p->mutex);
1197
1198         pdd = kfd_bind_process_to_device(dev, p);
1199         if (IS_ERR(pdd)) {
1200                 err = PTR_ERR(pdd);
1201                 goto err_unlock;
1202         }
1203
1204         err = dev->kfd2kgd->alloc_memory_of_gpu(
1205                 dev->kgd, args->va_addr, args->size,
1206                 pdd->vm, (struct kgd_mem **) &mem, &offset,
1207                 flags);
1208
1209         if (err)
1210                 goto err_unlock;
1211
1212         idr_handle = kfd_process_device_create_obj_handle(pdd, mem);
1213         if (idr_handle < 0) {
1214                 err = -EFAULT;
1215                 goto err_free;
1216         }
1217
1218         mutex_unlock(&p->mutex);
1219
1220         args->handle = MAKE_HANDLE(args->gpu_id, idr_handle);
1221         args->mmap_offset = offset;
1222
1223         return 0;
1224
1225 err_free:
1226         dev->kfd2kgd->free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem);
1227 err_unlock:
1228         mutex_unlock(&p->mutex);
1229         return err;
1230 }
1231
1232 static int kfd_ioctl_free_memory_of_gpu(struct file *filep,
1233                                         struct kfd_process *p, void *data)
1234 {
1235         struct kfd_ioctl_free_memory_of_gpu_args *args = data;
1236         struct kfd_process_device *pdd;
1237         void *mem;
1238         struct kfd_dev *dev;
1239         int ret;
1240
1241         dev = kfd_device_by_id(GET_GPU_ID(args->handle));
1242         if (!dev)
1243                 return -EINVAL;
1244
1245         mutex_lock(&p->mutex);
1246
1247         pdd = kfd_get_process_device_data(dev, p);
1248         if (!pdd) {
1249                 pr_err("Process device data doesn't exist\n");
1250                 ret = -EINVAL;
1251                 goto err_unlock;
1252         }
1253
1254         mem = kfd_process_device_translate_handle(
1255                 pdd, GET_IDR_HANDLE(args->handle));
1256         if (!mem) {
1257                 ret = -EINVAL;
1258                 goto err_unlock;
1259         }
1260
1261         ret = dev->kfd2kgd->free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem);
1262
1263         /* If freeing the buffer failed, leave the handle in place for
1264          * clean-up during process tear-down.
1265          */
1266         if (!ret)
1267                 kfd_process_device_remove_obj_handle(
1268                         pdd, GET_IDR_HANDLE(args->handle));
1269
1270 err_unlock:
1271         mutex_unlock(&p->mutex);
1272         return ret;
1273 }
1274
1275 static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
1276                                         struct kfd_process *p, void *data)
1277 {
1278         struct kfd_ioctl_map_memory_to_gpu_args *args = data;
1279         struct kfd_process_device *pdd, *peer_pdd;
1280         void *mem;
1281         struct kfd_dev *dev, *peer;
1282         long err = 0;
1283         int i;
1284         uint32_t *devices_arr = NULL;
1285
1286         dev = kfd_device_by_id(GET_GPU_ID(args->handle));
1287         if (!dev)
1288                 return -EINVAL;
1289
1290         if (!args->n_devices) {
1291                 pr_debug("Device IDs array empty\n");
1292                 return -EINVAL;
1293         }
1294         if (args->n_success > args->n_devices) {
1295                 pr_debug("n_success exceeds n_devices\n");
1296                 return -EINVAL;
1297         }
1298
1299         devices_arr = kmalloc(args->n_devices * sizeof(*devices_arr),
1300                               GFP_KERNEL);
1301         if (!devices_arr)
1302                 return -ENOMEM;
1303
1304         err = copy_from_user(devices_arr,
1305                              (void __user *)args->device_ids_array_ptr,
1306                              args->n_devices * sizeof(*devices_arr));
1307         if (err != 0) {
1308                 err = -EFAULT;
1309                 goto copy_from_user_failed;
1310         }
1311
1312         mutex_lock(&p->mutex);
1313
1314         pdd = kfd_bind_process_to_device(dev, p);
1315         if (IS_ERR(pdd)) {
1316                 err = PTR_ERR(pdd);
1317                 goto bind_process_to_device_failed;
1318         }
1319
1320         mem = kfd_process_device_translate_handle(pdd,
1321                                                 GET_IDR_HANDLE(args->handle));
1322         if (!mem) {
1323                 err = -ENOMEM;
1324                 goto get_mem_obj_from_handle_failed;
1325         }
1326
1327         for (i = args->n_success; i < args->n_devices; i++) {
1328                 peer = kfd_device_by_id(devices_arr[i]);
1329                 if (!peer) {
1330                         pr_debug("Getting device by id failed for 0x%x\n",
1331                                  devices_arr[i]);
1332                         err = -EINVAL;
1333                         goto get_mem_obj_from_handle_failed;
1334                 }
1335
1336                 peer_pdd = kfd_bind_process_to_device(peer, p);
1337                 if (IS_ERR(peer_pdd)) {
1338                         err = PTR_ERR(peer_pdd);
1339                         goto get_mem_obj_from_handle_failed;
1340                 }
1341                 err = peer->kfd2kgd->map_memory_to_gpu(
1342                         peer->kgd, (struct kgd_mem *)mem, peer_pdd->vm);
1343                 if (err) {
1344                         pr_err("Failed to map to gpu %d/%d\n",
1345                                i, args->n_devices);
1346                         goto map_memory_to_gpu_failed;
1347                 }
1348                 args->n_success = i+1;
1349         }
1350
1351         mutex_unlock(&p->mutex);
1352
1353         err = dev->kfd2kgd->sync_memory(dev->kgd, (struct kgd_mem *) mem, true);
1354         if (err) {
1355                 pr_debug("Sync memory failed, wait interrupted by user signal\n");
1356                 goto sync_memory_failed;
1357         }
1358
1359         /* Flush TLBs after waiting for the page table updates to complete */
1360         for (i = 0; i < args->n_devices; i++) {
1361                 peer = kfd_device_by_id(devices_arr[i]);
1362                 if (WARN_ON_ONCE(!peer))
1363                         continue;
1364                 peer_pdd = kfd_get_process_device_data(peer, p);
1365                 if (WARN_ON_ONCE(!peer_pdd))
1366                         continue;
1367                 kfd_flush_tlb(peer_pdd);
1368         }
1369
1370         kfree(devices_arr);
1371
1372         return err;
1373
1374 bind_process_to_device_failed:
1375 get_mem_obj_from_handle_failed:
1376 map_memory_to_gpu_failed:
1377         mutex_unlock(&p->mutex);
1378 copy_from_user_failed:
1379 sync_memory_failed:
1380         kfree(devices_arr);
1381
1382         return err;
1383 }
1384
1385 static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
1386                                         struct kfd_process *p, void *data)
1387 {
1388         struct kfd_ioctl_unmap_memory_from_gpu_args *args = data;
1389         struct kfd_process_device *pdd, *peer_pdd;
1390         void *mem;
1391         struct kfd_dev *dev, *peer;
1392         long err = 0;
1393         uint32_t *devices_arr = NULL, i;
1394
1395         dev = kfd_device_by_id(GET_GPU_ID(args->handle));
1396         if (!dev)
1397                 return -EINVAL;
1398
1399         if (!args->n_devices) {
1400                 pr_debug("Device IDs array empty\n");
1401                 return -EINVAL;
1402         }
1403         if (args->n_success > args->n_devices) {
1404                 pr_debug("n_success exceeds n_devices\n");
1405                 return -EINVAL;
1406         }
1407
1408         devices_arr = kmalloc(args->n_devices * sizeof(*devices_arr),
1409                               GFP_KERNEL);
1410         if (!devices_arr)
1411                 return -ENOMEM;
1412
1413         err = copy_from_user(devices_arr,
1414                              (void __user *)args->device_ids_array_ptr,
1415                              args->n_devices * sizeof(*devices_arr));
1416         if (err != 0) {
1417                 err = -EFAULT;
1418                 goto copy_from_user_failed;
1419         }
1420
1421         mutex_lock(&p->mutex);
1422
1423         pdd = kfd_get_process_device_data(dev, p);
1424         if (!pdd) {
1425                 err = -EINVAL;
1426                 goto bind_process_to_device_failed;
1427         }
1428
1429         mem = kfd_process_device_translate_handle(pdd,
1430                                                 GET_IDR_HANDLE(args->handle));
1431         if (!mem) {
1432                 err = -ENOMEM;
1433                 goto get_mem_obj_from_handle_failed;
1434         }
1435
1436         for (i = args->n_success; i < args->n_devices; i++) {
1437                 peer = kfd_device_by_id(devices_arr[i]);
1438                 if (!peer) {
1439                         err = -EINVAL;
1440                         goto get_mem_obj_from_handle_failed;
1441                 }
1442
1443                 peer_pdd = kfd_get_process_device_data(peer, p);
1444                 if (!peer_pdd) {
1445                         err = -ENODEV;
1446                         goto get_mem_obj_from_handle_failed;
1447                 }
1448                 err = dev->kfd2kgd->unmap_memory_to_gpu(
1449                         peer->kgd, (struct kgd_mem *)mem, peer_pdd->vm);
1450                 if (err) {
1451                         pr_err("Failed to unmap from gpu %d/%d\n",
1452                                i, args->n_devices);
1453                         goto unmap_memory_from_gpu_failed;
1454                 }
1455                 args->n_success = i+1;
1456         }
1457         kfree(devices_arr);
1458
1459         mutex_unlock(&p->mutex);
1460
1461         return 0;
1462
1463 bind_process_to_device_failed:
1464 get_mem_obj_from_handle_failed:
1465 unmap_memory_from_gpu_failed:
1466         mutex_unlock(&p->mutex);
1467 copy_from_user_failed:
1468         kfree(devices_arr);
1469         return err;
1470 }
1471
1472 #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \
1473         [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \
1474                             .cmd_drv = 0, .name = #ioctl}
1475
1476 /** Ioctl table */
1477 static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
1478         AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_VERSION,
1479                         kfd_ioctl_get_version, 0),
1480
1481         AMDKFD_IOCTL_DEF(AMDKFD_IOC_CREATE_QUEUE,
1482                         kfd_ioctl_create_queue, 0),
1483
1484         AMDKFD_IOCTL_DEF(AMDKFD_IOC_DESTROY_QUEUE,
1485                         kfd_ioctl_destroy_queue, 0),
1486
1487         AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_MEMORY_POLICY,
1488                         kfd_ioctl_set_memory_policy, 0),
1489
1490         AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_CLOCK_COUNTERS,
1491                         kfd_ioctl_get_clock_counters, 0),
1492
1493         AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES,
1494                         kfd_ioctl_get_process_apertures, 0),
1495
1496         AMDKFD_IOCTL_DEF(AMDKFD_IOC_UPDATE_QUEUE,
1497                         kfd_ioctl_update_queue, 0),
1498
1499         AMDKFD_IOCTL_DEF(AMDKFD_IOC_CREATE_EVENT,
1500                         kfd_ioctl_create_event, 0),
1501
1502         AMDKFD_IOCTL_DEF(AMDKFD_IOC_DESTROY_EVENT,
1503                         kfd_ioctl_destroy_event, 0),
1504
1505         AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_EVENT,
1506                         kfd_ioctl_set_event, 0),
1507
1508         AMDKFD_IOCTL_DEF(AMDKFD_IOC_RESET_EVENT,
1509                         kfd_ioctl_reset_event, 0),
1510
1511         AMDKFD_IOCTL_DEF(AMDKFD_IOC_WAIT_EVENTS,
1512                         kfd_ioctl_wait_events, 0),
1513
1514         AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_REGISTER,
1515                         kfd_ioctl_dbg_register, 0),
1516
1517         AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_UNREGISTER,
1518                         kfd_ioctl_dbg_unregister, 0),
1519
1520         AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_ADDRESS_WATCH,
1521                         kfd_ioctl_dbg_address_watch, 0),
1522
1523         AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_WAVE_CONTROL,
1524                         kfd_ioctl_dbg_wave_control, 0),
1525
1526         AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_SCRATCH_BACKING_VA,
1527                         kfd_ioctl_set_scratch_backing_va, 0),
1528
1529         AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_TILE_CONFIG,
1530                         kfd_ioctl_get_tile_config, 0),
1531
1532         AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_TRAP_HANDLER,
1533                         kfd_ioctl_set_trap_handler, 0),
1534
1535         AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES_NEW,
1536                         kfd_ioctl_get_process_apertures_new, 0),
1537
1538         AMDKFD_IOCTL_DEF(AMDKFD_IOC_ACQUIRE_VM,
1539                         kfd_ioctl_acquire_vm, 0),
1540
1541         AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_MEMORY_OF_GPU,
1542                         kfd_ioctl_alloc_memory_of_gpu, 0),
1543
1544         AMDKFD_IOCTL_DEF(AMDKFD_IOC_FREE_MEMORY_OF_GPU,
1545                         kfd_ioctl_free_memory_of_gpu, 0),
1546
1547         AMDKFD_IOCTL_DEF(AMDKFD_IOC_MAP_MEMORY_TO_GPU,
1548                         kfd_ioctl_map_memory_to_gpu, 0),
1549
1550         AMDKFD_IOCTL_DEF(AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU,
1551                         kfd_ioctl_unmap_memory_from_gpu, 0),
1552
1553 };
1554
1555 #define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls)
1556
1557 static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
1558 {
1559         struct kfd_process *process;
1560         amdkfd_ioctl_t *func;
1561         const struct amdkfd_ioctl_desc *ioctl = NULL;
1562         unsigned int nr = _IOC_NR(cmd);
1563         char stack_kdata[128];
1564         char *kdata = NULL;
1565         unsigned int usize, asize;
1566         int retcode = -EINVAL;
1567
1568         if (nr >= AMDKFD_CORE_IOCTL_COUNT)
1569                 goto err_i1;
1570
1571         if ((nr >= AMDKFD_COMMAND_START) && (nr < AMDKFD_COMMAND_END)) {
1572                 u32 amdkfd_size;
1573
1574                 ioctl = &amdkfd_ioctls[nr];
1575
1576                 amdkfd_size = _IOC_SIZE(ioctl->cmd);
1577                 usize = asize = _IOC_SIZE(cmd);
1578                 if (amdkfd_size > asize)
1579                         asize = amdkfd_size;
1580
1581                 cmd = ioctl->cmd;
1582         } else
1583                 goto err_i1;
1584
1585         dev_dbg(kfd_device, "ioctl cmd 0x%x (#%d), arg 0x%lx\n", cmd, nr, arg);
1586
1587         process = kfd_get_process(current);
1588         if (IS_ERR(process)) {
1589                 dev_dbg(kfd_device, "no process\n");
1590                 goto err_i1;
1591         }
1592
1593         /* Do not trust userspace, use our own definition */
1594         func = ioctl->func;
1595
1596         if (unlikely(!func)) {
1597                 dev_dbg(kfd_device, "no function\n");
1598                 retcode = -EINVAL;
1599                 goto err_i1;
1600         }
1601
1602         if (cmd & (IOC_IN | IOC_OUT)) {
1603                 if (asize <= sizeof(stack_kdata)) {
1604                         kdata = stack_kdata;
1605                 } else {
1606                         kdata = kmalloc(asize, GFP_KERNEL);
1607                         if (!kdata) {
1608                                 retcode = -ENOMEM;
1609                                 goto err_i1;
1610                         }
1611                 }
1612                 if (asize > usize)
1613                         memset(kdata + usize, 0, asize - usize);
1614         }
1615
1616         if (cmd & IOC_IN) {
1617                 if (copy_from_user(kdata, (void __user *)arg, usize) != 0) {
1618                         retcode = -EFAULT;
1619                         goto err_i1;
1620                 }
1621         } else if (cmd & IOC_OUT) {
1622                 memset(kdata, 0, usize);
1623         }
1624
1625         retcode = func(filep, process, kdata);
1626
1627         if (cmd & IOC_OUT)
1628                 if (copy_to_user((void __user *)arg, kdata, usize) != 0)
1629                         retcode = -EFAULT;
1630
1631 err_i1:
1632         if (!ioctl)
1633                 dev_dbg(kfd_device, "invalid ioctl: pid=%d, cmd=0x%02x, nr=0x%02x\n",
1634                           task_pid_nr(current), cmd, nr);
1635
1636         if (kdata != stack_kdata)
1637                 kfree(kdata);
1638
1639         if (retcode)
1640                 dev_dbg(kfd_device, "ret = %d\n", retcode);
1641
1642         return retcode;
1643 }
1644
1645 static int kfd_mmap(struct file *filp, struct vm_area_struct *vma)
1646 {
1647         struct kfd_process *process;
1648
1649         process = kfd_get_process(current);
1650         if (IS_ERR(process))
1651                 return PTR_ERR(process);
1652
1653         if ((vma->vm_pgoff & KFD_MMAP_DOORBELL_MASK) ==
1654                         KFD_MMAP_DOORBELL_MASK) {
1655                 vma->vm_pgoff = vma->vm_pgoff ^ KFD_MMAP_DOORBELL_MASK;
1656                 return kfd_doorbell_mmap(process, vma);
1657         } else if ((vma->vm_pgoff & KFD_MMAP_EVENTS_MASK) ==
1658                         KFD_MMAP_EVENTS_MASK) {
1659                 vma->vm_pgoff = vma->vm_pgoff ^ KFD_MMAP_EVENTS_MASK;
1660                 return kfd_event_mmap(process, vma);
1661         } else if ((vma->vm_pgoff & KFD_MMAP_RESERVED_MEM_MASK) ==
1662                         KFD_MMAP_RESERVED_MEM_MASK) {
1663                 vma->vm_pgoff = vma->vm_pgoff ^ KFD_MMAP_RESERVED_MEM_MASK;
1664                 return kfd_reserved_mem_mmap(process, vma);
1665         }
1666
1667         return -EFAULT;
1668 }