]> asedeno.scripts.mit.edu Git - linux.git/blob - arch/nds32/kernel/dma.c
nds32: implement the unmap_sg DMA operation
[linux.git] / arch / nds32 / kernel / dma.c
1 // SPDX-License-Identifier: GPL-2.0
2 // Copyright (C) 2005-2017 Andes Technology Corporation
3
4 #include <linux/types.h>
5 #include <linux/mm.h>
6 #include <linux/export.h>
7 #include <linux/string.h>
8 #include <linux/scatterlist.h>
9 #include <linux/dma-mapping.h>
10 #include <linux/io.h>
11 #include <linux/cache.h>
12 #include <linux/highmem.h>
13 #include <linux/slab.h>
14 #include <asm/cacheflush.h>
15 #include <asm/tlbflush.h>
16 #include <asm/dma-mapping.h>
17 #include <asm/proc-fns.h>
18
19 /*
20  * This is the page table (2MB) covering uncached, DMA consistent allocations
21  */
22 static pte_t *consistent_pte;
23 static DEFINE_RAW_SPINLOCK(consistent_lock);
24
25 /*
26  * VM region handling support.
27  *
28  * This should become something generic, handling VM region allocations for
29  * vmalloc and similar (ioremap, module space, etc).
30  *
31  * I envisage vmalloc()'s supporting vm_struct becoming:
32  *
33  *  struct vm_struct {
34  *    struct vm_region  region;
35  *    unsigned long     flags;
36  *    struct page       **pages;
37  *    unsigned int      nr_pages;
38  *    unsigned long     phys_addr;
39  *  };
40  *
41  * get_vm_area() would then call vm_region_alloc with an appropriate
42  * struct vm_region head (eg):
43  *
44  *  struct vm_region vmalloc_head = {
45  *      .vm_list        = LIST_HEAD_INIT(vmalloc_head.vm_list),
46  *      .vm_start       = VMALLOC_START,
47  *      .vm_end         = VMALLOC_END,
48  *  };
49  *
50  * However, vmalloc_head.vm_start is variable (typically, it is dependent on
51  * the amount of RAM found at boot time.)  I would imagine that get_vm_area()
52  * would have to initialise this each time prior to calling vm_region_alloc().
53  */
54 struct arch_vm_region {
55         struct list_head vm_list;
56         unsigned long vm_start;
57         unsigned long vm_end;
58         struct page *vm_pages;
59 };
60
61 static struct arch_vm_region consistent_head = {
62         .vm_list = LIST_HEAD_INIT(consistent_head.vm_list),
63         .vm_start = CONSISTENT_BASE,
64         .vm_end = CONSISTENT_END,
65 };
66
67 static struct arch_vm_region *vm_region_alloc(struct arch_vm_region *head,
68                                               size_t size, int gfp)
69 {
70         unsigned long addr = head->vm_start, end = head->vm_end - size;
71         unsigned long flags;
72         struct arch_vm_region *c, *new;
73
74         new = kmalloc(sizeof(struct arch_vm_region), gfp);
75         if (!new)
76                 goto out;
77
78         raw_spin_lock_irqsave(&consistent_lock, flags);
79
80         list_for_each_entry(c, &head->vm_list, vm_list) {
81                 if ((addr + size) < addr)
82                         goto nospc;
83                 if ((addr + size) <= c->vm_start)
84                         goto found;
85                 addr = c->vm_end;
86                 if (addr > end)
87                         goto nospc;
88         }
89
90 found:
91         /*
92          * Insert this entry _before_ the one we found.
93          */
94         list_add_tail(&new->vm_list, &c->vm_list);
95         new->vm_start = addr;
96         new->vm_end = addr + size;
97
98         raw_spin_unlock_irqrestore(&consistent_lock, flags);
99         return new;
100
101 nospc:
102         raw_spin_unlock_irqrestore(&consistent_lock, flags);
103         kfree(new);
104 out:
105         return NULL;
106 }
107
108 static struct arch_vm_region *vm_region_find(struct arch_vm_region *head,
109                                              unsigned long addr)
110 {
111         struct arch_vm_region *c;
112
113         list_for_each_entry(c, &head->vm_list, vm_list) {
114                 if (c->vm_start == addr)
115                         goto out;
116         }
117         c = NULL;
118 out:
119         return c;
120 }
121
122 /* FIXME: attrs is not used. */
123 static void *nds32_dma_alloc_coherent(struct device *dev, size_t size,
124                                       dma_addr_t * handle, gfp_t gfp,
125                                       unsigned long attrs)
126 {
127         struct page *page;
128         struct arch_vm_region *c;
129         unsigned long order;
130         u64 mask = ~0ULL, limit;
131         pgprot_t prot = pgprot_noncached(PAGE_KERNEL);
132
133         if (!consistent_pte) {
134                 pr_err("%s: not initialized\n", __func__);
135                 dump_stack();
136                 return NULL;
137         }
138
139         if (dev) {
140                 mask = dev->coherent_dma_mask;
141
142                 /*
143                  * Sanity check the DMA mask - it must be non-zero, and
144                  * must be able to be satisfied by a DMA allocation.
145                  */
146                 if (mask == 0) {
147                         dev_warn(dev, "coherent DMA mask is unset\n");
148                         goto no_page;
149                 }
150
151         }
152
153         /*
154          * Sanity check the allocation size.
155          */
156         size = PAGE_ALIGN(size);
157         limit = (mask + 1) & ~mask;
158         if ((limit && size >= limit) ||
159             size >= (CONSISTENT_END - CONSISTENT_BASE)) {
160                 pr_warn("coherent allocation too big "
161                         "(requested %#x mask %#llx)\n", size, mask);
162                 goto no_page;
163         }
164
165         order = get_order(size);
166
167         if (mask != 0xffffffff)
168                 gfp |= GFP_DMA;
169
170         page = alloc_pages(gfp, order);
171         if (!page)
172                 goto no_page;
173
174         /*
175          * Invalidate any data that might be lurking in the
176          * kernel direct-mapped region for device DMA.
177          */
178         {
179                 unsigned long kaddr = (unsigned long)page_address(page);
180                 memset(page_address(page), 0, size);
181                 cpu_dma_wbinval_range(kaddr, kaddr + size);
182         }
183
184         /*
185          * Allocate a virtual address in the consistent mapping region.
186          */
187         c = vm_region_alloc(&consistent_head, size,
188                             gfp & ~(__GFP_DMA | __GFP_HIGHMEM));
189         if (c) {
190                 pte_t *pte = consistent_pte + CONSISTENT_OFFSET(c->vm_start);
191                 struct page *end = page + (1 << order);
192
193                 c->vm_pages = page;
194
195                 /*
196                  * Set the "dma handle"
197                  */
198                 *handle = page_to_phys(page);
199
200                 do {
201                         BUG_ON(!pte_none(*pte));
202
203                         /*
204                          * x86 does not mark the pages reserved...
205                          */
206                         SetPageReserved(page);
207                         set_pte(pte, mk_pte(page, prot));
208                         page++;
209                         pte++;
210                 } while (size -= PAGE_SIZE);
211
212                 /*
213                  * Free the otherwise unused pages.
214                  */
215                 while (page < end) {
216                         __free_page(page);
217                         page++;
218                 }
219
220                 return (void *)c->vm_start;
221         }
222
223         if (page)
224                 __free_pages(page, order);
225 no_page:
226         *handle = ~0;
227         return NULL;
228 }
229
230 static void nds32_dma_free(struct device *dev, size_t size, void *cpu_addr,
231                            dma_addr_t handle, unsigned long attrs)
232 {
233         struct arch_vm_region *c;
234         unsigned long flags, addr;
235         pte_t *ptep;
236
237         size = PAGE_ALIGN(size);
238
239         raw_spin_lock_irqsave(&consistent_lock, flags);
240
241         c = vm_region_find(&consistent_head, (unsigned long)cpu_addr);
242         if (!c)
243                 goto no_area;
244
245         if ((c->vm_end - c->vm_start) != size) {
246                 pr_err("%s: freeing wrong coherent size (%ld != %d)\n",
247                        __func__, c->vm_end - c->vm_start, size);
248                 dump_stack();
249                 size = c->vm_end - c->vm_start;
250         }
251
252         ptep = consistent_pte + CONSISTENT_OFFSET(c->vm_start);
253         addr = c->vm_start;
254         do {
255                 pte_t pte = ptep_get_and_clear(&init_mm, addr, ptep);
256                 unsigned long pfn;
257
258                 ptep++;
259                 addr += PAGE_SIZE;
260
261                 if (!pte_none(pte) && pte_present(pte)) {
262                         pfn = pte_pfn(pte);
263
264                         if (pfn_valid(pfn)) {
265                                 struct page *page = pfn_to_page(pfn);
266
267                                 /*
268                                  * x86 does not mark the pages reserved...
269                                  */
270                                 ClearPageReserved(page);
271
272                                 __free_page(page);
273                                 continue;
274                         }
275                 }
276
277                 pr_crit("%s: bad page in kernel page table\n", __func__);
278         } while (size -= PAGE_SIZE);
279
280         flush_tlb_kernel_range(c->vm_start, c->vm_end);
281
282         list_del(&c->vm_list);
283
284         raw_spin_unlock_irqrestore(&consistent_lock, flags);
285
286         kfree(c);
287         return;
288
289 no_area:
290         raw_spin_unlock_irqrestore(&consistent_lock, flags);
291         pr_err("%s: trying to free invalid coherent area: %p\n",
292                __func__, cpu_addr);
293         dump_stack();
294 }
295
296 /*
297  * Initialise the consistent memory allocation.
298  */
299 static int __init consistent_init(void)
300 {
301         pgd_t *pgd;
302         pmd_t *pmd;
303         pte_t *pte;
304         int ret = 0;
305
306         do {
307                 pgd = pgd_offset(&init_mm, CONSISTENT_BASE);
308                 pmd = pmd_alloc(&init_mm, pgd, CONSISTENT_BASE);
309                 if (!pmd) {
310                         pr_err("%s: no pmd tables\n", __func__);
311                         ret = -ENOMEM;
312                         break;
313                 }
314                 /* The first level mapping may be created in somewhere.
315                  * It's not necessary to warn here. */
316                 /* WARN_ON(!pmd_none(*pmd)); */
317
318                 pte = pte_alloc_kernel(pmd, CONSISTENT_BASE);
319                 if (!pte) {
320                         ret = -ENOMEM;
321                         break;
322                 }
323
324                 consistent_pte = pte;
325         } while (0);
326
327         return ret;
328 }
329
330 core_initcall(consistent_init);
331
332 static inline void cache_op(phys_addr_t paddr, size_t size,
333                 void (*fn)(unsigned long start, unsigned long end))
334 {
335         struct page *page = pfn_to_page(paddr >> PAGE_SHIFT);
336         unsigned offset = paddr & ~PAGE_MASK;
337         size_t left = size;
338         unsigned long start;
339
340         do {
341                 size_t len = left;
342
343                 if (PageHighMem(page)) {
344                         void *addr;
345
346                         if (offset + len > PAGE_SIZE) {
347                                 if (offset >= PAGE_SIZE) {
348                                         page += offset >> PAGE_SHIFT;
349                                         offset &= ~PAGE_MASK;
350                                 }
351                                 len = PAGE_SIZE - offset;
352                         }
353
354                         addr = kmap_atomic(page);
355                         start = (unsigned long)(addr + offset);
356                         fn(start, start + len);
357                         kunmap_atomic(addr);
358                 } else {
359                         start = (unsigned long)phys_to_virt(paddr);
360                         fn(start, start + size);
361                 }
362                 offset = 0;
363                 page++;
364                 left -= len;
365         } while (left);
366 }
367
368 static void
369 nds32_dma_sync_single_for_device(struct device *dev, dma_addr_t handle,
370                                  size_t size, enum dma_data_direction dir)
371 {
372         switch (dir) {
373         case DMA_FROM_DEVICE:
374                 break;
375         case DMA_TO_DEVICE:
376         case DMA_BIDIRECTIONAL:
377                 cache_op(handle, size, cpu_dma_wb_range);
378                 break;
379         default:
380                 BUG();
381         }
382 }
383
384 static void
385 nds32_dma_sync_single_for_cpu(struct device *dev, dma_addr_t handle,
386                               size_t size, enum dma_data_direction dir)
387 {
388         switch (dir) {
389         case DMA_TO_DEVICE:
390                 break;
391         case DMA_FROM_DEVICE:
392         case DMA_BIDIRECTIONAL:
393                 cache_op(handle, size, cpu_dma_inval_range);
394                 break;
395         default:
396                 BUG();
397         }
398 }
399
400 static dma_addr_t nds32_dma_map_page(struct device *dev, struct page *page,
401                                      unsigned long offset, size_t size,
402                                      enum dma_data_direction dir,
403                                      unsigned long attrs)
404 {
405         dma_addr_t dma_addr = page_to_phys(page) + offset;
406
407         if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
408                 nds32_dma_sync_single_for_device(dev, handle, size, dir);
409         return dma_addr;
410 }
411
412 static void nds32_dma_unmap_page(struct device *dev, dma_addr_t handle,
413                                  size_t size, enum dma_data_direction dir,
414                                  unsigned long attrs)
415 {
416         if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
417                 nds32_dma_sync_single_for_cpu(dev, handle, size, dir);
418 }
419
420 static void
421 nds32_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
422                              int nents, enum dma_data_direction dir)
423 {
424         int i;
425
426         for (i = 0; i < nents; i++, sg++) {
427                 nds32_dma_sync_single_for_device(dev, sg_dma_address(sg),
428                                 sg->length, dir);
429         }
430 }
431
432 static void
433 nds32_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nents,
434                           enum dma_data_direction dir)
435 {
436         int i;
437
438         for (i = 0; i < nents; i++, sg++) {
439                 nds32_dma_sync_single_for_cpu(dev, sg_dma_address(sg),
440                                 sg->length, dir);
441         }
442 }
443
444 static int nds32_dma_map_sg(struct device *dev, struct scatterlist *sg,
445                             int nents, enum dma_data_direction dir,
446                             unsigned long attrs)
447 {
448         int i;
449
450         for (i = 0; i < nents; i++, sg++) {
451                 nds32_dma_sync_single_for_device(dev, sg_dma_address(sg),
452                                 sg->length, dir);
453         }
454         return nents;
455 }
456
457 static void nds32_dma_unmap_sg(struct device *dev, struct scatterlist *sg,
458                                int nhwentries, enum dma_data_direction dir,
459                                unsigned long attrs)
460 {
461         int i;
462
463         for (i = 0; i < nhwentries; i++, sg++) {
464                 nds32_dma_sync_single_for_cpu(dev, sg_dma_address(sg),
465                                 sg->length, dir);
466         }
467 }
468
469 struct dma_map_ops nds32_dma_ops = {
470         .alloc = nds32_dma_alloc_coherent,
471         .free = nds32_dma_free,
472         .map_page = nds32_dma_map_page,
473         .unmap_page = nds32_dma_unmap_page,
474         .map_sg = nds32_dma_map_sg,
475         .unmap_sg = nds32_dma_unmap_sg,
476         .sync_single_for_device = nds32_dma_sync_single_for_device,
477         .sync_single_for_cpu = nds32_dma_sync_single_for_cpu,
478         .sync_sg_for_cpu = nds32_dma_sync_sg_for_cpu,
479         .sync_sg_for_device = nds32_dma_sync_sg_for_device,
480 };
481
482 EXPORT_SYMBOL(nds32_dma_ops);