1 // SPDX-License-Identifier: GPL-2.0
2 // Copyright (C) 2005-2017 Andes Technology Corporation
4 #include <linux/types.h>
6 #include <linux/export.h>
7 #include <linux/string.h>
8 #include <linux/scatterlist.h>
9 #include <linux/dma-mapping.h>
11 #include <linux/cache.h>
12 #include <linux/highmem.h>
13 #include <linux/slab.h>
14 #include <asm/cacheflush.h>
15 #include <asm/tlbflush.h>
16 #include <asm/dma-mapping.h>
17 #include <asm/proc-fns.h>
20 * This is the page table (2MB) covering uncached, DMA consistent allocations
22 static pte_t *consistent_pte;
23 static DEFINE_RAW_SPINLOCK(consistent_lock);
31 * VM region handling support.
33 * This should become something generic, handling VM region allocations for
34 * vmalloc and similar (ioremap, module space, etc).
36 * I envisage vmalloc()'s supporting vm_struct becoming:
39 * struct vm_region region;
40 * unsigned long flags;
41 * struct page **pages;
42 * unsigned int nr_pages;
43 * unsigned long phys_addr;
46 * get_vm_area() would then call vm_region_alloc with an appropriate
47 * struct vm_region head (eg):
49 * struct vm_region vmalloc_head = {
50 * .vm_list = LIST_HEAD_INIT(vmalloc_head.vm_list),
51 * .vm_start = VMALLOC_START,
52 * .vm_end = VMALLOC_END,
55 * However, vmalloc_head.vm_start is variable (typically, it is dependent on
56 * the amount of RAM found at boot time.) I would imagine that get_vm_area()
57 * would have to initialise this each time prior to calling vm_region_alloc().
59 struct arch_vm_region {
60 struct list_head vm_list;
61 unsigned long vm_start;
63 struct page *vm_pages;
66 static struct arch_vm_region consistent_head = {
67 .vm_list = LIST_HEAD_INIT(consistent_head.vm_list),
68 .vm_start = CONSISTENT_BASE,
69 .vm_end = CONSISTENT_END,
72 static struct arch_vm_region *vm_region_alloc(struct arch_vm_region *head,
75 unsigned long addr = head->vm_start, end = head->vm_end - size;
77 struct arch_vm_region *c, *new;
79 new = kmalloc(sizeof(struct arch_vm_region), gfp);
83 raw_spin_lock_irqsave(&consistent_lock, flags);
85 list_for_each_entry(c, &head->vm_list, vm_list) {
86 if ((addr + size) < addr)
88 if ((addr + size) <= c->vm_start)
97 * Insert this entry _before_ the one we found.
99 list_add_tail(&new->vm_list, &c->vm_list);
100 new->vm_start = addr;
101 new->vm_end = addr + size;
103 raw_spin_unlock_irqrestore(&consistent_lock, flags);
107 raw_spin_unlock_irqrestore(&consistent_lock, flags);
113 static struct arch_vm_region *vm_region_find(struct arch_vm_region *head,
116 struct arch_vm_region *c;
118 list_for_each_entry(c, &head->vm_list, vm_list) {
119 if (c->vm_start == addr)
127 /* FIXME: attrs is not used. */
128 static void *nds32_dma_alloc_coherent(struct device *dev, size_t size,
129 dma_addr_t * handle, gfp_t gfp,
133 struct arch_vm_region *c;
135 u64 mask = ~0ULL, limit;
136 pgprot_t prot = pgprot_noncached(PAGE_KERNEL);
138 if (!consistent_pte) {
139 pr_err("%s: not initialized\n", __func__);
145 mask = dev->coherent_dma_mask;
148 * Sanity check the DMA mask - it must be non-zero, and
149 * must be able to be satisfied by a DMA allocation.
152 dev_warn(dev, "coherent DMA mask is unset\n");
159 * Sanity check the allocation size.
161 size = PAGE_ALIGN(size);
162 limit = (mask + 1) & ~mask;
163 if ((limit && size >= limit) ||
164 size >= (CONSISTENT_END - CONSISTENT_BASE)) {
165 pr_warn("coherent allocation too big "
166 "(requested %#x mask %#llx)\n", size, mask);
170 order = get_order(size);
172 if (mask != 0xffffffff)
175 page = alloc_pages(gfp, order);
180 * Invalidate any data that might be lurking in the
181 * kernel direct-mapped region for device DMA.
184 unsigned long kaddr = (unsigned long)page_address(page);
185 memset(page_address(page), 0, size);
186 cpu_dma_wbinval_range(kaddr, kaddr + size);
190 * Allocate a virtual address in the consistent mapping region.
192 c = vm_region_alloc(&consistent_head, size,
193 gfp & ~(__GFP_DMA | __GFP_HIGHMEM));
195 pte_t *pte = consistent_pte + CONSISTENT_OFFSET(c->vm_start);
196 struct page *end = page + (1 << order);
201 * Set the "dma handle"
203 *handle = page_to_phys(page);
206 BUG_ON(!pte_none(*pte));
209 * x86 does not mark the pages reserved...
211 SetPageReserved(page);
212 set_pte(pte, mk_pte(page, prot));
215 } while (size -= PAGE_SIZE);
218 * Free the otherwise unused pages.
225 return (void *)c->vm_start;
229 __free_pages(page, order);
235 static void nds32_dma_free(struct device *dev, size_t size, void *cpu_addr,
236 dma_addr_t handle, unsigned long attrs)
238 struct arch_vm_region *c;
239 unsigned long flags, addr;
242 size = PAGE_ALIGN(size);
244 raw_spin_lock_irqsave(&consistent_lock, flags);
246 c = vm_region_find(&consistent_head, (unsigned long)cpu_addr);
250 if ((c->vm_end - c->vm_start) != size) {
251 pr_err("%s: freeing wrong coherent size (%ld != %d)\n",
252 __func__, c->vm_end - c->vm_start, size);
254 size = c->vm_end - c->vm_start;
257 ptep = consistent_pte + CONSISTENT_OFFSET(c->vm_start);
260 pte_t pte = ptep_get_and_clear(&init_mm, addr, ptep);
266 if (!pte_none(pte) && pte_present(pte)) {
269 if (pfn_valid(pfn)) {
270 struct page *page = pfn_to_page(pfn);
273 * x86 does not mark the pages reserved...
275 ClearPageReserved(page);
282 pr_crit("%s: bad page in kernel page table\n", __func__);
283 } while (size -= PAGE_SIZE);
285 flush_tlb_kernel_range(c->vm_start, c->vm_end);
287 list_del(&c->vm_list);
289 raw_spin_unlock_irqrestore(&consistent_lock, flags);
295 raw_spin_unlock_irqrestore(&consistent_lock, flags);
296 pr_err("%s: trying to free invalid coherent area: %p\n",
302 * Initialise the consistent memory allocation.
304 static int __init consistent_init(void)
312 pgd = pgd_offset(&init_mm, CONSISTENT_BASE);
313 pmd = pmd_alloc(&init_mm, pgd, CONSISTENT_BASE);
315 pr_err("%s: no pmd tables\n", __func__);
319 /* The first level mapping may be created in somewhere.
320 * It's not necessary to warn here. */
321 /* WARN_ON(!pmd_none(*pmd)); */
323 pte = pte_alloc_kernel(pmd, CONSISTENT_BASE);
329 consistent_pte = pte;
335 core_initcall(consistent_init);
336 static void consistent_sync(void *vaddr, size_t size, int direction, int master_type);
337 static dma_addr_t nds32_dma_map_page(struct device *dev, struct page *page,
338 unsigned long offset, size_t size,
339 enum dma_data_direction dir,
342 if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
343 consistent_sync((void *)(page_address(page) + offset), size, dir, FOR_DEVICE);
344 return page_to_phys(page) + offset;
347 static void nds32_dma_unmap_page(struct device *dev, dma_addr_t handle,
348 size_t size, enum dma_data_direction dir,
351 if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
352 consistent_sync(phys_to_virt(handle), size, dir, FOR_CPU);
356 * Make an area consistent for devices.
358 static void consistent_sync(void *vaddr, size_t size, int direction, int master_type)
360 unsigned long start = (unsigned long)vaddr;
361 unsigned long end = start + size;
363 if (master_type == FOR_CPU) {
367 case DMA_FROM_DEVICE:
368 case DMA_BIDIRECTIONAL:
369 cpu_dma_inval_range(start, end);
377 case DMA_FROM_DEVICE:
380 case DMA_BIDIRECTIONAL:
381 cpu_dma_wb_range(start, end);
389 static int nds32_dma_map_sg(struct device *dev, struct scatterlist *sg,
390 int nents, enum dma_data_direction dir,
395 for (i = 0; i < nents; i++, sg++) {
398 struct page *page = sg_page(sg);
400 sg->dma_address = sg_phys(sg);
401 pfn = page_to_pfn(page) + sg->offset / PAGE_SIZE;
402 page = pfn_to_page(pfn);
403 if (PageHighMem(page)) {
404 virt = kmap_atomic(page);
405 consistent_sync(virt, sg->length, dir, FOR_CPU);
408 if (sg->offset > PAGE_SIZE)
409 panic("sg->offset:%08x > PAGE_SIZE\n",
411 virt = page_address(page) + sg->offset;
412 consistent_sync(virt, sg->length, dir, FOR_CPU);
418 static void nds32_dma_unmap_sg(struct device *dev, struct scatterlist *sg,
419 int nhwentries, enum dma_data_direction dir,
425 nds32_dma_sync_single_for_cpu(struct device *dev, dma_addr_t handle,
426 size_t size, enum dma_data_direction dir)
428 consistent_sync((void *)phys_to_virt(handle), size, dir, FOR_CPU);
432 nds32_dma_sync_single_for_device(struct device *dev, dma_addr_t handle,
433 size_t size, enum dma_data_direction dir)
435 consistent_sync((void *)phys_to_virt(handle), size, dir, FOR_DEVICE);
439 nds32_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nents,
440 enum dma_data_direction dir)
444 for (i = 0; i < nents; i++, sg++) {
446 page_address((struct page *)sg->page_link) + sg->offset;
447 consistent_sync(virt, sg->length, dir, FOR_CPU);
452 nds32_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
453 int nents, enum dma_data_direction dir)
457 for (i = 0; i < nents; i++, sg++) {
459 page_address((struct page *)sg->page_link) + sg->offset;
460 consistent_sync(virt, sg->length, dir, FOR_DEVICE);
464 struct dma_map_ops nds32_dma_ops = {
465 .alloc = nds32_dma_alloc_coherent,
466 .free = nds32_dma_free,
467 .map_page = nds32_dma_map_page,
468 .unmap_page = nds32_dma_unmap_page,
469 .map_sg = nds32_dma_map_sg,
470 .unmap_sg = nds32_dma_unmap_sg,
471 .sync_single_for_device = nds32_dma_sync_single_for_device,
472 .sync_single_for_cpu = nds32_dma_sync_single_for_cpu,
473 .sync_sg_for_cpu = nds32_dma_sync_sg_for_cpu,
474 .sync_sg_for_device = nds32_dma_sync_sg_for_device,
477 EXPORT_SYMBOL(nds32_dma_ops);