arch/nds32/kernel/dma.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 // Copyright (C) 2005-2017 Andes Technology Corporation
   3
   4 #include <linux/types.h>
   5 #include <linux/mm.h>
   6 #include <linux/export.h>
   7 #include <linux/string.h>
   8 #include <linux/scatterlist.h>
   9 #include <linux/dma-mapping.h>
  10 #include <linux/io.h>
  11 #include <linux/cache.h>
  12 #include <linux/highmem.h>
  13 #include <linux/slab.h>
  14 #include <asm/cacheflush.h>
  15 #include <asm/tlbflush.h>
  16 #include <asm/dma-mapping.h>
  17 #include <asm/proc-fns.h>
  18
  19 /*
  20  * This is the page table (2MB) covering uncached, DMA consistent allocations
  21  */
  22 static pte_t *consistent_pte;
  23 static DEFINE_RAW_SPINLOCK(consistent_lock);
  24
  25 enum master_type {
  26         FOR_CPU = 0,
  27         FOR_DEVICE = 1,
  28 };
  29
  30 /*
  31  * VM region handling support.
  32  *
  33  * This should become something generic, handling VM region allocations for
  34  * vmalloc and similar (ioremap, module space, etc).
  35  *
  36  * I envisage vmalloc()'s supporting vm_struct becoming:
  37  *
  38  *  struct vm_struct {
  39  *    struct vm_region  region;
  40  *    unsigned long     flags;
  41  *    struct page       **pages;
  42  *    unsigned int      nr_pages;
  43  *    unsigned long     phys_addr;
  44  *  };
  45  *
  46  * get_vm_area() would then call vm_region_alloc with an appropriate
  47  * struct vm_region head (eg):
  48  *
  49  *  struct vm_region vmalloc_head = {
  50  *      .vm_list        = LIST_HEAD_INIT(vmalloc_head.vm_list),
  51  *      .vm_start       = VMALLOC_START,
  52  *      .vm_end         = VMALLOC_END,
  53  *  };
  54  *
  55  * However, vmalloc_head.vm_start is variable (typically, it is dependent on
  56  * the amount of RAM found at boot time.)  I would imagine that get_vm_area()
  57  * would have to initialise this each time prior to calling vm_region_alloc().
  58  */
  59 struct arch_vm_region {
  60         struct list_head vm_list;
  61         unsigned long vm_start;
  62         unsigned long vm_end;
  63         struct page *vm_pages;
  64 };
  65
  66 static struct arch_vm_region consistent_head = {
  67         .vm_list = LIST_HEAD_INIT(consistent_head.vm_list),
  68         .vm_start = CONSISTENT_BASE,
  69         .vm_end = CONSISTENT_END,
  70 };
  71
  72 static struct arch_vm_region *vm_region_alloc(struct arch_vm_region *head,
  73                                               size_t size, int gfp)
  74 {
  75         unsigned long addr = head->vm_start, end = head->vm_end - size;
  76         unsigned long flags;
  77         struct arch_vm_region *c, *new;
  78
  79         new = kmalloc(sizeof(struct arch_vm_region), gfp);
  80         if (!new)
  81                 goto out;
  82
  83         raw_spin_lock_irqsave(&consistent_lock, flags);
  84
  85         list_for_each_entry(c, &head->vm_list, vm_list) {
  86                 if ((addr + size) < addr)
  87                         goto nospc;
  88                 if ((addr + size) <= c->vm_start)
  89                         goto found;
  90                 addr = c->vm_end;
  91                 if (addr > end)
  92                         goto nospc;
  93         }
  94
  95 found:
  96         /*
  97          * Insert this entry _before_ the one we found.
  98          */
  99         list_add_tail(&new->vm_list, &c->vm_list);
 100         new->vm_start = addr;
 101         new->vm_end = addr + size;
 102
 103         raw_spin_unlock_irqrestore(&consistent_lock, flags);
 104         return new;
 105
 106 nospc:
 107         raw_spin_unlock_irqrestore(&consistent_lock, flags);
 108         kfree(new);
 109 out:
 110         return NULL;
 111 }
 112
 113 static struct arch_vm_region *vm_region_find(struct arch_vm_region *head,
 114                                              unsigned long addr)
 115 {
 116         struct arch_vm_region *c;
 117
 118         list_for_each_entry(c, &head->vm_list, vm_list) {
 119                 if (c->vm_start == addr)
 120                         goto out;
 121         }
 122         c = NULL;
 123 out:
 124         return c;
 125 }
 126
 127 /* FIXME: attrs is not used. */
 128 static void *nds32_dma_alloc_coherent(struct device *dev, size_t size,
 129                                       dma_addr_t * handle, gfp_t gfp,
 130                                       unsigned long attrs)
 131 {
 132         struct page *page;
 133         struct arch_vm_region *c;
 134         unsigned long order;
 135         u64 mask = ~0ULL, limit;
 136         pgprot_t prot = pgprot_noncached(PAGE_KERNEL);
 137
 138         if (!consistent_pte) {
 139                 pr_err("%s: not initialized\n", __func__);
 140                 dump_stack();
 141                 return NULL;
 142         }
 143
 144         if (dev) {
 145                 mask = dev->coherent_dma_mask;
 146
 147                 /*
 148                  * Sanity check the DMA mask - it must be non-zero, and
 149                  * must be able to be satisfied by a DMA allocation.
 150                  */
 151                 if (mask == 0) {
 152                         dev_warn(dev, "coherent DMA mask is unset\n");
 153                         goto no_page;
 154                 }
 155
 156         }
 157
 158         /*
 159          * Sanity check the allocation size.
 160          */
 161         size = PAGE_ALIGN(size);
 162         limit = (mask + 1) & ~mask;
 163         if ((limit && size >= limit) ||
 164             size >= (CONSISTENT_END - CONSISTENT_BASE)) {
 165                 pr_warn("coherent allocation too big "
 166                         "(requested %#x mask %#llx)\n", size, mask);
 167                 goto no_page;
 168         }
 169
 170         order = get_order(size);
 171
 172         if (mask != 0xffffffff)
 173                 gfp |= GFP_DMA;
 174
 175         page = alloc_pages(gfp, order);
 176         if (!page)
 177                 goto no_page;
 178
 179         /*
 180          * Invalidate any data that might be lurking in the
 181          * kernel direct-mapped region for device DMA.
 182          */
 183         {
 184                 unsigned long kaddr = (unsigned long)page_address(page);
 185                 memset(page_address(page), 0, size);
 186                 cpu_dma_wbinval_range(kaddr, kaddr + size);
 187         }
 188
 189         /*
 190          * Allocate a virtual address in the consistent mapping region.
 191          */
 192         c = vm_region_alloc(&consistent_head, size,
 193                             gfp & ~(__GFP_DMA | __GFP_HIGHMEM));
 194         if (c) {
 195                 pte_t *pte = consistent_pte + CONSISTENT_OFFSET(c->vm_start);
 196                 struct page *end = page + (1 << order);
 197
 198                 c->vm_pages = page;
 199
 200                 /*
 201                  * Set the "dma handle"
 202                  */
 203                 *handle = page_to_phys(page);
 204
 205                 do {
 206                         BUG_ON(!pte_none(*pte));
 207
 208                         /*
 209                          * x86 does not mark the pages reserved...
 210                          */
 211                         SetPageReserved(page);
 212                         set_pte(pte, mk_pte(page, prot));
 213                         page++;
 214                         pte++;
 215                 } while (size -= PAGE_SIZE);
 216
 217                 /*
 218                  * Free the otherwise unused pages.
 219                  */
 220                 while (page < end) {
 221                         __free_page(page);
 222                         page++;
 223                 }
 224
 225                 return (void *)c->vm_start;
 226         }
 227
 228         if (page)
 229                 __free_pages(page, order);
 230 no_page:
 231         *handle = ~0;
 232         return NULL;
 233 }
 234
 235 static void nds32_dma_free(struct device *dev, size_t size, void *cpu_addr,
 236                            dma_addr_t handle, unsigned long attrs)
 237 {
 238         struct arch_vm_region *c;
 239         unsigned long flags, addr;
 240         pte_t *ptep;
 241
 242         size = PAGE_ALIGN(size);
 243
 244         raw_spin_lock_irqsave(&consistent_lock, flags);
 245
 246         c = vm_region_find(&consistent_head, (unsigned long)cpu_addr);
 247         if (!c)
 248                 goto no_area;
 249
 250         if ((c->vm_end - c->vm_start) != size) {
 251                 pr_err("%s: freeing wrong coherent size (%ld != %d)\n",
 252                        __func__, c->vm_end - c->vm_start, size);
 253                 dump_stack();
 254                 size = c->vm_end - c->vm_start;
 255         }
 256
 257         ptep = consistent_pte + CONSISTENT_OFFSET(c->vm_start);
 258         addr = c->vm_start;
 259         do {
 260                 pte_t pte = ptep_get_and_clear(&init_mm, addr, ptep);
 261                 unsigned long pfn;
 262
 263                 ptep++;
 264                 addr += PAGE_SIZE;
 265
 266                 if (!pte_none(pte) && pte_present(pte)) {
 267                         pfn = pte_pfn(pte);
 268
 269                         if (pfn_valid(pfn)) {
 270                                 struct page *page = pfn_to_page(pfn);
 271
 272                                 /*
 273                                  * x86 does not mark the pages reserved...
 274                                  */
 275                                 ClearPageReserved(page);
 276
 277                                 __free_page(page);
 278                                 continue;
 279                         }
 280                 }
 281
 282                 pr_crit("%s: bad page in kernel page table\n", __func__);
 283         } while (size -= PAGE_SIZE);
 284
 285         flush_tlb_kernel_range(c->vm_start, c->vm_end);
 286
 287         list_del(&c->vm_list);
 288
 289         raw_spin_unlock_irqrestore(&consistent_lock, flags);
 290
 291         kfree(c);
 292         return;
 293
 294 no_area:
 295         raw_spin_unlock_irqrestore(&consistent_lock, flags);
 296         pr_err("%s: trying to free invalid coherent area: %p\n",
 297                __func__, cpu_addr);
 298         dump_stack();
 299 }
 300
 301 /*
 302  * Initialise the consistent memory allocation.
 303  */
 304 static int __init consistent_init(void)
 305 {
 306         pgd_t *pgd;
 307         pmd_t *pmd;
 308         pte_t *pte;
 309         int ret = 0;
 310
 311         do {
 312                 pgd = pgd_offset(&init_mm, CONSISTENT_BASE);
 313                 pmd = pmd_alloc(&init_mm, pgd, CONSISTENT_BASE);
 314                 if (!pmd) {
 315                         pr_err("%s: no pmd tables\n", __func__);
 316                         ret = -ENOMEM;
 317                         break;
 318                 }
 319                 /* The first level mapping may be created in somewhere.
 320                  * It's not necessary to warn here. */
 321                 /* WARN_ON(!pmd_none(*pmd)); */
 322
 323                 pte = pte_alloc_kernel(pmd, CONSISTENT_BASE);
 324                 if (!pte) {
 325                         ret = -ENOMEM;
 326                         break;
 327                 }
 328
 329                 consistent_pte = pte;
 330         } while (0);
 331
 332         return ret;
 333 }
 334
 335 core_initcall(consistent_init);
 336 static void consistent_sync(void *vaddr, size_t size, int direction, int master_type);
 337 static dma_addr_t nds32_dma_map_page(struct device *dev, struct page *page,
 338                                      unsigned long offset, size_t size,
 339                                      enum dma_data_direction dir,
 340                                      unsigned long attrs)
 341 {
 342         if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
 343                 consistent_sync((void *)(page_address(page) + offset), size, dir, FOR_DEVICE);
 344         return page_to_phys(page) + offset;
 345 }
 346
 347 static void nds32_dma_unmap_page(struct device *dev, dma_addr_t handle,
 348                                  size_t size, enum dma_data_direction dir,
 349                                  unsigned long attrs)
 350 {
 351         if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
 352                 consistent_sync(phys_to_virt(handle), size, dir, FOR_CPU);
 353 }
 354
 355 /*
 356  * Make an area consistent for devices.
 357  */
 358 static void consistent_sync(void *vaddr, size_t size, int direction, int master_type)
 359 {
 360         unsigned long start = (unsigned long)vaddr;
 361         unsigned long end = start + size;
 362
 363         if (master_type == FOR_CPU) {
 364                 switch (direction) {
 365                 case DMA_TO_DEVICE:
 366                         break;
 367                 case DMA_FROM_DEVICE:
 368                 case DMA_BIDIRECTIONAL:
 369                         cpu_dma_inval_range(start, end);
 370                         break;
 371                 default:
 372                         BUG();
 373                 }
 374         } else {
 375                 /* FOR_DEVICE */
 376                 switch (direction) {
 377                 case DMA_FROM_DEVICE:
 378                         break;
 379                 case DMA_TO_DEVICE:
 380                 case DMA_BIDIRECTIONAL:
 381                         cpu_dma_wb_range(start, end);
 382                         break;
 383                 default:
 384                         BUG();
 385                 }
 386         }
 387 }
 388
 389 static int nds32_dma_map_sg(struct device *dev, struct scatterlist *sg,
 390                             int nents, enum dma_data_direction dir,
 391                             unsigned long attrs)
 392 {
 393         int i;
 394
 395         for (i = 0; i < nents; i++, sg++) {
 396                 void *virt;
 397                 unsigned long pfn;
 398                 struct page *page = sg_page(sg);
 399
 400                 sg->dma_address = sg_phys(sg);
 401                 pfn = page_to_pfn(page) + sg->offset / PAGE_SIZE;
 402                 page = pfn_to_page(pfn);
 403                 if (PageHighMem(page)) {
 404                         virt = kmap_atomic(page);
 405                         consistent_sync(virt, sg->length, dir, FOR_CPU);
 406                         kunmap_atomic(virt);
 407                 } else {
 408                         if (sg->offset > PAGE_SIZE)
 409                                 panic("sg->offset:%08x > PAGE_SIZE\n",
 410                                       sg->offset);
 411                         virt = page_address(page) + sg->offset;
 412                         consistent_sync(virt, sg->length, dir, FOR_CPU);
 413                 }
 414         }
 415         return nents;
 416 }
 417
 418 static void nds32_dma_unmap_sg(struct device *dev, struct scatterlist *sg,
 419                                int nhwentries, enum dma_data_direction dir,
 420                                unsigned long attrs)
 421 {
 422 }
 423
 424 static void
 425 nds32_dma_sync_single_for_cpu(struct device *dev, dma_addr_t handle,
 426                               size_t size, enum dma_data_direction dir)
 427 {
 428         consistent_sync((void *)phys_to_virt(handle), size, dir, FOR_CPU);
 429 }
 430
 431 static void
 432 nds32_dma_sync_single_for_device(struct device *dev, dma_addr_t handle,
 433                                  size_t size, enum dma_data_direction dir)
 434 {
 435         consistent_sync((void *)phys_to_virt(handle), size, dir, FOR_DEVICE);
 436 }
 437
 438 static void
 439 nds32_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nents,
 440                           enum dma_data_direction dir)
 441 {
 442         int i;
 443
 444         for (i = 0; i < nents; i++, sg++) {
 445                 char *virt =
 446                     page_address((struct page *)sg->page_link) + sg->offset;
 447                 consistent_sync(virt, sg->length, dir, FOR_CPU);
 448         }
 449 }
 450
 451 static void
 452 nds32_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
 453                              int nents, enum dma_data_direction dir)
 454 {
 455         int i;
 456
 457         for (i = 0; i < nents; i++, sg++) {
 458                 char *virt =
 459                     page_address((struct page *)sg->page_link) + sg->offset;
 460                 consistent_sync(virt, sg->length, dir, FOR_DEVICE);
 461         }
 462 }
 463
 464 struct dma_map_ops nds32_dma_ops = {
 465         .alloc = nds32_dma_alloc_coherent,
 466         .free = nds32_dma_free,
 467         .map_page = nds32_dma_map_page,
 468         .unmap_page = nds32_dma_unmap_page,
 469         .map_sg = nds32_dma_map_sg,
 470         .unmap_sg = nds32_dma_unmap_sg,
 471         .sync_single_for_device = nds32_dma_sync_single_for_device,
 472         .sync_single_for_cpu = nds32_dma_sync_single_for_cpu,
 473         .sync_sg_for_cpu = nds32_dma_sync_sg_for_cpu,
 474         .sync_sg_for_device = nds32_dma_sync_sg_for_device,
 475 };
 476
 477 EXPORT_SYMBOL(nds32_dma_ops);