arch/nds32/kernel/dma.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 // Copyright (C) 2005-2017 Andes Technology Corporation
   3
   4 #include <linux/types.h>
   5 #include <linux/mm.h>
   6 #include <linux/export.h>
   7 #include <linux/string.h>
   8 #include <linux/scatterlist.h>
   9 #include <linux/dma-mapping.h>
  10 #include <linux/io.h>
  11 #include <linux/cache.h>
  12 #include <linux/highmem.h>
  13 #include <linux/slab.h>
  14 #include <asm/cacheflush.h>
  15 #include <asm/tlbflush.h>
  16 #include <asm/dma-mapping.h>
  17 #include <asm/proc-fns.h>
  18
  19 /*
  20  * This is the page table (2MB) covering uncached, DMA consistent allocations
  21  */
  22 static pte_t *consistent_pte;
  23 static DEFINE_RAW_SPINLOCK(consistent_lock);
  24
  25 /*
  26  * VM region handling support.
  27  *
  28  * This should become something generic, handling VM region allocations for
  29  * vmalloc and similar (ioremap, module space, etc).
  30  *
  31  * I envisage vmalloc()'s supporting vm_struct becoming:
  32  *
  33  *  struct vm_struct {
  34  *    struct vm_region  region;
  35  *    unsigned long     flags;
  36  *    struct page       **pages;
  37  *    unsigned int      nr_pages;
  38  *    unsigned long     phys_addr;
  39  *  };
  40  *
  41  * get_vm_area() would then call vm_region_alloc with an appropriate
  42  * struct vm_region head (eg):
  43  *
  44  *  struct vm_region vmalloc_head = {
  45  *      .vm_list        = LIST_HEAD_INIT(vmalloc_head.vm_list),
  46  *      .vm_start       = VMALLOC_START,
  47  *      .vm_end         = VMALLOC_END,
  48  *  };
  49  *
  50  * However, vmalloc_head.vm_start is variable (typically, it is dependent on
  51  * the amount of RAM found at boot time.)  I would imagine that get_vm_area()
  52  * would have to initialise this each time prior to calling vm_region_alloc().
  53  */
  54 struct arch_vm_region {
  55         struct list_head vm_list;
  56         unsigned long vm_start;
  57         unsigned long vm_end;
  58         struct page *vm_pages;
  59 };
  60
  61 static struct arch_vm_region consistent_head = {
  62         .vm_list = LIST_HEAD_INIT(consistent_head.vm_list),
  63         .vm_start = CONSISTENT_BASE,
  64         .vm_end = CONSISTENT_END,
  65 };
  66
  67 static struct arch_vm_region *vm_region_alloc(struct arch_vm_region *head,
  68                                               size_t size, int gfp)
  69 {
  70         unsigned long addr = head->vm_start, end = head->vm_end - size;
  71         unsigned long flags;
  72         struct arch_vm_region *c, *new;
  73
  74         new = kmalloc(sizeof(struct arch_vm_region), gfp);
  75         if (!new)
  76                 goto out;
  77
  78         raw_spin_lock_irqsave(&consistent_lock, flags);
  79
  80         list_for_each_entry(c, &head->vm_list, vm_list) {
  81                 if ((addr + size) < addr)
  82                         goto nospc;
  83                 if ((addr + size) <= c->vm_start)
  84                         goto found;
  85                 addr = c->vm_end;
  86                 if (addr > end)
  87                         goto nospc;
  88         }
  89
  90 found:
  91         /*
  92          * Insert this entry _before_ the one we found.
  93          */
  94         list_add_tail(&new->vm_list, &c->vm_list);
  95         new->vm_start = addr;
  96         new->vm_end = addr + size;
  97
  98         raw_spin_unlock_irqrestore(&consistent_lock, flags);
  99         return new;
 100
 101 nospc:
 102         raw_spin_unlock_irqrestore(&consistent_lock, flags);
 103         kfree(new);
 104 out:
 105         return NULL;
 106 }
 107
 108 static struct arch_vm_region *vm_region_find(struct arch_vm_region *head,
 109                                              unsigned long addr)
 110 {
 111         struct arch_vm_region *c;
 112
 113         list_for_each_entry(c, &head->vm_list, vm_list) {
 114                 if (c->vm_start == addr)
 115                         goto out;
 116         }
 117         c = NULL;
 118 out:
 119         return c;
 120 }
 121
 122 /* FIXME: attrs is not used. */
 123 static void *nds32_dma_alloc_coherent(struct device *dev, size_t size,
 124                                       dma_addr_t * handle, gfp_t gfp,
 125                                       unsigned long attrs)
 126 {
 127         struct page *page;
 128         struct arch_vm_region *c;
 129         unsigned long order;
 130         u64 mask = ~0ULL, limit;
 131         pgprot_t prot = pgprot_noncached(PAGE_KERNEL);
 132
 133         if (!consistent_pte) {
 134                 pr_err("%s: not initialized\n", __func__);
 135                 dump_stack();
 136                 return NULL;
 137         }
 138
 139         if (dev) {
 140                 mask = dev->coherent_dma_mask;
 141
 142                 /*
 143                  * Sanity check the DMA mask - it must be non-zero, and
 144                  * must be able to be satisfied by a DMA allocation.
 145                  */
 146                 if (mask == 0) {
 147                         dev_warn(dev, "coherent DMA mask is unset\n");
 148                         goto no_page;
 149                 }
 150
 151         }
 152
 153         /*
 154          * Sanity check the allocation size.
 155          */
 156         size = PAGE_ALIGN(size);
 157         limit = (mask + 1) & ~mask;
 158         if ((limit && size >= limit) ||
 159             size >= (CONSISTENT_END - CONSISTENT_BASE)) {
 160                 pr_warn("coherent allocation too big "
 161                         "(requested %#x mask %#llx)\n", size, mask);
 162                 goto no_page;
 163         }
 164
 165         order = get_order(size);
 166
 167         if (mask != 0xffffffff)
 168                 gfp |= GFP_DMA;
 169
 170         page = alloc_pages(gfp, order);
 171         if (!page)
 172                 goto no_page;
 173
 174         /*
 175          * Invalidate any data that might be lurking in the
 176          * kernel direct-mapped region for device DMA.
 177          */
 178         {
 179                 unsigned long kaddr = (unsigned long)page_address(page);
 180                 memset(page_address(page), 0, size);
 181                 cpu_dma_wbinval_range(kaddr, kaddr + size);
 182         }
 183
 184         /*
 185          * Allocate a virtual address in the consistent mapping region.
 186          */
 187         c = vm_region_alloc(&consistent_head, size,
 188                             gfp & ~(__GFP_DMA | __GFP_HIGHMEM));
 189         if (c) {
 190                 pte_t *pte = consistent_pte + CONSISTENT_OFFSET(c->vm_start);
 191                 struct page *end = page + (1 << order);
 192
 193                 c->vm_pages = page;
 194
 195                 /*
 196                  * Set the "dma handle"
 197                  */
 198                 *handle = page_to_phys(page);
 199
 200                 do {
 201                         BUG_ON(!pte_none(*pte));
 202
 203                         /*
 204                          * x86 does not mark the pages reserved...
 205                          */
 206                         SetPageReserved(page);
 207                         set_pte(pte, mk_pte(page, prot));
 208                         page++;
 209                         pte++;
 210                 } while (size -= PAGE_SIZE);
 211
 212                 /*
 213                  * Free the otherwise unused pages.
 214                  */
 215                 while (page < end) {
 216                         __free_page(page);
 217                         page++;
 218                 }
 219
 220                 return (void *)c->vm_start;
 221         }
 222
 223         if (page)
 224                 __free_pages(page, order);
 225 no_page:
 226         *handle = ~0;
 227         return NULL;
 228 }
 229
 230 static void nds32_dma_free(struct device *dev, size_t size, void *cpu_addr,
 231                            dma_addr_t handle, unsigned long attrs)
 232 {
 233         struct arch_vm_region *c;
 234         unsigned long flags, addr;
 235         pte_t *ptep;
 236
 237         size = PAGE_ALIGN(size);
 238
 239         raw_spin_lock_irqsave(&consistent_lock, flags);
 240
 241         c = vm_region_find(&consistent_head, (unsigned long)cpu_addr);
 242         if (!c)
 243                 goto no_area;
 244
 245         if ((c->vm_end - c->vm_start) != size) {
 246                 pr_err("%s: freeing wrong coherent size (%ld != %d)\n",
 247                        __func__, c->vm_end - c->vm_start, size);
 248                 dump_stack();
 249                 size = c->vm_end - c->vm_start;
 250         }
 251
 252         ptep = consistent_pte + CONSISTENT_OFFSET(c->vm_start);
 253         addr = c->vm_start;
 254         do {
 255                 pte_t pte = ptep_get_and_clear(&init_mm, addr, ptep);
 256                 unsigned long pfn;
 257
 258                 ptep++;
 259                 addr += PAGE_SIZE;
 260
 261                 if (!pte_none(pte) && pte_present(pte)) {
 262                         pfn = pte_pfn(pte);
 263
 264                         if (pfn_valid(pfn)) {
 265                                 struct page *page = pfn_to_page(pfn);
 266
 267                                 /*
 268                                  * x86 does not mark the pages reserved...
 269                                  */
 270                                 ClearPageReserved(page);
 271
 272                                 __free_page(page);
 273                                 continue;
 274                         }
 275                 }
 276
 277                 pr_crit("%s: bad page in kernel page table\n", __func__);
 278         } while (size -= PAGE_SIZE);
 279
 280         flush_tlb_kernel_range(c->vm_start, c->vm_end);
 281
 282         list_del(&c->vm_list);
 283
 284         raw_spin_unlock_irqrestore(&consistent_lock, flags);
 285
 286         kfree(c);
 287         return;
 288
 289 no_area:
 290         raw_spin_unlock_irqrestore(&consistent_lock, flags);
 291         pr_err("%s: trying to free invalid coherent area: %p\n",
 292                __func__, cpu_addr);
 293         dump_stack();
 294 }
 295
 296 /*
 297  * Initialise the consistent memory allocation.
 298  */
 299 static int __init consistent_init(void)
 300 {
 301         pgd_t *pgd;
 302         pmd_t *pmd;
 303         pte_t *pte;
 304         int ret = 0;
 305
 306         do {
 307                 pgd = pgd_offset(&init_mm, CONSISTENT_BASE);
 308                 pmd = pmd_alloc(&init_mm, pgd, CONSISTENT_BASE);
 309                 if (!pmd) {
 310                         pr_err("%s: no pmd tables\n", __func__);
 311                         ret = -ENOMEM;
 312                         break;
 313                 }
 314                 /* The first level mapping may be created in somewhere.
 315                  * It's not necessary to warn here. */
 316                 /* WARN_ON(!pmd_none(*pmd)); */
 317
 318                 pte = pte_alloc_kernel(pmd, CONSISTENT_BASE);
 319                 if (!pte) {
 320                         ret = -ENOMEM;
 321                         break;
 322                 }
 323
 324                 consistent_pte = pte;
 325         } while (0);
 326
 327         return ret;
 328 }
 329
 330 core_initcall(consistent_init);
 331
 332 static inline void cache_op(phys_addr_t paddr, size_t size,
 333                 void (*fn)(unsigned long start, unsigned long end))
 334 {
 335         struct page *page = pfn_to_page(paddr >> PAGE_SHIFT);
 336         unsigned offset = paddr & ~PAGE_MASK;
 337         size_t left = size;
 338         unsigned long start;
 339
 340         do {
 341                 size_t len = left;
 342
 343                 if (PageHighMem(page)) {
 344                         void *addr;
 345
 346                         if (offset + len > PAGE_SIZE) {
 347                                 if (offset >= PAGE_SIZE) {
 348                                         page += offset >> PAGE_SHIFT;
 349                                         offset &= ~PAGE_MASK;
 350                                 }
 351                                 len = PAGE_SIZE - offset;
 352                         }
 353
 354                         addr = kmap_atomic(page);
 355                         start = (unsigned long)(addr + offset);
 356                         fn(start, start + len);
 357                         kunmap_atomic(addr);
 358                 } else {
 359                         start = (unsigned long)phys_to_virt(paddr);
 360                         fn(start, start + size);
 361                 }
 362                 offset = 0;
 363                 page++;
 364                 left -= len;
 365         } while (left);
 366 }
 367
 368 static void
 369 nds32_dma_sync_single_for_device(struct device *dev, dma_addr_t handle,
 370                                  size_t size, enum dma_data_direction dir)
 371 {
 372         switch (dir) {
 373         case DMA_FROM_DEVICE:
 374                 break;
 375         case DMA_TO_DEVICE:
 376         case DMA_BIDIRECTIONAL:
 377                 cache_op(handle, size, cpu_dma_wb_range);
 378                 break;
 379         default:
 380                 BUG();
 381         }
 382 }
 383
 384 static void
 385 nds32_dma_sync_single_for_cpu(struct device *dev, dma_addr_t handle,
 386                               size_t size, enum dma_data_direction dir)
 387 {
 388         switch (dir) {
 389         case DMA_TO_DEVICE:
 390                 break;
 391         case DMA_FROM_DEVICE:
 392         case DMA_BIDIRECTIONAL:
 393                 cache_op(handle, size, cpu_dma_inval_range);
 394                 break;
 395         default:
 396                 BUG();
 397         }
 398 }
 399
 400 static dma_addr_t nds32_dma_map_page(struct device *dev, struct page *page,
 401                                      unsigned long offset, size_t size,
 402                                      enum dma_data_direction dir,
 403                                      unsigned long attrs)
 404 {
 405         dma_addr_t dma_addr = page_to_phys(page) + offset;
 406
 407         if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
 408                 nds32_dma_sync_single_for_device(dev, handle, size, dir);
 409         return dma_addr;
 410 }
 411
 412 static void nds32_dma_unmap_page(struct device *dev, dma_addr_t handle,
 413                                  size_t size, enum dma_data_direction dir,
 414                                  unsigned long attrs)
 415 {
 416         if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
 417                 nds32_dma_sync_single_for_cpu(dev, handle, size, dir);
 418 }
 419
 420 static void
 421 nds32_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
 422                              int nents, enum dma_data_direction dir)
 423 {
 424         int i;
 425
 426         for (i = 0; i < nents; i++, sg++) {
 427                 nds32_dma_sync_single_for_device(dev, sg_dma_address(sg),
 428                                 sg->length, dir);
 429         }
 430 }
 431
 432 static void
 433 nds32_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nents,
 434                           enum dma_data_direction dir)
 435 {
 436         int i;
 437
 438         for (i = 0; i < nents; i++, sg++) {
 439                 nds32_dma_sync_single_for_cpu(dev, sg_dma_address(sg),
 440                                 sg->length, dir);
 441         }
 442 }
 443
 444 static int nds32_dma_map_sg(struct device *dev, struct scatterlist *sg,
 445                             int nents, enum dma_data_direction dir,
 446                             unsigned long attrs)
 447 {
 448         int i;
 449
 450         for (i = 0; i < nents; i++, sg++) {
 451                 nds32_dma_sync_single_for_device(dev, sg_dma_address(sg),
 452                                 sg->length, dir);
 453         }
 454         return nents;
 455 }
 456
 457 static void nds32_dma_unmap_sg(struct device *dev, struct scatterlist *sg,
 458                                int nhwentries, enum dma_data_direction dir,
 459                                unsigned long attrs)
 460 {
 461         int i;
 462
 463         for (i = 0; i < nhwentries; i++, sg++) {
 464                 nds32_dma_sync_single_for_cpu(dev, sg_dma_address(sg),
 465                                 sg->length, dir);
 466         }
 467 }
 468
 469 struct dma_map_ops nds32_dma_ops = {
 470         .alloc = nds32_dma_alloc_coherent,
 471         .free = nds32_dma_free,
 472         .map_page = nds32_dma_map_page,
 473         .unmap_page = nds32_dma_unmap_page,
 474         .map_sg = nds32_dma_map_sg,
 475         .unmap_sg = nds32_dma_unmap_sg,
 476         .sync_single_for_device = nds32_dma_sync_single_for_device,
 477         .sync_single_for_cpu = nds32_dma_sync_single_for_cpu,
 478         .sync_sg_for_cpu = nds32_dma_sync_sg_for_cpu,
 479         .sync_sg_for_device = nds32_dma_sync_sg_for_device,
 480 };
 481
 482 EXPORT_SYMBOL(nds32_dma_ops);