drivers/misc/habanalabs/mmu.c

   1 // SPDX-License-Identifier: GPL-2.0
   2
   3 /*
   4  * Copyright 2016-2019 HabanaLabs, Ltd.
   5  * All Rights Reserved.
   6  */
   7
   8 #include "habanalabs.h"
   9 #include "include/hw_ip/mmu/mmu_general.h"
  10
  11 #include <linux/genalloc.h>
  12 #include <linux/slab.h>
  13
  14 static inline u64 get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr);
  15
  16 static struct pgt_info *get_pgt_info(struct hl_ctx *ctx, u64 hop_addr)
  17 {
  18         struct pgt_info *pgt_info = NULL;
  19
  20         hash_for_each_possible(ctx->mmu_shadow_hash, pgt_info, node,
  21                                 (unsigned long) hop_addr)
  22                 if (hop_addr == pgt_info->shadow_addr)
  23                         break;
  24
  25         return pgt_info;
  26 }
  27
  28 static void free_hop(struct hl_ctx *ctx, u64 hop_addr)
  29 {
  30         struct hl_device *hdev = ctx->hdev;
  31         struct pgt_info *pgt_info = get_pgt_info(ctx, hop_addr);
  32
  33         gen_pool_free(hdev->mmu_pgt_pool, pgt_info->phys_addr,
  34                         hdev->asic_prop.mmu_hop_table_size);
  35         hash_del(&pgt_info->node);
  36         kfree((u64 *) (uintptr_t) pgt_info->shadow_addr);
  37         kfree(pgt_info);
  38 }
  39
  40 static u64 alloc_hop(struct hl_ctx *ctx)
  41 {
  42         struct hl_device *hdev = ctx->hdev;
  43         struct asic_fixed_properties *prop = &hdev->asic_prop;
  44         struct pgt_info *pgt_info;
  45         u64 phys_addr, shadow_addr;
  46
  47         pgt_info = kmalloc(sizeof(*pgt_info), GFP_KERNEL);
  48         if (!pgt_info)
  49                 return ULLONG_MAX;
  50
  51         phys_addr = (u64) gen_pool_alloc(hdev->mmu_pgt_pool,
  52                                         prop->mmu_hop_table_size);
  53         if (!phys_addr) {
  54                 dev_err(hdev->dev, "failed to allocate page\n");
  55                 goto pool_add_err;
  56         }
  57
  58         shadow_addr = (u64) (uintptr_t) kzalloc(prop->mmu_hop_table_size,
  59                                                 GFP_KERNEL);
  60         if (!shadow_addr)
  61                 goto shadow_err;
  62
  63         pgt_info->phys_addr = phys_addr;
  64         pgt_info->shadow_addr = shadow_addr;
  65         pgt_info->ctx = ctx;
  66         pgt_info->num_of_ptes = 0;
  67         hash_add(ctx->mmu_shadow_hash, &pgt_info->node, shadow_addr);
  68
  69         return shadow_addr;
  70
  71 shadow_err:
  72         gen_pool_free(hdev->mmu_pgt_pool, phys_addr, prop->mmu_hop_table_size);
  73 pool_add_err:
  74         kfree(pgt_info);
  75
  76         return ULLONG_MAX;
  77 }
  78
  79 static inline u64 get_phys_hop0_addr(struct hl_ctx *ctx)
  80 {
  81         return ctx->hdev->asic_prop.mmu_pgt_addr +
  82                         (ctx->asid * ctx->hdev->asic_prop.mmu_hop_table_size);
  83 }
  84
  85 static inline u64 get_hop0_addr(struct hl_ctx *ctx)
  86 {
  87         return (u64) (uintptr_t) ctx->hdev->mmu_shadow_hop0 +
  88                         (ctx->asid * ctx->hdev->asic_prop.mmu_hop_table_size);
  89 }
  90
  91 static inline void flush(struct hl_ctx *ctx)
  92 {
  93         /* flush all writes from all cores to reach PCI */
  94         mb();
  95         ctx->hdev->asic_funcs->read_pte(ctx->hdev, get_phys_hop0_addr(ctx));
  96 }
  97
  98 /* transform the value to physical address when writing to H/W */
  99 static inline void write_pte(struct hl_ctx *ctx, u64 shadow_pte_addr, u64 val)
 100 {
 101         /*
 102          * The value to write is actually the address of the next shadow hop +
 103          * flags at the 12 LSBs.
 104          * Hence in order to get the value to write to the physical PTE, we
 105          * clear the 12 LSBs and translate the shadow hop to its associated
 106          * physical hop, and add back the original 12 LSBs.
 107          */
 108         u64 phys_val = get_phys_addr(ctx, val & PTE_PHYS_ADDR_MASK) |
 109                                 (val & OFFSET_MASK);
 110
 111         ctx->hdev->asic_funcs->write_pte(ctx->hdev,
 112                                         get_phys_addr(ctx, shadow_pte_addr),
 113                                         phys_val);
 114
 115         *(u64 *) (uintptr_t) shadow_pte_addr = val;
 116 }
 117
 118 /* do not transform the value to physical address when writing to H/W */
 119 static inline void write_final_pte(struct hl_ctx *ctx, u64 shadow_pte_addr,
 120                                         u64 val)
 121 {
 122         ctx->hdev->asic_funcs->write_pte(ctx->hdev,
 123                                         get_phys_addr(ctx, shadow_pte_addr),
 124                                         val);
 125         *(u64 *) (uintptr_t) shadow_pte_addr = val;
 126 }
 127
 128 /* clear the last and present bits */
 129 static inline void clear_pte(struct hl_ctx *ctx, u64 pte_addr)
 130 {
 131         /* no need to transform the value to physical address */
 132         write_final_pte(ctx, pte_addr, 0);
 133 }
 134
 135 static inline void get_pte(struct hl_ctx *ctx, u64 hop_addr)
 136 {
 137         get_pgt_info(ctx, hop_addr)->num_of_ptes++;
 138 }
 139
 140 /*
 141  * put_pte - decrement the num of ptes and free the hop if possible
 142  *
 143  * @ctx: pointer to the context structure
 144  * @hop_addr: addr of the hop
 145  *
 146  * This function returns the number of ptes left on this hop. If the number is
 147  * 0, it means the pte was freed.
 148  */
 149 static inline int put_pte(struct hl_ctx *ctx, u64 hop_addr)
 150 {
 151         struct pgt_info *pgt_info = get_pgt_info(ctx, hop_addr);
 152         int num_of_ptes_left;
 153
 154         pgt_info->num_of_ptes--;
 155
 156         /*
 157          * Need to save the number of ptes left because free_hop might free
 158          * the pgt_info
 159          */
 160         num_of_ptes_left = pgt_info->num_of_ptes;
 161         if (!num_of_ptes_left)
 162                 free_hop(ctx, hop_addr);
 163
 164         return num_of_ptes_left;
 165 }
 166
 167 static inline u64 get_hopN_pte_addr(struct hl_ctx *ctx, u64 hop_addr,
 168                                         u64 virt_addr, u64 mask, u64 shift)
 169 {
 170         return hop_addr + ctx->hdev->asic_prop.mmu_pte_size *
 171                         ((virt_addr & mask) >> shift);
 172 }
 173
 174 static inline u64 get_hop0_pte_addr(struct hl_ctx *ctx, u64 hop_addr, u64 vaddr)
 175 {
 176         return get_hopN_pte_addr(ctx, hop_addr, vaddr, HOP0_MASK, HOP0_SHIFT);
 177 }
 178
 179 static inline u64 get_hop1_pte_addr(struct hl_ctx *ctx, u64 hop_addr, u64 vaddr)
 180 {
 181         return get_hopN_pte_addr(ctx, hop_addr, vaddr, HOP1_MASK, HOP1_SHIFT);
 182 }
 183
 184 static inline u64 get_hop2_pte_addr(struct hl_ctx *ctx, u64 hop_addr, u64 vaddr)
 185 {
 186         return get_hopN_pte_addr(ctx, hop_addr, vaddr, HOP2_MASK, HOP2_SHIFT);
 187 }
 188
 189 static inline u64 get_hop3_pte_addr(struct hl_ctx *ctx, u64 hop_addr, u64 vaddr)
 190 {
 191         return get_hopN_pte_addr(ctx, hop_addr, vaddr, HOP3_MASK, HOP3_SHIFT);
 192 }
 193
 194 static inline u64 get_hop4_pte_addr(struct hl_ctx *ctx, u64 hop_addr, u64 vaddr)
 195 {
 196         return get_hopN_pte_addr(ctx, hop_addr, vaddr, HOP4_MASK, HOP4_SHIFT);
 197 }
 198
 199 static inline u64 get_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte)
 200 {
 201         if (curr_pte & PAGE_PRESENT_MASK)
 202                 return curr_pte & PHYS_ADDR_MASK;
 203         else
 204                 return ULLONG_MAX;
 205 }
 206
 207 static inline u64 get_alloc_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte,
 208                                                 bool *is_new_hop)
 209 {
 210         u64 hop_addr = get_next_hop_addr(ctx, curr_pte);
 211
 212         if (hop_addr == ULLONG_MAX) {
 213                 hop_addr = alloc_hop(ctx);
 214                 *is_new_hop = (hop_addr != ULLONG_MAX);
 215         }
 216
 217         return hop_addr;
 218 }
 219
 220 /* translates shadow address inside hop to a physical address */
 221 static inline u64 get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr)
 222 {
 223         u64 page_mask = (ctx->hdev->asic_prop.mmu_hop_table_size - 1);
 224         u64 shadow_hop_addr = shadow_addr & ~page_mask;
 225         u64 pte_offset = shadow_addr & page_mask;
 226         u64 phys_hop_addr;
 227
 228         if (shadow_hop_addr != get_hop0_addr(ctx))
 229                 phys_hop_addr = get_pgt_info(ctx, shadow_hop_addr)->phys_addr;
 230         else
 231                 phys_hop_addr = get_phys_hop0_addr(ctx);
 232
 233         return phys_hop_addr + pte_offset;
 234 }
 235
 236 static int dram_default_mapping_init(struct hl_ctx *ctx)
 237 {
 238         struct hl_device *hdev = ctx->hdev;
 239         struct asic_fixed_properties *prop = &hdev->asic_prop;
 240         u64 num_of_hop3, total_hops, hop0_addr, hop1_addr, hop2_addr,
 241                 hop2_pte_addr, hop3_pte_addr, pte_val;
 242         int rc, i, j, hop3_allocated = 0;
 243
 244         if (!hdev->dram_supports_virtual_memory ||
 245                         !hdev->dram_default_page_mapping)
 246                 return 0;
 247
 248         num_of_hop3 = prop->dram_size_for_default_page_mapping;
 249         do_div(num_of_hop3, prop->dram_page_size);
 250         do_div(num_of_hop3, PTE_ENTRIES_IN_HOP);
 251
 252         /* add hop1 and hop2 */
 253         total_hops = num_of_hop3 + 2;
 254
 255         ctx->dram_default_hops = kzalloc(HL_PTE_SIZE * total_hops,  GFP_KERNEL);
 256         if (!ctx->dram_default_hops)
 257                 return -ENOMEM;
 258
 259         hop0_addr = get_hop0_addr(ctx);
 260
 261         hop1_addr = alloc_hop(ctx);
 262         if (hop1_addr == ULLONG_MAX) {
 263                 dev_err(hdev->dev, "failed to alloc hop 1\n");
 264                 rc = -ENOMEM;
 265                 goto hop1_err;
 266         }
 267
 268         ctx->dram_default_hops[total_hops - 1] = hop1_addr;
 269
 270         hop2_addr = alloc_hop(ctx);
 271         if (hop2_addr == ULLONG_MAX) {
 272                 dev_err(hdev->dev, "failed to alloc hop 2\n");
 273                 rc = -ENOMEM;
 274                 goto hop2_err;
 275         }
 276
 277         ctx->dram_default_hops[total_hops - 2] = hop2_addr;
 278
 279         for (i = 0 ; i < num_of_hop3 ; i++) {
 280                 ctx->dram_default_hops[i] = alloc_hop(ctx);
 281                 if (ctx->dram_default_hops[i] == ULLONG_MAX) {
 282                         dev_err(hdev->dev, "failed to alloc hop 3, i: %d\n", i);
 283                         rc = -ENOMEM;
 284                         goto hop3_err;
 285                 }
 286                 hop3_allocated++;
 287         }
 288
 289         /* need only pte 0 in hops 0 and 1 */
 290         pte_val = (hop1_addr & PTE_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
 291         write_pte(ctx, hop0_addr, pte_val);
 292
 293         pte_val = (hop2_addr & PTE_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
 294         write_pte(ctx, hop1_addr, pte_val);
 295         get_pte(ctx, hop1_addr);
 296
 297         hop2_pte_addr = hop2_addr;
 298         for (i = 0 ; i < num_of_hop3 ; i++) {
 299                 pte_val = (ctx->dram_default_hops[i] & PTE_PHYS_ADDR_MASK) |
 300                                 PAGE_PRESENT_MASK;
 301                 write_pte(ctx, hop2_pte_addr, pte_val);
 302                 get_pte(ctx, hop2_addr);
 303                 hop2_pte_addr += HL_PTE_SIZE;
 304         }
 305
 306         pte_val = (prop->mmu_dram_default_page_addr & PTE_PHYS_ADDR_MASK) |
 307                         LAST_MASK | PAGE_PRESENT_MASK;
 308
 309         for (i = 0 ; i < num_of_hop3 ; i++) {
 310                 hop3_pte_addr = ctx->dram_default_hops[i];
 311                 for (j = 0 ; j < PTE_ENTRIES_IN_HOP ; j++) {
 312                         write_final_pte(ctx, hop3_pte_addr, pte_val);
 313                         get_pte(ctx, ctx->dram_default_hops[i]);
 314                         hop3_pte_addr += HL_PTE_SIZE;
 315                 }
 316         }
 317
 318         flush(ctx);
 319
 320         return 0;
 321
 322 hop3_err:
 323         for (i = 0 ; i < hop3_allocated ; i++)
 324                 free_hop(ctx, ctx->dram_default_hops[i]);
 325
 326         free_hop(ctx, hop2_addr);
 327 hop2_err:
 328         free_hop(ctx, hop1_addr);
 329 hop1_err:
 330         kfree(ctx->dram_default_hops);
 331
 332         return rc;
 333 }
 334
 335 static void dram_default_mapping_fini(struct hl_ctx *ctx)
 336 {
 337         struct hl_device *hdev = ctx->hdev;
 338         struct asic_fixed_properties *prop = &hdev->asic_prop;
 339         u64 num_of_hop3, total_hops, hop0_addr, hop1_addr, hop2_addr,
 340                 hop2_pte_addr, hop3_pte_addr;
 341         int i, j;
 342
 343         if (!hdev->dram_supports_virtual_memory ||
 344                         !hdev->dram_default_page_mapping)
 345                 return;
 346
 347         num_of_hop3 = prop->dram_size_for_default_page_mapping;
 348         do_div(num_of_hop3, prop->dram_page_size);
 349         do_div(num_of_hop3, PTE_ENTRIES_IN_HOP);
 350
 351         hop0_addr = get_hop0_addr(ctx);
 352         /* add hop1 and hop2 */
 353         total_hops = num_of_hop3 + 2;
 354         hop1_addr = ctx->dram_default_hops[total_hops - 1];
 355         hop2_addr = ctx->dram_default_hops[total_hops - 2];
 356
 357         for (i = 0 ; i < num_of_hop3 ; i++) {
 358                 hop3_pte_addr = ctx->dram_default_hops[i];
 359                 for (j = 0 ; j < PTE_ENTRIES_IN_HOP ; j++) {
 360                         clear_pte(ctx, hop3_pte_addr);
 361                         put_pte(ctx, ctx->dram_default_hops[i]);
 362                         hop3_pte_addr += HL_PTE_SIZE;
 363                 }
 364         }
 365
 366         hop2_pte_addr = hop2_addr;
 367         hop2_pte_addr = hop2_addr;
 368         for (i = 0 ; i < num_of_hop3 ; i++) {
 369                 clear_pte(ctx, hop2_pte_addr);
 370                 put_pte(ctx, hop2_addr);
 371                 hop2_pte_addr += HL_PTE_SIZE;
 372         }
 373
 374         clear_pte(ctx, hop1_addr);
 375         put_pte(ctx, hop1_addr);
 376         clear_pte(ctx, hop0_addr);
 377
 378         kfree(ctx->dram_default_hops);
 379
 380         flush(ctx);
 381 }
 382
 383 /**
 384  * hl_mmu_init() - initialize the MMU module.
 385  * @hdev: habanalabs device structure.
 386  *
 387  * This function does the following:
 388  * - Allocate max_asid zeroed hop0 pgts so no mapping is available.
 389  * - Enable MMU in H/W.
 390  * - Invalidate the MMU cache.
 391  * - Create a pool of pages for pgt_infos.
 392  *
 393  * This function depends on DMA QMAN to be working!
 394  *
 395  * Return: 0 for success, non-zero for failure.
 396  */
 397 int hl_mmu_init(struct hl_device *hdev)
 398 {
 399         struct asic_fixed_properties *prop = &hdev->asic_prop;
 400         int rc;
 401
 402         if (!hdev->mmu_enable)
 403                 return 0;
 404
 405         /* MMU H/W init was already done in device hw_init() */
 406
 407         hdev->mmu_pgt_pool =
 408                         gen_pool_create(__ffs(prop->mmu_hop_table_size), -1);
 409
 410         if (!hdev->mmu_pgt_pool) {
 411                 dev_err(hdev->dev, "Failed to create page gen pool\n");
 412                 return -ENOMEM;
 413         }
 414
 415         rc = gen_pool_add(hdev->mmu_pgt_pool, prop->mmu_pgt_addr +
 416                         prop->mmu_hop0_tables_total_size,
 417                         prop->mmu_pgt_size - prop->mmu_hop0_tables_total_size,
 418                         -1);
 419         if (rc) {
 420                 dev_err(hdev->dev, "Failed to add memory to page gen pool\n");
 421                 goto err_pool_add;
 422         }
 423
 424         hdev->mmu_shadow_hop0 = kvmalloc_array(prop->max_asid,
 425                                         prop->mmu_hop_table_size,
 426                                         GFP_KERNEL | __GFP_ZERO);
 427         if (!hdev->mmu_shadow_hop0) {
 428                 rc = -ENOMEM;
 429                 goto err_pool_add;
 430         }
 431
 432         return 0;
 433
 434 err_pool_add:
 435         gen_pool_destroy(hdev->mmu_pgt_pool);
 436
 437         return rc;
 438 }
 439
 440 /**
 441  * hl_mmu_fini() - release the MMU module.
 442  * @hdev: habanalabs device structure.
 443  *
 444  * This function does the following:
 445  * - Disable MMU in H/W.
 446  * - Free the pgt_infos pool.
 447  *
 448  * All contexts should be freed before calling this function.
 449  */
 450 void hl_mmu_fini(struct hl_device *hdev)
 451 {
 452         if (!hdev->mmu_enable)
 453                 return;
 454
 455         kvfree(hdev->mmu_shadow_hop0);
 456         gen_pool_destroy(hdev->mmu_pgt_pool);
 457
 458         /* MMU H/W fini will be done in device hw_fini() */
 459 }
 460
 461 /**
 462  * hl_mmu_ctx_init() - initialize a context for using the MMU module.
 463  * @ctx: pointer to the context structure to initialize.
 464  *
 465  * Initialize a mutex to protect the concurrent mapping flow, a hash to hold all
 466  * page tables hops related to this context.
 467  * Return: 0 on success, non-zero otherwise.
 468  */
 469 int hl_mmu_ctx_init(struct hl_ctx *ctx)
 470 {
 471         struct hl_device *hdev = ctx->hdev;
 472
 473         if (!hdev->mmu_enable)
 474                 return 0;
 475
 476         mutex_init(&ctx->mmu_lock);
 477         hash_init(ctx->mmu_phys_hash);
 478         hash_init(ctx->mmu_shadow_hash);
 479
 480         return dram_default_mapping_init(ctx);
 481 }
 482
 483 /*
 484  * hl_mmu_ctx_fini - disable a ctx from using the mmu module
 485  *
 486  * @ctx: pointer to the context structure
 487  *
 488  * This function does the following:
 489  * - Free any pgts which were not freed yet
 490  * - Free the mutex
 491  * - Free DRAM default page mapping hops
 492  */
 493 void hl_mmu_ctx_fini(struct hl_ctx *ctx)
 494 {
 495         struct hl_device *hdev = ctx->hdev;
 496         struct pgt_info *pgt_info;
 497         struct hlist_node *tmp;
 498         int i;
 499
 500         if (!hdev->mmu_enable)
 501                 return;
 502
 503         dram_default_mapping_fini(ctx);
 504
 505         if (!hash_empty(ctx->mmu_shadow_hash))
 506                 dev_err(hdev->dev, "ctx is freed while it has pgts in use\n");
 507
 508         hash_for_each_safe(ctx->mmu_shadow_hash, i, tmp, pgt_info, node) {
 509                 dev_err(hdev->dev,
 510                         "pgt_info of addr 0x%llx of asid %d was not destroyed, num_ptes: %d\n",
 511                         pgt_info->phys_addr, ctx->asid, pgt_info->num_of_ptes);
 512                 free_hop(ctx, pgt_info->shadow_addr);
 513         }
 514
 515         mutex_destroy(&ctx->mmu_lock);
 516 }
 517
 518 static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr)
 519 {
 520         struct hl_device *hdev = ctx->hdev;
 521         struct asic_fixed_properties *prop = &hdev->asic_prop;
 522         u64 hop0_addr = 0, hop0_pte_addr = 0,
 523                 hop1_addr = 0, hop1_pte_addr = 0,
 524                 hop2_addr = 0, hop2_pte_addr = 0,
 525                 hop3_addr = 0, hop3_pte_addr = 0,
 526                 hop4_addr = 0, hop4_pte_addr = 0,
 527                 curr_pte;
 528         bool is_dram_addr, is_huge, clear_hop3 = true;
 529
 530         is_dram_addr = hl_mem_area_inside_range(virt_addr, PAGE_SIZE_2MB,
 531                                 prop->va_space_dram_start_address,
 532                                 prop->va_space_dram_end_address);
 533
 534         hop0_addr = get_hop0_addr(ctx);
 535         hop0_pte_addr = get_hop0_pte_addr(ctx, hop0_addr, virt_addr);
 536
 537         curr_pte = *(u64 *) (uintptr_t) hop0_pte_addr;
 538
 539         hop1_addr = get_next_hop_addr(ctx, curr_pte);
 540
 541         if (hop1_addr == ULLONG_MAX)
 542                 goto not_mapped;
 543
 544         hop1_pte_addr = get_hop1_pte_addr(ctx, hop1_addr, virt_addr);
 545
 546         curr_pte = *(u64 *) (uintptr_t) hop1_pte_addr;
 547
 548         hop2_addr = get_next_hop_addr(ctx, curr_pte);
 549
 550         if (hop2_addr == ULLONG_MAX)
 551                 goto not_mapped;
 552
 553         hop2_pte_addr = get_hop2_pte_addr(ctx, hop2_addr, virt_addr);
 554
 555         curr_pte = *(u64 *) (uintptr_t) hop2_pte_addr;
 556
 557         hop3_addr = get_next_hop_addr(ctx, curr_pte);
 558
 559         if (hop3_addr == ULLONG_MAX)
 560                 goto not_mapped;
 561
 562         hop3_pte_addr = get_hop3_pte_addr(ctx, hop3_addr, virt_addr);
 563
 564         curr_pte = *(u64 *) (uintptr_t) hop3_pte_addr;
 565
 566         is_huge = curr_pte & LAST_MASK;
 567
 568         if (is_dram_addr && !is_huge) {
 569                 dev_err(hdev->dev,
 570                                 "DRAM unmapping should use huge pages only\n");
 571                 return -EFAULT;
 572         }
 573
 574         if (!is_huge) {
 575                 hop4_addr = get_next_hop_addr(ctx, curr_pte);
 576
 577                 if (hop4_addr == ULLONG_MAX)
 578                         goto not_mapped;
 579
 580                 hop4_pte_addr = get_hop4_pte_addr(ctx, hop4_addr, virt_addr);
 581
 582                 curr_pte = *(u64 *) (uintptr_t) hop4_pte_addr;
 583
 584                 clear_hop3 = false;
 585         }
 586
 587         if (hdev->dram_default_page_mapping && is_dram_addr) {
 588                 u64 default_pte = (prop->mmu_dram_default_page_addr &
 589                                 PTE_PHYS_ADDR_MASK) | LAST_MASK |
 590                                         PAGE_PRESENT_MASK;
 591                 if (curr_pte == default_pte) {
 592                         dev_err(hdev->dev,
 593                                 "DRAM: hop3 PTE points to zero page, can't unmap, va: 0x%llx\n",
 594                                         virt_addr);
 595                         goto not_mapped;
 596                 }
 597
 598                 if (!(curr_pte & PAGE_PRESENT_MASK)) {
 599                         dev_err(hdev->dev,
 600                                 "DRAM: hop3 PTE is cleared! can't unmap, va: 0x%llx\n",
 601                                         virt_addr);
 602                         goto not_mapped;
 603                 }
 604
 605                 write_final_pte(ctx, hop3_pte_addr, default_pte);
 606                 put_pte(ctx, hop3_addr);
 607         } else {
 608                 if (!(curr_pte & PAGE_PRESENT_MASK))
 609                         goto not_mapped;
 610
 611                 if (hop4_addr)
 612                         clear_pte(ctx, hop4_pte_addr);
 613                 else
 614                         clear_pte(ctx, hop3_pte_addr);
 615
 616                 if (hop4_addr && !put_pte(ctx, hop4_addr))
 617                         clear_hop3 = true;
 618
 619                 if (!clear_hop3)
 620                         goto flush;
 621
 622                 clear_pte(ctx, hop3_pte_addr);
 623
 624                 if (put_pte(ctx, hop3_addr))
 625                         goto flush;
 626
 627                 clear_pte(ctx, hop2_pte_addr);
 628
 629                 if (put_pte(ctx, hop2_addr))
 630                         goto flush;
 631
 632                 clear_pte(ctx, hop1_pte_addr);
 633
 634                 if (put_pte(ctx, hop1_addr))
 635                         goto flush;
 636
 637                 clear_pte(ctx, hop0_pte_addr);
 638         }
 639
 640 flush:
 641         flush(ctx);
 642
 643         return 0;
 644
 645 not_mapped:
 646         dev_err(hdev->dev, "virt addr 0x%llx is not mapped to phys addr\n",
 647                 virt_addr);
 648
 649         return -EINVAL;
 650 }
 651
 652 /*
 653  * hl_mmu_unmap - unmaps a virtual addr
 654  *
 655  * @ctx: pointer to the context structure
 656  * @virt_addr: virt addr to map from
 657  * @page_size: size of the page to unmap
 658  *
 659  * This function does the following:
 660  * - Check that the virt addr is mapped
 661  * - Unmap the virt addr and frees pgts if possible
 662  * - Returns 0 on success, -EINVAL if the given addr is not mapped
 663  *
 664  * Because this function changes the page tables in the device and because it
 665  * changes the MMU hash, it must be protected by a lock.
 666  * However, because it maps only a single page, the lock should be implemented
 667  * in a higher level in order to protect the entire mapping of the memory area
 668  */
 669 int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size)
 670 {
 671         struct hl_device *hdev = ctx->hdev;
 672         u64 real_virt_addr;
 673         u32 real_page_size, npages;
 674         int i, rc;
 675
 676         if (!hdev->mmu_enable)
 677                 return 0;
 678
 679         /*
 680          * The H/W handles mapping of 4KB/2MB page. Hence if the host page size
 681          * is bigger, we break it to sub-pages and unmap them separately.
 682          */
 683         if ((page_size % PAGE_SIZE_2MB) == 0) {
 684                 real_page_size = PAGE_SIZE_2MB;
 685         } else if ((page_size % PAGE_SIZE_4KB) == 0) {
 686                 real_page_size = PAGE_SIZE_4KB;
 687         } else {
 688                 dev_err(hdev->dev,
 689                         "page size of %u is not 4KB nor 2MB aligned, can't unmap\n",
 690                                 page_size);
 691
 692                 return -EFAULT;
 693         }
 694
 695         npages = page_size / real_page_size;
 696         real_virt_addr = virt_addr;
 697
 698         for (i = 0 ; i < npages ; i++) {
 699                 rc = _hl_mmu_unmap(ctx, real_virt_addr);
 700                 if (rc)
 701                         return rc;
 702
 703                 real_virt_addr += real_page_size;
 704         }
 705
 706         return 0;
 707 }
 708
 709 static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
 710                 u32 page_size)
 711 {
 712         struct hl_device *hdev = ctx->hdev;
 713         struct asic_fixed_properties *prop = &hdev->asic_prop;
 714         u64 hop0_addr = 0, hop0_pte_addr = 0,
 715                 hop1_addr = 0, hop1_pte_addr = 0,
 716                 hop2_addr = 0, hop2_pte_addr = 0,
 717                 hop3_addr = 0, hop3_pte_addr = 0,
 718                 hop4_addr = 0, hop4_pte_addr = 0,
 719                 curr_pte = 0;
 720         bool hop1_new = false, hop2_new = false, hop3_new = false,
 721                 hop4_new = false, is_huge, is_dram_addr;
 722         int rc = -ENOMEM;
 723
 724         /*
 725          * This mapping function can map a 4KB/2MB page. For 2MB page there are
 726          * only 3 hops rather than 4. Currently the DRAM allocation uses 2MB
 727          * pages only but user memory could have been allocated with one of the
 728          * two page sizes. Since this is a common code for all the three cases,
 729          * we need this hugs page check.
 730          */
 731         is_huge = page_size == PAGE_SIZE_2MB;
 732
 733         is_dram_addr = hl_mem_area_inside_range(virt_addr, page_size,
 734                                 prop->va_space_dram_start_address,
 735                                 prop->va_space_dram_end_address);
 736
 737         if (is_dram_addr && !is_huge) {
 738                 dev_err(hdev->dev, "DRAM mapping should use huge pages only\n");
 739                 return -EFAULT;
 740         }
 741
 742         hop0_addr = get_hop0_addr(ctx);
 743         hop0_pte_addr = get_hop0_pte_addr(ctx, hop0_addr, virt_addr);
 744         curr_pte = *(u64 *) (uintptr_t) hop0_pte_addr;
 745
 746         hop1_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop1_new);
 747         if (hop1_addr == ULLONG_MAX)
 748                 goto err;
 749
 750         hop1_pte_addr = get_hop1_pte_addr(ctx, hop1_addr, virt_addr);
 751         curr_pte = *(u64 *) (uintptr_t) hop1_pte_addr;
 752
 753         hop2_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop2_new);
 754         if (hop2_addr == ULLONG_MAX)
 755                 goto err;
 756
 757         hop2_pte_addr = get_hop2_pte_addr(ctx, hop2_addr, virt_addr);
 758         curr_pte = *(u64 *) (uintptr_t) hop2_pte_addr;
 759
 760         hop3_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop3_new);
 761         if (hop3_addr == ULLONG_MAX)
 762                 goto err;
 763
 764         hop3_pte_addr = get_hop3_pte_addr(ctx, hop3_addr, virt_addr);
 765         curr_pte = *(u64 *) (uintptr_t) hop3_pte_addr;
 766
 767         if (!is_huge) {
 768                 hop4_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop4_new);
 769                 if (hop4_addr == ULLONG_MAX)
 770                         goto err;
 771
 772                 hop4_pte_addr = get_hop4_pte_addr(ctx, hop4_addr, virt_addr);
 773                 curr_pte = *(u64 *) (uintptr_t) hop4_pte_addr;
 774         }
 775
 776         if (hdev->dram_default_page_mapping && is_dram_addr) {
 777                 u64 default_pte = (prop->mmu_dram_default_page_addr &
 778                                         PTE_PHYS_ADDR_MASK) | LAST_MASK |
 779                                                 PAGE_PRESENT_MASK;
 780
 781                 if (curr_pte != default_pte) {
 782                         dev_err(hdev->dev,
 783                                 "DRAM: mapping already exists for virt_addr 0x%llx\n",
 784                                         virt_addr);
 785                         rc = -EINVAL;
 786                         goto err;
 787                 }
 788
 789                 if (hop1_new || hop2_new || hop3_new || hop4_new) {
 790                         dev_err(hdev->dev,
 791                                 "DRAM mapping should not allocate more hops\n");
 792                         rc = -EFAULT;
 793                         goto err;
 794                 }
 795         } else if (curr_pte & PAGE_PRESENT_MASK) {
 796                 dev_err(hdev->dev,
 797                         "mapping already exists for virt_addr 0x%llx\n",
 798                                 virt_addr);
 799
 800                 dev_dbg(hdev->dev, "hop0 pte: 0x%llx (0x%llx)\n",
 801                         *(u64 *) (uintptr_t) hop0_pte_addr, hop0_pte_addr);
 802                 dev_dbg(hdev->dev, "hop1 pte: 0x%llx (0x%llx)\n",
 803                         *(u64 *) (uintptr_t) hop1_pte_addr, hop1_pte_addr);
 804                 dev_dbg(hdev->dev, "hop2 pte: 0x%llx (0x%llx)\n",
 805                         *(u64 *) (uintptr_t) hop2_pte_addr, hop2_pte_addr);
 806                 dev_dbg(hdev->dev, "hop3 pte: 0x%llx (0x%llx)\n",
 807                         *(u64 *) (uintptr_t) hop3_pte_addr, hop3_pte_addr);
 808
 809                 if (!is_huge)
 810                         dev_dbg(hdev->dev, "hop4 pte: 0x%llx (0x%llx)\n",
 811                                 *(u64 *) (uintptr_t) hop4_pte_addr,
 812                                 hop4_pte_addr);
 813
 814                 rc = -EINVAL;
 815                 goto err;
 816         }
 817
 818         curr_pte = (phys_addr & PTE_PHYS_ADDR_MASK) | LAST_MASK
 819                         | PAGE_PRESENT_MASK;
 820
 821         if (is_huge)
 822                 write_final_pte(ctx, hop3_pte_addr, curr_pte);
 823         else
 824                 write_final_pte(ctx, hop4_pte_addr, curr_pte);
 825
 826         if (hop1_new) {
 827                 curr_pte =
 828                         (hop1_addr & PTE_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
 829                 write_pte(ctx, hop0_pte_addr, curr_pte);
 830         }
 831         if (hop2_new) {
 832                 curr_pte =
 833                         (hop2_addr & PTE_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
 834                 write_pte(ctx, hop1_pte_addr, curr_pte);
 835                 get_pte(ctx, hop1_addr);
 836         }
 837         if (hop3_new) {
 838                 curr_pte =
 839                         (hop3_addr & PTE_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
 840                 write_pte(ctx, hop2_pte_addr, curr_pte);
 841                 get_pte(ctx, hop2_addr);
 842         }
 843
 844         if (!is_huge) {
 845                 if (hop4_new) {
 846                         curr_pte = (hop4_addr & PTE_PHYS_ADDR_MASK) |
 847                                         PAGE_PRESENT_MASK;
 848                         write_pte(ctx, hop3_pte_addr, curr_pte);
 849                         get_pte(ctx, hop3_addr);
 850                 }
 851
 852                 get_pte(ctx, hop4_addr);
 853         } else {
 854                 get_pte(ctx, hop3_addr);
 855         }
 856
 857         flush(ctx);
 858
 859         return 0;
 860
 861 err:
 862         if (hop4_new)
 863                 free_hop(ctx, hop4_addr);
 864         if (hop3_new)
 865                 free_hop(ctx, hop3_addr);
 866         if (hop2_new)
 867                 free_hop(ctx, hop2_addr);
 868         if (hop1_new)
 869                 free_hop(ctx, hop1_addr);
 870
 871         return rc;
 872 }
 873
 874 /*
 875  * hl_mmu_map - maps a virtual addr to physical addr
 876  *
 877  * @ctx: pointer to the context structure
 878  * @virt_addr: virt addr to map from
 879  * @phys_addr: phys addr to map to
 880  * @page_size: physical page size
 881  *
 882  * This function does the following:
 883  * - Check that the virt addr is not mapped
 884  * - Allocate pgts as necessary in order to map the virt addr to the phys
 885  * - Returns 0 on success, -EINVAL if addr is already mapped, or -ENOMEM.
 886  *
 887  * Because this function changes the page tables in the device and because it
 888  * changes the MMU hash, it must be protected by a lock.
 889  * However, because it maps only a single page, the lock should be implemented
 890  * in a higher level in order to protect the entire mapping of the memory area
 891  */
 892 int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size)
 893 {
 894         struct hl_device *hdev = ctx->hdev;
 895         u64 real_virt_addr, real_phys_addr;
 896         u32 real_page_size, npages;
 897         int i, rc, mapped_cnt = 0;
 898
 899         if (!hdev->mmu_enable)
 900                 return 0;
 901
 902         /*
 903          * The H/W handles mapping of 4KB/2MB page. Hence if the host page size
 904          * is bigger, we break it to sub-pages and map them separately.
 905          */
 906         if ((page_size % PAGE_SIZE_2MB) == 0) {
 907                 real_page_size = PAGE_SIZE_2MB;
 908         } else if ((page_size % PAGE_SIZE_4KB) == 0) {
 909                 real_page_size = PAGE_SIZE_4KB;
 910         } else {
 911                 dev_err(hdev->dev,
 912                         "page size of %u is not 4KB nor 2MB aligned, can't map\n",
 913                                 page_size);
 914
 915                 return -EFAULT;
 916         }
 917
 918         npages = page_size / real_page_size;
 919         real_virt_addr = virt_addr;
 920         real_phys_addr = phys_addr;
 921
 922         for (i = 0 ; i < npages ; i++) {
 923                 rc = _hl_mmu_map(ctx, real_virt_addr, real_phys_addr,
 924                                 real_page_size);
 925                 if (rc)
 926                         goto err;
 927
 928                 real_virt_addr += real_page_size;
 929                 real_phys_addr += real_page_size;
 930                 mapped_cnt++;
 931         }
 932
 933         return 0;
 934
 935 err:
 936         real_virt_addr = virt_addr;
 937         for (i = 0 ; i < mapped_cnt ; i++) {
 938                 if (_hl_mmu_unmap(ctx, real_virt_addr))
 939                         dev_warn_ratelimited(hdev->dev,
 940                                 "failed to unmap va: 0x%llx\n", real_virt_addr);
 941
 942                 real_virt_addr += real_page_size;
 943         }
 944
 945         return rc;
 946 }
 947
 948 /*
 949  * hl_mmu_swap_out - marks all mapping of the given ctx as swapped out
 950  *
 951  * @ctx: pointer to the context structure
 952  *
 953  */
 954 void hl_mmu_swap_out(struct hl_ctx *ctx)
 955 {
 956
 957 }
 958
 959 /*
 960  * hl_mmu_swap_in - marks all mapping of the given ctx as swapped in
 961  *
 962  * @ctx: pointer to the context structure
 963  *
 964  */
 965 void hl_mmu_swap_in(struct hl_ctx *ctx)
 966 {
 967
 968 }