]> asedeno.scripts.mit.edu Git - linux.git/commitdiff
Merge branch 'for-joerg/arm-smmu/updates' of git://git.kernel.org/pub/scm/linux/kerne...
authorJoerg Roedel <jroedel@suse.de>
Mon, 1 Jul 2019 11:44:41 +0000 (13:44 +0200)
committerJoerg Roedel <jroedel@suse.de>
Mon, 1 Jul 2019 11:44:41 +0000 (13:44 +0200)
drivers/iommu/arm-smmu-v3.c
drivers/iommu/arm-smmu.c
drivers/iommu/io-pgtable-arm-v7s.c
drivers/iommu/io-pgtable-arm.c
drivers/iommu/ipmmu-vmsa.c
include/linux/io-pgtable.h
include/linux/iommu.h

index 4d5a694f02c2bb3159d2e7119a474e35fc384a96..8ff8f61d9e1cb646b0e70efa5c3bd6806f4dc963 100644 (file)
 #define Q_BASE_RWA                     (1UL << 62)
 #define Q_BASE_ADDR_MASK               GENMASK_ULL(51, 5)
 #define Q_BASE_LOG2SIZE                        GENMASK(4, 0)
+#define Q_MAX_SZ_SHIFT                 (PAGE_SHIFT + CONFIG_CMA_ALIGNMENT)
 
 /*
  * Stream table.
                                        FIELD_GET(ARM64_TCR_##fld, tcr))
 
 /* Command queue */
-#define CMDQ_ENT_DWORDS                        2
-#define CMDQ_MAX_SZ_SHIFT              8
+#define CMDQ_ENT_SZ_SHIFT              4
+#define CMDQ_ENT_DWORDS                        ((1 << CMDQ_ENT_SZ_SHIFT) >> 3)
+#define CMDQ_MAX_SZ_SHIFT              (Q_MAX_SZ_SHIFT - CMDQ_ENT_SZ_SHIFT)
 
 #define CMDQ_CONS_ERR                  GENMASK(30, 24)
 #define CMDQ_ERR_CERROR_NONE_IDX       0
 #define CMDQ_SYNC_1_MSIADDR_MASK       GENMASK_ULL(51, 2)
 
 /* Event queue */
-#define EVTQ_ENT_DWORDS                        4
-#define EVTQ_MAX_SZ_SHIFT              7
+#define EVTQ_ENT_SZ_SHIFT              5
+#define EVTQ_ENT_DWORDS                        ((1 << EVTQ_ENT_SZ_SHIFT) >> 3)
+#define EVTQ_MAX_SZ_SHIFT              (Q_MAX_SZ_SHIFT - EVTQ_ENT_SZ_SHIFT)
 
 #define EVTQ_0_ID                      GENMASK_ULL(7, 0)
 
 /* PRI queue */
-#define PRIQ_ENT_DWORDS                        2
-#define PRIQ_MAX_SZ_SHIFT              8
+#define PRIQ_ENT_SZ_SHIFT              4
+#define PRIQ_ENT_DWORDS                        ((1 << PRIQ_ENT_SZ_SHIFT) >> 3)
+#define PRIQ_MAX_SZ_SHIFT              (Q_MAX_SZ_SHIFT - PRIQ_ENT_SZ_SHIFT)
 
 #define PRIQ_0_SID                     GENMASK_ULL(31, 0)
 #define PRIQ_0_SSID                    GENMASK_ULL(51, 32)
@@ -798,7 +802,7 @@ static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
 /* High-level queue accessors */
 static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
 {
-       memset(cmd, 0, CMDQ_ENT_DWORDS << 3);
+       memset(cmd, 0, 1 << CMDQ_ENT_SZ_SHIFT);
        cmd[0] |= FIELD_PREP(CMDQ_0_OP, ent->opcode);
 
        switch (ent->opcode) {
@@ -1785,13 +1789,11 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain)
                .pgsize_bitmap  = smmu->pgsize_bitmap,
                .ias            = ias,
                .oas            = oas,
+               .coherent_walk  = smmu->features & ARM_SMMU_FEAT_COHERENCY,
                .tlb            = &arm_smmu_gather_ops,
                .iommu_dev      = smmu->dev,
        };
 
-       if (smmu->features & ARM_SMMU_FEAT_COHERENCY)
-               pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_NO_DMA;
-
        if (smmu_domain->non_strict)
                pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
 
@@ -2270,17 +2272,32 @@ static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
                                   struct arm_smmu_queue *q,
                                   unsigned long prod_off,
                                   unsigned long cons_off,
-                                  size_t dwords)
+                                  size_t dwords, const char *name)
 {
-       size_t qsz = ((1 << q->max_n_shift) * dwords) << 3;
+       size_t qsz;
+
+       do {
+               qsz = ((1 << q->max_n_shift) * dwords) << 3;
+               q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma,
+                                             GFP_KERNEL);
+               if (q->base || qsz < PAGE_SIZE)
+                       break;
+
+               q->max_n_shift--;
+       } while (1);
 
-       q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma, GFP_KERNEL);
        if (!q->base) {
-               dev_err(smmu->dev, "failed to allocate queue (0x%zx bytes)\n",
-                       qsz);
+               dev_err(smmu->dev,
+                       "failed to allocate queue (0x%zx bytes) for %s\n",
+                       qsz, name);
                return -ENOMEM;
        }
 
+       if (!WARN_ON(q->base_dma & (qsz - 1))) {
+               dev_info(smmu->dev, "allocated %u entries for %s\n",
+                        1 << q->max_n_shift, name);
+       }
+
        q->prod_reg     = arm_smmu_page1_fixup(prod_off, smmu);
        q->cons_reg     = arm_smmu_page1_fixup(cons_off, smmu);
        q->ent_dwords   = dwords;
@@ -2300,13 +2317,15 @@ static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
        /* cmdq */
        spin_lock_init(&smmu->cmdq.lock);
        ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, ARM_SMMU_CMDQ_PROD,
-                                     ARM_SMMU_CMDQ_CONS, CMDQ_ENT_DWORDS);
+                                     ARM_SMMU_CMDQ_CONS, CMDQ_ENT_DWORDS,
+                                     "cmdq");
        if (ret)
                return ret;
 
        /* evtq */
        ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, ARM_SMMU_EVTQ_PROD,
-                                     ARM_SMMU_EVTQ_CONS, EVTQ_ENT_DWORDS);
+                                     ARM_SMMU_EVTQ_CONS, EVTQ_ENT_DWORDS,
+                                     "evtq");
        if (ret)
                return ret;
 
@@ -2315,7 +2334,8 @@ static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
                return 0;
 
        return arm_smmu_init_one_queue(smmu, &smmu->priq.q, ARM_SMMU_PRIQ_PROD,
-                                      ARM_SMMU_PRIQ_CONS, PRIQ_ENT_DWORDS);
+                                      ARM_SMMU_PRIQ_CONS, PRIQ_ENT_DWORDS,
+                                      "priq");
 }
 
 static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu)
@@ -2879,7 +2899,7 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
                return -ENXIO;
        }
 
-       /* Queue sizes, capped at 4k */
+       /* Queue sizes, capped to ensure natural alignment */
        smmu->cmdq.q.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT,
                                         FIELD_GET(IDR1_CMDQS, reg));
        if (!smmu->cmdq.q.max_n_shift) {
index 586dd5a46d9f593b33a9a46d7e57fbe4dbebc5ff..653b6b3dcafb0f09387653b898472694901be3a0 100644 (file)
@@ -892,13 +892,11 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
                .pgsize_bitmap  = smmu->pgsize_bitmap,
                .ias            = ias,
                .oas            = oas,
+               .coherent_walk  = smmu->features & ARM_SMMU_FEAT_COHERENT_WALK,
                .tlb            = smmu_domain->tlb_ops,
                .iommu_dev      = smmu->dev,
        };
 
-       if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
-               pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_NO_DMA;
-
        if (smmu_domain->non_strict)
                pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
 
index aa7a3fa6dd09df2cfe3783c96be04d4c131a0629..0fc8dfab2abf57d51418ba17f5d470eb6292b49a 100644 (file)
@@ -204,7 +204,7 @@ static void *__arm_v7s_alloc_table(int lvl, gfp_t gfp,
                dev_err(dev, "Page table does not fit in PTE: %pa", &phys);
                goto out_free;
        }
-       if (table && !(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA)) {
+       if (table && !cfg->coherent_walk) {
                dma = dma_map_single(dev, table, size, DMA_TO_DEVICE);
                if (dma_mapping_error(dev, dma))
                        goto out_free;
@@ -238,7 +238,7 @@ static void __arm_v7s_free_table(void *table, int lvl,
        struct device *dev = cfg->iommu_dev;
        size_t size = ARM_V7S_TABLE_SIZE(lvl);
 
-       if (!(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA))
+       if (!cfg->coherent_walk)
                dma_unmap_single(dev, __arm_v7s_dma_addr(table), size,
                                 DMA_TO_DEVICE);
        if (lvl == 1)
@@ -250,7 +250,7 @@ static void __arm_v7s_free_table(void *table, int lvl,
 static void __arm_v7s_pte_sync(arm_v7s_iopte *ptep, int num_entries,
                               struct io_pgtable_cfg *cfg)
 {
-       if (cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA)
+       if (cfg->coherent_walk)
                return;
 
        dma_sync_single_for_device(cfg->iommu_dev, __arm_v7s_dma_addr(ptep),
@@ -716,7 +716,6 @@ static struct io_pgtable *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg,
                            IO_PGTABLE_QUIRK_NO_PERMS |
                            IO_PGTABLE_QUIRK_TLBI_ON_MAP |
                            IO_PGTABLE_QUIRK_ARM_MTK_4GB |
-                           IO_PGTABLE_QUIRK_NO_DMA |
                            IO_PGTABLE_QUIRK_NON_STRICT))
                return NULL;
 
@@ -779,8 +778,11 @@ static struct io_pgtable *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg,
        /* TTBRs */
        cfg->arm_v7s_cfg.ttbr[0] = virt_to_phys(data->pgd) |
                                   ARM_V7S_TTBR_S | ARM_V7S_TTBR_NOS |
-                                  ARM_V7S_TTBR_IRGN_ATTR(ARM_V7S_RGN_WBWA) |
-                                  ARM_V7S_TTBR_ORGN_ATTR(ARM_V7S_RGN_WBWA);
+                                  (cfg->coherent_walk ?
+                                  (ARM_V7S_TTBR_IRGN_ATTR(ARM_V7S_RGN_WBWA) |
+                                   ARM_V7S_TTBR_ORGN_ATTR(ARM_V7S_RGN_WBWA)) :
+                                  (ARM_V7S_TTBR_IRGN_ATTR(ARM_V7S_RGN_NC) |
+                                   ARM_V7S_TTBR_ORGN_ATTR(ARM_V7S_RGN_NC)));
        cfg->arm_v7s_cfg.ttbr[1] = 0;
        return &data->iop;
 
@@ -835,7 +837,8 @@ static int __init arm_v7s_do_selftests(void)
                .tlb = &dummy_tlb_ops,
                .oas = 32,
                .ias = 32,
-               .quirks = IO_PGTABLE_QUIRK_ARM_NS | IO_PGTABLE_QUIRK_NO_DMA,
+               .coherent_walk = true,
+               .quirks = IO_PGTABLE_QUIRK_ARM_NS,
                .pgsize_bitmap = SZ_4K | SZ_64K | SZ_1M | SZ_16M,
        };
        unsigned int iova, size, iova_start;
index 4b6b2f3150a9d21ed6465631bc973c54d51790bc..161a7d56264d0c26b7297930c0d843df674dd7bc 100644 (file)
 #define ARM_LPAE_MAIR_ATTR_MASK                0xff
 #define ARM_LPAE_MAIR_ATTR_DEVICE      0x04
 #define ARM_LPAE_MAIR_ATTR_NC          0x44
+#define ARM_LPAE_MAIR_ATTR_INC_OWBRWA  0xf4
 #define ARM_LPAE_MAIR_ATTR_WBRWA       0xff
 #define ARM_LPAE_MAIR_ATTR_IDX_NC      0
 #define ARM_LPAE_MAIR_ATTR_IDX_CACHE   1
 #define ARM_LPAE_MAIR_ATTR_IDX_DEV     2
+#define ARM_LPAE_MAIR_ATTR_IDX_INC_OCACHE      3
 
 #define ARM_MALI_LPAE_TTBR_ADRMODE_TABLE (3u << 0)
 #define ARM_MALI_LPAE_TTBR_READ_INNER  BIT(2)
@@ -239,7 +241,7 @@ static void *__arm_lpae_alloc_pages(size_t size, gfp_t gfp,
                return NULL;
 
        pages = page_address(p);
-       if (!(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA)) {
+       if (!cfg->coherent_walk) {
                dma = dma_map_single(dev, pages, size, DMA_TO_DEVICE);
                if (dma_mapping_error(dev, dma))
                        goto out_free;
@@ -265,7 +267,7 @@ static void *__arm_lpae_alloc_pages(size_t size, gfp_t gfp,
 static void __arm_lpae_free_pages(void *pages, size_t size,
                                  struct io_pgtable_cfg *cfg)
 {
-       if (!(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA))
+       if (!cfg->coherent_walk)
                dma_unmap_single(cfg->iommu_dev, __arm_lpae_dma_addr(pages),
                                 size, DMA_TO_DEVICE);
        free_pages((unsigned long)pages, get_order(size));
@@ -283,7 +285,7 @@ static void __arm_lpae_set_pte(arm_lpae_iopte *ptep, arm_lpae_iopte pte,
 {
        *ptep = pte;
 
-       if (!(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA))
+       if (!cfg->coherent_walk)
                __arm_lpae_sync_pte(ptep, cfg);
 }
 
@@ -361,8 +363,7 @@ static arm_lpae_iopte arm_lpae_install_table(arm_lpae_iopte *table,
 
        old = cmpxchg64_relaxed(ptep, curr, new);
 
-       if ((cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA) ||
-           (old & ARM_LPAE_PTE_SW_SYNC))
+       if (cfg->coherent_walk || (old & ARM_LPAE_PTE_SW_SYNC))
                return old;
 
        /* Even if it's not ours, there's no point waiting; just kick it */
@@ -403,8 +404,7 @@ static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova,
                pte = arm_lpae_install_table(cptep, ptep, 0, cfg);
                if (pte)
                        __arm_lpae_free_pages(cptep, tblsz, cfg);
-       } else if (!(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA) &&
-                  !(pte & ARM_LPAE_PTE_SW_SYNC)) {
+       } else if (!cfg->coherent_walk && !(pte & ARM_LPAE_PTE_SW_SYNC)) {
                __arm_lpae_sync_pte(ptep, cfg);
        }
 
@@ -459,6 +459,9 @@ static arm_lpae_iopte arm_lpae_prot_to_pte(struct arm_lpae_io_pgtable *data,
                else if (prot & IOMMU_CACHE)
                        pte |= (ARM_LPAE_MAIR_ATTR_IDX_CACHE
                                << ARM_LPAE_PTE_ATTRINDX_SHIFT);
+               else if (prot & IOMMU_QCOM_SYS_CACHE)
+                       pte |= (ARM_LPAE_MAIR_ATTR_IDX_INC_OCACHE
+                               << ARM_LPAE_PTE_ATTRINDX_SHIFT);
        }
 
        if (prot & IOMMU_NOEXEC)
@@ -783,7 +786,7 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie)
        u64 reg;
        struct arm_lpae_io_pgtable *data;
 
-       if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS | IO_PGTABLE_QUIRK_NO_DMA |
+       if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS |
                            IO_PGTABLE_QUIRK_NON_STRICT))
                return NULL;
 
@@ -792,9 +795,15 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie)
                return NULL;
 
        /* TCR */
-       reg = (ARM_LPAE_TCR_SH_IS << ARM_LPAE_TCR_SH0_SHIFT) |
-             (ARM_LPAE_TCR_RGN_WBWA << ARM_LPAE_TCR_IRGN0_SHIFT) |
-             (ARM_LPAE_TCR_RGN_WBWA << ARM_LPAE_TCR_ORGN0_SHIFT);
+       if (cfg->coherent_walk) {
+               reg = (ARM_LPAE_TCR_SH_IS << ARM_LPAE_TCR_SH0_SHIFT) |
+                     (ARM_LPAE_TCR_RGN_WBWA << ARM_LPAE_TCR_IRGN0_SHIFT) |
+                     (ARM_LPAE_TCR_RGN_WBWA << ARM_LPAE_TCR_ORGN0_SHIFT);
+       } else {
+               reg = (ARM_LPAE_TCR_SH_OS << ARM_LPAE_TCR_SH0_SHIFT) |
+                     (ARM_LPAE_TCR_RGN_NC << ARM_LPAE_TCR_IRGN0_SHIFT) |
+                     (ARM_LPAE_TCR_RGN_NC << ARM_LPAE_TCR_ORGN0_SHIFT);
+       }
 
        switch (ARM_LPAE_GRANULE(data)) {
        case SZ_4K:
@@ -846,7 +855,9 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie)
              (ARM_LPAE_MAIR_ATTR_WBRWA
               << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_CACHE)) |
              (ARM_LPAE_MAIR_ATTR_DEVICE
-              << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_DEV));
+              << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_DEV)) |
+             (ARM_LPAE_MAIR_ATTR_INC_OWBRWA
+              << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_INC_OCACHE));
 
        cfg->arm_lpae_s1_cfg.mair[0] = reg;
        cfg->arm_lpae_s1_cfg.mair[1] = 0;
@@ -876,8 +887,7 @@ arm_64_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie)
        struct arm_lpae_io_pgtable *data;
 
        /* The NS quirk doesn't apply at stage 2 */
-       if (cfg->quirks & ~(IO_PGTABLE_QUIRK_NO_DMA |
-                           IO_PGTABLE_QUIRK_NON_STRICT))
+       if (cfg->quirks & ~(IO_PGTABLE_QUIRK_NON_STRICT))
                return NULL;
 
        data = arm_lpae_alloc_pgtable(cfg);
@@ -1212,7 +1222,7 @@ static int __init arm_lpae_do_selftests(void)
        struct io_pgtable_cfg cfg = {
                .tlb = &dummy_tlb_ops,
                .oas = 48,
-               .quirks = IO_PGTABLE_QUIRK_NO_DMA,
+               .coherent_walk = true,
        };
 
        for (i = 0; i < ARRAY_SIZE(pgsize); ++i) {
index 9a380c10655e182d35d9f6c170336aeaa05030f6..12bcb95bdaa83fc486e50bb280daf6b844a11eef 100644 (file)
@@ -431,6 +431,7 @@ static int ipmmu_domain_init_context(struct ipmmu_vmsa_domain *domain)
         * TODO: Add support for coherent walk through CCI with DVM and remove
         * cache handling. For now, delegate it to the io-pgtable code.
         */
+       domain->cfg.coherent_walk = false;
        domain->cfg.iommu_dev = domain->mmu->root->dev;
 
        /*
index 76969a56483142a374469aca826fffa18a23c304..b5a450a3bb47a6f25beafa9510902af070f14dbd 100644 (file)
@@ -44,6 +44,8 @@ struct iommu_gather_ops {
  *                 tables.
  * @ias:           Input address (iova) size, in bits.
  * @oas:           Output address (paddr) size, in bits.
+ * @coherent_walk  A flag to indicate whether or not page table walks made
+ *                 by the IOMMU are coherent with the CPU caches.
  * @tlb:           TLB management callbacks for this set of tables.
  * @iommu_dev:     The device representing the DMA configuration for the
  *                 page table walker.
@@ -68,11 +70,6 @@ struct io_pgtable_cfg {
         *      when the SoC is in "4GB mode" and they can only access the high
         *      remap of DRAM (0x1_00000000 to 0x1_ffffffff).
         *
-        * IO_PGTABLE_QUIRK_NO_DMA: Guarantees that the tables will only ever
-        *      be accessed by a fully cache-coherent IOMMU or CPU (e.g. for a
-        *      software-emulated IOMMU), such that pagetable updates need not
-        *      be treated as explicit DMA data.
-        *
         * IO_PGTABLE_QUIRK_NON_STRICT: Skip issuing synchronous leaf TLBIs
         *      on unmap, for DMA domains using the flush queue mechanism for
         *      delayed invalidation.
@@ -81,12 +78,12 @@ struct io_pgtable_cfg {
        #define IO_PGTABLE_QUIRK_NO_PERMS       BIT(1)
        #define IO_PGTABLE_QUIRK_TLBI_ON_MAP    BIT(2)
        #define IO_PGTABLE_QUIRK_ARM_MTK_4GB    BIT(3)
-       #define IO_PGTABLE_QUIRK_NO_DMA         BIT(4)
-       #define IO_PGTABLE_QUIRK_NON_STRICT     BIT(5)
+       #define IO_PGTABLE_QUIRK_NON_STRICT     BIT(4)
        unsigned long                   quirks;
        unsigned long                   pgsize_bitmap;
        unsigned int                    ias;
        unsigned int                    oas;
+       bool                            coherent_walk;
        const struct iommu_gather_ops   *tlb;
        struct device                   *iommu_dev;
 
index e552c3b63f6f014808e13ebc052517f2cb2eb5bd..86b4e0a75a97a8ed9ce21effdb08c6ab098fe0ca 100644 (file)
  * if the IOMMU page table format is equivalent.
  */
 #define IOMMU_PRIV     (1 << 5)
+/*
+ * Non-coherent masters on few Qualcomm SoCs can use this page protection flag
+ * to set correct cacheability attributes to use an outer level of cache -
+ * last level cache, aka system cache.
+ */
+#define IOMMU_QCOM_SYS_CACHE   (1 << 6)
 
 struct iommu_ops;
 struct iommu_group;