1 // SPDX-License-Identifier: GPL-2.0-only
3 * IOMMU API for ARM architected SMMU implementations.
5 * Copyright (C) 2013 ARM Limited
7 * Author: Will Deacon <will.deacon@arm.com>
9 * This driver currently supports:
10 * - SMMUv1 and v2 implementations
11 * - Stream-matching and stream-indexing
12 * - v7/v8 long-descriptor format
13 * - Non-secure access to the SMMU
14 * - Context fault reporting
15 * - Extended Stream ID (16 bit)
18 #define pr_fmt(fmt) "arm-smmu: " fmt
20 #include <linux/acpi.h>
21 #include <linux/acpi_iort.h>
22 #include <linux/atomic.h>
23 #include <linux/bitfield.h>
24 #include <linux/delay.h>
25 #include <linux/dma-iommu.h>
26 #include <linux/dma-mapping.h>
27 #include <linux/err.h>
28 #include <linux/interrupt.h>
30 #include <linux/io-64-nonatomic-hi-lo.h>
31 #include <linux/io-pgtable.h>
32 #include <linux/iommu.h>
33 #include <linux/iopoll.h>
34 #include <linux/init.h>
35 #include <linux/moduleparam.h>
37 #include <linux/of_address.h>
38 #include <linux/of_device.h>
39 #include <linux/of_iommu.h>
40 #include <linux/pci.h>
41 #include <linux/platform_device.h>
42 #include <linux/pm_runtime.h>
43 #include <linux/slab.h>
44 #include <linux/spinlock.h>
46 #include <linux/amba/bus.h>
47 #include <linux/fsl/mc.h>
49 #include "arm-smmu-regs.h"
52 * Apparently, some Qualcomm arm64 platforms which appear to expose their SMMU
53 * global register space are still, in fact, using a hypervisor to mediate it
54 * by trapping and emulating register accesses. Sadly, some deployed versions
55 * of said trapping code have bugs wherein they go horribly wrong for stores
56 * using r31 (i.e. XZR/WZR) as the source register.
58 #define QCOM_DUMMY_VAL -1
60 #define ARM_MMU500_ACTLR_CPRE (1 << 1)
62 #define ARM_MMU500_ACR_CACHE_LOCK (1 << 26)
63 #define ARM_MMU500_ACR_S2CRB_TLBEN (1 << 10)
64 #define ARM_MMU500_ACR_SMTNMB_TLBEN (1 << 8)
66 #define TLB_LOOP_TIMEOUT 1000000 /* 1s! */
67 #define TLB_SPIN_COUNT 10
69 /* Maximum number of context banks per SMMU */
70 #define ARM_SMMU_MAX_CBS 128
72 /* SMMU global address space */
73 #define ARM_SMMU_GR0(smmu) ((smmu)->base)
76 * SMMU global address space with conditional offset to access secure
77 * aliases of non-secure registers (e.g. nsCR0: 0x400, nsGFSR: 0x448,
80 #define ARM_SMMU_GR0_NS(smmu) \
82 ((smmu->options & ARM_SMMU_OPT_SECURE_CFG_ACCESS) \
85 #define MSI_IOVA_BASE 0x8000000
86 #define MSI_IOVA_LENGTH 0x100000
88 static int force_stage;
90 * not really modular, but the easiest way to keep compat with existing
91 * bootargs behaviour is to continue using module_param() here.
93 module_param(force_stage, int, S_IRUGO);
94 MODULE_PARM_DESC(force_stage,
95 "Force SMMU mappings to be installed at a particular stage of translation. A value of '1' or '2' forces the corresponding stage. All other values are ignored (i.e. no stage is forced). Note that selecting a specific stage will disable support for nested translation.");
96 static bool disable_bypass =
97 IS_ENABLED(CONFIG_ARM_SMMU_DISABLE_BYPASS_BY_DEFAULT);
98 module_param(disable_bypass, bool, S_IRUGO);
99 MODULE_PARM_DESC(disable_bypass,
100 "Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
102 enum arm_smmu_arch_version {
108 enum arm_smmu_implementation {
115 struct arm_smmu_s2cr {
116 struct iommu_group *group;
118 enum arm_smmu_s2cr_type type;
119 enum arm_smmu_s2cr_privcfg privcfg;
123 #define s2cr_init_val (struct arm_smmu_s2cr){ \
124 .type = disable_bypass ? S2CR_TYPE_FAULT : S2CR_TYPE_BYPASS, \
127 struct arm_smmu_smr {
137 struct arm_smmu_cfg *cfg;
140 struct arm_smmu_master_cfg {
141 struct arm_smmu_device *smmu;
144 #define INVALID_SMENDX -1
145 #define __fwspec_cfg(fw) ((struct arm_smmu_master_cfg *)fw->iommu_priv)
146 #define fwspec_smmu(fw) (__fwspec_cfg(fw)->smmu)
147 #define fwspec_smendx(fw, i) \
148 (i >= fw->num_ids ? INVALID_SMENDX : __fwspec_cfg(fw)->smendx[i])
149 #define for_each_cfg_sme(fw, i, idx) \
150 for (i = 0; idx = fwspec_smendx(fw, i), i < fw->num_ids; ++i)
152 struct arm_smmu_device {
156 unsigned int numpage;
157 unsigned int pgshift;
159 #define ARM_SMMU_FEAT_COHERENT_WALK (1 << 0)
160 #define ARM_SMMU_FEAT_STREAM_MATCH (1 << 1)
161 #define ARM_SMMU_FEAT_TRANS_S1 (1 << 2)
162 #define ARM_SMMU_FEAT_TRANS_S2 (1 << 3)
163 #define ARM_SMMU_FEAT_TRANS_NESTED (1 << 4)
164 #define ARM_SMMU_FEAT_TRANS_OPS (1 << 5)
165 #define ARM_SMMU_FEAT_VMID16 (1 << 6)
166 #define ARM_SMMU_FEAT_FMT_AARCH64_4K (1 << 7)
167 #define ARM_SMMU_FEAT_FMT_AARCH64_16K (1 << 8)
168 #define ARM_SMMU_FEAT_FMT_AARCH64_64K (1 << 9)
169 #define ARM_SMMU_FEAT_FMT_AARCH32_L (1 << 10)
170 #define ARM_SMMU_FEAT_FMT_AARCH32_S (1 << 11)
171 #define ARM_SMMU_FEAT_EXIDS (1 << 12)
174 #define ARM_SMMU_OPT_SECURE_CFG_ACCESS (1 << 0)
176 enum arm_smmu_arch_version version;
177 enum arm_smmu_implementation model;
179 u32 num_context_banks;
180 u32 num_s2_context_banks;
181 DECLARE_BITMAP(context_map, ARM_SMMU_MAX_CBS);
182 struct arm_smmu_cb *cbs;
185 u32 num_mapping_groups;
188 struct arm_smmu_smr *smrs;
189 struct arm_smmu_s2cr *s2crs;
190 struct mutex stream_map_mutex;
192 unsigned long va_size;
193 unsigned long ipa_size;
194 unsigned long pa_size;
195 unsigned long pgsize_bitmap;
198 u32 num_context_irqs;
200 struct clk_bulk_data *clks;
203 u32 cavium_id_base; /* Specific to Cavium */
205 spinlock_t global_sync_lock;
207 /* IOMMU core code handle */
208 struct iommu_device iommu;
211 enum arm_smmu_context_fmt {
212 ARM_SMMU_CTX_FMT_NONE,
213 ARM_SMMU_CTX_FMT_AARCH64,
214 ARM_SMMU_CTX_FMT_AARCH32_L,
215 ARM_SMMU_CTX_FMT_AARCH32_S,
218 struct arm_smmu_cfg {
225 enum arm_smmu_cbar_type cbar;
226 enum arm_smmu_context_fmt fmt;
228 #define INVALID_IRPTNDX 0xff
230 enum arm_smmu_domain_stage {
231 ARM_SMMU_DOMAIN_S1 = 0,
233 ARM_SMMU_DOMAIN_NESTED,
234 ARM_SMMU_DOMAIN_BYPASS,
237 struct arm_smmu_domain {
238 struct arm_smmu_device *smmu;
239 struct io_pgtable_ops *pgtbl_ops;
240 const struct iommu_gather_ops *tlb_ops;
241 struct arm_smmu_cfg cfg;
242 enum arm_smmu_domain_stage stage;
244 struct mutex init_mutex; /* Protects smmu pointer */
245 spinlock_t cb_lock; /* Serialises ATS1* ops and TLB syncs */
246 struct iommu_domain domain;
249 static void __iomem *arm_smmu_page(struct arm_smmu_device *smmu, int n)
251 return smmu->base + (n << smmu->pgshift);
254 static u32 arm_smmu_readl(struct arm_smmu_device *smmu, int page, int offset)
256 return readl_relaxed(arm_smmu_page(smmu, page) + offset);
259 static void arm_smmu_writel(struct arm_smmu_device *smmu, int page, int offset,
262 writel_relaxed(val, arm_smmu_page(smmu, page) + offset);
265 static u64 arm_smmu_readq(struct arm_smmu_device *smmu, int page, int offset)
267 return readq_relaxed(arm_smmu_page(smmu, page) + offset);
270 static void arm_smmu_writeq(struct arm_smmu_device *smmu, int page, int offset,
273 writeq_relaxed(val, arm_smmu_page(smmu, page) + offset);
276 #define ARM_SMMU_GR1 1
277 #define ARM_SMMU_CB(s, n) ((s)->numpage + (n))
279 #define arm_smmu_gr1_read(s, o) \
280 arm_smmu_readl((s), ARM_SMMU_GR1, (o))
281 #define arm_smmu_gr1_write(s, o, v) \
282 arm_smmu_writel((s), ARM_SMMU_GR1, (o), (v))
284 #define arm_smmu_cb_read(s, n, o) \
285 arm_smmu_readl((s), ARM_SMMU_CB((s), (n)), (o))
286 #define arm_smmu_cb_write(s, n, o, v) \
287 arm_smmu_writel((s), ARM_SMMU_CB((s), (n)), (o), (v))
288 #define arm_smmu_cb_readq(s, n, o) \
289 arm_smmu_readq((s), ARM_SMMU_CB((s), (n)), (o))
290 #define arm_smmu_cb_writeq(s, n, o, v) \
291 arm_smmu_writeq((s), ARM_SMMU_CB((s), (n)), (o), (v))
293 struct arm_smmu_option_prop {
298 static atomic_t cavium_smmu_context_count = ATOMIC_INIT(0);
300 static bool using_legacy_binding, using_generic_binding;
302 static struct arm_smmu_option_prop arm_smmu_options[] = {
303 { ARM_SMMU_OPT_SECURE_CFG_ACCESS, "calxeda,smmu-secure-config-access" },
307 static inline int arm_smmu_rpm_get(struct arm_smmu_device *smmu)
309 if (pm_runtime_enabled(smmu->dev))
310 return pm_runtime_get_sync(smmu->dev);
315 static inline void arm_smmu_rpm_put(struct arm_smmu_device *smmu)
317 if (pm_runtime_enabled(smmu->dev))
318 pm_runtime_put(smmu->dev);
321 static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
323 return container_of(dom, struct arm_smmu_domain, domain);
326 static void parse_driver_options(struct arm_smmu_device *smmu)
331 if (of_property_read_bool(smmu->dev->of_node,
332 arm_smmu_options[i].prop)) {
333 smmu->options |= arm_smmu_options[i].opt;
334 dev_notice(smmu->dev, "option %s\n",
335 arm_smmu_options[i].prop);
337 } while (arm_smmu_options[++i].opt);
340 static struct device_node *dev_get_dev_node(struct device *dev)
342 if (dev_is_pci(dev)) {
343 struct pci_bus *bus = to_pci_dev(dev)->bus;
345 while (!pci_is_root_bus(bus))
347 return of_node_get(bus->bridge->parent->of_node);
350 return of_node_get(dev->of_node);
353 static int __arm_smmu_get_pci_sid(struct pci_dev *pdev, u16 alias, void *data)
355 *((__be32 *)data) = cpu_to_be32(alias);
356 return 0; /* Continue walking */
359 static int __find_legacy_master_phandle(struct device *dev, void *data)
361 struct of_phandle_iterator *it = *(void **)data;
362 struct device_node *np = it->node;
365 of_for_each_phandle(it, err, dev->of_node, "mmu-masters",
366 "#stream-id-cells", 0)
367 if (it->node == np) {
368 *(void **)data = dev;
372 return err == -ENOENT ? 0 : err;
375 static struct platform_driver arm_smmu_driver;
376 static struct iommu_ops arm_smmu_ops;
378 static int arm_smmu_register_legacy_master(struct device *dev,
379 struct arm_smmu_device **smmu)
381 struct device *smmu_dev;
382 struct device_node *np;
383 struct of_phandle_iterator it;
389 np = dev_get_dev_node(dev);
390 if (!np || !of_find_property(np, "#stream-id-cells", NULL)) {
396 err = driver_for_each_device(&arm_smmu_driver.driver, NULL, &data,
397 __find_legacy_master_phandle);
405 if (dev_is_pci(dev)) {
406 /* "mmu-masters" assumes Stream ID == Requester ID */
407 pci_for_each_dma_alias(to_pci_dev(dev), __arm_smmu_get_pci_sid,
413 err = iommu_fwspec_init(dev, &smmu_dev->of_node->fwnode,
418 sids = kcalloc(it.cur_count, sizeof(*sids), GFP_KERNEL);
422 *smmu = dev_get_drvdata(smmu_dev);
423 of_phandle_iterator_args(&it, sids, it.cur_count);
424 err = iommu_fwspec_add_ids(dev, sids, it.cur_count);
429 static int __arm_smmu_alloc_bitmap(unsigned long *map, int start, int end)
434 idx = find_next_zero_bit(map, end, start);
437 } while (test_and_set_bit(idx, map));
442 static void __arm_smmu_free_bitmap(unsigned long *map, int idx)
447 /* Wait for any pending TLB invalidations to complete */
448 static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu, int page,
449 int sync, int status)
451 unsigned int spin_cnt, delay;
454 arm_smmu_writel(smmu, page, sync, QCOM_DUMMY_VAL);
455 for (delay = 1; delay < TLB_LOOP_TIMEOUT; delay *= 2) {
456 for (spin_cnt = TLB_SPIN_COUNT; spin_cnt > 0; spin_cnt--) {
457 reg = arm_smmu_readl(smmu, page, status);
458 if (!(reg & sTLBGSTATUS_GSACTIVE))
464 dev_err_ratelimited(smmu->dev,
465 "TLB sync timed out -- SMMU may be deadlocked\n");
468 static void arm_smmu_tlb_sync_global(struct arm_smmu_device *smmu)
472 spin_lock_irqsave(&smmu->global_sync_lock, flags);
473 __arm_smmu_tlb_sync(smmu, 0, ARM_SMMU_GR0_sTLBGSYNC,
474 ARM_SMMU_GR0_sTLBGSTATUS);
475 spin_unlock_irqrestore(&smmu->global_sync_lock, flags);
478 static void arm_smmu_tlb_sync_context(void *cookie)
480 struct arm_smmu_domain *smmu_domain = cookie;
481 struct arm_smmu_device *smmu = smmu_domain->smmu;
484 spin_lock_irqsave(&smmu_domain->cb_lock, flags);
485 __arm_smmu_tlb_sync(smmu, ARM_SMMU_CB(smmu, smmu_domain->cfg.cbndx),
486 ARM_SMMU_CB_TLBSYNC, ARM_SMMU_CB_TLBSTATUS);
487 spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
490 static void arm_smmu_tlb_sync_vmid(void *cookie)
492 struct arm_smmu_domain *smmu_domain = cookie;
494 arm_smmu_tlb_sync_global(smmu_domain->smmu);
497 static void arm_smmu_tlb_inv_context_s1(void *cookie)
499 struct arm_smmu_domain *smmu_domain = cookie;
501 * The TLBI write may be relaxed, so ensure that PTEs cleared by the
502 * current CPU are visible beforehand.
505 arm_smmu_cb_write(smmu_domain->smmu, smmu_domain->cfg.cbndx,
506 ARM_SMMU_CB_S1_TLBIASID, smmu_domain->cfg.asid);
507 arm_smmu_tlb_sync_context(cookie);
510 static void arm_smmu_tlb_inv_context_s2(void *cookie)
512 struct arm_smmu_domain *smmu_domain = cookie;
513 struct arm_smmu_device *smmu = smmu_domain->smmu;
514 void __iomem *base = ARM_SMMU_GR0(smmu);
516 /* NOTE: see above */
517 writel(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID);
518 arm_smmu_tlb_sync_global(smmu);
521 static void arm_smmu_tlb_inv_range_s1(unsigned long iova, size_t size,
522 size_t granule, bool leaf, void *cookie)
524 struct arm_smmu_domain *smmu_domain = cookie;
525 struct arm_smmu_device *smmu = smmu_domain->smmu;
526 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
527 int reg, idx = cfg->cbndx;
529 if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
532 reg = leaf ? ARM_SMMU_CB_S1_TLBIVAL : ARM_SMMU_CB_S1_TLBIVA;
534 if (cfg->fmt != ARM_SMMU_CTX_FMT_AARCH64) {
535 iova = (iova >> 12) << 12;
538 arm_smmu_cb_write(smmu, idx, reg, iova);
540 } while (size -= granule);
543 iova |= (u64)cfg->asid << 48;
545 arm_smmu_cb_writeq(smmu, idx, reg, iova);
546 iova += granule >> 12;
547 } while (size -= granule);
551 static void arm_smmu_tlb_inv_range_s2(unsigned long iova, size_t size,
552 size_t granule, bool leaf, void *cookie)
554 struct arm_smmu_domain *smmu_domain = cookie;
555 struct arm_smmu_device *smmu = smmu_domain->smmu;
556 int reg, idx = smmu_domain->cfg.cbndx;
558 if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
561 reg = leaf ? ARM_SMMU_CB_S2_TLBIIPAS2L : ARM_SMMU_CB_S2_TLBIIPAS2;
564 if (smmu_domain->cfg.fmt == ARM_SMMU_CTX_FMT_AARCH64)
565 arm_smmu_cb_writeq(smmu, idx, reg, iova);
567 arm_smmu_cb_write(smmu, idx, reg, iova);
568 iova += granule >> 12;
569 } while (size -= granule);
573 * On MMU-401 at least, the cost of firing off multiple TLBIVMIDs appears
574 * almost negligible, but the benefit of getting the first one in as far ahead
575 * of the sync as possible is significant, hence we don't just make this a
576 * no-op and set .tlb_sync to arm_smmu_inv_context_s2() as you might think.
578 static void arm_smmu_tlb_inv_vmid_nosync(unsigned long iova, size_t size,
579 size_t granule, bool leaf, void *cookie)
581 struct arm_smmu_domain *smmu_domain = cookie;
582 void __iomem *base = ARM_SMMU_GR0(smmu_domain->smmu);
584 if (smmu_domain->smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
587 writel_relaxed(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID);
590 static const struct iommu_gather_ops arm_smmu_s1_tlb_ops = {
591 .tlb_flush_all = arm_smmu_tlb_inv_context_s1,
592 .tlb_add_flush = arm_smmu_tlb_inv_range_s1,
593 .tlb_sync = arm_smmu_tlb_sync_context,
596 static const struct iommu_gather_ops arm_smmu_s2_tlb_ops_v2 = {
597 .tlb_flush_all = arm_smmu_tlb_inv_context_s2,
598 .tlb_add_flush = arm_smmu_tlb_inv_range_s2,
599 .tlb_sync = arm_smmu_tlb_sync_context,
602 static const struct iommu_gather_ops arm_smmu_s2_tlb_ops_v1 = {
603 .tlb_flush_all = arm_smmu_tlb_inv_context_s2,
604 .tlb_add_flush = arm_smmu_tlb_inv_vmid_nosync,
605 .tlb_sync = arm_smmu_tlb_sync_vmid,
608 static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
610 u32 fsr, fsynr, cbfrsynra;
612 struct iommu_domain *domain = dev;
613 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
614 struct arm_smmu_device *smmu = smmu_domain->smmu;
615 int idx = smmu_domain->cfg.cbndx;
617 fsr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSR);
618 if (!(fsr & FSR_FAULT))
621 fsynr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSYNR0);
622 iova = arm_smmu_cb_readq(smmu, idx, ARM_SMMU_CB_FAR);
623 cbfrsynra = arm_smmu_gr1_read(smmu, ARM_SMMU_GR1_CBFRSYNRA(idx));
625 dev_err_ratelimited(smmu->dev,
626 "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cbfrsynra=0x%x, cb=%d\n",
627 fsr, iova, fsynr, cbfrsynra, idx);
629 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_FSR, fsr);
633 static irqreturn_t arm_smmu_global_fault(int irq, void *dev)
635 u32 gfsr, gfsynr0, gfsynr1, gfsynr2;
636 struct arm_smmu_device *smmu = dev;
637 void __iomem *gr0_base = ARM_SMMU_GR0_NS(smmu);
639 gfsr = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSR);
640 gfsynr0 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR0);
641 gfsynr1 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR1);
642 gfsynr2 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR2);
647 dev_err_ratelimited(smmu->dev,
648 "Unexpected global fault, this could be serious\n");
649 dev_err_ratelimited(smmu->dev,
650 "\tGFSR 0x%08x, GFSYNR0 0x%08x, GFSYNR1 0x%08x, GFSYNR2 0x%08x\n",
651 gfsr, gfsynr0, gfsynr1, gfsynr2);
653 writel(gfsr, gr0_base + ARM_SMMU_GR0_sGFSR);
657 static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
658 struct io_pgtable_cfg *pgtbl_cfg)
660 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
661 struct arm_smmu_cb *cb = &smmu_domain->smmu->cbs[cfg->cbndx];
662 bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
668 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
669 cb->tcr[0] = pgtbl_cfg->arm_v7s_cfg.tcr;
671 cb->tcr[0] = pgtbl_cfg->arm_lpae_s1_cfg.tcr;
672 cb->tcr[1] = pgtbl_cfg->arm_lpae_s1_cfg.tcr >> 32;
673 cb->tcr[1] |= FIELD_PREP(TCR2_SEP, TCR2_SEP_UPSTREAM);
674 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
675 cb->tcr[1] |= TCR2_AS;
678 cb->tcr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
683 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
684 cb->ttbr[0] = pgtbl_cfg->arm_v7s_cfg.ttbr[0];
685 cb->ttbr[1] = pgtbl_cfg->arm_v7s_cfg.ttbr[1];
687 cb->ttbr[0] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
688 cb->ttbr[0] |= FIELD_PREP(TTBRn_ASID, cfg->asid);
689 cb->ttbr[1] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1];
690 cb->ttbr[1] |= FIELD_PREP(TTBRn_ASID, cfg->asid);
693 cb->ttbr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
696 /* MAIRs (stage-1 only) */
698 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
699 cb->mair[0] = pgtbl_cfg->arm_v7s_cfg.prrr;
700 cb->mair[1] = pgtbl_cfg->arm_v7s_cfg.nmrr;
702 cb->mair[0] = pgtbl_cfg->arm_lpae_s1_cfg.mair[0];
703 cb->mair[1] = pgtbl_cfg->arm_lpae_s1_cfg.mair[1];
708 static void arm_smmu_write_context_bank(struct arm_smmu_device *smmu, int idx)
712 struct arm_smmu_cb *cb = &smmu->cbs[idx];
713 struct arm_smmu_cfg *cfg = cb->cfg;
715 /* Unassigned context banks only need disabling */
717 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_SCTLR, 0);
721 stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
724 if (smmu->version > ARM_SMMU_V1) {
725 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
729 /* 16-bit VMIDs live in CBA2R */
730 if (smmu->features & ARM_SMMU_FEAT_VMID16)
731 reg |= FIELD_PREP(CBA2R_VMID16, cfg->vmid);
733 arm_smmu_gr1_write(smmu, ARM_SMMU_GR1_CBA2R(idx), reg);
737 reg = FIELD_PREP(CBAR_TYPE, cfg->cbar);
738 if (smmu->version < ARM_SMMU_V2)
739 reg |= FIELD_PREP(CBAR_IRPTNDX, cfg->irptndx);
742 * Use the weakest shareability/memory types, so they are
743 * overridden by the ttbcr/pte.
746 reg |= FIELD_PREP(CBAR_S1_BPSHCFG, CBAR_S1_BPSHCFG_NSH) |
747 FIELD_PREP(CBAR_S1_MEMATTR, CBAR_S1_MEMATTR_WB);
748 } else if (!(smmu->features & ARM_SMMU_FEAT_VMID16)) {
749 /* 8-bit VMIDs live in CBAR */
750 reg |= FIELD_PREP(CBAR_VMID, cfg->vmid);
752 arm_smmu_gr1_write(smmu, ARM_SMMU_GR1_CBAR(idx), reg);
756 * We must write this before the TTBRs, since it determines the
757 * access behaviour of some fields (in particular, ASID[15:8]).
759 if (stage1 && smmu->version > ARM_SMMU_V1)
760 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TCR2, cb->tcr[1]);
761 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TCR, cb->tcr[0]);
764 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
765 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_CONTEXTIDR, cfg->asid);
766 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TTBR0, cb->ttbr[0]);
767 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TTBR1, cb->ttbr[1]);
769 arm_smmu_cb_writeq(smmu, idx, ARM_SMMU_CB_TTBR0, cb->ttbr[0]);
771 arm_smmu_cb_writeq(smmu, idx, ARM_SMMU_CB_TTBR1,
775 /* MAIRs (stage-1 only) */
777 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_S1_MAIR0, cb->mair[0]);
778 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_S1_MAIR1, cb->mair[1]);
782 reg = SCTLR_CFIE | SCTLR_CFRE | SCTLR_AFE | SCTLR_TRE | SCTLR_M;
784 reg |= SCTLR_S1_ASIDPNE;
785 if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN))
788 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_SCTLR, reg);
791 static int arm_smmu_init_domain_context(struct iommu_domain *domain,
792 struct arm_smmu_device *smmu)
794 int irq, start, ret = 0;
795 unsigned long ias, oas;
796 struct io_pgtable_ops *pgtbl_ops;
797 struct io_pgtable_cfg pgtbl_cfg;
798 enum io_pgtable_fmt fmt;
799 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
800 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
802 mutex_lock(&smmu_domain->init_mutex);
803 if (smmu_domain->smmu)
806 if (domain->type == IOMMU_DOMAIN_IDENTITY) {
807 smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
808 smmu_domain->smmu = smmu;
813 * Mapping the requested stage onto what we support is surprisingly
814 * complicated, mainly because the spec allows S1+S2 SMMUs without
815 * support for nested translation. That means we end up with the
818 * Requested Supported Actual
828 * Note that you can't actually request stage-2 mappings.
830 if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
831 smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
832 if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
833 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
836 * Choosing a suitable context format is even more fiddly. Until we
837 * grow some way for the caller to express a preference, and/or move
838 * the decision into the io-pgtable code where it arguably belongs,
839 * just aim for the closest thing to the rest of the system, and hope
840 * that the hardware isn't esoteric enough that we can't assume AArch64
841 * support to be a superset of AArch32 support...
843 if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_L)
844 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_L;
845 if (IS_ENABLED(CONFIG_IOMMU_IO_PGTABLE_ARMV7S) &&
846 !IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_ARM_LPAE) &&
847 (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S) &&
848 (smmu_domain->stage == ARM_SMMU_DOMAIN_S1))
849 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_S;
850 if ((IS_ENABLED(CONFIG_64BIT) || cfg->fmt == ARM_SMMU_CTX_FMT_NONE) &&
851 (smmu->features & (ARM_SMMU_FEAT_FMT_AARCH64_64K |
852 ARM_SMMU_FEAT_FMT_AARCH64_16K |
853 ARM_SMMU_FEAT_FMT_AARCH64_4K)))
854 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH64;
856 if (cfg->fmt == ARM_SMMU_CTX_FMT_NONE) {
861 switch (smmu_domain->stage) {
862 case ARM_SMMU_DOMAIN_S1:
863 cfg->cbar = CBAR_TYPE_S1_TRANS_S2_BYPASS;
864 start = smmu->num_s2_context_banks;
866 oas = smmu->ipa_size;
867 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
868 fmt = ARM_64_LPAE_S1;
869 } else if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_L) {
870 fmt = ARM_32_LPAE_S1;
871 ias = min(ias, 32UL);
872 oas = min(oas, 40UL);
875 ias = min(ias, 32UL);
876 oas = min(oas, 32UL);
878 smmu_domain->tlb_ops = &arm_smmu_s1_tlb_ops;
880 case ARM_SMMU_DOMAIN_NESTED:
882 * We will likely want to change this if/when KVM gets
885 case ARM_SMMU_DOMAIN_S2:
886 cfg->cbar = CBAR_TYPE_S2_TRANS;
888 ias = smmu->ipa_size;
890 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
891 fmt = ARM_64_LPAE_S2;
893 fmt = ARM_32_LPAE_S2;
894 ias = min(ias, 40UL);
895 oas = min(oas, 40UL);
897 if (smmu->version == ARM_SMMU_V2)
898 smmu_domain->tlb_ops = &arm_smmu_s2_tlb_ops_v2;
900 smmu_domain->tlb_ops = &arm_smmu_s2_tlb_ops_v1;
906 ret = __arm_smmu_alloc_bitmap(smmu->context_map, start,
907 smmu->num_context_banks);
912 if (smmu->version < ARM_SMMU_V2) {
913 cfg->irptndx = atomic_inc_return(&smmu->irptndx);
914 cfg->irptndx %= smmu->num_context_irqs;
916 cfg->irptndx = cfg->cbndx;
919 if (smmu_domain->stage == ARM_SMMU_DOMAIN_S2)
920 cfg->vmid = cfg->cbndx + 1 + smmu->cavium_id_base;
922 cfg->asid = cfg->cbndx + smmu->cavium_id_base;
924 pgtbl_cfg = (struct io_pgtable_cfg) {
925 .pgsize_bitmap = smmu->pgsize_bitmap,
928 .coherent_walk = smmu->features & ARM_SMMU_FEAT_COHERENT_WALK,
929 .tlb = smmu_domain->tlb_ops,
930 .iommu_dev = smmu->dev,
933 if (smmu_domain->non_strict)
934 pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
936 smmu_domain->smmu = smmu;
937 pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
943 /* Update the domain's page sizes to reflect the page table format */
944 domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
945 domain->geometry.aperture_end = (1UL << ias) - 1;
946 domain->geometry.force_aperture = true;
948 /* Initialise the context bank with our page table cfg */
949 arm_smmu_init_context_bank(smmu_domain, &pgtbl_cfg);
950 arm_smmu_write_context_bank(smmu, cfg->cbndx);
953 * Request context fault interrupt. Do this last to avoid the
954 * handler seeing a half-initialised domain state.
956 irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
957 ret = devm_request_irq(smmu->dev, irq, arm_smmu_context_fault,
958 IRQF_SHARED, "arm-smmu-context-fault", domain);
960 dev_err(smmu->dev, "failed to request context IRQ %d (%u)\n",
962 cfg->irptndx = INVALID_IRPTNDX;
965 mutex_unlock(&smmu_domain->init_mutex);
967 /* Publish page table ops for map/unmap */
968 smmu_domain->pgtbl_ops = pgtbl_ops;
972 smmu_domain->smmu = NULL;
974 mutex_unlock(&smmu_domain->init_mutex);
978 static void arm_smmu_destroy_domain_context(struct iommu_domain *domain)
980 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
981 struct arm_smmu_device *smmu = smmu_domain->smmu;
982 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
985 if (!smmu || domain->type == IOMMU_DOMAIN_IDENTITY)
988 ret = arm_smmu_rpm_get(smmu);
993 * Disable the context bank and free the page tables before freeing
996 smmu->cbs[cfg->cbndx].cfg = NULL;
997 arm_smmu_write_context_bank(smmu, cfg->cbndx);
999 if (cfg->irptndx != INVALID_IRPTNDX) {
1000 irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
1001 devm_free_irq(smmu->dev, irq, domain);
1004 free_io_pgtable_ops(smmu_domain->pgtbl_ops);
1005 __arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
1007 arm_smmu_rpm_put(smmu);
1010 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
1012 struct arm_smmu_domain *smmu_domain;
1014 if (type != IOMMU_DOMAIN_UNMANAGED &&
1015 type != IOMMU_DOMAIN_DMA &&
1016 type != IOMMU_DOMAIN_IDENTITY)
1019 * Allocate the domain and initialise some of its data structures.
1020 * We can't really do anything meaningful until we've added a
1023 smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
1027 if (type == IOMMU_DOMAIN_DMA && (using_legacy_binding ||
1028 iommu_get_dma_cookie(&smmu_domain->domain))) {
1033 mutex_init(&smmu_domain->init_mutex);
1034 spin_lock_init(&smmu_domain->cb_lock);
1036 return &smmu_domain->domain;
1039 static void arm_smmu_domain_free(struct iommu_domain *domain)
1041 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1044 * Free the domain resources. We assume that all devices have
1045 * already been detached.
1047 iommu_put_dma_cookie(domain);
1048 arm_smmu_destroy_domain_context(domain);
1052 static void arm_smmu_write_smr(struct arm_smmu_device *smmu, int idx)
1054 struct arm_smmu_smr *smr = smmu->smrs + idx;
1055 u32 reg = FIELD_PREP(SMR_ID, smr->id) | FIELD_PREP(SMR_MASK, smr->mask);
1057 if (!(smmu->features & ARM_SMMU_FEAT_EXIDS) && smr->valid)
1059 writel_relaxed(reg, ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_SMR(idx));
1062 static void arm_smmu_write_s2cr(struct arm_smmu_device *smmu, int idx)
1064 struct arm_smmu_s2cr *s2cr = smmu->s2crs + idx;
1065 u32 reg = FIELD_PREP(S2CR_TYPE, s2cr->type) |
1066 FIELD_PREP(S2CR_CBNDX, s2cr->cbndx) |
1067 FIELD_PREP(S2CR_PRIVCFG, s2cr->privcfg);
1069 if (smmu->features & ARM_SMMU_FEAT_EXIDS && smmu->smrs &&
1070 smmu->smrs[idx].valid)
1071 reg |= S2CR_EXIDVALID;
1072 writel_relaxed(reg, ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_S2CR(idx));
1075 static void arm_smmu_write_sme(struct arm_smmu_device *smmu, int idx)
1077 arm_smmu_write_s2cr(smmu, idx);
1079 arm_smmu_write_smr(smmu, idx);
1083 * The width of SMR's mask field depends on sCR0_EXIDENABLE, so this function
1084 * should be called after sCR0 is written.
1086 static void arm_smmu_test_smr_masks(struct arm_smmu_device *smmu)
1088 void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1095 * SMR.ID bits may not be preserved if the corresponding MASK
1096 * bits are set, so check each one separately. We can reject
1097 * masters later if they try to claim IDs outside these masks.
1099 smr = FIELD_PREP(SMR_ID, smmu->streamid_mask);
1100 writel_relaxed(smr, gr0_base + ARM_SMMU_GR0_SMR(0));
1101 smr = readl_relaxed(gr0_base + ARM_SMMU_GR0_SMR(0));
1102 smmu->streamid_mask = FIELD_GET(SMR_ID, smr);
1104 smr = FIELD_PREP(SMR_MASK, smmu->streamid_mask);
1105 writel_relaxed(smr, gr0_base + ARM_SMMU_GR0_SMR(0));
1106 smr = readl_relaxed(gr0_base + ARM_SMMU_GR0_SMR(0));
1107 smmu->smr_mask_mask = FIELD_GET(SMR_MASK, smr);
1110 static int arm_smmu_find_sme(struct arm_smmu_device *smmu, u16 id, u16 mask)
1112 struct arm_smmu_smr *smrs = smmu->smrs;
1113 int i, free_idx = -ENOSPC;
1115 /* Stream indexing is blissfully easy */
1119 /* Validating SMRs is... less so */
1120 for (i = 0; i < smmu->num_mapping_groups; ++i) {
1121 if (!smrs[i].valid) {
1123 * Note the first free entry we come across, which
1124 * we'll claim in the end if nothing else matches.
1131 * If the new entry is _entirely_ matched by an existing entry,
1132 * then reuse that, with the guarantee that there also cannot
1133 * be any subsequent conflicting entries. In normal use we'd
1134 * expect simply identical entries for this case, but there's
1135 * no harm in accommodating the generalisation.
1137 if ((mask & smrs[i].mask) == mask &&
1138 !((id ^ smrs[i].id) & ~smrs[i].mask))
1141 * If the new entry has any other overlap with an existing one,
1142 * though, then there always exists at least one stream ID
1143 * which would cause a conflict, and we can't allow that risk.
1145 if (!((id ^ smrs[i].id) & ~(smrs[i].mask | mask)))
1152 static bool arm_smmu_free_sme(struct arm_smmu_device *smmu, int idx)
1154 if (--smmu->s2crs[idx].count)
1157 smmu->s2crs[idx] = s2cr_init_val;
1159 smmu->smrs[idx].valid = false;
1164 static int arm_smmu_master_alloc_smes(struct device *dev)
1166 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1167 struct arm_smmu_master_cfg *cfg = fwspec->iommu_priv;
1168 struct arm_smmu_device *smmu = cfg->smmu;
1169 struct arm_smmu_smr *smrs = smmu->smrs;
1170 struct iommu_group *group;
1173 mutex_lock(&smmu->stream_map_mutex);
1174 /* Figure out a viable stream map entry allocation */
1175 for_each_cfg_sme(fwspec, i, idx) {
1176 u16 sid = FIELD_GET(SMR_ID, fwspec->ids[i]);
1177 u16 mask = FIELD_GET(SMR_MASK, fwspec->ids[i]);
1179 if (idx != INVALID_SMENDX) {
1184 ret = arm_smmu_find_sme(smmu, sid, mask);
1189 if (smrs && smmu->s2crs[idx].count == 0) {
1191 smrs[idx].mask = mask;
1192 smrs[idx].valid = true;
1194 smmu->s2crs[idx].count++;
1195 cfg->smendx[i] = (s16)idx;
1198 group = iommu_group_get_for_dev(dev);
1200 group = ERR_PTR(-ENOMEM);
1201 if (IS_ERR(group)) {
1202 ret = PTR_ERR(group);
1205 iommu_group_put(group);
1207 /* It worked! Now, poke the actual hardware */
1208 for_each_cfg_sme(fwspec, i, idx) {
1209 arm_smmu_write_sme(smmu, idx);
1210 smmu->s2crs[idx].group = group;
1213 mutex_unlock(&smmu->stream_map_mutex);
1218 arm_smmu_free_sme(smmu, cfg->smendx[i]);
1219 cfg->smendx[i] = INVALID_SMENDX;
1221 mutex_unlock(&smmu->stream_map_mutex);
1225 static void arm_smmu_master_free_smes(struct iommu_fwspec *fwspec)
1227 struct arm_smmu_device *smmu = fwspec_smmu(fwspec);
1228 struct arm_smmu_master_cfg *cfg = fwspec->iommu_priv;
1231 mutex_lock(&smmu->stream_map_mutex);
1232 for_each_cfg_sme(fwspec, i, idx) {
1233 if (arm_smmu_free_sme(smmu, idx))
1234 arm_smmu_write_sme(smmu, idx);
1235 cfg->smendx[i] = INVALID_SMENDX;
1237 mutex_unlock(&smmu->stream_map_mutex);
1240 static int arm_smmu_domain_add_master(struct arm_smmu_domain *smmu_domain,
1241 struct iommu_fwspec *fwspec)
1243 struct arm_smmu_device *smmu = smmu_domain->smmu;
1244 struct arm_smmu_s2cr *s2cr = smmu->s2crs;
1245 u8 cbndx = smmu_domain->cfg.cbndx;
1246 enum arm_smmu_s2cr_type type;
1249 if (smmu_domain->stage == ARM_SMMU_DOMAIN_BYPASS)
1250 type = S2CR_TYPE_BYPASS;
1252 type = S2CR_TYPE_TRANS;
1254 for_each_cfg_sme(fwspec, i, idx) {
1255 if (type == s2cr[idx].type && cbndx == s2cr[idx].cbndx)
1258 s2cr[idx].type = type;
1259 s2cr[idx].privcfg = S2CR_PRIVCFG_DEFAULT;
1260 s2cr[idx].cbndx = cbndx;
1261 arm_smmu_write_s2cr(smmu, idx);
1266 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
1269 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1270 struct arm_smmu_device *smmu;
1271 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1273 if (!fwspec || fwspec->ops != &arm_smmu_ops) {
1274 dev_err(dev, "cannot attach to SMMU, is it on the same bus?\n");
1279 * FIXME: The arch/arm DMA API code tries to attach devices to its own
1280 * domains between of_xlate() and add_device() - we have no way to cope
1281 * with that, so until ARM gets converted to rely on groups and default
1282 * domains, just say no (but more politely than by dereferencing NULL).
1283 * This should be at least a WARN_ON once that's sorted.
1285 if (!fwspec->iommu_priv)
1288 smmu = fwspec_smmu(fwspec);
1290 ret = arm_smmu_rpm_get(smmu);
1294 /* Ensure that the domain is finalised */
1295 ret = arm_smmu_init_domain_context(domain, smmu);
1300 * Sanity check the domain. We don't support domains across
1303 if (smmu_domain->smmu != smmu) {
1305 "cannot attach to SMMU %s whilst already attached to domain on SMMU %s\n",
1306 dev_name(smmu_domain->smmu->dev), dev_name(smmu->dev));
1311 /* Looks ok, so add the device to the domain */
1312 ret = arm_smmu_domain_add_master(smmu_domain, fwspec);
1315 arm_smmu_rpm_put(smmu);
1319 static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
1320 phys_addr_t paddr, size_t size, int prot)
1322 struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1323 struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
1329 arm_smmu_rpm_get(smmu);
1330 ret = ops->map(ops, iova, paddr, size, prot);
1331 arm_smmu_rpm_put(smmu);
1336 static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
1339 struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1340 struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
1346 arm_smmu_rpm_get(smmu);
1347 ret = ops->unmap(ops, iova, size);
1348 arm_smmu_rpm_put(smmu);
1353 static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
1355 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1356 struct arm_smmu_device *smmu = smmu_domain->smmu;
1358 if (smmu_domain->tlb_ops) {
1359 arm_smmu_rpm_get(smmu);
1360 smmu_domain->tlb_ops->tlb_flush_all(smmu_domain);
1361 arm_smmu_rpm_put(smmu);
1365 static void arm_smmu_iotlb_sync(struct iommu_domain *domain)
1367 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1368 struct arm_smmu_device *smmu = smmu_domain->smmu;
1370 if (smmu_domain->tlb_ops) {
1371 arm_smmu_rpm_get(smmu);
1372 smmu_domain->tlb_ops->tlb_sync(smmu_domain);
1373 arm_smmu_rpm_put(smmu);
1377 static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
1380 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1381 struct arm_smmu_device *smmu = smmu_domain->smmu;
1382 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
1383 struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
1384 struct device *dev = smmu->dev;
1388 unsigned long va, flags;
1389 int ret, idx = cfg->cbndx;
1391 ret = arm_smmu_rpm_get(smmu);
1395 spin_lock_irqsave(&smmu_domain->cb_lock, flags);
1396 va = iova & ~0xfffUL;
1397 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
1398 arm_smmu_cb_writeq(smmu, idx, ARM_SMMU_CB_ATS1PR, va);
1400 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_ATS1PR, va);
1402 reg = arm_smmu_page(smmu, ARM_SMMU_CB(smmu, idx)) + ARM_SMMU_CB_ATSR;
1403 if (readl_poll_timeout_atomic(reg, tmp, !(tmp & ATSR_ACTIVE), 5, 50)) {
1404 spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
1406 "iova to phys timed out on %pad. Falling back to software table walk.\n",
1408 return ops->iova_to_phys(ops, iova);
1411 phys = arm_smmu_cb_readq(smmu, idx, ARM_SMMU_CB_PAR);
1412 spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
1413 if (phys & CB_PAR_F) {
1414 dev_err(dev, "translation fault!\n");
1415 dev_err(dev, "PAR = 0x%llx\n", phys);
1419 arm_smmu_rpm_put(smmu);
1421 return (phys & GENMASK_ULL(39, 12)) | (iova & 0xfff);
1424 static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain,
1427 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1428 struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
1430 if (domain->type == IOMMU_DOMAIN_IDENTITY)
1436 if (smmu_domain->smmu->features & ARM_SMMU_FEAT_TRANS_OPS &&
1437 smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
1438 return arm_smmu_iova_to_phys_hard(domain, iova);
1440 return ops->iova_to_phys(ops, iova);
1443 static bool arm_smmu_capable(enum iommu_cap cap)
1446 case IOMMU_CAP_CACHE_COHERENCY:
1448 * Return true here as the SMMU can always send out coherent
1452 case IOMMU_CAP_NOEXEC:
1459 static int arm_smmu_match_node(struct device *dev, const void *data)
1461 return dev->fwnode == data;
1465 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
1467 struct device *dev = driver_find_device(&arm_smmu_driver.driver, NULL,
1468 fwnode, arm_smmu_match_node);
1470 return dev ? dev_get_drvdata(dev) : NULL;
1473 static int arm_smmu_add_device(struct device *dev)
1475 struct arm_smmu_device *smmu;
1476 struct arm_smmu_master_cfg *cfg;
1477 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1480 if (using_legacy_binding) {
1481 ret = arm_smmu_register_legacy_master(dev, &smmu);
1484 * If dev->iommu_fwspec is initally NULL, arm_smmu_register_legacy_master()
1485 * will allocate/initialise a new one. Thus we need to update fwspec for
1488 fwspec = dev_iommu_fwspec_get(dev);
1491 } else if (fwspec && fwspec->ops == &arm_smmu_ops) {
1492 smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
1498 for (i = 0; i < fwspec->num_ids; i++) {
1499 u16 sid = FIELD_GET(SMR_ID, fwspec->ids[i]);
1500 u16 mask = FIELD_GET(SMR_MASK, fwspec->ids[i]);
1502 if (sid & ~smmu->streamid_mask) {
1503 dev_err(dev, "stream ID 0x%x out of range for SMMU (0x%x)\n",
1504 sid, smmu->streamid_mask);
1507 if (mask & ~smmu->smr_mask_mask) {
1508 dev_err(dev, "SMR mask 0x%x out of range for SMMU (0x%x)\n",
1509 mask, smmu->smr_mask_mask);
1515 cfg = kzalloc(offsetof(struct arm_smmu_master_cfg, smendx[i]),
1521 fwspec->iommu_priv = cfg;
1523 cfg->smendx[i] = INVALID_SMENDX;
1525 ret = arm_smmu_rpm_get(smmu);
1529 ret = arm_smmu_master_alloc_smes(dev);
1530 arm_smmu_rpm_put(smmu);
1535 iommu_device_link(&smmu->iommu, dev);
1537 device_link_add(dev, smmu->dev,
1538 DL_FLAG_PM_RUNTIME | DL_FLAG_AUTOREMOVE_SUPPLIER);
1545 iommu_fwspec_free(dev);
1549 static void arm_smmu_remove_device(struct device *dev)
1551 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1552 struct arm_smmu_master_cfg *cfg;
1553 struct arm_smmu_device *smmu;
1556 if (!fwspec || fwspec->ops != &arm_smmu_ops)
1559 cfg = fwspec->iommu_priv;
1562 ret = arm_smmu_rpm_get(smmu);
1566 iommu_device_unlink(&smmu->iommu, dev);
1567 arm_smmu_master_free_smes(fwspec);
1569 arm_smmu_rpm_put(smmu);
1571 iommu_group_remove_device(dev);
1572 kfree(fwspec->iommu_priv);
1573 iommu_fwspec_free(dev);
1576 static struct iommu_group *arm_smmu_device_group(struct device *dev)
1578 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1579 struct arm_smmu_device *smmu = fwspec_smmu(fwspec);
1580 struct iommu_group *group = NULL;
1583 for_each_cfg_sme(fwspec, i, idx) {
1584 if (group && smmu->s2crs[idx].group &&
1585 group != smmu->s2crs[idx].group)
1586 return ERR_PTR(-EINVAL);
1588 group = smmu->s2crs[idx].group;
1592 return iommu_group_ref_get(group);
1594 if (dev_is_pci(dev))
1595 group = pci_device_group(dev);
1596 else if (dev_is_fsl_mc(dev))
1597 group = fsl_mc_device_group(dev);
1599 group = generic_device_group(dev);
1604 static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
1605 enum iommu_attr attr, void *data)
1607 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1609 switch(domain->type) {
1610 case IOMMU_DOMAIN_UNMANAGED:
1612 case DOMAIN_ATTR_NESTING:
1613 *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
1619 case IOMMU_DOMAIN_DMA:
1621 case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
1622 *(int *)data = smmu_domain->non_strict;
1633 static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
1634 enum iommu_attr attr, void *data)
1637 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1639 mutex_lock(&smmu_domain->init_mutex);
1641 switch(domain->type) {
1642 case IOMMU_DOMAIN_UNMANAGED:
1644 case DOMAIN_ATTR_NESTING:
1645 if (smmu_domain->smmu) {
1651 smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
1653 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
1659 case IOMMU_DOMAIN_DMA:
1661 case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
1662 smmu_domain->non_strict = *(int *)data;
1672 mutex_unlock(&smmu_domain->init_mutex);
1676 static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
1680 if (args->args_count > 0)
1681 fwid |= FIELD_PREP(SMR_ID, args->args[0]);
1683 if (args->args_count > 1)
1684 fwid |= FIELD_PREP(SMR_MASK, args->args[1]);
1685 else if (!of_property_read_u32(args->np, "stream-match-mask", &mask))
1686 fwid |= FIELD_PREP(SMR_MASK, mask);
1688 return iommu_fwspec_add_ids(dev, &fwid, 1);
1691 static void arm_smmu_get_resv_regions(struct device *dev,
1692 struct list_head *head)
1694 struct iommu_resv_region *region;
1695 int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
1697 region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
1698 prot, IOMMU_RESV_SW_MSI);
1702 list_add_tail(®ion->list, head);
1704 iommu_dma_get_resv_regions(dev, head);
1707 static void arm_smmu_put_resv_regions(struct device *dev,
1708 struct list_head *head)
1710 struct iommu_resv_region *entry, *next;
1712 list_for_each_entry_safe(entry, next, head, list)
1716 static struct iommu_ops arm_smmu_ops = {
1717 .capable = arm_smmu_capable,
1718 .domain_alloc = arm_smmu_domain_alloc,
1719 .domain_free = arm_smmu_domain_free,
1720 .attach_dev = arm_smmu_attach_dev,
1721 .map = arm_smmu_map,
1722 .unmap = arm_smmu_unmap,
1723 .flush_iotlb_all = arm_smmu_flush_iotlb_all,
1724 .iotlb_sync = arm_smmu_iotlb_sync,
1725 .iova_to_phys = arm_smmu_iova_to_phys,
1726 .add_device = arm_smmu_add_device,
1727 .remove_device = arm_smmu_remove_device,
1728 .device_group = arm_smmu_device_group,
1729 .domain_get_attr = arm_smmu_domain_get_attr,
1730 .domain_set_attr = arm_smmu_domain_set_attr,
1731 .of_xlate = arm_smmu_of_xlate,
1732 .get_resv_regions = arm_smmu_get_resv_regions,
1733 .put_resv_regions = arm_smmu_put_resv_regions,
1734 .pgsize_bitmap = -1UL, /* Restricted during device attach */
1737 static void arm_smmu_device_reset(struct arm_smmu_device *smmu)
1739 void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1743 /* clear global FSR */
1744 reg = readl_relaxed(ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sGFSR);
1745 writel(reg, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sGFSR);
1748 * Reset stream mapping groups: Initial values mark all SMRn as
1749 * invalid and all S2CRn as bypass unless overridden.
1751 for (i = 0; i < smmu->num_mapping_groups; ++i)
1752 arm_smmu_write_sme(smmu, i);
1754 if (smmu->model == ARM_MMU500) {
1756 * Before clearing ARM_MMU500_ACTLR_CPRE, need to
1757 * clear CACHE_LOCK bit of ACR first. And, CACHE_LOCK
1758 * bit is only present in MMU-500r2 onwards.
1760 reg = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID7);
1761 major = FIELD_GET(ID7_MAJOR, reg);
1762 reg = readl_relaxed(gr0_base + ARM_SMMU_GR0_sACR);
1764 reg &= ~ARM_MMU500_ACR_CACHE_LOCK;
1766 * Allow unmatched Stream IDs to allocate bypass
1767 * TLB entries for reduced latency.
1769 reg |= ARM_MMU500_ACR_SMTNMB_TLBEN | ARM_MMU500_ACR_S2CRB_TLBEN;
1770 writel_relaxed(reg, gr0_base + ARM_SMMU_GR0_sACR);
1773 /* Make sure all context banks are disabled and clear CB_FSR */
1774 for (i = 0; i < smmu->num_context_banks; ++i) {
1775 arm_smmu_write_context_bank(smmu, i);
1776 arm_smmu_cb_write(smmu, i, ARM_SMMU_CB_FSR, FSR_FAULT);
1778 * Disable MMU-500's not-particularly-beneficial next-page
1779 * prefetcher for the sake of errata #841119 and #826419.
1781 if (smmu->model == ARM_MMU500) {
1782 reg = arm_smmu_cb_read(smmu, i, ARM_SMMU_CB_ACTLR);
1783 reg &= ~ARM_MMU500_ACTLR_CPRE;
1784 arm_smmu_cb_write(smmu, i, ARM_SMMU_CB_ACTLR, reg);
1788 /* Invalidate the TLB, just in case */
1789 writel_relaxed(QCOM_DUMMY_VAL, gr0_base + ARM_SMMU_GR0_TLBIALLH);
1790 writel_relaxed(QCOM_DUMMY_VAL, gr0_base + ARM_SMMU_GR0_TLBIALLNSNH);
1792 reg = readl_relaxed(ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
1794 /* Enable fault reporting */
1795 reg |= (sCR0_GFRE | sCR0_GFIE | sCR0_GCFGFRE | sCR0_GCFGFIE);
1797 /* Disable TLB broadcasting. */
1798 reg |= (sCR0_VMIDPNE | sCR0_PTM);
1800 /* Enable client access, handling unmatched streams as appropriate */
1801 reg &= ~sCR0_CLIENTPD;
1805 reg &= ~sCR0_USFCFG;
1807 /* Disable forced broadcasting */
1810 /* Don't upgrade barriers */
1813 if (smmu->features & ARM_SMMU_FEAT_VMID16)
1814 reg |= sCR0_VMID16EN;
1816 if (smmu->features & ARM_SMMU_FEAT_EXIDS)
1817 reg |= sCR0_EXIDENABLE;
1819 /* Push the button */
1820 arm_smmu_tlb_sync_global(smmu);
1821 writel(reg, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
1824 static int arm_smmu_id_size_to_bits(int size)
1843 static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
1846 void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1848 bool cttw_reg, cttw_fw = smmu->features & ARM_SMMU_FEAT_COHERENT_WALK;
1851 dev_notice(smmu->dev, "probing hardware configuration...\n");
1852 dev_notice(smmu->dev, "SMMUv%d with:\n",
1853 smmu->version == ARM_SMMU_V2 ? 2 : 1);
1856 id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID0);
1858 /* Restrict available stages based on module parameter */
1859 if (force_stage == 1)
1860 id &= ~(ID0_S2TS | ID0_NTS);
1861 else if (force_stage == 2)
1862 id &= ~(ID0_S1TS | ID0_NTS);
1864 if (id & ID0_S1TS) {
1865 smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
1866 dev_notice(smmu->dev, "\tstage 1 translation\n");
1869 if (id & ID0_S2TS) {
1870 smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
1871 dev_notice(smmu->dev, "\tstage 2 translation\n");
1875 smmu->features |= ARM_SMMU_FEAT_TRANS_NESTED;
1876 dev_notice(smmu->dev, "\tnested translation\n");
1879 if (!(smmu->features &
1880 (ARM_SMMU_FEAT_TRANS_S1 | ARM_SMMU_FEAT_TRANS_S2))) {
1881 dev_err(smmu->dev, "\tno translation support!\n");
1885 if ((id & ID0_S1TS) &&
1886 ((smmu->version < ARM_SMMU_V2) || !(id & ID0_ATOSNS))) {
1887 smmu->features |= ARM_SMMU_FEAT_TRANS_OPS;
1888 dev_notice(smmu->dev, "\taddress translation ops\n");
1892 * In order for DMA API calls to work properly, we must defer to what
1893 * the FW says about coherency, regardless of what the hardware claims.
1894 * Fortunately, this also opens up a workaround for systems where the
1895 * ID register value has ended up configured incorrectly.
1897 cttw_reg = !!(id & ID0_CTTW);
1898 if (cttw_fw || cttw_reg)
1899 dev_notice(smmu->dev, "\t%scoherent table walk\n",
1900 cttw_fw ? "" : "non-");
1901 if (cttw_fw != cttw_reg)
1902 dev_notice(smmu->dev,
1903 "\t(IDR0.CTTW overridden by FW configuration)\n");
1905 /* Max. number of entries we have for stream matching/indexing */
1906 if (smmu->version == ARM_SMMU_V2 && id & ID0_EXIDS) {
1907 smmu->features |= ARM_SMMU_FEAT_EXIDS;
1910 size = 1 << FIELD_GET(ID0_NUMSIDB, id);
1912 smmu->streamid_mask = size - 1;
1914 smmu->features |= ARM_SMMU_FEAT_STREAM_MATCH;
1915 size = FIELD_GET(ID0_NUMSMRG, id);
1918 "stream-matching supported, but no SMRs present!\n");
1922 /* Zero-initialised to mark as invalid */
1923 smmu->smrs = devm_kcalloc(smmu->dev, size, sizeof(*smmu->smrs),
1928 dev_notice(smmu->dev,
1929 "\tstream matching with %u register groups", size);
1931 /* s2cr->type == 0 means translation, so initialise explicitly */
1932 smmu->s2crs = devm_kmalloc_array(smmu->dev, size, sizeof(*smmu->s2crs),
1936 for (i = 0; i < size; i++)
1937 smmu->s2crs[i] = s2cr_init_val;
1939 smmu->num_mapping_groups = size;
1940 mutex_init(&smmu->stream_map_mutex);
1941 spin_lock_init(&smmu->global_sync_lock);
1943 if (smmu->version < ARM_SMMU_V2 || !(id & ID0_PTFS_NO_AARCH32)) {
1944 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_L;
1945 if (!(id & ID0_PTFS_NO_AARCH32S))
1946 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_S;
1950 id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID1);
1951 smmu->pgshift = (id & ID1_PAGESIZE) ? 16 : 12;
1953 /* Check for size mismatch of SMMU address space from mapped region */
1954 size = 1 << (FIELD_GET(ID1_NUMPAGENDXB, id) + 1);
1955 if (smmu->numpage != 2 * size << smmu->pgshift)
1957 "SMMU address space size (0x%x) differs from mapped region size (0x%x)!\n",
1958 2 * size << smmu->pgshift, smmu->numpage);
1959 /* Now properly encode NUMPAGE to subsequently derive SMMU_CB_BASE */
1960 smmu->numpage = size;
1962 smmu->num_s2_context_banks = FIELD_GET(ID1_NUMS2CB, id);
1963 smmu->num_context_banks = FIELD_GET(ID1_NUMCB, id);
1964 if (smmu->num_s2_context_banks > smmu->num_context_banks) {
1965 dev_err(smmu->dev, "impossible number of S2 context banks!\n");
1968 dev_notice(smmu->dev, "\t%u context banks (%u stage-2 only)\n",
1969 smmu->num_context_banks, smmu->num_s2_context_banks);
1971 * Cavium CN88xx erratum #27704.
1972 * Ensure ASID and VMID allocation is unique across all SMMUs in
1975 if (smmu->model == CAVIUM_SMMUV2) {
1976 smmu->cavium_id_base =
1977 atomic_add_return(smmu->num_context_banks,
1978 &cavium_smmu_context_count);
1979 smmu->cavium_id_base -= smmu->num_context_banks;
1980 dev_notice(smmu->dev, "\tenabling workaround for Cavium erratum 27704\n");
1982 smmu->cbs = devm_kcalloc(smmu->dev, smmu->num_context_banks,
1983 sizeof(*smmu->cbs), GFP_KERNEL);
1988 id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID2);
1989 size = arm_smmu_id_size_to_bits(FIELD_GET(ID2_IAS, id));
1990 smmu->ipa_size = size;
1992 /* The output mask is also applied for bypass */
1993 size = arm_smmu_id_size_to_bits(FIELD_GET(ID2_OAS, id));
1994 smmu->pa_size = size;
1996 if (id & ID2_VMID16)
1997 smmu->features |= ARM_SMMU_FEAT_VMID16;
2000 * What the page table walker can address actually depends on which
2001 * descriptor format is in use, but since a) we don't know that yet,
2002 * and b) it can vary per context bank, this will have to do...
2004 if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(size)))
2006 "failed to set DMA mask for table walker\n");
2008 if (smmu->version < ARM_SMMU_V2) {
2009 smmu->va_size = smmu->ipa_size;
2010 if (smmu->version == ARM_SMMU_V1_64K)
2011 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
2013 size = FIELD_GET(ID2_UBS, id);
2014 smmu->va_size = arm_smmu_id_size_to_bits(size);
2015 if (id & ID2_PTFS_4K)
2016 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_4K;
2017 if (id & ID2_PTFS_16K)
2018 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_16K;
2019 if (id & ID2_PTFS_64K)
2020 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
2023 /* Now we've corralled the various formats, what'll it do? */
2024 if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S)
2025 smmu->pgsize_bitmap |= SZ_4K | SZ_64K | SZ_1M | SZ_16M;
2026 if (smmu->features &
2027 (ARM_SMMU_FEAT_FMT_AARCH32_L | ARM_SMMU_FEAT_FMT_AARCH64_4K))
2028 smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
2029 if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_16K)
2030 smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
2031 if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_64K)
2032 smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
2034 if (arm_smmu_ops.pgsize_bitmap == -1UL)
2035 arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
2037 arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
2038 dev_notice(smmu->dev, "\tSupported page sizes: 0x%08lx\n",
2039 smmu->pgsize_bitmap);
2042 if (smmu->features & ARM_SMMU_FEAT_TRANS_S1)
2043 dev_notice(smmu->dev, "\tStage-1: %lu-bit VA -> %lu-bit IPA\n",
2044 smmu->va_size, smmu->ipa_size);
2046 if (smmu->features & ARM_SMMU_FEAT_TRANS_S2)
2047 dev_notice(smmu->dev, "\tStage-2: %lu-bit IPA -> %lu-bit PA\n",
2048 smmu->ipa_size, smmu->pa_size);
2053 struct arm_smmu_match_data {
2054 enum arm_smmu_arch_version version;
2055 enum arm_smmu_implementation model;
2058 #define ARM_SMMU_MATCH_DATA(name, ver, imp) \
2059 static const struct arm_smmu_match_data name = { .version = ver, .model = imp }
2061 ARM_SMMU_MATCH_DATA(smmu_generic_v1, ARM_SMMU_V1, GENERIC_SMMU);
2062 ARM_SMMU_MATCH_DATA(smmu_generic_v2, ARM_SMMU_V2, GENERIC_SMMU);
2063 ARM_SMMU_MATCH_DATA(arm_mmu401, ARM_SMMU_V1_64K, GENERIC_SMMU);
2064 ARM_SMMU_MATCH_DATA(arm_mmu500, ARM_SMMU_V2, ARM_MMU500);
2065 ARM_SMMU_MATCH_DATA(cavium_smmuv2, ARM_SMMU_V2, CAVIUM_SMMUV2);
2066 ARM_SMMU_MATCH_DATA(qcom_smmuv2, ARM_SMMU_V2, QCOM_SMMUV2);
2068 static const struct of_device_id arm_smmu_of_match[] = {
2069 { .compatible = "arm,smmu-v1", .data = &smmu_generic_v1 },
2070 { .compatible = "arm,smmu-v2", .data = &smmu_generic_v2 },
2071 { .compatible = "arm,mmu-400", .data = &smmu_generic_v1 },
2072 { .compatible = "arm,mmu-401", .data = &arm_mmu401 },
2073 { .compatible = "arm,mmu-500", .data = &arm_mmu500 },
2074 { .compatible = "cavium,smmu-v2", .data = &cavium_smmuv2 },
2075 { .compatible = "qcom,smmu-v2", .data = &qcom_smmuv2 },
2080 static int acpi_smmu_get_data(u32 model, struct arm_smmu_device *smmu)
2085 case ACPI_IORT_SMMU_V1:
2086 case ACPI_IORT_SMMU_CORELINK_MMU400:
2087 smmu->version = ARM_SMMU_V1;
2088 smmu->model = GENERIC_SMMU;
2090 case ACPI_IORT_SMMU_CORELINK_MMU401:
2091 smmu->version = ARM_SMMU_V1_64K;
2092 smmu->model = GENERIC_SMMU;
2094 case ACPI_IORT_SMMU_V2:
2095 smmu->version = ARM_SMMU_V2;
2096 smmu->model = GENERIC_SMMU;
2098 case ACPI_IORT_SMMU_CORELINK_MMU500:
2099 smmu->version = ARM_SMMU_V2;
2100 smmu->model = ARM_MMU500;
2102 case ACPI_IORT_SMMU_CAVIUM_THUNDERX:
2103 smmu->version = ARM_SMMU_V2;
2104 smmu->model = CAVIUM_SMMUV2;
2113 static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
2114 struct arm_smmu_device *smmu)
2116 struct device *dev = smmu->dev;
2117 struct acpi_iort_node *node =
2118 *(struct acpi_iort_node **)dev_get_platdata(dev);
2119 struct acpi_iort_smmu *iort_smmu;
2122 /* Retrieve SMMU1/2 specific data */
2123 iort_smmu = (struct acpi_iort_smmu *)node->node_data;
2125 ret = acpi_smmu_get_data(iort_smmu->model, smmu);
2129 /* Ignore the configuration access interrupt */
2130 smmu->num_global_irqs = 1;
2132 if (iort_smmu->flags & ACPI_IORT_SMMU_COHERENT_WALK)
2133 smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
2138 static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
2139 struct arm_smmu_device *smmu)
2145 static int arm_smmu_device_dt_probe(struct platform_device *pdev,
2146 struct arm_smmu_device *smmu)
2148 const struct arm_smmu_match_data *data;
2149 struct device *dev = &pdev->dev;
2150 bool legacy_binding;
2152 if (of_property_read_u32(dev->of_node, "#global-interrupts",
2153 &smmu->num_global_irqs)) {
2154 dev_err(dev, "missing #global-interrupts property\n");
2158 data = of_device_get_match_data(dev);
2159 smmu->version = data->version;
2160 smmu->model = data->model;
2162 parse_driver_options(smmu);
2164 legacy_binding = of_find_property(dev->of_node, "mmu-masters", NULL);
2165 if (legacy_binding && !using_generic_binding) {
2166 if (!using_legacy_binding)
2167 pr_notice("deprecated \"mmu-masters\" DT property in use; DMA API support unavailable\n");
2168 using_legacy_binding = true;
2169 } else if (!legacy_binding && !using_legacy_binding) {
2170 using_generic_binding = true;
2172 dev_err(dev, "not probing due to mismatched DT properties\n");
2176 if (of_dma_is_coherent(dev->of_node))
2177 smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
2182 static void arm_smmu_bus_init(void)
2184 /* Oh, for a proper bus abstraction */
2185 if (!iommu_present(&platform_bus_type))
2186 bus_set_iommu(&platform_bus_type, &arm_smmu_ops);
2187 #ifdef CONFIG_ARM_AMBA
2188 if (!iommu_present(&amba_bustype))
2189 bus_set_iommu(&amba_bustype, &arm_smmu_ops);
2192 if (!iommu_present(&pci_bus_type)) {
2194 bus_set_iommu(&pci_bus_type, &arm_smmu_ops);
2197 #ifdef CONFIG_FSL_MC_BUS
2198 if (!iommu_present(&fsl_mc_bus_type))
2199 bus_set_iommu(&fsl_mc_bus_type, &arm_smmu_ops);
2203 static int arm_smmu_device_probe(struct platform_device *pdev)
2205 struct resource *res;
2206 resource_size_t ioaddr;
2207 struct arm_smmu_device *smmu;
2208 struct device *dev = &pdev->dev;
2209 int num_irqs, i, err;
2211 smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
2213 dev_err(dev, "failed to allocate arm_smmu_device\n");
2219 err = arm_smmu_device_dt_probe(pdev, smmu);
2221 err = arm_smmu_device_acpi_probe(pdev, smmu);
2226 res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
2227 ioaddr = res->start;
2228 smmu->base = devm_ioremap_resource(dev, res);
2229 if (IS_ERR(smmu->base))
2230 return PTR_ERR(smmu->base);
2232 * The resource size should effectively match the value of SMMU_TOP;
2233 * stash that temporarily until we know PAGESIZE to validate it with.
2235 smmu->numpage = resource_size(res);
2238 while ((res = platform_get_resource(pdev, IORESOURCE_IRQ, num_irqs))) {
2240 if (num_irqs > smmu->num_global_irqs)
2241 smmu->num_context_irqs++;
2244 if (!smmu->num_context_irqs) {
2245 dev_err(dev, "found %d interrupts but expected at least %d\n",
2246 num_irqs, smmu->num_global_irqs + 1);
2250 smmu->irqs = devm_kcalloc(dev, num_irqs, sizeof(*smmu->irqs),
2253 dev_err(dev, "failed to allocate %d irqs\n", num_irqs);
2257 for (i = 0; i < num_irqs; ++i) {
2258 int irq = platform_get_irq(pdev, i);
2261 dev_err(dev, "failed to get irq index %d\n", i);
2264 smmu->irqs[i] = irq;
2267 err = devm_clk_bulk_get_all(dev, &smmu->clks);
2269 dev_err(dev, "failed to get clocks %d\n", err);
2272 smmu->num_clks = err;
2274 err = clk_bulk_prepare_enable(smmu->num_clks, smmu->clks);
2278 err = arm_smmu_device_cfg_probe(smmu);
2282 if (smmu->version == ARM_SMMU_V2) {
2283 if (smmu->num_context_banks > smmu->num_context_irqs) {
2285 "found only %d context irq(s) but %d required\n",
2286 smmu->num_context_irqs, smmu->num_context_banks);
2290 /* Ignore superfluous interrupts */
2291 smmu->num_context_irqs = smmu->num_context_banks;
2294 for (i = 0; i < smmu->num_global_irqs; ++i) {
2295 err = devm_request_irq(smmu->dev, smmu->irqs[i],
2296 arm_smmu_global_fault,
2298 "arm-smmu global fault",
2301 dev_err(dev, "failed to request global IRQ %d (%u)\n",
2307 err = iommu_device_sysfs_add(&smmu->iommu, smmu->dev, NULL,
2308 "smmu.%pa", &ioaddr);
2310 dev_err(dev, "Failed to register iommu in sysfs\n");
2314 iommu_device_set_ops(&smmu->iommu, &arm_smmu_ops);
2315 iommu_device_set_fwnode(&smmu->iommu, dev->fwnode);
2317 err = iommu_device_register(&smmu->iommu);
2319 dev_err(dev, "Failed to register iommu\n");
2323 platform_set_drvdata(pdev, smmu);
2324 arm_smmu_device_reset(smmu);
2325 arm_smmu_test_smr_masks(smmu);
2328 * We want to avoid touching dev->power.lock in fastpaths unless
2329 * it's really going to do something useful - pm_runtime_enabled()
2330 * can serve as an ideal proxy for that decision. So, conditionally
2331 * enable pm_runtime.
2333 if (dev->pm_domain) {
2334 pm_runtime_set_active(dev);
2335 pm_runtime_enable(dev);
2339 * For ACPI and generic DT bindings, an SMMU will be probed before
2340 * any device which might need it, so we want the bus ops in place
2341 * ready to handle default domain setup as soon as any SMMU exists.
2343 if (!using_legacy_binding)
2344 arm_smmu_bus_init();
2350 * With the legacy DT binding in play, though, we have no guarantees about
2351 * probe order, but then we're also not doing default domains, so we can
2352 * delay setting bus ops until we're sure every possible SMMU is ready,
2353 * and that way ensure that no add_device() calls get missed.
2355 static int arm_smmu_legacy_bus_init(void)
2357 if (using_legacy_binding)
2358 arm_smmu_bus_init();
2361 device_initcall_sync(arm_smmu_legacy_bus_init);
2363 static void arm_smmu_device_shutdown(struct platform_device *pdev)
2365 struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
2370 if (!bitmap_empty(smmu->context_map, ARM_SMMU_MAX_CBS))
2371 dev_err(&pdev->dev, "removing device with active domains!\n");
2373 arm_smmu_rpm_get(smmu);
2374 /* Turn the thing off */
2375 writel(sCR0_CLIENTPD, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
2376 arm_smmu_rpm_put(smmu);
2378 if (pm_runtime_enabled(smmu->dev))
2379 pm_runtime_force_suspend(smmu->dev);
2381 clk_bulk_disable(smmu->num_clks, smmu->clks);
2383 clk_bulk_unprepare(smmu->num_clks, smmu->clks);
2386 static int __maybe_unused arm_smmu_runtime_resume(struct device *dev)
2388 struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2391 ret = clk_bulk_enable(smmu->num_clks, smmu->clks);
2395 arm_smmu_device_reset(smmu);
2400 static int __maybe_unused arm_smmu_runtime_suspend(struct device *dev)
2402 struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2404 clk_bulk_disable(smmu->num_clks, smmu->clks);
2409 static int __maybe_unused arm_smmu_pm_resume(struct device *dev)
2411 if (pm_runtime_suspended(dev))
2414 return arm_smmu_runtime_resume(dev);
2417 static int __maybe_unused arm_smmu_pm_suspend(struct device *dev)
2419 if (pm_runtime_suspended(dev))
2422 return arm_smmu_runtime_suspend(dev);
2425 static const struct dev_pm_ops arm_smmu_pm_ops = {
2426 SET_SYSTEM_SLEEP_PM_OPS(arm_smmu_pm_suspend, arm_smmu_pm_resume)
2427 SET_RUNTIME_PM_OPS(arm_smmu_runtime_suspend,
2428 arm_smmu_runtime_resume, NULL)
2431 static struct platform_driver arm_smmu_driver = {
2434 .of_match_table = of_match_ptr(arm_smmu_of_match),
2435 .pm = &arm_smmu_pm_ops,
2436 .suppress_bind_attrs = true,
2438 .probe = arm_smmu_device_probe,
2439 .shutdown = arm_smmu_device_shutdown,
2441 builtin_platform_driver(arm_smmu_driver);