1 // SPDX-License-Identifier: GPL-2.0-only
3 * IOMMU API for ARM architected SMMU implementations.
5 * Copyright (C) 2013 ARM Limited
7 * Author: Will Deacon <will.deacon@arm.com>
9 * This driver currently supports:
10 * - SMMUv1 and v2 implementations
11 * - Stream-matching and stream-indexing
12 * - v7/v8 long-descriptor format
13 * - Non-secure access to the SMMU
14 * - Context fault reporting
15 * - Extended Stream ID (16 bit)
18 #define pr_fmt(fmt) "arm-smmu: " fmt
20 #include <linux/acpi.h>
21 #include <linux/acpi_iort.h>
22 #include <linux/atomic.h>
23 #include <linux/bitfield.h>
24 #include <linux/delay.h>
25 #include <linux/dma-iommu.h>
26 #include <linux/dma-mapping.h>
27 #include <linux/err.h>
28 #include <linux/interrupt.h>
30 #include <linux/io-64-nonatomic-hi-lo.h>
31 #include <linux/io-pgtable.h>
32 #include <linux/iommu.h>
33 #include <linux/iopoll.h>
34 #include <linux/init.h>
35 #include <linux/moduleparam.h>
37 #include <linux/of_address.h>
38 #include <linux/of_device.h>
39 #include <linux/of_iommu.h>
40 #include <linux/pci.h>
41 #include <linux/platform_device.h>
42 #include <linux/pm_runtime.h>
43 #include <linux/slab.h>
44 #include <linux/spinlock.h>
46 #include <linux/amba/bus.h>
47 #include <linux/fsl/mc.h>
49 #include "arm-smmu-regs.h"
52 * Apparently, some Qualcomm arm64 platforms which appear to expose their SMMU
53 * global register space are still, in fact, using a hypervisor to mediate it
54 * by trapping and emulating register accesses. Sadly, some deployed versions
55 * of said trapping code have bugs wherein they go horribly wrong for stores
56 * using r31 (i.e. XZR/WZR) as the source register.
58 #define QCOM_DUMMY_VAL -1
60 #define ARM_MMU500_ACTLR_CPRE (1 << 1)
62 #define ARM_MMU500_ACR_CACHE_LOCK (1 << 26)
63 #define ARM_MMU500_ACR_S2CRB_TLBEN (1 << 10)
64 #define ARM_MMU500_ACR_SMTNMB_TLBEN (1 << 8)
66 #define TLB_LOOP_TIMEOUT 1000000 /* 1s! */
67 #define TLB_SPIN_COUNT 10
69 /* Maximum number of context banks per SMMU */
70 #define ARM_SMMU_MAX_CBS 128
72 /* SMMU global address space */
73 #define ARM_SMMU_GR0(smmu) ((smmu)->base)
74 #define ARM_SMMU_GR1(smmu) ((smmu)->base + (1 << (smmu)->pgshift))
77 * SMMU global address space with conditional offset to access secure
78 * aliases of non-secure registers (e.g. nsCR0: 0x400, nsGFSR: 0x448,
81 #define ARM_SMMU_GR0_NS(smmu) \
83 ((smmu->options & ARM_SMMU_OPT_SECURE_CFG_ACCESS) \
87 * Some 64-bit registers only make sense to write atomically, but in such
88 * cases all the data relevant to AArch32 formats lies within the lower word,
89 * therefore this actually makes more sense than it might first appear.
92 #define smmu_write_atomic_lq writeq_relaxed
94 #define smmu_write_atomic_lq writel_relaxed
97 /* Translation context bank */
98 #define ARM_SMMU_CB(smmu, n) ((smmu)->base + (((smmu)->numpage + (n)) << (smmu)->pgshift))
100 #define MSI_IOVA_BASE 0x8000000
101 #define MSI_IOVA_LENGTH 0x100000
103 static int force_stage;
105 * not really modular, but the easiest way to keep compat with existing
106 * bootargs behaviour is to continue using module_param() here.
108 module_param(force_stage, int, S_IRUGO);
109 MODULE_PARM_DESC(force_stage,
110 "Force SMMU mappings to be installed at a particular stage of translation. A value of '1' or '2' forces the corresponding stage. All other values are ignored (i.e. no stage is forced). Note that selecting a specific stage will disable support for nested translation.");
111 static bool disable_bypass =
112 IS_ENABLED(CONFIG_ARM_SMMU_DISABLE_BYPASS_BY_DEFAULT);
113 module_param(disable_bypass, bool, S_IRUGO);
114 MODULE_PARM_DESC(disable_bypass,
115 "Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
117 enum arm_smmu_arch_version {
123 enum arm_smmu_implementation {
130 struct arm_smmu_s2cr {
131 struct iommu_group *group;
133 enum arm_smmu_s2cr_type type;
134 enum arm_smmu_s2cr_privcfg privcfg;
138 #define s2cr_init_val (struct arm_smmu_s2cr){ \
139 .type = disable_bypass ? S2CR_TYPE_FAULT : S2CR_TYPE_BYPASS, \
142 struct arm_smmu_smr {
152 struct arm_smmu_cfg *cfg;
155 struct arm_smmu_master_cfg {
156 struct arm_smmu_device *smmu;
159 #define INVALID_SMENDX -1
160 #define __fwspec_cfg(fw) ((struct arm_smmu_master_cfg *)fw->iommu_priv)
161 #define fwspec_smmu(fw) (__fwspec_cfg(fw)->smmu)
162 #define fwspec_smendx(fw, i) \
163 (i >= fw->num_ids ? INVALID_SMENDX : __fwspec_cfg(fw)->smendx[i])
164 #define for_each_cfg_sme(fw, i, idx) \
165 for (i = 0; idx = fwspec_smendx(fw, i), i < fw->num_ids; ++i)
167 struct arm_smmu_device {
171 unsigned int numpage;
172 unsigned int pgshift;
174 #define ARM_SMMU_FEAT_COHERENT_WALK (1 << 0)
175 #define ARM_SMMU_FEAT_STREAM_MATCH (1 << 1)
176 #define ARM_SMMU_FEAT_TRANS_S1 (1 << 2)
177 #define ARM_SMMU_FEAT_TRANS_S2 (1 << 3)
178 #define ARM_SMMU_FEAT_TRANS_NESTED (1 << 4)
179 #define ARM_SMMU_FEAT_TRANS_OPS (1 << 5)
180 #define ARM_SMMU_FEAT_VMID16 (1 << 6)
181 #define ARM_SMMU_FEAT_FMT_AARCH64_4K (1 << 7)
182 #define ARM_SMMU_FEAT_FMT_AARCH64_16K (1 << 8)
183 #define ARM_SMMU_FEAT_FMT_AARCH64_64K (1 << 9)
184 #define ARM_SMMU_FEAT_FMT_AARCH32_L (1 << 10)
185 #define ARM_SMMU_FEAT_FMT_AARCH32_S (1 << 11)
186 #define ARM_SMMU_FEAT_EXIDS (1 << 12)
189 #define ARM_SMMU_OPT_SECURE_CFG_ACCESS (1 << 0)
191 enum arm_smmu_arch_version version;
192 enum arm_smmu_implementation model;
194 u32 num_context_banks;
195 u32 num_s2_context_banks;
196 DECLARE_BITMAP(context_map, ARM_SMMU_MAX_CBS);
197 struct arm_smmu_cb *cbs;
200 u32 num_mapping_groups;
203 struct arm_smmu_smr *smrs;
204 struct arm_smmu_s2cr *s2crs;
205 struct mutex stream_map_mutex;
207 unsigned long va_size;
208 unsigned long ipa_size;
209 unsigned long pa_size;
210 unsigned long pgsize_bitmap;
213 u32 num_context_irqs;
215 struct clk_bulk_data *clks;
218 u32 cavium_id_base; /* Specific to Cavium */
220 spinlock_t global_sync_lock;
222 /* IOMMU core code handle */
223 struct iommu_device iommu;
226 enum arm_smmu_context_fmt {
227 ARM_SMMU_CTX_FMT_NONE,
228 ARM_SMMU_CTX_FMT_AARCH64,
229 ARM_SMMU_CTX_FMT_AARCH32_L,
230 ARM_SMMU_CTX_FMT_AARCH32_S,
233 struct arm_smmu_cfg {
240 enum arm_smmu_cbar_type cbar;
241 enum arm_smmu_context_fmt fmt;
243 #define INVALID_IRPTNDX 0xff
245 enum arm_smmu_domain_stage {
246 ARM_SMMU_DOMAIN_S1 = 0,
248 ARM_SMMU_DOMAIN_NESTED,
249 ARM_SMMU_DOMAIN_BYPASS,
252 struct arm_smmu_domain {
253 struct arm_smmu_device *smmu;
254 struct io_pgtable_ops *pgtbl_ops;
255 const struct iommu_gather_ops *tlb_ops;
256 struct arm_smmu_cfg cfg;
257 enum arm_smmu_domain_stage stage;
259 struct mutex init_mutex; /* Protects smmu pointer */
260 spinlock_t cb_lock; /* Serialises ATS1* ops and TLB syncs */
261 struct iommu_domain domain;
264 struct arm_smmu_option_prop {
269 static atomic_t cavium_smmu_context_count = ATOMIC_INIT(0);
271 static bool using_legacy_binding, using_generic_binding;
273 static struct arm_smmu_option_prop arm_smmu_options[] = {
274 { ARM_SMMU_OPT_SECURE_CFG_ACCESS, "calxeda,smmu-secure-config-access" },
278 static inline int arm_smmu_rpm_get(struct arm_smmu_device *smmu)
280 if (pm_runtime_enabled(smmu->dev))
281 return pm_runtime_get_sync(smmu->dev);
286 static inline void arm_smmu_rpm_put(struct arm_smmu_device *smmu)
288 if (pm_runtime_enabled(smmu->dev))
289 pm_runtime_put(smmu->dev);
292 static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
294 return container_of(dom, struct arm_smmu_domain, domain);
297 static void parse_driver_options(struct arm_smmu_device *smmu)
302 if (of_property_read_bool(smmu->dev->of_node,
303 arm_smmu_options[i].prop)) {
304 smmu->options |= arm_smmu_options[i].opt;
305 dev_notice(smmu->dev, "option %s\n",
306 arm_smmu_options[i].prop);
308 } while (arm_smmu_options[++i].opt);
311 static struct device_node *dev_get_dev_node(struct device *dev)
313 if (dev_is_pci(dev)) {
314 struct pci_bus *bus = to_pci_dev(dev)->bus;
316 while (!pci_is_root_bus(bus))
318 return of_node_get(bus->bridge->parent->of_node);
321 return of_node_get(dev->of_node);
324 static int __arm_smmu_get_pci_sid(struct pci_dev *pdev, u16 alias, void *data)
326 *((__be32 *)data) = cpu_to_be32(alias);
327 return 0; /* Continue walking */
330 static int __find_legacy_master_phandle(struct device *dev, void *data)
332 struct of_phandle_iterator *it = *(void **)data;
333 struct device_node *np = it->node;
336 of_for_each_phandle(it, err, dev->of_node, "mmu-masters",
337 "#stream-id-cells", 0)
338 if (it->node == np) {
339 *(void **)data = dev;
343 return err == -ENOENT ? 0 : err;
346 static struct platform_driver arm_smmu_driver;
347 static struct iommu_ops arm_smmu_ops;
349 static int arm_smmu_register_legacy_master(struct device *dev,
350 struct arm_smmu_device **smmu)
352 struct device *smmu_dev;
353 struct device_node *np;
354 struct of_phandle_iterator it;
360 np = dev_get_dev_node(dev);
361 if (!np || !of_find_property(np, "#stream-id-cells", NULL)) {
367 err = driver_for_each_device(&arm_smmu_driver.driver, NULL, &data,
368 __find_legacy_master_phandle);
376 if (dev_is_pci(dev)) {
377 /* "mmu-masters" assumes Stream ID == Requester ID */
378 pci_for_each_dma_alias(to_pci_dev(dev), __arm_smmu_get_pci_sid,
384 err = iommu_fwspec_init(dev, &smmu_dev->of_node->fwnode,
389 sids = kcalloc(it.cur_count, sizeof(*sids), GFP_KERNEL);
393 *smmu = dev_get_drvdata(smmu_dev);
394 of_phandle_iterator_args(&it, sids, it.cur_count);
395 err = iommu_fwspec_add_ids(dev, sids, it.cur_count);
400 static int __arm_smmu_alloc_bitmap(unsigned long *map, int start, int end)
405 idx = find_next_zero_bit(map, end, start);
408 } while (test_and_set_bit(idx, map));
413 static void __arm_smmu_free_bitmap(unsigned long *map, int idx)
418 /* Wait for any pending TLB invalidations to complete */
419 static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu,
420 void __iomem *sync, void __iomem *status)
422 unsigned int spin_cnt, delay;
424 writel_relaxed(QCOM_DUMMY_VAL, sync);
425 for (delay = 1; delay < TLB_LOOP_TIMEOUT; delay *= 2) {
426 for (spin_cnt = TLB_SPIN_COUNT; spin_cnt > 0; spin_cnt--) {
427 if (!(readl_relaxed(status) & sTLBGSTATUS_GSACTIVE))
433 dev_err_ratelimited(smmu->dev,
434 "TLB sync timed out -- SMMU may be deadlocked\n");
437 static void arm_smmu_tlb_sync_global(struct arm_smmu_device *smmu)
439 void __iomem *base = ARM_SMMU_GR0(smmu);
442 spin_lock_irqsave(&smmu->global_sync_lock, flags);
443 __arm_smmu_tlb_sync(smmu, base + ARM_SMMU_GR0_sTLBGSYNC,
444 base + ARM_SMMU_GR0_sTLBGSTATUS);
445 spin_unlock_irqrestore(&smmu->global_sync_lock, flags);
448 static void arm_smmu_tlb_sync_context(void *cookie)
450 struct arm_smmu_domain *smmu_domain = cookie;
451 struct arm_smmu_device *smmu = smmu_domain->smmu;
452 void __iomem *base = ARM_SMMU_CB(smmu, smmu_domain->cfg.cbndx);
455 spin_lock_irqsave(&smmu_domain->cb_lock, flags);
456 __arm_smmu_tlb_sync(smmu, base + ARM_SMMU_CB_TLBSYNC,
457 base + ARM_SMMU_CB_TLBSTATUS);
458 spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
461 static void arm_smmu_tlb_sync_vmid(void *cookie)
463 struct arm_smmu_domain *smmu_domain = cookie;
465 arm_smmu_tlb_sync_global(smmu_domain->smmu);
468 static void arm_smmu_tlb_inv_context_s1(void *cookie)
470 struct arm_smmu_domain *smmu_domain = cookie;
471 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
472 void __iomem *base = ARM_SMMU_CB(smmu_domain->smmu, cfg->cbndx);
475 * NOTE: this is not a relaxed write; it needs to guarantee that PTEs
476 * cleared by the current CPU are visible to the SMMU before the TLBI.
478 writel(cfg->asid, base + ARM_SMMU_CB_S1_TLBIASID);
479 arm_smmu_tlb_sync_context(cookie);
482 static void arm_smmu_tlb_inv_context_s2(void *cookie)
484 struct arm_smmu_domain *smmu_domain = cookie;
485 struct arm_smmu_device *smmu = smmu_domain->smmu;
486 void __iomem *base = ARM_SMMU_GR0(smmu);
488 /* NOTE: see above */
489 writel(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID);
490 arm_smmu_tlb_sync_global(smmu);
493 static void arm_smmu_tlb_inv_range_s1(unsigned long iova, size_t size,
494 size_t granule, bool leaf, void *cookie)
496 struct arm_smmu_domain *smmu_domain = cookie;
497 struct arm_smmu_device *smmu = smmu_domain->smmu;
498 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
499 void __iomem *reg = ARM_SMMU_CB(smmu, cfg->cbndx);
501 if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
504 reg += leaf ? ARM_SMMU_CB_S1_TLBIVAL : ARM_SMMU_CB_S1_TLBIVA;
506 if (cfg->fmt != ARM_SMMU_CTX_FMT_AARCH64) {
507 iova = (iova >> 12) << 12;
510 writel_relaxed(iova, reg);
512 } while (size -= granule);
515 iova |= (u64)cfg->asid << 48;
517 writeq_relaxed(iova, reg);
518 iova += granule >> 12;
519 } while (size -= granule);
523 static void arm_smmu_tlb_inv_range_s2(unsigned long iova, size_t size,
524 size_t granule, bool leaf, void *cookie)
526 struct arm_smmu_domain *smmu_domain = cookie;
527 struct arm_smmu_device *smmu = smmu_domain->smmu;
528 void __iomem *reg = ARM_SMMU_CB(smmu, smmu_domain->cfg.cbndx);
530 if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
533 reg += leaf ? ARM_SMMU_CB_S2_TLBIIPAS2L : ARM_SMMU_CB_S2_TLBIIPAS2;
536 smmu_write_atomic_lq(iova, reg);
537 iova += granule >> 12;
538 } while (size -= granule);
542 * On MMU-401 at least, the cost of firing off multiple TLBIVMIDs appears
543 * almost negligible, but the benefit of getting the first one in as far ahead
544 * of the sync as possible is significant, hence we don't just make this a
545 * no-op and set .tlb_sync to arm_smmu_inv_context_s2() as you might think.
547 static void arm_smmu_tlb_inv_vmid_nosync(unsigned long iova, size_t size,
548 size_t granule, bool leaf, void *cookie)
550 struct arm_smmu_domain *smmu_domain = cookie;
551 void __iomem *base = ARM_SMMU_GR0(smmu_domain->smmu);
553 if (smmu_domain->smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
556 writel_relaxed(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID);
559 static const struct iommu_gather_ops arm_smmu_s1_tlb_ops = {
560 .tlb_flush_all = arm_smmu_tlb_inv_context_s1,
561 .tlb_add_flush = arm_smmu_tlb_inv_range_s1,
562 .tlb_sync = arm_smmu_tlb_sync_context,
565 static const struct iommu_gather_ops arm_smmu_s2_tlb_ops_v2 = {
566 .tlb_flush_all = arm_smmu_tlb_inv_context_s2,
567 .tlb_add_flush = arm_smmu_tlb_inv_range_s2,
568 .tlb_sync = arm_smmu_tlb_sync_context,
571 static const struct iommu_gather_ops arm_smmu_s2_tlb_ops_v1 = {
572 .tlb_flush_all = arm_smmu_tlb_inv_context_s2,
573 .tlb_add_flush = arm_smmu_tlb_inv_vmid_nosync,
574 .tlb_sync = arm_smmu_tlb_sync_vmid,
577 static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
579 u32 fsr, fsynr, cbfrsynra;
581 struct iommu_domain *domain = dev;
582 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
583 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
584 struct arm_smmu_device *smmu = smmu_domain->smmu;
585 void __iomem *gr1_base = ARM_SMMU_GR1(smmu);
586 void __iomem *cb_base;
588 cb_base = ARM_SMMU_CB(smmu, cfg->cbndx);
589 fsr = readl_relaxed(cb_base + ARM_SMMU_CB_FSR);
591 if (!(fsr & FSR_FAULT))
594 fsynr = readl_relaxed(cb_base + ARM_SMMU_CB_FSYNR0);
595 iova = readq_relaxed(cb_base + ARM_SMMU_CB_FAR);
596 cbfrsynra = readl_relaxed(gr1_base + ARM_SMMU_GR1_CBFRSYNRA(cfg->cbndx));
598 dev_err_ratelimited(smmu->dev,
599 "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cbfrsynra=0x%x, cb=%d\n",
600 fsr, iova, fsynr, cbfrsynra, cfg->cbndx);
602 writel(fsr, cb_base + ARM_SMMU_CB_FSR);
606 static irqreturn_t arm_smmu_global_fault(int irq, void *dev)
608 u32 gfsr, gfsynr0, gfsynr1, gfsynr2;
609 struct arm_smmu_device *smmu = dev;
610 void __iomem *gr0_base = ARM_SMMU_GR0_NS(smmu);
612 gfsr = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSR);
613 gfsynr0 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR0);
614 gfsynr1 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR1);
615 gfsynr2 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR2);
620 dev_err_ratelimited(smmu->dev,
621 "Unexpected global fault, this could be serious\n");
622 dev_err_ratelimited(smmu->dev,
623 "\tGFSR 0x%08x, GFSYNR0 0x%08x, GFSYNR1 0x%08x, GFSYNR2 0x%08x\n",
624 gfsr, gfsynr0, gfsynr1, gfsynr2);
626 writel(gfsr, gr0_base + ARM_SMMU_GR0_sGFSR);
630 static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
631 struct io_pgtable_cfg *pgtbl_cfg)
633 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
634 struct arm_smmu_cb *cb = &smmu_domain->smmu->cbs[cfg->cbndx];
635 bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
641 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
642 cb->tcr[0] = pgtbl_cfg->arm_v7s_cfg.tcr;
644 cb->tcr[0] = pgtbl_cfg->arm_lpae_s1_cfg.tcr;
645 cb->tcr[1] = pgtbl_cfg->arm_lpae_s1_cfg.tcr >> 32;
646 cb->tcr[1] |= FIELD_PREP(TCR2_SEP, TCR2_SEP_UPSTREAM);
647 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
648 cb->tcr[1] |= TCR2_AS;
651 cb->tcr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
656 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
657 cb->ttbr[0] = pgtbl_cfg->arm_v7s_cfg.ttbr[0];
658 cb->ttbr[1] = pgtbl_cfg->arm_v7s_cfg.ttbr[1];
660 cb->ttbr[0] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
661 cb->ttbr[0] |= FIELD_PREP(TTBRn_ASID, cfg->asid);
662 cb->ttbr[1] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1];
663 cb->ttbr[1] |= FIELD_PREP(TTBRn_ASID, cfg->asid);
666 cb->ttbr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
669 /* MAIRs (stage-1 only) */
671 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
672 cb->mair[0] = pgtbl_cfg->arm_v7s_cfg.prrr;
673 cb->mair[1] = pgtbl_cfg->arm_v7s_cfg.nmrr;
675 cb->mair[0] = pgtbl_cfg->arm_lpae_s1_cfg.mair[0];
676 cb->mair[1] = pgtbl_cfg->arm_lpae_s1_cfg.mair[1];
681 static void arm_smmu_write_context_bank(struct arm_smmu_device *smmu, int idx)
685 struct arm_smmu_cb *cb = &smmu->cbs[idx];
686 struct arm_smmu_cfg *cfg = cb->cfg;
687 void __iomem *cb_base, *gr1_base;
689 cb_base = ARM_SMMU_CB(smmu, idx);
691 /* Unassigned context banks only need disabling */
693 writel_relaxed(0, cb_base + ARM_SMMU_CB_SCTLR);
697 gr1_base = ARM_SMMU_GR1(smmu);
698 stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
701 if (smmu->version > ARM_SMMU_V1) {
702 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
706 /* 16-bit VMIDs live in CBA2R */
707 if (smmu->features & ARM_SMMU_FEAT_VMID16)
708 reg |= FIELD_PREP(CBA2R_VMID16, cfg->vmid);
710 writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBA2R(idx));
714 reg = FIELD_PREP(CBAR_TYPE, cfg->cbar);
715 if (smmu->version < ARM_SMMU_V2)
716 reg |= FIELD_PREP(CBAR_IRPTNDX, cfg->irptndx);
719 * Use the weakest shareability/memory types, so they are
720 * overridden by the ttbcr/pte.
723 reg |= FIELD_PREP(CBAR_S1_BPSHCFG, CBAR_S1_BPSHCFG_NSH) |
724 FIELD_PREP(CBAR_S1_MEMATTR, CBAR_S1_MEMATTR_WB);
725 } else if (!(smmu->features & ARM_SMMU_FEAT_VMID16)) {
726 /* 8-bit VMIDs live in CBAR */
727 reg |= FIELD_PREP(CBAR_VMID, cfg->vmid);
729 writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBAR(idx));
733 * We must write this before the TTBRs, since it determines the
734 * access behaviour of some fields (in particular, ASID[15:8]).
736 if (stage1 && smmu->version > ARM_SMMU_V1)
737 writel_relaxed(cb->tcr[1], cb_base + ARM_SMMU_CB_TCR2);
738 writel_relaxed(cb->tcr[0], cb_base + ARM_SMMU_CB_TCR);
741 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
742 writel_relaxed(cfg->asid, cb_base + ARM_SMMU_CB_CONTEXTIDR);
743 writel_relaxed(cb->ttbr[0], cb_base + ARM_SMMU_CB_TTBR0);
744 writel_relaxed(cb->ttbr[1], cb_base + ARM_SMMU_CB_TTBR1);
746 writeq_relaxed(cb->ttbr[0], cb_base + ARM_SMMU_CB_TTBR0);
748 writeq_relaxed(cb->ttbr[1], cb_base + ARM_SMMU_CB_TTBR1);
751 /* MAIRs (stage-1 only) */
753 writel_relaxed(cb->mair[0], cb_base + ARM_SMMU_CB_S1_MAIR0);
754 writel_relaxed(cb->mair[1], cb_base + ARM_SMMU_CB_S1_MAIR1);
758 reg = SCTLR_CFIE | SCTLR_CFRE | SCTLR_AFE | SCTLR_TRE | SCTLR_M;
760 reg |= SCTLR_S1_ASIDPNE;
761 if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN))
764 writel_relaxed(reg, cb_base + ARM_SMMU_CB_SCTLR);
767 static int arm_smmu_init_domain_context(struct iommu_domain *domain,
768 struct arm_smmu_device *smmu)
770 int irq, start, ret = 0;
771 unsigned long ias, oas;
772 struct io_pgtable_ops *pgtbl_ops;
773 struct io_pgtable_cfg pgtbl_cfg;
774 enum io_pgtable_fmt fmt;
775 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
776 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
778 mutex_lock(&smmu_domain->init_mutex);
779 if (smmu_domain->smmu)
782 if (domain->type == IOMMU_DOMAIN_IDENTITY) {
783 smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
784 smmu_domain->smmu = smmu;
789 * Mapping the requested stage onto what we support is surprisingly
790 * complicated, mainly because the spec allows S1+S2 SMMUs without
791 * support for nested translation. That means we end up with the
794 * Requested Supported Actual
804 * Note that you can't actually request stage-2 mappings.
806 if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
807 smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
808 if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
809 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
812 * Choosing a suitable context format is even more fiddly. Until we
813 * grow some way for the caller to express a preference, and/or move
814 * the decision into the io-pgtable code where it arguably belongs,
815 * just aim for the closest thing to the rest of the system, and hope
816 * that the hardware isn't esoteric enough that we can't assume AArch64
817 * support to be a superset of AArch32 support...
819 if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_L)
820 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_L;
821 if (IS_ENABLED(CONFIG_IOMMU_IO_PGTABLE_ARMV7S) &&
822 !IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_ARM_LPAE) &&
823 (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S) &&
824 (smmu_domain->stage == ARM_SMMU_DOMAIN_S1))
825 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_S;
826 if ((IS_ENABLED(CONFIG_64BIT) || cfg->fmt == ARM_SMMU_CTX_FMT_NONE) &&
827 (smmu->features & (ARM_SMMU_FEAT_FMT_AARCH64_64K |
828 ARM_SMMU_FEAT_FMT_AARCH64_16K |
829 ARM_SMMU_FEAT_FMT_AARCH64_4K)))
830 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH64;
832 if (cfg->fmt == ARM_SMMU_CTX_FMT_NONE) {
837 switch (smmu_domain->stage) {
838 case ARM_SMMU_DOMAIN_S1:
839 cfg->cbar = CBAR_TYPE_S1_TRANS_S2_BYPASS;
840 start = smmu->num_s2_context_banks;
842 oas = smmu->ipa_size;
843 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
844 fmt = ARM_64_LPAE_S1;
845 } else if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_L) {
846 fmt = ARM_32_LPAE_S1;
847 ias = min(ias, 32UL);
848 oas = min(oas, 40UL);
851 ias = min(ias, 32UL);
852 oas = min(oas, 32UL);
854 smmu_domain->tlb_ops = &arm_smmu_s1_tlb_ops;
856 case ARM_SMMU_DOMAIN_NESTED:
858 * We will likely want to change this if/when KVM gets
861 case ARM_SMMU_DOMAIN_S2:
862 cfg->cbar = CBAR_TYPE_S2_TRANS;
864 ias = smmu->ipa_size;
866 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
867 fmt = ARM_64_LPAE_S2;
869 fmt = ARM_32_LPAE_S2;
870 ias = min(ias, 40UL);
871 oas = min(oas, 40UL);
873 if (smmu->version == ARM_SMMU_V2)
874 smmu_domain->tlb_ops = &arm_smmu_s2_tlb_ops_v2;
876 smmu_domain->tlb_ops = &arm_smmu_s2_tlb_ops_v1;
882 ret = __arm_smmu_alloc_bitmap(smmu->context_map, start,
883 smmu->num_context_banks);
888 if (smmu->version < ARM_SMMU_V2) {
889 cfg->irptndx = atomic_inc_return(&smmu->irptndx);
890 cfg->irptndx %= smmu->num_context_irqs;
892 cfg->irptndx = cfg->cbndx;
895 if (smmu_domain->stage == ARM_SMMU_DOMAIN_S2)
896 cfg->vmid = cfg->cbndx + 1 + smmu->cavium_id_base;
898 cfg->asid = cfg->cbndx + smmu->cavium_id_base;
900 pgtbl_cfg = (struct io_pgtable_cfg) {
901 .pgsize_bitmap = smmu->pgsize_bitmap,
904 .coherent_walk = smmu->features & ARM_SMMU_FEAT_COHERENT_WALK,
905 .tlb = smmu_domain->tlb_ops,
906 .iommu_dev = smmu->dev,
909 if (smmu_domain->non_strict)
910 pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
912 smmu_domain->smmu = smmu;
913 pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
919 /* Update the domain's page sizes to reflect the page table format */
920 domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
921 domain->geometry.aperture_end = (1UL << ias) - 1;
922 domain->geometry.force_aperture = true;
924 /* Initialise the context bank with our page table cfg */
925 arm_smmu_init_context_bank(smmu_domain, &pgtbl_cfg);
926 arm_smmu_write_context_bank(smmu, cfg->cbndx);
929 * Request context fault interrupt. Do this last to avoid the
930 * handler seeing a half-initialised domain state.
932 irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
933 ret = devm_request_irq(smmu->dev, irq, arm_smmu_context_fault,
934 IRQF_SHARED, "arm-smmu-context-fault", domain);
936 dev_err(smmu->dev, "failed to request context IRQ %d (%u)\n",
938 cfg->irptndx = INVALID_IRPTNDX;
941 mutex_unlock(&smmu_domain->init_mutex);
943 /* Publish page table ops for map/unmap */
944 smmu_domain->pgtbl_ops = pgtbl_ops;
948 smmu_domain->smmu = NULL;
950 mutex_unlock(&smmu_domain->init_mutex);
954 static void arm_smmu_destroy_domain_context(struct iommu_domain *domain)
956 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
957 struct arm_smmu_device *smmu = smmu_domain->smmu;
958 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
961 if (!smmu || domain->type == IOMMU_DOMAIN_IDENTITY)
964 ret = arm_smmu_rpm_get(smmu);
969 * Disable the context bank and free the page tables before freeing
972 smmu->cbs[cfg->cbndx].cfg = NULL;
973 arm_smmu_write_context_bank(smmu, cfg->cbndx);
975 if (cfg->irptndx != INVALID_IRPTNDX) {
976 irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
977 devm_free_irq(smmu->dev, irq, domain);
980 free_io_pgtable_ops(smmu_domain->pgtbl_ops);
981 __arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
983 arm_smmu_rpm_put(smmu);
986 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
988 struct arm_smmu_domain *smmu_domain;
990 if (type != IOMMU_DOMAIN_UNMANAGED &&
991 type != IOMMU_DOMAIN_DMA &&
992 type != IOMMU_DOMAIN_IDENTITY)
995 * Allocate the domain and initialise some of its data structures.
996 * We can't really do anything meaningful until we've added a
999 smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
1003 if (type == IOMMU_DOMAIN_DMA && (using_legacy_binding ||
1004 iommu_get_dma_cookie(&smmu_domain->domain))) {
1009 mutex_init(&smmu_domain->init_mutex);
1010 spin_lock_init(&smmu_domain->cb_lock);
1012 return &smmu_domain->domain;
1015 static void arm_smmu_domain_free(struct iommu_domain *domain)
1017 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1020 * Free the domain resources. We assume that all devices have
1021 * already been detached.
1023 iommu_put_dma_cookie(domain);
1024 arm_smmu_destroy_domain_context(domain);
1028 static void arm_smmu_write_smr(struct arm_smmu_device *smmu, int idx)
1030 struct arm_smmu_smr *smr = smmu->smrs + idx;
1031 u32 reg = FIELD_PREP(SMR_ID, smr->id) | FIELD_PREP(SMR_MASK, smr->mask);
1033 if (!(smmu->features & ARM_SMMU_FEAT_EXIDS) && smr->valid)
1035 writel_relaxed(reg, ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_SMR(idx));
1038 static void arm_smmu_write_s2cr(struct arm_smmu_device *smmu, int idx)
1040 struct arm_smmu_s2cr *s2cr = smmu->s2crs + idx;
1041 u32 reg = FIELD_PREP(S2CR_TYPE, s2cr->type) |
1042 FIELD_PREP(S2CR_CBNDX, s2cr->cbndx) |
1043 FIELD_PREP(S2CR_PRIVCFG, s2cr->privcfg);
1045 if (smmu->features & ARM_SMMU_FEAT_EXIDS && smmu->smrs &&
1046 smmu->smrs[idx].valid)
1047 reg |= S2CR_EXIDVALID;
1048 writel_relaxed(reg, ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_S2CR(idx));
1051 static void arm_smmu_write_sme(struct arm_smmu_device *smmu, int idx)
1053 arm_smmu_write_s2cr(smmu, idx);
1055 arm_smmu_write_smr(smmu, idx);
1059 * The width of SMR's mask field depends on sCR0_EXIDENABLE, so this function
1060 * should be called after sCR0 is written.
1062 static void arm_smmu_test_smr_masks(struct arm_smmu_device *smmu)
1064 void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1071 * SMR.ID bits may not be preserved if the corresponding MASK
1072 * bits are set, so check each one separately. We can reject
1073 * masters later if they try to claim IDs outside these masks.
1075 smr = FIELD_PREP(SMR_ID, smmu->streamid_mask);
1076 writel_relaxed(smr, gr0_base + ARM_SMMU_GR0_SMR(0));
1077 smr = readl_relaxed(gr0_base + ARM_SMMU_GR0_SMR(0));
1078 smmu->streamid_mask = FIELD_GET(SMR_ID, smr);
1080 smr = FIELD_PREP(SMR_MASK, smmu->streamid_mask);
1081 writel_relaxed(smr, gr0_base + ARM_SMMU_GR0_SMR(0));
1082 smr = readl_relaxed(gr0_base + ARM_SMMU_GR0_SMR(0));
1083 smmu->smr_mask_mask = FIELD_GET(SMR_MASK, smr);
1086 static int arm_smmu_find_sme(struct arm_smmu_device *smmu, u16 id, u16 mask)
1088 struct arm_smmu_smr *smrs = smmu->smrs;
1089 int i, free_idx = -ENOSPC;
1091 /* Stream indexing is blissfully easy */
1095 /* Validating SMRs is... less so */
1096 for (i = 0; i < smmu->num_mapping_groups; ++i) {
1097 if (!smrs[i].valid) {
1099 * Note the first free entry we come across, which
1100 * we'll claim in the end if nothing else matches.
1107 * If the new entry is _entirely_ matched by an existing entry,
1108 * then reuse that, with the guarantee that there also cannot
1109 * be any subsequent conflicting entries. In normal use we'd
1110 * expect simply identical entries for this case, but there's
1111 * no harm in accommodating the generalisation.
1113 if ((mask & smrs[i].mask) == mask &&
1114 !((id ^ smrs[i].id) & ~smrs[i].mask))
1117 * If the new entry has any other overlap with an existing one,
1118 * though, then there always exists at least one stream ID
1119 * which would cause a conflict, and we can't allow that risk.
1121 if (!((id ^ smrs[i].id) & ~(smrs[i].mask | mask)))
1128 static bool arm_smmu_free_sme(struct arm_smmu_device *smmu, int idx)
1130 if (--smmu->s2crs[idx].count)
1133 smmu->s2crs[idx] = s2cr_init_val;
1135 smmu->smrs[idx].valid = false;
1140 static int arm_smmu_master_alloc_smes(struct device *dev)
1142 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1143 struct arm_smmu_master_cfg *cfg = fwspec->iommu_priv;
1144 struct arm_smmu_device *smmu = cfg->smmu;
1145 struct arm_smmu_smr *smrs = smmu->smrs;
1146 struct iommu_group *group;
1149 mutex_lock(&smmu->stream_map_mutex);
1150 /* Figure out a viable stream map entry allocation */
1151 for_each_cfg_sme(fwspec, i, idx) {
1152 u16 sid = FIELD_GET(SMR_ID, fwspec->ids[i]);
1153 u16 mask = FIELD_GET(SMR_MASK, fwspec->ids[i]);
1155 if (idx != INVALID_SMENDX) {
1160 ret = arm_smmu_find_sme(smmu, sid, mask);
1165 if (smrs && smmu->s2crs[idx].count == 0) {
1167 smrs[idx].mask = mask;
1168 smrs[idx].valid = true;
1170 smmu->s2crs[idx].count++;
1171 cfg->smendx[i] = (s16)idx;
1174 group = iommu_group_get_for_dev(dev);
1176 group = ERR_PTR(-ENOMEM);
1177 if (IS_ERR(group)) {
1178 ret = PTR_ERR(group);
1181 iommu_group_put(group);
1183 /* It worked! Now, poke the actual hardware */
1184 for_each_cfg_sme(fwspec, i, idx) {
1185 arm_smmu_write_sme(smmu, idx);
1186 smmu->s2crs[idx].group = group;
1189 mutex_unlock(&smmu->stream_map_mutex);
1194 arm_smmu_free_sme(smmu, cfg->smendx[i]);
1195 cfg->smendx[i] = INVALID_SMENDX;
1197 mutex_unlock(&smmu->stream_map_mutex);
1201 static void arm_smmu_master_free_smes(struct iommu_fwspec *fwspec)
1203 struct arm_smmu_device *smmu = fwspec_smmu(fwspec);
1204 struct arm_smmu_master_cfg *cfg = fwspec->iommu_priv;
1207 mutex_lock(&smmu->stream_map_mutex);
1208 for_each_cfg_sme(fwspec, i, idx) {
1209 if (arm_smmu_free_sme(smmu, idx))
1210 arm_smmu_write_sme(smmu, idx);
1211 cfg->smendx[i] = INVALID_SMENDX;
1213 mutex_unlock(&smmu->stream_map_mutex);
1216 static int arm_smmu_domain_add_master(struct arm_smmu_domain *smmu_domain,
1217 struct iommu_fwspec *fwspec)
1219 struct arm_smmu_device *smmu = smmu_domain->smmu;
1220 struct arm_smmu_s2cr *s2cr = smmu->s2crs;
1221 u8 cbndx = smmu_domain->cfg.cbndx;
1222 enum arm_smmu_s2cr_type type;
1225 if (smmu_domain->stage == ARM_SMMU_DOMAIN_BYPASS)
1226 type = S2CR_TYPE_BYPASS;
1228 type = S2CR_TYPE_TRANS;
1230 for_each_cfg_sme(fwspec, i, idx) {
1231 if (type == s2cr[idx].type && cbndx == s2cr[idx].cbndx)
1234 s2cr[idx].type = type;
1235 s2cr[idx].privcfg = S2CR_PRIVCFG_DEFAULT;
1236 s2cr[idx].cbndx = cbndx;
1237 arm_smmu_write_s2cr(smmu, idx);
1242 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
1245 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1246 struct arm_smmu_device *smmu;
1247 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1249 if (!fwspec || fwspec->ops != &arm_smmu_ops) {
1250 dev_err(dev, "cannot attach to SMMU, is it on the same bus?\n");
1255 * FIXME: The arch/arm DMA API code tries to attach devices to its own
1256 * domains between of_xlate() and add_device() - we have no way to cope
1257 * with that, so until ARM gets converted to rely on groups and default
1258 * domains, just say no (but more politely than by dereferencing NULL).
1259 * This should be at least a WARN_ON once that's sorted.
1261 if (!fwspec->iommu_priv)
1264 smmu = fwspec_smmu(fwspec);
1266 ret = arm_smmu_rpm_get(smmu);
1270 /* Ensure that the domain is finalised */
1271 ret = arm_smmu_init_domain_context(domain, smmu);
1276 * Sanity check the domain. We don't support domains across
1279 if (smmu_domain->smmu != smmu) {
1281 "cannot attach to SMMU %s whilst already attached to domain on SMMU %s\n",
1282 dev_name(smmu_domain->smmu->dev), dev_name(smmu->dev));
1287 /* Looks ok, so add the device to the domain */
1288 ret = arm_smmu_domain_add_master(smmu_domain, fwspec);
1291 arm_smmu_rpm_put(smmu);
1295 static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
1296 phys_addr_t paddr, size_t size, int prot)
1298 struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1299 struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
1305 arm_smmu_rpm_get(smmu);
1306 ret = ops->map(ops, iova, paddr, size, prot);
1307 arm_smmu_rpm_put(smmu);
1312 static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
1315 struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1316 struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
1322 arm_smmu_rpm_get(smmu);
1323 ret = ops->unmap(ops, iova, size);
1324 arm_smmu_rpm_put(smmu);
1329 static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
1331 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1332 struct arm_smmu_device *smmu = smmu_domain->smmu;
1334 if (smmu_domain->tlb_ops) {
1335 arm_smmu_rpm_get(smmu);
1336 smmu_domain->tlb_ops->tlb_flush_all(smmu_domain);
1337 arm_smmu_rpm_put(smmu);
1341 static void arm_smmu_iotlb_sync(struct iommu_domain *domain)
1343 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1344 struct arm_smmu_device *smmu = smmu_domain->smmu;
1346 if (smmu_domain->tlb_ops) {
1347 arm_smmu_rpm_get(smmu);
1348 smmu_domain->tlb_ops->tlb_sync(smmu_domain);
1349 arm_smmu_rpm_put(smmu);
1353 static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
1356 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1357 struct arm_smmu_device *smmu = smmu_domain->smmu;
1358 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
1359 struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
1360 struct device *dev = smmu->dev;
1361 void __iomem *cb_base;
1364 unsigned long va, flags;
1367 ret = arm_smmu_rpm_get(smmu);
1371 cb_base = ARM_SMMU_CB(smmu, cfg->cbndx);
1373 spin_lock_irqsave(&smmu_domain->cb_lock, flags);
1374 /* ATS1 registers can only be written atomically */
1375 va = iova & ~0xfffUL;
1376 if (smmu->version == ARM_SMMU_V2)
1377 smmu_write_atomic_lq(va, cb_base + ARM_SMMU_CB_ATS1PR);
1378 else /* Register is only 32-bit in v1 */
1379 writel_relaxed(va, cb_base + ARM_SMMU_CB_ATS1PR);
1381 if (readl_poll_timeout_atomic(cb_base + ARM_SMMU_CB_ATSR, tmp,
1382 !(tmp & ATSR_ACTIVE), 5, 50)) {
1383 spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
1385 "iova to phys timed out on %pad. Falling back to software table walk.\n",
1387 return ops->iova_to_phys(ops, iova);
1390 phys = readq_relaxed(cb_base + ARM_SMMU_CB_PAR);
1391 spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
1392 if (phys & CB_PAR_F) {
1393 dev_err(dev, "translation fault!\n");
1394 dev_err(dev, "PAR = 0x%llx\n", phys);
1398 arm_smmu_rpm_put(smmu);
1400 return (phys & GENMASK_ULL(39, 12)) | (iova & 0xfff);
1403 static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain,
1406 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1407 struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
1409 if (domain->type == IOMMU_DOMAIN_IDENTITY)
1415 if (smmu_domain->smmu->features & ARM_SMMU_FEAT_TRANS_OPS &&
1416 smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
1417 return arm_smmu_iova_to_phys_hard(domain, iova);
1419 return ops->iova_to_phys(ops, iova);
1422 static bool arm_smmu_capable(enum iommu_cap cap)
1425 case IOMMU_CAP_CACHE_COHERENCY:
1427 * Return true here as the SMMU can always send out coherent
1431 case IOMMU_CAP_NOEXEC:
1438 static int arm_smmu_match_node(struct device *dev, const void *data)
1440 return dev->fwnode == data;
1444 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
1446 struct device *dev = driver_find_device(&arm_smmu_driver.driver, NULL,
1447 fwnode, arm_smmu_match_node);
1449 return dev ? dev_get_drvdata(dev) : NULL;
1452 static int arm_smmu_add_device(struct device *dev)
1454 struct arm_smmu_device *smmu;
1455 struct arm_smmu_master_cfg *cfg;
1456 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1459 if (using_legacy_binding) {
1460 ret = arm_smmu_register_legacy_master(dev, &smmu);
1463 * If dev->iommu_fwspec is initally NULL, arm_smmu_register_legacy_master()
1464 * will allocate/initialise a new one. Thus we need to update fwspec for
1467 fwspec = dev_iommu_fwspec_get(dev);
1470 } else if (fwspec && fwspec->ops == &arm_smmu_ops) {
1471 smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
1477 for (i = 0; i < fwspec->num_ids; i++) {
1478 u16 sid = FIELD_GET(SMR_ID, fwspec->ids[i]);
1479 u16 mask = FIELD_GET(SMR_MASK, fwspec->ids[i]);
1481 if (sid & ~smmu->streamid_mask) {
1482 dev_err(dev, "stream ID 0x%x out of range for SMMU (0x%x)\n",
1483 sid, smmu->streamid_mask);
1486 if (mask & ~smmu->smr_mask_mask) {
1487 dev_err(dev, "SMR mask 0x%x out of range for SMMU (0x%x)\n",
1488 mask, smmu->smr_mask_mask);
1494 cfg = kzalloc(offsetof(struct arm_smmu_master_cfg, smendx[i]),
1500 fwspec->iommu_priv = cfg;
1502 cfg->smendx[i] = INVALID_SMENDX;
1504 ret = arm_smmu_rpm_get(smmu);
1508 ret = arm_smmu_master_alloc_smes(dev);
1509 arm_smmu_rpm_put(smmu);
1514 iommu_device_link(&smmu->iommu, dev);
1516 device_link_add(dev, smmu->dev,
1517 DL_FLAG_PM_RUNTIME | DL_FLAG_AUTOREMOVE_SUPPLIER);
1524 iommu_fwspec_free(dev);
1528 static void arm_smmu_remove_device(struct device *dev)
1530 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1531 struct arm_smmu_master_cfg *cfg;
1532 struct arm_smmu_device *smmu;
1535 if (!fwspec || fwspec->ops != &arm_smmu_ops)
1538 cfg = fwspec->iommu_priv;
1541 ret = arm_smmu_rpm_get(smmu);
1545 iommu_device_unlink(&smmu->iommu, dev);
1546 arm_smmu_master_free_smes(fwspec);
1548 arm_smmu_rpm_put(smmu);
1550 iommu_group_remove_device(dev);
1551 kfree(fwspec->iommu_priv);
1552 iommu_fwspec_free(dev);
1555 static struct iommu_group *arm_smmu_device_group(struct device *dev)
1557 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1558 struct arm_smmu_device *smmu = fwspec_smmu(fwspec);
1559 struct iommu_group *group = NULL;
1562 for_each_cfg_sme(fwspec, i, idx) {
1563 if (group && smmu->s2crs[idx].group &&
1564 group != smmu->s2crs[idx].group)
1565 return ERR_PTR(-EINVAL);
1567 group = smmu->s2crs[idx].group;
1571 return iommu_group_ref_get(group);
1573 if (dev_is_pci(dev))
1574 group = pci_device_group(dev);
1575 else if (dev_is_fsl_mc(dev))
1576 group = fsl_mc_device_group(dev);
1578 group = generic_device_group(dev);
1583 static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
1584 enum iommu_attr attr, void *data)
1586 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1588 switch(domain->type) {
1589 case IOMMU_DOMAIN_UNMANAGED:
1591 case DOMAIN_ATTR_NESTING:
1592 *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
1598 case IOMMU_DOMAIN_DMA:
1600 case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
1601 *(int *)data = smmu_domain->non_strict;
1612 static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
1613 enum iommu_attr attr, void *data)
1616 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1618 mutex_lock(&smmu_domain->init_mutex);
1620 switch(domain->type) {
1621 case IOMMU_DOMAIN_UNMANAGED:
1623 case DOMAIN_ATTR_NESTING:
1624 if (smmu_domain->smmu) {
1630 smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
1632 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
1638 case IOMMU_DOMAIN_DMA:
1640 case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
1641 smmu_domain->non_strict = *(int *)data;
1651 mutex_unlock(&smmu_domain->init_mutex);
1655 static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
1659 if (args->args_count > 0)
1660 fwid |= FIELD_PREP(SMR_ID, args->args[0]);
1662 if (args->args_count > 1)
1663 fwid |= FIELD_PREP(SMR_MASK, args->args[1]);
1664 else if (!of_property_read_u32(args->np, "stream-match-mask", &mask))
1665 fwid |= FIELD_PREP(SMR_MASK, mask);
1667 return iommu_fwspec_add_ids(dev, &fwid, 1);
1670 static void arm_smmu_get_resv_regions(struct device *dev,
1671 struct list_head *head)
1673 struct iommu_resv_region *region;
1674 int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
1676 region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
1677 prot, IOMMU_RESV_SW_MSI);
1681 list_add_tail(®ion->list, head);
1683 iommu_dma_get_resv_regions(dev, head);
1686 static void arm_smmu_put_resv_regions(struct device *dev,
1687 struct list_head *head)
1689 struct iommu_resv_region *entry, *next;
1691 list_for_each_entry_safe(entry, next, head, list)
1695 static struct iommu_ops arm_smmu_ops = {
1696 .capable = arm_smmu_capable,
1697 .domain_alloc = arm_smmu_domain_alloc,
1698 .domain_free = arm_smmu_domain_free,
1699 .attach_dev = arm_smmu_attach_dev,
1700 .map = arm_smmu_map,
1701 .unmap = arm_smmu_unmap,
1702 .flush_iotlb_all = arm_smmu_flush_iotlb_all,
1703 .iotlb_sync = arm_smmu_iotlb_sync,
1704 .iova_to_phys = arm_smmu_iova_to_phys,
1705 .add_device = arm_smmu_add_device,
1706 .remove_device = arm_smmu_remove_device,
1707 .device_group = arm_smmu_device_group,
1708 .domain_get_attr = arm_smmu_domain_get_attr,
1709 .domain_set_attr = arm_smmu_domain_set_attr,
1710 .of_xlate = arm_smmu_of_xlate,
1711 .get_resv_regions = arm_smmu_get_resv_regions,
1712 .put_resv_regions = arm_smmu_put_resv_regions,
1713 .pgsize_bitmap = -1UL, /* Restricted during device attach */
1716 static void arm_smmu_device_reset(struct arm_smmu_device *smmu)
1718 void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1722 /* clear global FSR */
1723 reg = readl_relaxed(ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sGFSR);
1724 writel(reg, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sGFSR);
1727 * Reset stream mapping groups: Initial values mark all SMRn as
1728 * invalid and all S2CRn as bypass unless overridden.
1730 for (i = 0; i < smmu->num_mapping_groups; ++i)
1731 arm_smmu_write_sme(smmu, i);
1733 if (smmu->model == ARM_MMU500) {
1735 * Before clearing ARM_MMU500_ACTLR_CPRE, need to
1736 * clear CACHE_LOCK bit of ACR first. And, CACHE_LOCK
1737 * bit is only present in MMU-500r2 onwards.
1739 reg = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID7);
1740 major = FIELD_GET(ID7_MAJOR, reg);
1741 reg = readl_relaxed(gr0_base + ARM_SMMU_GR0_sACR);
1743 reg &= ~ARM_MMU500_ACR_CACHE_LOCK;
1745 * Allow unmatched Stream IDs to allocate bypass
1746 * TLB entries for reduced latency.
1748 reg |= ARM_MMU500_ACR_SMTNMB_TLBEN | ARM_MMU500_ACR_S2CRB_TLBEN;
1749 writel_relaxed(reg, gr0_base + ARM_SMMU_GR0_sACR);
1752 /* Make sure all context banks are disabled and clear CB_FSR */
1753 for (i = 0; i < smmu->num_context_banks; ++i) {
1754 void __iomem *cb_base = ARM_SMMU_CB(smmu, i);
1756 arm_smmu_write_context_bank(smmu, i);
1757 writel_relaxed(FSR_FAULT, cb_base + ARM_SMMU_CB_FSR);
1759 * Disable MMU-500's not-particularly-beneficial next-page
1760 * prefetcher for the sake of errata #841119 and #826419.
1762 if (smmu->model == ARM_MMU500) {
1763 reg = readl_relaxed(cb_base + ARM_SMMU_CB_ACTLR);
1764 reg &= ~ARM_MMU500_ACTLR_CPRE;
1765 writel_relaxed(reg, cb_base + ARM_SMMU_CB_ACTLR);
1769 /* Invalidate the TLB, just in case */
1770 writel_relaxed(QCOM_DUMMY_VAL, gr0_base + ARM_SMMU_GR0_TLBIALLH);
1771 writel_relaxed(QCOM_DUMMY_VAL, gr0_base + ARM_SMMU_GR0_TLBIALLNSNH);
1773 reg = readl_relaxed(ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
1775 /* Enable fault reporting */
1776 reg |= (sCR0_GFRE | sCR0_GFIE | sCR0_GCFGFRE | sCR0_GCFGFIE);
1778 /* Disable TLB broadcasting. */
1779 reg |= (sCR0_VMIDPNE | sCR0_PTM);
1781 /* Enable client access, handling unmatched streams as appropriate */
1782 reg &= ~sCR0_CLIENTPD;
1786 reg &= ~sCR0_USFCFG;
1788 /* Disable forced broadcasting */
1791 /* Don't upgrade barriers */
1794 if (smmu->features & ARM_SMMU_FEAT_VMID16)
1795 reg |= sCR0_VMID16EN;
1797 if (smmu->features & ARM_SMMU_FEAT_EXIDS)
1798 reg |= sCR0_EXIDENABLE;
1800 /* Push the button */
1801 arm_smmu_tlb_sync_global(smmu);
1802 writel(reg, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
1805 static int arm_smmu_id_size_to_bits(int size)
1824 static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
1827 void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1829 bool cttw_reg, cttw_fw = smmu->features & ARM_SMMU_FEAT_COHERENT_WALK;
1832 dev_notice(smmu->dev, "probing hardware configuration...\n");
1833 dev_notice(smmu->dev, "SMMUv%d with:\n",
1834 smmu->version == ARM_SMMU_V2 ? 2 : 1);
1837 id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID0);
1839 /* Restrict available stages based on module parameter */
1840 if (force_stage == 1)
1841 id &= ~(ID0_S2TS | ID0_NTS);
1842 else if (force_stage == 2)
1843 id &= ~(ID0_S1TS | ID0_NTS);
1845 if (id & ID0_S1TS) {
1846 smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
1847 dev_notice(smmu->dev, "\tstage 1 translation\n");
1850 if (id & ID0_S2TS) {
1851 smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
1852 dev_notice(smmu->dev, "\tstage 2 translation\n");
1856 smmu->features |= ARM_SMMU_FEAT_TRANS_NESTED;
1857 dev_notice(smmu->dev, "\tnested translation\n");
1860 if (!(smmu->features &
1861 (ARM_SMMU_FEAT_TRANS_S1 | ARM_SMMU_FEAT_TRANS_S2))) {
1862 dev_err(smmu->dev, "\tno translation support!\n");
1866 if ((id & ID0_S1TS) &&
1867 ((smmu->version < ARM_SMMU_V2) || !(id & ID0_ATOSNS))) {
1868 smmu->features |= ARM_SMMU_FEAT_TRANS_OPS;
1869 dev_notice(smmu->dev, "\taddress translation ops\n");
1873 * In order for DMA API calls to work properly, we must defer to what
1874 * the FW says about coherency, regardless of what the hardware claims.
1875 * Fortunately, this also opens up a workaround for systems where the
1876 * ID register value has ended up configured incorrectly.
1878 cttw_reg = !!(id & ID0_CTTW);
1879 if (cttw_fw || cttw_reg)
1880 dev_notice(smmu->dev, "\t%scoherent table walk\n",
1881 cttw_fw ? "" : "non-");
1882 if (cttw_fw != cttw_reg)
1883 dev_notice(smmu->dev,
1884 "\t(IDR0.CTTW overridden by FW configuration)\n");
1886 /* Max. number of entries we have for stream matching/indexing */
1887 if (smmu->version == ARM_SMMU_V2 && id & ID0_EXIDS) {
1888 smmu->features |= ARM_SMMU_FEAT_EXIDS;
1891 size = 1 << FIELD_GET(ID0_NUMSIDB, id);
1893 smmu->streamid_mask = size - 1;
1895 smmu->features |= ARM_SMMU_FEAT_STREAM_MATCH;
1896 size = FIELD_GET(ID0_NUMSMRG, id);
1899 "stream-matching supported, but no SMRs present!\n");
1903 /* Zero-initialised to mark as invalid */
1904 smmu->smrs = devm_kcalloc(smmu->dev, size, sizeof(*smmu->smrs),
1909 dev_notice(smmu->dev,
1910 "\tstream matching with %u register groups", size);
1912 /* s2cr->type == 0 means translation, so initialise explicitly */
1913 smmu->s2crs = devm_kmalloc_array(smmu->dev, size, sizeof(*smmu->s2crs),
1917 for (i = 0; i < size; i++)
1918 smmu->s2crs[i] = s2cr_init_val;
1920 smmu->num_mapping_groups = size;
1921 mutex_init(&smmu->stream_map_mutex);
1922 spin_lock_init(&smmu->global_sync_lock);
1924 if (smmu->version < ARM_SMMU_V2 || !(id & ID0_PTFS_NO_AARCH32)) {
1925 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_L;
1926 if (!(id & ID0_PTFS_NO_AARCH32S))
1927 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_S;
1931 id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID1);
1932 smmu->pgshift = (id & ID1_PAGESIZE) ? 16 : 12;
1934 /* Check for size mismatch of SMMU address space from mapped region */
1935 size = 1 << (FIELD_GET(ID1_NUMPAGENDXB, id) + 1);
1936 if (smmu->numpage != 2 * size << smmu->pgshift)
1938 "SMMU address space size (0x%x) differs from mapped region size (0x%x)!\n",
1939 2 * size << smmu->pgshift, smmu->numpage);
1940 /* Now properly encode NUMPAGE to subsequently derive SMMU_CB_BASE */
1941 smmu->numpage = size;
1943 smmu->num_s2_context_banks = FIELD_GET(ID1_NUMS2CB, id);
1944 smmu->num_context_banks = FIELD_GET(ID1_NUMCB, id);
1945 if (smmu->num_s2_context_banks > smmu->num_context_banks) {
1946 dev_err(smmu->dev, "impossible number of S2 context banks!\n");
1949 dev_notice(smmu->dev, "\t%u context banks (%u stage-2 only)\n",
1950 smmu->num_context_banks, smmu->num_s2_context_banks);
1952 * Cavium CN88xx erratum #27704.
1953 * Ensure ASID and VMID allocation is unique across all SMMUs in
1956 if (smmu->model == CAVIUM_SMMUV2) {
1957 smmu->cavium_id_base =
1958 atomic_add_return(smmu->num_context_banks,
1959 &cavium_smmu_context_count);
1960 smmu->cavium_id_base -= smmu->num_context_banks;
1961 dev_notice(smmu->dev, "\tenabling workaround for Cavium erratum 27704\n");
1963 smmu->cbs = devm_kcalloc(smmu->dev, smmu->num_context_banks,
1964 sizeof(*smmu->cbs), GFP_KERNEL);
1969 id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID2);
1970 size = arm_smmu_id_size_to_bits(FIELD_GET(ID2_IAS, id));
1971 smmu->ipa_size = size;
1973 /* The output mask is also applied for bypass */
1974 size = arm_smmu_id_size_to_bits(FIELD_GET(ID2_OAS, id));
1975 smmu->pa_size = size;
1977 if (id & ID2_VMID16)
1978 smmu->features |= ARM_SMMU_FEAT_VMID16;
1981 * What the page table walker can address actually depends on which
1982 * descriptor format is in use, but since a) we don't know that yet,
1983 * and b) it can vary per context bank, this will have to do...
1985 if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(size)))
1987 "failed to set DMA mask for table walker\n");
1989 if (smmu->version < ARM_SMMU_V2) {
1990 smmu->va_size = smmu->ipa_size;
1991 if (smmu->version == ARM_SMMU_V1_64K)
1992 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1994 size = FIELD_GET(ID2_UBS, id);
1995 smmu->va_size = arm_smmu_id_size_to_bits(size);
1996 if (id & ID2_PTFS_4K)
1997 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_4K;
1998 if (id & ID2_PTFS_16K)
1999 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_16K;
2000 if (id & ID2_PTFS_64K)
2001 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
2004 /* Now we've corralled the various formats, what'll it do? */
2005 if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S)
2006 smmu->pgsize_bitmap |= SZ_4K | SZ_64K | SZ_1M | SZ_16M;
2007 if (smmu->features &
2008 (ARM_SMMU_FEAT_FMT_AARCH32_L | ARM_SMMU_FEAT_FMT_AARCH64_4K))
2009 smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
2010 if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_16K)
2011 smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
2012 if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_64K)
2013 smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
2015 if (arm_smmu_ops.pgsize_bitmap == -1UL)
2016 arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
2018 arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
2019 dev_notice(smmu->dev, "\tSupported page sizes: 0x%08lx\n",
2020 smmu->pgsize_bitmap);
2023 if (smmu->features & ARM_SMMU_FEAT_TRANS_S1)
2024 dev_notice(smmu->dev, "\tStage-1: %lu-bit VA -> %lu-bit IPA\n",
2025 smmu->va_size, smmu->ipa_size);
2027 if (smmu->features & ARM_SMMU_FEAT_TRANS_S2)
2028 dev_notice(smmu->dev, "\tStage-2: %lu-bit IPA -> %lu-bit PA\n",
2029 smmu->ipa_size, smmu->pa_size);
2034 struct arm_smmu_match_data {
2035 enum arm_smmu_arch_version version;
2036 enum arm_smmu_implementation model;
2039 #define ARM_SMMU_MATCH_DATA(name, ver, imp) \
2040 static const struct arm_smmu_match_data name = { .version = ver, .model = imp }
2042 ARM_SMMU_MATCH_DATA(smmu_generic_v1, ARM_SMMU_V1, GENERIC_SMMU);
2043 ARM_SMMU_MATCH_DATA(smmu_generic_v2, ARM_SMMU_V2, GENERIC_SMMU);
2044 ARM_SMMU_MATCH_DATA(arm_mmu401, ARM_SMMU_V1_64K, GENERIC_SMMU);
2045 ARM_SMMU_MATCH_DATA(arm_mmu500, ARM_SMMU_V2, ARM_MMU500);
2046 ARM_SMMU_MATCH_DATA(cavium_smmuv2, ARM_SMMU_V2, CAVIUM_SMMUV2);
2047 ARM_SMMU_MATCH_DATA(qcom_smmuv2, ARM_SMMU_V2, QCOM_SMMUV2);
2049 static const struct of_device_id arm_smmu_of_match[] = {
2050 { .compatible = "arm,smmu-v1", .data = &smmu_generic_v1 },
2051 { .compatible = "arm,smmu-v2", .data = &smmu_generic_v2 },
2052 { .compatible = "arm,mmu-400", .data = &smmu_generic_v1 },
2053 { .compatible = "arm,mmu-401", .data = &arm_mmu401 },
2054 { .compatible = "arm,mmu-500", .data = &arm_mmu500 },
2055 { .compatible = "cavium,smmu-v2", .data = &cavium_smmuv2 },
2056 { .compatible = "qcom,smmu-v2", .data = &qcom_smmuv2 },
2061 static int acpi_smmu_get_data(u32 model, struct arm_smmu_device *smmu)
2066 case ACPI_IORT_SMMU_V1:
2067 case ACPI_IORT_SMMU_CORELINK_MMU400:
2068 smmu->version = ARM_SMMU_V1;
2069 smmu->model = GENERIC_SMMU;
2071 case ACPI_IORT_SMMU_CORELINK_MMU401:
2072 smmu->version = ARM_SMMU_V1_64K;
2073 smmu->model = GENERIC_SMMU;
2075 case ACPI_IORT_SMMU_V2:
2076 smmu->version = ARM_SMMU_V2;
2077 smmu->model = GENERIC_SMMU;
2079 case ACPI_IORT_SMMU_CORELINK_MMU500:
2080 smmu->version = ARM_SMMU_V2;
2081 smmu->model = ARM_MMU500;
2083 case ACPI_IORT_SMMU_CAVIUM_THUNDERX:
2084 smmu->version = ARM_SMMU_V2;
2085 smmu->model = CAVIUM_SMMUV2;
2094 static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
2095 struct arm_smmu_device *smmu)
2097 struct device *dev = smmu->dev;
2098 struct acpi_iort_node *node =
2099 *(struct acpi_iort_node **)dev_get_platdata(dev);
2100 struct acpi_iort_smmu *iort_smmu;
2103 /* Retrieve SMMU1/2 specific data */
2104 iort_smmu = (struct acpi_iort_smmu *)node->node_data;
2106 ret = acpi_smmu_get_data(iort_smmu->model, smmu);
2110 /* Ignore the configuration access interrupt */
2111 smmu->num_global_irqs = 1;
2113 if (iort_smmu->flags & ACPI_IORT_SMMU_COHERENT_WALK)
2114 smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
2119 static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
2120 struct arm_smmu_device *smmu)
2126 static int arm_smmu_device_dt_probe(struct platform_device *pdev,
2127 struct arm_smmu_device *smmu)
2129 const struct arm_smmu_match_data *data;
2130 struct device *dev = &pdev->dev;
2131 bool legacy_binding;
2133 if (of_property_read_u32(dev->of_node, "#global-interrupts",
2134 &smmu->num_global_irqs)) {
2135 dev_err(dev, "missing #global-interrupts property\n");
2139 data = of_device_get_match_data(dev);
2140 smmu->version = data->version;
2141 smmu->model = data->model;
2143 parse_driver_options(smmu);
2145 legacy_binding = of_find_property(dev->of_node, "mmu-masters", NULL);
2146 if (legacy_binding && !using_generic_binding) {
2147 if (!using_legacy_binding)
2148 pr_notice("deprecated \"mmu-masters\" DT property in use; DMA API support unavailable\n");
2149 using_legacy_binding = true;
2150 } else if (!legacy_binding && !using_legacy_binding) {
2151 using_generic_binding = true;
2153 dev_err(dev, "not probing due to mismatched DT properties\n");
2157 if (of_dma_is_coherent(dev->of_node))
2158 smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
2163 static void arm_smmu_bus_init(void)
2165 /* Oh, for a proper bus abstraction */
2166 if (!iommu_present(&platform_bus_type))
2167 bus_set_iommu(&platform_bus_type, &arm_smmu_ops);
2168 #ifdef CONFIG_ARM_AMBA
2169 if (!iommu_present(&amba_bustype))
2170 bus_set_iommu(&amba_bustype, &arm_smmu_ops);
2173 if (!iommu_present(&pci_bus_type)) {
2175 bus_set_iommu(&pci_bus_type, &arm_smmu_ops);
2178 #ifdef CONFIG_FSL_MC_BUS
2179 if (!iommu_present(&fsl_mc_bus_type))
2180 bus_set_iommu(&fsl_mc_bus_type, &arm_smmu_ops);
2184 static int arm_smmu_device_probe(struct platform_device *pdev)
2186 struct resource *res;
2187 resource_size_t ioaddr;
2188 struct arm_smmu_device *smmu;
2189 struct device *dev = &pdev->dev;
2190 int num_irqs, i, err;
2192 smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
2194 dev_err(dev, "failed to allocate arm_smmu_device\n");
2200 err = arm_smmu_device_dt_probe(pdev, smmu);
2202 err = arm_smmu_device_acpi_probe(pdev, smmu);
2207 res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
2208 ioaddr = res->start;
2209 smmu->base = devm_ioremap_resource(dev, res);
2210 if (IS_ERR(smmu->base))
2211 return PTR_ERR(smmu->base);
2213 * The resource size should effectively match the value of SMMU_TOP;
2214 * stash that temporarily until we know PAGESIZE to validate it with.
2216 smmu->numpage = resource_size(res);
2219 while ((res = platform_get_resource(pdev, IORESOURCE_IRQ, num_irqs))) {
2221 if (num_irqs > smmu->num_global_irqs)
2222 smmu->num_context_irqs++;
2225 if (!smmu->num_context_irqs) {
2226 dev_err(dev, "found %d interrupts but expected at least %d\n",
2227 num_irqs, smmu->num_global_irqs + 1);
2231 smmu->irqs = devm_kcalloc(dev, num_irqs, sizeof(*smmu->irqs),
2234 dev_err(dev, "failed to allocate %d irqs\n", num_irqs);
2238 for (i = 0; i < num_irqs; ++i) {
2239 int irq = platform_get_irq(pdev, i);
2242 dev_err(dev, "failed to get irq index %d\n", i);
2245 smmu->irqs[i] = irq;
2248 err = devm_clk_bulk_get_all(dev, &smmu->clks);
2250 dev_err(dev, "failed to get clocks %d\n", err);
2253 smmu->num_clks = err;
2255 err = clk_bulk_prepare_enable(smmu->num_clks, smmu->clks);
2259 err = arm_smmu_device_cfg_probe(smmu);
2263 if (smmu->version == ARM_SMMU_V2) {
2264 if (smmu->num_context_banks > smmu->num_context_irqs) {
2266 "found only %d context irq(s) but %d required\n",
2267 smmu->num_context_irqs, smmu->num_context_banks);
2271 /* Ignore superfluous interrupts */
2272 smmu->num_context_irqs = smmu->num_context_banks;
2275 for (i = 0; i < smmu->num_global_irqs; ++i) {
2276 err = devm_request_irq(smmu->dev, smmu->irqs[i],
2277 arm_smmu_global_fault,
2279 "arm-smmu global fault",
2282 dev_err(dev, "failed to request global IRQ %d (%u)\n",
2288 err = iommu_device_sysfs_add(&smmu->iommu, smmu->dev, NULL,
2289 "smmu.%pa", &ioaddr);
2291 dev_err(dev, "Failed to register iommu in sysfs\n");
2295 iommu_device_set_ops(&smmu->iommu, &arm_smmu_ops);
2296 iommu_device_set_fwnode(&smmu->iommu, dev->fwnode);
2298 err = iommu_device_register(&smmu->iommu);
2300 dev_err(dev, "Failed to register iommu\n");
2304 platform_set_drvdata(pdev, smmu);
2305 arm_smmu_device_reset(smmu);
2306 arm_smmu_test_smr_masks(smmu);
2309 * We want to avoid touching dev->power.lock in fastpaths unless
2310 * it's really going to do something useful - pm_runtime_enabled()
2311 * can serve as an ideal proxy for that decision. So, conditionally
2312 * enable pm_runtime.
2314 if (dev->pm_domain) {
2315 pm_runtime_set_active(dev);
2316 pm_runtime_enable(dev);
2320 * For ACPI and generic DT bindings, an SMMU will be probed before
2321 * any device which might need it, so we want the bus ops in place
2322 * ready to handle default domain setup as soon as any SMMU exists.
2324 if (!using_legacy_binding)
2325 arm_smmu_bus_init();
2331 * With the legacy DT binding in play, though, we have no guarantees about
2332 * probe order, but then we're also not doing default domains, so we can
2333 * delay setting bus ops until we're sure every possible SMMU is ready,
2334 * and that way ensure that no add_device() calls get missed.
2336 static int arm_smmu_legacy_bus_init(void)
2338 if (using_legacy_binding)
2339 arm_smmu_bus_init();
2342 device_initcall_sync(arm_smmu_legacy_bus_init);
2344 static void arm_smmu_device_shutdown(struct platform_device *pdev)
2346 struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
2351 if (!bitmap_empty(smmu->context_map, ARM_SMMU_MAX_CBS))
2352 dev_err(&pdev->dev, "removing device with active domains!\n");
2354 arm_smmu_rpm_get(smmu);
2355 /* Turn the thing off */
2356 writel(sCR0_CLIENTPD, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
2357 arm_smmu_rpm_put(smmu);
2359 if (pm_runtime_enabled(smmu->dev))
2360 pm_runtime_force_suspend(smmu->dev);
2362 clk_bulk_disable(smmu->num_clks, smmu->clks);
2364 clk_bulk_unprepare(smmu->num_clks, smmu->clks);
2367 static int __maybe_unused arm_smmu_runtime_resume(struct device *dev)
2369 struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2372 ret = clk_bulk_enable(smmu->num_clks, smmu->clks);
2376 arm_smmu_device_reset(smmu);
2381 static int __maybe_unused arm_smmu_runtime_suspend(struct device *dev)
2383 struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2385 clk_bulk_disable(smmu->num_clks, smmu->clks);
2390 static int __maybe_unused arm_smmu_pm_resume(struct device *dev)
2392 if (pm_runtime_suspended(dev))
2395 return arm_smmu_runtime_resume(dev);
2398 static int __maybe_unused arm_smmu_pm_suspend(struct device *dev)
2400 if (pm_runtime_suspended(dev))
2403 return arm_smmu_runtime_suspend(dev);
2406 static const struct dev_pm_ops arm_smmu_pm_ops = {
2407 SET_SYSTEM_SLEEP_PM_OPS(arm_smmu_pm_suspend, arm_smmu_pm_resume)
2408 SET_RUNTIME_PM_OPS(arm_smmu_runtime_suspend,
2409 arm_smmu_runtime_resume, NULL)
2412 static struct platform_driver arm_smmu_driver = {
2415 .of_match_table = of_match_ptr(arm_smmu_of_match),
2416 .pm = &arm_smmu_pm_ops,
2417 .suppress_bind_attrs = true,
2419 .probe = arm_smmu_device_probe,
2420 .shutdown = arm_smmu_device_shutdown,
2422 builtin_platform_driver(arm_smmu_driver);