1 // SPDX-License-Identifier: GPL-2.0-only
3 * IOMMU API for ARM architected SMMU implementations.
5 * Copyright (C) 2013 ARM Limited
7 * Author: Will Deacon <will.deacon@arm.com>
9 * This driver currently supports:
10 * - SMMUv1 and v2 implementations
11 * - Stream-matching and stream-indexing
12 * - v7/v8 long-descriptor format
13 * - Non-secure access to the SMMU
14 * - Context fault reporting
15 * - Extended Stream ID (16 bit)
18 #define pr_fmt(fmt) "arm-smmu: " fmt
20 #include <linux/acpi.h>
21 #include <linux/acpi_iort.h>
22 #include <linux/atomic.h>
23 #include <linux/delay.h>
24 #include <linux/dma-iommu.h>
25 #include <linux/dma-mapping.h>
26 #include <linux/err.h>
27 #include <linux/interrupt.h>
29 #include <linux/io-64-nonatomic-hi-lo.h>
30 #include <linux/io-pgtable.h>
31 #include <linux/iommu.h>
32 #include <linux/iopoll.h>
33 #include <linux/init.h>
34 #include <linux/moduleparam.h>
36 #include <linux/of_address.h>
37 #include <linux/of_device.h>
38 #include <linux/of_iommu.h>
39 #include <linux/pci.h>
40 #include <linux/platform_device.h>
41 #include <linux/pm_runtime.h>
42 #include <linux/slab.h>
43 #include <linux/spinlock.h>
45 #include <linux/amba/bus.h>
46 #include <linux/fsl/mc.h>
48 #include "arm-smmu-regs.h"
51 * Apparently, some Qualcomm arm64 platforms which appear to expose their SMMU
52 * global register space are still, in fact, using a hypervisor to mediate it
53 * by trapping and emulating register accesses. Sadly, some deployed versions
54 * of said trapping code have bugs wherein they go horribly wrong for stores
55 * using r31 (i.e. XZR/WZR) as the source register.
57 #define QCOM_DUMMY_VAL -1
59 #define ARM_MMU500_ACTLR_CPRE (1 << 1)
61 #define ARM_MMU500_ACR_CACHE_LOCK (1 << 26)
62 #define ARM_MMU500_ACR_S2CRB_TLBEN (1 << 10)
63 #define ARM_MMU500_ACR_SMTNMB_TLBEN (1 << 8)
65 #define TLB_LOOP_TIMEOUT 1000000 /* 1s! */
66 #define TLB_SPIN_COUNT 10
68 /* Maximum number of context banks per SMMU */
69 #define ARM_SMMU_MAX_CBS 128
71 /* SMMU global address space */
72 #define ARM_SMMU_GR0(smmu) ((smmu)->base)
73 #define ARM_SMMU_GR1(smmu) ((smmu)->base + (1 << (smmu)->pgshift))
76 * SMMU global address space with conditional offset to access secure
77 * aliases of non-secure registers (e.g. nsCR0: 0x400, nsGFSR: 0x448,
80 #define ARM_SMMU_GR0_NS(smmu) \
82 ((smmu->options & ARM_SMMU_OPT_SECURE_CFG_ACCESS) \
86 * Some 64-bit registers only make sense to write atomically, but in such
87 * cases all the data relevant to AArch32 formats lies within the lower word,
88 * therefore this actually makes more sense than it might first appear.
91 #define smmu_write_atomic_lq writeq_relaxed
93 #define smmu_write_atomic_lq writel_relaxed
96 /* Translation context bank */
97 #define ARM_SMMU_CB(smmu, n) ((smmu)->cb_base + ((n) << (smmu)->pgshift))
99 #define MSI_IOVA_BASE 0x8000000
100 #define MSI_IOVA_LENGTH 0x100000
102 static int force_stage;
104 * not really modular, but the easiest way to keep compat with existing
105 * bootargs behaviour is to continue using module_param() here.
107 module_param(force_stage, int, S_IRUGO);
108 MODULE_PARM_DESC(force_stage,
109 "Force SMMU mappings to be installed at a particular stage of translation. A value of '1' or '2' forces the corresponding stage. All other values are ignored (i.e. no stage is forced). Note that selecting a specific stage will disable support for nested translation.");
110 static bool disable_bypass =
111 IS_ENABLED(CONFIG_ARM_SMMU_DISABLE_BYPASS_BY_DEFAULT);
112 module_param(disable_bypass, bool, S_IRUGO);
113 MODULE_PARM_DESC(disable_bypass,
114 "Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
116 enum arm_smmu_arch_version {
122 enum arm_smmu_implementation {
129 struct arm_smmu_s2cr {
130 struct iommu_group *group;
132 enum arm_smmu_s2cr_type type;
133 enum arm_smmu_s2cr_privcfg privcfg;
137 #define s2cr_init_val (struct arm_smmu_s2cr){ \
138 .type = disable_bypass ? S2CR_TYPE_FAULT : S2CR_TYPE_BYPASS, \
141 struct arm_smmu_smr {
151 struct arm_smmu_cfg *cfg;
154 struct arm_smmu_master_cfg {
155 struct arm_smmu_device *smmu;
158 #define INVALID_SMENDX -1
159 #define __fwspec_cfg(fw) ((struct arm_smmu_master_cfg *)fw->iommu_priv)
160 #define fwspec_smmu(fw) (__fwspec_cfg(fw)->smmu)
161 #define fwspec_smendx(fw, i) \
162 (i >= fw->num_ids ? INVALID_SMENDX : __fwspec_cfg(fw)->smendx[i])
163 #define for_each_cfg_sme(fw, i, idx) \
164 for (i = 0; idx = fwspec_smendx(fw, i), i < fw->num_ids; ++i)
166 struct arm_smmu_device {
170 void __iomem *cb_base;
171 unsigned long pgshift;
173 #define ARM_SMMU_FEAT_COHERENT_WALK (1 << 0)
174 #define ARM_SMMU_FEAT_STREAM_MATCH (1 << 1)
175 #define ARM_SMMU_FEAT_TRANS_S1 (1 << 2)
176 #define ARM_SMMU_FEAT_TRANS_S2 (1 << 3)
177 #define ARM_SMMU_FEAT_TRANS_NESTED (1 << 4)
178 #define ARM_SMMU_FEAT_TRANS_OPS (1 << 5)
179 #define ARM_SMMU_FEAT_VMID16 (1 << 6)
180 #define ARM_SMMU_FEAT_FMT_AARCH64_4K (1 << 7)
181 #define ARM_SMMU_FEAT_FMT_AARCH64_16K (1 << 8)
182 #define ARM_SMMU_FEAT_FMT_AARCH64_64K (1 << 9)
183 #define ARM_SMMU_FEAT_FMT_AARCH32_L (1 << 10)
184 #define ARM_SMMU_FEAT_FMT_AARCH32_S (1 << 11)
185 #define ARM_SMMU_FEAT_EXIDS (1 << 12)
188 #define ARM_SMMU_OPT_SECURE_CFG_ACCESS (1 << 0)
190 enum arm_smmu_arch_version version;
191 enum arm_smmu_implementation model;
193 u32 num_context_banks;
194 u32 num_s2_context_banks;
195 DECLARE_BITMAP(context_map, ARM_SMMU_MAX_CBS);
196 struct arm_smmu_cb *cbs;
199 u32 num_mapping_groups;
202 struct arm_smmu_smr *smrs;
203 struct arm_smmu_s2cr *s2crs;
204 struct mutex stream_map_mutex;
206 unsigned long va_size;
207 unsigned long ipa_size;
208 unsigned long pa_size;
209 unsigned long pgsize_bitmap;
212 u32 num_context_irqs;
214 struct clk_bulk_data *clks;
217 u32 cavium_id_base; /* Specific to Cavium */
219 spinlock_t global_sync_lock;
221 /* IOMMU core code handle */
222 struct iommu_device iommu;
225 enum arm_smmu_context_fmt {
226 ARM_SMMU_CTX_FMT_NONE,
227 ARM_SMMU_CTX_FMT_AARCH64,
228 ARM_SMMU_CTX_FMT_AARCH32_L,
229 ARM_SMMU_CTX_FMT_AARCH32_S,
232 struct arm_smmu_cfg {
240 enum arm_smmu_context_fmt fmt;
242 #define INVALID_IRPTNDX 0xff
244 enum arm_smmu_domain_stage {
245 ARM_SMMU_DOMAIN_S1 = 0,
247 ARM_SMMU_DOMAIN_NESTED,
248 ARM_SMMU_DOMAIN_BYPASS,
251 struct arm_smmu_domain {
252 struct arm_smmu_device *smmu;
253 struct io_pgtable_ops *pgtbl_ops;
254 const struct iommu_gather_ops *tlb_ops;
255 struct arm_smmu_cfg cfg;
256 enum arm_smmu_domain_stage stage;
258 struct mutex init_mutex; /* Protects smmu pointer */
259 spinlock_t cb_lock; /* Serialises ATS1* ops and TLB syncs */
260 struct iommu_domain domain;
263 struct arm_smmu_option_prop {
268 static atomic_t cavium_smmu_context_count = ATOMIC_INIT(0);
270 static bool using_legacy_binding, using_generic_binding;
272 static struct arm_smmu_option_prop arm_smmu_options[] = {
273 { ARM_SMMU_OPT_SECURE_CFG_ACCESS, "calxeda,smmu-secure-config-access" },
277 static inline int arm_smmu_rpm_get(struct arm_smmu_device *smmu)
279 if (pm_runtime_enabled(smmu->dev))
280 return pm_runtime_get_sync(smmu->dev);
285 static inline void arm_smmu_rpm_put(struct arm_smmu_device *smmu)
287 if (pm_runtime_enabled(smmu->dev))
288 pm_runtime_put(smmu->dev);
291 static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
293 return container_of(dom, struct arm_smmu_domain, domain);
296 static void parse_driver_options(struct arm_smmu_device *smmu)
301 if (of_property_read_bool(smmu->dev->of_node,
302 arm_smmu_options[i].prop)) {
303 smmu->options |= arm_smmu_options[i].opt;
304 dev_notice(smmu->dev, "option %s\n",
305 arm_smmu_options[i].prop);
307 } while (arm_smmu_options[++i].opt);
310 static struct device_node *dev_get_dev_node(struct device *dev)
312 if (dev_is_pci(dev)) {
313 struct pci_bus *bus = to_pci_dev(dev)->bus;
315 while (!pci_is_root_bus(bus))
317 return of_node_get(bus->bridge->parent->of_node);
320 return of_node_get(dev->of_node);
323 static int __arm_smmu_get_pci_sid(struct pci_dev *pdev, u16 alias, void *data)
325 *((__be32 *)data) = cpu_to_be32(alias);
326 return 0; /* Continue walking */
329 static int __find_legacy_master_phandle(struct device *dev, void *data)
331 struct of_phandle_iterator *it = *(void **)data;
332 struct device_node *np = it->node;
335 of_for_each_phandle(it, err, dev->of_node, "mmu-masters",
336 "#stream-id-cells", 0)
337 if (it->node == np) {
338 *(void **)data = dev;
342 return err == -ENOENT ? 0 : err;
345 static struct platform_driver arm_smmu_driver;
346 static struct iommu_ops arm_smmu_ops;
348 static int arm_smmu_register_legacy_master(struct device *dev,
349 struct arm_smmu_device **smmu)
351 struct device *smmu_dev;
352 struct device_node *np;
353 struct of_phandle_iterator it;
359 np = dev_get_dev_node(dev);
360 if (!np || !of_find_property(np, "#stream-id-cells", NULL)) {
366 err = driver_for_each_device(&arm_smmu_driver.driver, NULL, &data,
367 __find_legacy_master_phandle);
375 if (dev_is_pci(dev)) {
376 /* "mmu-masters" assumes Stream ID == Requester ID */
377 pci_for_each_dma_alias(to_pci_dev(dev), __arm_smmu_get_pci_sid,
383 err = iommu_fwspec_init(dev, &smmu_dev->of_node->fwnode,
388 sids = kcalloc(it.cur_count, sizeof(*sids), GFP_KERNEL);
392 *smmu = dev_get_drvdata(smmu_dev);
393 of_phandle_iterator_args(&it, sids, it.cur_count);
394 err = iommu_fwspec_add_ids(dev, sids, it.cur_count);
399 static int __arm_smmu_alloc_bitmap(unsigned long *map, int start, int end)
404 idx = find_next_zero_bit(map, end, start);
407 } while (test_and_set_bit(idx, map));
412 static void __arm_smmu_free_bitmap(unsigned long *map, int idx)
417 /* Wait for any pending TLB invalidations to complete */
418 static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu,
419 void __iomem *sync, void __iomem *status)
421 unsigned int spin_cnt, delay;
423 writel_relaxed(QCOM_DUMMY_VAL, sync);
424 for (delay = 1; delay < TLB_LOOP_TIMEOUT; delay *= 2) {
425 for (spin_cnt = TLB_SPIN_COUNT; spin_cnt > 0; spin_cnt--) {
426 if (!(readl_relaxed(status) & sTLBGSTATUS_GSACTIVE))
432 dev_err_ratelimited(smmu->dev,
433 "TLB sync timed out -- SMMU may be deadlocked\n");
436 static void arm_smmu_tlb_sync_global(struct arm_smmu_device *smmu)
438 void __iomem *base = ARM_SMMU_GR0(smmu);
441 spin_lock_irqsave(&smmu->global_sync_lock, flags);
442 __arm_smmu_tlb_sync(smmu, base + ARM_SMMU_GR0_sTLBGSYNC,
443 base + ARM_SMMU_GR0_sTLBGSTATUS);
444 spin_unlock_irqrestore(&smmu->global_sync_lock, flags);
447 static void arm_smmu_tlb_sync_context(void *cookie)
449 struct arm_smmu_domain *smmu_domain = cookie;
450 struct arm_smmu_device *smmu = smmu_domain->smmu;
451 void __iomem *base = ARM_SMMU_CB(smmu, smmu_domain->cfg.cbndx);
454 spin_lock_irqsave(&smmu_domain->cb_lock, flags);
455 __arm_smmu_tlb_sync(smmu, base + ARM_SMMU_CB_TLBSYNC,
456 base + ARM_SMMU_CB_TLBSTATUS);
457 spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
460 static void arm_smmu_tlb_sync_vmid(void *cookie)
462 struct arm_smmu_domain *smmu_domain = cookie;
464 arm_smmu_tlb_sync_global(smmu_domain->smmu);
467 static void arm_smmu_tlb_inv_context_s1(void *cookie)
469 struct arm_smmu_domain *smmu_domain = cookie;
470 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
471 void __iomem *base = ARM_SMMU_CB(smmu_domain->smmu, cfg->cbndx);
474 * NOTE: this is not a relaxed write; it needs to guarantee that PTEs
475 * cleared by the current CPU are visible to the SMMU before the TLBI.
477 writel(cfg->asid, base + ARM_SMMU_CB_S1_TLBIASID);
478 arm_smmu_tlb_sync_context(cookie);
481 static void arm_smmu_tlb_inv_context_s2(void *cookie)
483 struct arm_smmu_domain *smmu_domain = cookie;
484 struct arm_smmu_device *smmu = smmu_domain->smmu;
485 void __iomem *base = ARM_SMMU_GR0(smmu);
487 /* NOTE: see above */
488 writel(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID);
489 arm_smmu_tlb_sync_global(smmu);
492 static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
493 size_t granule, bool leaf, void *cookie)
495 struct arm_smmu_domain *smmu_domain = cookie;
496 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
497 bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
498 void __iomem *reg = ARM_SMMU_CB(smmu_domain->smmu, cfg->cbndx);
500 if (smmu_domain->smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
504 reg += leaf ? ARM_SMMU_CB_S1_TLBIVAL : ARM_SMMU_CB_S1_TLBIVA;
506 if (cfg->fmt != ARM_SMMU_CTX_FMT_AARCH64) {
510 writel_relaxed(iova, reg);
512 } while (size -= granule);
515 iova |= (u64)cfg->asid << 48;
517 writeq_relaxed(iova, reg);
518 iova += granule >> 12;
519 } while (size -= granule);
522 reg += leaf ? ARM_SMMU_CB_S2_TLBIIPAS2L :
523 ARM_SMMU_CB_S2_TLBIIPAS2;
526 smmu_write_atomic_lq(iova, reg);
527 iova += granule >> 12;
528 } while (size -= granule);
533 * On MMU-401 at least, the cost of firing off multiple TLBIVMIDs appears
534 * almost negligible, but the benefit of getting the first one in as far ahead
535 * of the sync as possible is significant, hence we don't just make this a
536 * no-op and set .tlb_sync to arm_smmu_inv_context_s2() as you might think.
538 static void arm_smmu_tlb_inv_vmid_nosync(unsigned long iova, size_t size,
539 size_t granule, bool leaf, void *cookie)
541 struct arm_smmu_domain *smmu_domain = cookie;
542 void __iomem *base = ARM_SMMU_GR0(smmu_domain->smmu);
544 if (smmu_domain->smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
547 writel_relaxed(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID);
550 static const struct iommu_gather_ops arm_smmu_s1_tlb_ops = {
551 .tlb_flush_all = arm_smmu_tlb_inv_context_s1,
552 .tlb_add_flush = arm_smmu_tlb_inv_range_nosync,
553 .tlb_sync = arm_smmu_tlb_sync_context,
556 static const struct iommu_gather_ops arm_smmu_s2_tlb_ops_v2 = {
557 .tlb_flush_all = arm_smmu_tlb_inv_context_s2,
558 .tlb_add_flush = arm_smmu_tlb_inv_range_nosync,
559 .tlb_sync = arm_smmu_tlb_sync_context,
562 static const struct iommu_gather_ops arm_smmu_s2_tlb_ops_v1 = {
563 .tlb_flush_all = arm_smmu_tlb_inv_context_s2,
564 .tlb_add_flush = arm_smmu_tlb_inv_vmid_nosync,
565 .tlb_sync = arm_smmu_tlb_sync_vmid,
568 static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
570 u32 fsr, fsynr, cbfrsynra;
572 struct iommu_domain *domain = dev;
573 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
574 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
575 struct arm_smmu_device *smmu = smmu_domain->smmu;
576 void __iomem *gr1_base = ARM_SMMU_GR1(smmu);
577 void __iomem *cb_base;
579 cb_base = ARM_SMMU_CB(smmu, cfg->cbndx);
580 fsr = readl_relaxed(cb_base + ARM_SMMU_CB_FSR);
582 if (!(fsr & FSR_FAULT))
585 fsynr = readl_relaxed(cb_base + ARM_SMMU_CB_FSYNR0);
586 iova = readq_relaxed(cb_base + ARM_SMMU_CB_FAR);
587 cbfrsynra = readl_relaxed(gr1_base + ARM_SMMU_GR1_CBFRSYNRA(cfg->cbndx));
589 dev_err_ratelimited(smmu->dev,
590 "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cbfrsynra=0x%x, cb=%d\n",
591 fsr, iova, fsynr, cbfrsynra, cfg->cbndx);
593 writel(fsr, cb_base + ARM_SMMU_CB_FSR);
597 static irqreturn_t arm_smmu_global_fault(int irq, void *dev)
599 u32 gfsr, gfsynr0, gfsynr1, gfsynr2;
600 struct arm_smmu_device *smmu = dev;
601 void __iomem *gr0_base = ARM_SMMU_GR0_NS(smmu);
603 gfsr = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSR);
604 gfsynr0 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR0);
605 gfsynr1 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR1);
606 gfsynr2 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR2);
611 dev_err_ratelimited(smmu->dev,
612 "Unexpected global fault, this could be serious\n");
613 dev_err_ratelimited(smmu->dev,
614 "\tGFSR 0x%08x, GFSYNR0 0x%08x, GFSYNR1 0x%08x, GFSYNR2 0x%08x\n",
615 gfsr, gfsynr0, gfsynr1, gfsynr2);
617 writel(gfsr, gr0_base + ARM_SMMU_GR0_sGFSR);
621 static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
622 struct io_pgtable_cfg *pgtbl_cfg)
624 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
625 struct arm_smmu_cb *cb = &smmu_domain->smmu->cbs[cfg->cbndx];
626 bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
632 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
633 cb->tcr[0] = pgtbl_cfg->arm_v7s_cfg.tcr;
635 cb->tcr[0] = pgtbl_cfg->arm_lpae_s1_cfg.tcr;
636 cb->tcr[1] = pgtbl_cfg->arm_lpae_s1_cfg.tcr >> 32;
637 cb->tcr[1] |= TTBCR2_SEP_UPSTREAM;
638 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
639 cb->tcr[1] |= TTBCR2_AS;
642 cb->tcr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
647 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
648 cb->ttbr[0] = pgtbl_cfg->arm_v7s_cfg.ttbr[0];
649 cb->ttbr[1] = pgtbl_cfg->arm_v7s_cfg.ttbr[1];
651 cb->ttbr[0] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
652 cb->ttbr[0] |= (u64)cfg->asid << TTBRn_ASID_SHIFT;
653 cb->ttbr[1] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1];
654 cb->ttbr[1] |= (u64)cfg->asid << TTBRn_ASID_SHIFT;
657 cb->ttbr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
660 /* MAIRs (stage-1 only) */
662 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
663 cb->mair[0] = pgtbl_cfg->arm_v7s_cfg.prrr;
664 cb->mair[1] = pgtbl_cfg->arm_v7s_cfg.nmrr;
666 cb->mair[0] = pgtbl_cfg->arm_lpae_s1_cfg.mair[0];
667 cb->mair[1] = pgtbl_cfg->arm_lpae_s1_cfg.mair[1];
672 static void arm_smmu_write_context_bank(struct arm_smmu_device *smmu, int idx)
676 struct arm_smmu_cb *cb = &smmu->cbs[idx];
677 struct arm_smmu_cfg *cfg = cb->cfg;
678 void __iomem *cb_base, *gr1_base;
680 cb_base = ARM_SMMU_CB(smmu, idx);
682 /* Unassigned context banks only need disabling */
684 writel_relaxed(0, cb_base + ARM_SMMU_CB_SCTLR);
688 gr1_base = ARM_SMMU_GR1(smmu);
689 stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
692 if (smmu->version > ARM_SMMU_V1) {
693 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
694 reg = CBA2R_RW64_64BIT;
696 reg = CBA2R_RW64_32BIT;
697 /* 16-bit VMIDs live in CBA2R */
698 if (smmu->features & ARM_SMMU_FEAT_VMID16)
699 reg |= cfg->vmid << CBA2R_VMID_SHIFT;
701 writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBA2R(idx));
706 if (smmu->version < ARM_SMMU_V2)
707 reg |= cfg->irptndx << CBAR_IRPTNDX_SHIFT;
710 * Use the weakest shareability/memory types, so they are
711 * overridden by the ttbcr/pte.
714 reg |= (CBAR_S1_BPSHCFG_NSH << CBAR_S1_BPSHCFG_SHIFT) |
715 (CBAR_S1_MEMATTR_WB << CBAR_S1_MEMATTR_SHIFT);
716 } else if (!(smmu->features & ARM_SMMU_FEAT_VMID16)) {
717 /* 8-bit VMIDs live in CBAR */
718 reg |= cfg->vmid << CBAR_VMID_SHIFT;
720 writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBAR(idx));
724 * We must write this before the TTBRs, since it determines the
725 * access behaviour of some fields (in particular, ASID[15:8]).
727 if (stage1 && smmu->version > ARM_SMMU_V1)
728 writel_relaxed(cb->tcr[1], cb_base + ARM_SMMU_CB_TTBCR2);
729 writel_relaxed(cb->tcr[0], cb_base + ARM_SMMU_CB_TTBCR);
732 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
733 writel_relaxed(cfg->asid, cb_base + ARM_SMMU_CB_CONTEXTIDR);
734 writel_relaxed(cb->ttbr[0], cb_base + ARM_SMMU_CB_TTBR0);
735 writel_relaxed(cb->ttbr[1], cb_base + ARM_SMMU_CB_TTBR1);
737 writeq_relaxed(cb->ttbr[0], cb_base + ARM_SMMU_CB_TTBR0);
739 writeq_relaxed(cb->ttbr[1], cb_base + ARM_SMMU_CB_TTBR1);
742 /* MAIRs (stage-1 only) */
744 writel_relaxed(cb->mair[0], cb_base + ARM_SMMU_CB_S1_MAIR0);
745 writel_relaxed(cb->mair[1], cb_base + ARM_SMMU_CB_S1_MAIR1);
749 reg = SCTLR_CFIE | SCTLR_CFRE | SCTLR_AFE | SCTLR_TRE | SCTLR_M;
751 reg |= SCTLR_S1_ASIDPNE;
752 if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN))
755 writel_relaxed(reg, cb_base + ARM_SMMU_CB_SCTLR);
758 static int arm_smmu_init_domain_context(struct iommu_domain *domain,
759 struct arm_smmu_device *smmu)
761 int irq, start, ret = 0;
762 unsigned long ias, oas;
763 struct io_pgtable_ops *pgtbl_ops;
764 struct io_pgtable_cfg pgtbl_cfg;
765 enum io_pgtable_fmt fmt;
766 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
767 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
769 mutex_lock(&smmu_domain->init_mutex);
770 if (smmu_domain->smmu)
773 if (domain->type == IOMMU_DOMAIN_IDENTITY) {
774 smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
775 smmu_domain->smmu = smmu;
780 * Mapping the requested stage onto what we support is surprisingly
781 * complicated, mainly because the spec allows S1+S2 SMMUs without
782 * support for nested translation. That means we end up with the
785 * Requested Supported Actual
795 * Note that you can't actually request stage-2 mappings.
797 if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
798 smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
799 if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
800 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
803 * Choosing a suitable context format is even more fiddly. Until we
804 * grow some way for the caller to express a preference, and/or move
805 * the decision into the io-pgtable code where it arguably belongs,
806 * just aim for the closest thing to the rest of the system, and hope
807 * that the hardware isn't esoteric enough that we can't assume AArch64
808 * support to be a superset of AArch32 support...
810 if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_L)
811 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_L;
812 if (IS_ENABLED(CONFIG_IOMMU_IO_PGTABLE_ARMV7S) &&
813 !IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_ARM_LPAE) &&
814 (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S) &&
815 (smmu_domain->stage == ARM_SMMU_DOMAIN_S1))
816 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_S;
817 if ((IS_ENABLED(CONFIG_64BIT) || cfg->fmt == ARM_SMMU_CTX_FMT_NONE) &&
818 (smmu->features & (ARM_SMMU_FEAT_FMT_AARCH64_64K |
819 ARM_SMMU_FEAT_FMT_AARCH64_16K |
820 ARM_SMMU_FEAT_FMT_AARCH64_4K)))
821 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH64;
823 if (cfg->fmt == ARM_SMMU_CTX_FMT_NONE) {
828 switch (smmu_domain->stage) {
829 case ARM_SMMU_DOMAIN_S1:
830 cfg->cbar = CBAR_TYPE_S1_TRANS_S2_BYPASS;
831 start = smmu->num_s2_context_banks;
833 oas = smmu->ipa_size;
834 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
835 fmt = ARM_64_LPAE_S1;
836 } else if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_L) {
837 fmt = ARM_32_LPAE_S1;
838 ias = min(ias, 32UL);
839 oas = min(oas, 40UL);
842 ias = min(ias, 32UL);
843 oas = min(oas, 32UL);
845 smmu_domain->tlb_ops = &arm_smmu_s1_tlb_ops;
847 case ARM_SMMU_DOMAIN_NESTED:
849 * We will likely want to change this if/when KVM gets
852 case ARM_SMMU_DOMAIN_S2:
853 cfg->cbar = CBAR_TYPE_S2_TRANS;
855 ias = smmu->ipa_size;
857 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
858 fmt = ARM_64_LPAE_S2;
860 fmt = ARM_32_LPAE_S2;
861 ias = min(ias, 40UL);
862 oas = min(oas, 40UL);
864 if (smmu->version == ARM_SMMU_V2)
865 smmu_domain->tlb_ops = &arm_smmu_s2_tlb_ops_v2;
867 smmu_domain->tlb_ops = &arm_smmu_s2_tlb_ops_v1;
873 ret = __arm_smmu_alloc_bitmap(smmu->context_map, start,
874 smmu->num_context_banks);
879 if (smmu->version < ARM_SMMU_V2) {
880 cfg->irptndx = atomic_inc_return(&smmu->irptndx);
881 cfg->irptndx %= smmu->num_context_irqs;
883 cfg->irptndx = cfg->cbndx;
886 if (smmu_domain->stage == ARM_SMMU_DOMAIN_S2)
887 cfg->vmid = cfg->cbndx + 1 + smmu->cavium_id_base;
889 cfg->asid = cfg->cbndx + smmu->cavium_id_base;
891 pgtbl_cfg = (struct io_pgtable_cfg) {
892 .pgsize_bitmap = smmu->pgsize_bitmap,
895 .coherent_walk = smmu->features & ARM_SMMU_FEAT_COHERENT_WALK,
896 .tlb = smmu_domain->tlb_ops,
897 .iommu_dev = smmu->dev,
900 if (smmu_domain->non_strict)
901 pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
903 smmu_domain->smmu = smmu;
904 pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
910 /* Update the domain's page sizes to reflect the page table format */
911 domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
912 domain->geometry.aperture_end = (1UL << ias) - 1;
913 domain->geometry.force_aperture = true;
915 /* Initialise the context bank with our page table cfg */
916 arm_smmu_init_context_bank(smmu_domain, &pgtbl_cfg);
917 arm_smmu_write_context_bank(smmu, cfg->cbndx);
920 * Request context fault interrupt. Do this last to avoid the
921 * handler seeing a half-initialised domain state.
923 irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
924 ret = devm_request_irq(smmu->dev, irq, arm_smmu_context_fault,
925 IRQF_SHARED, "arm-smmu-context-fault", domain);
927 dev_err(smmu->dev, "failed to request context IRQ %d (%u)\n",
929 cfg->irptndx = INVALID_IRPTNDX;
932 mutex_unlock(&smmu_domain->init_mutex);
934 /* Publish page table ops for map/unmap */
935 smmu_domain->pgtbl_ops = pgtbl_ops;
939 smmu_domain->smmu = NULL;
941 mutex_unlock(&smmu_domain->init_mutex);
945 static void arm_smmu_destroy_domain_context(struct iommu_domain *domain)
947 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
948 struct arm_smmu_device *smmu = smmu_domain->smmu;
949 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
952 if (!smmu || domain->type == IOMMU_DOMAIN_IDENTITY)
955 ret = arm_smmu_rpm_get(smmu);
960 * Disable the context bank and free the page tables before freeing
963 smmu->cbs[cfg->cbndx].cfg = NULL;
964 arm_smmu_write_context_bank(smmu, cfg->cbndx);
966 if (cfg->irptndx != INVALID_IRPTNDX) {
967 irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
968 devm_free_irq(smmu->dev, irq, domain);
971 free_io_pgtable_ops(smmu_domain->pgtbl_ops);
972 __arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
974 arm_smmu_rpm_put(smmu);
977 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
979 struct arm_smmu_domain *smmu_domain;
981 if (type != IOMMU_DOMAIN_UNMANAGED &&
982 type != IOMMU_DOMAIN_DMA &&
983 type != IOMMU_DOMAIN_IDENTITY)
986 * Allocate the domain and initialise some of its data structures.
987 * We can't really do anything meaningful until we've added a
990 smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
994 if (type == IOMMU_DOMAIN_DMA && (using_legacy_binding ||
995 iommu_get_dma_cookie(&smmu_domain->domain))) {
1000 mutex_init(&smmu_domain->init_mutex);
1001 spin_lock_init(&smmu_domain->cb_lock);
1003 return &smmu_domain->domain;
1006 static void arm_smmu_domain_free(struct iommu_domain *domain)
1008 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1011 * Free the domain resources. We assume that all devices have
1012 * already been detached.
1014 iommu_put_dma_cookie(domain);
1015 arm_smmu_destroy_domain_context(domain);
1019 static void arm_smmu_write_smr(struct arm_smmu_device *smmu, int idx)
1021 struct arm_smmu_smr *smr = smmu->smrs + idx;
1022 u32 reg = smr->id << SMR_ID_SHIFT | smr->mask << SMR_MASK_SHIFT;
1024 if (!(smmu->features & ARM_SMMU_FEAT_EXIDS) && smr->valid)
1026 writel_relaxed(reg, ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_SMR(idx));
1029 static void arm_smmu_write_s2cr(struct arm_smmu_device *smmu, int idx)
1031 struct arm_smmu_s2cr *s2cr = smmu->s2crs + idx;
1032 u32 reg = (s2cr->type & S2CR_TYPE_MASK) << S2CR_TYPE_SHIFT |
1033 (s2cr->cbndx & S2CR_CBNDX_MASK) << S2CR_CBNDX_SHIFT |
1034 (s2cr->privcfg & S2CR_PRIVCFG_MASK) << S2CR_PRIVCFG_SHIFT;
1036 if (smmu->features & ARM_SMMU_FEAT_EXIDS && smmu->smrs &&
1037 smmu->smrs[idx].valid)
1038 reg |= S2CR_EXIDVALID;
1039 writel_relaxed(reg, ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_S2CR(idx));
1042 static void arm_smmu_write_sme(struct arm_smmu_device *smmu, int idx)
1044 arm_smmu_write_s2cr(smmu, idx);
1046 arm_smmu_write_smr(smmu, idx);
1050 * The width of SMR's mask field depends on sCR0_EXIDENABLE, so this function
1051 * should be called after sCR0 is written.
1053 static void arm_smmu_test_smr_masks(struct arm_smmu_device *smmu)
1055 void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1062 * SMR.ID bits may not be preserved if the corresponding MASK
1063 * bits are set, so check each one separately. We can reject
1064 * masters later if they try to claim IDs outside these masks.
1066 smr = smmu->streamid_mask << SMR_ID_SHIFT;
1067 writel_relaxed(smr, gr0_base + ARM_SMMU_GR0_SMR(0));
1068 smr = readl_relaxed(gr0_base + ARM_SMMU_GR0_SMR(0));
1069 smmu->streamid_mask = smr >> SMR_ID_SHIFT;
1071 smr = smmu->streamid_mask << SMR_MASK_SHIFT;
1072 writel_relaxed(smr, gr0_base + ARM_SMMU_GR0_SMR(0));
1073 smr = readl_relaxed(gr0_base + ARM_SMMU_GR0_SMR(0));
1074 smmu->smr_mask_mask = smr >> SMR_MASK_SHIFT;
1077 static int arm_smmu_find_sme(struct arm_smmu_device *smmu, u16 id, u16 mask)
1079 struct arm_smmu_smr *smrs = smmu->smrs;
1080 int i, free_idx = -ENOSPC;
1082 /* Stream indexing is blissfully easy */
1086 /* Validating SMRs is... less so */
1087 for (i = 0; i < smmu->num_mapping_groups; ++i) {
1088 if (!smrs[i].valid) {
1090 * Note the first free entry we come across, which
1091 * we'll claim in the end if nothing else matches.
1098 * If the new entry is _entirely_ matched by an existing entry,
1099 * then reuse that, with the guarantee that there also cannot
1100 * be any subsequent conflicting entries. In normal use we'd
1101 * expect simply identical entries for this case, but there's
1102 * no harm in accommodating the generalisation.
1104 if ((mask & smrs[i].mask) == mask &&
1105 !((id ^ smrs[i].id) & ~smrs[i].mask))
1108 * If the new entry has any other overlap with an existing one,
1109 * though, then there always exists at least one stream ID
1110 * which would cause a conflict, and we can't allow that risk.
1112 if (!((id ^ smrs[i].id) & ~(smrs[i].mask | mask)))
1119 static bool arm_smmu_free_sme(struct arm_smmu_device *smmu, int idx)
1121 if (--smmu->s2crs[idx].count)
1124 smmu->s2crs[idx] = s2cr_init_val;
1126 smmu->smrs[idx].valid = false;
1131 static int arm_smmu_master_alloc_smes(struct device *dev)
1133 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1134 struct arm_smmu_master_cfg *cfg = fwspec->iommu_priv;
1135 struct arm_smmu_device *smmu = cfg->smmu;
1136 struct arm_smmu_smr *smrs = smmu->smrs;
1137 struct iommu_group *group;
1140 mutex_lock(&smmu->stream_map_mutex);
1141 /* Figure out a viable stream map entry allocation */
1142 for_each_cfg_sme(fwspec, i, idx) {
1143 u16 sid = fwspec->ids[i];
1144 u16 mask = fwspec->ids[i] >> SMR_MASK_SHIFT;
1146 if (idx != INVALID_SMENDX) {
1151 ret = arm_smmu_find_sme(smmu, sid, mask);
1156 if (smrs && smmu->s2crs[idx].count == 0) {
1158 smrs[idx].mask = mask;
1159 smrs[idx].valid = true;
1161 smmu->s2crs[idx].count++;
1162 cfg->smendx[i] = (s16)idx;
1165 group = iommu_group_get_for_dev(dev);
1167 group = ERR_PTR(-ENOMEM);
1168 if (IS_ERR(group)) {
1169 ret = PTR_ERR(group);
1172 iommu_group_put(group);
1174 /* It worked! Now, poke the actual hardware */
1175 for_each_cfg_sme(fwspec, i, idx) {
1176 arm_smmu_write_sme(smmu, idx);
1177 smmu->s2crs[idx].group = group;
1180 mutex_unlock(&smmu->stream_map_mutex);
1185 arm_smmu_free_sme(smmu, cfg->smendx[i]);
1186 cfg->smendx[i] = INVALID_SMENDX;
1188 mutex_unlock(&smmu->stream_map_mutex);
1192 static void arm_smmu_master_free_smes(struct iommu_fwspec *fwspec)
1194 struct arm_smmu_device *smmu = fwspec_smmu(fwspec);
1195 struct arm_smmu_master_cfg *cfg = fwspec->iommu_priv;
1198 mutex_lock(&smmu->stream_map_mutex);
1199 for_each_cfg_sme(fwspec, i, idx) {
1200 if (arm_smmu_free_sme(smmu, idx))
1201 arm_smmu_write_sme(smmu, idx);
1202 cfg->smendx[i] = INVALID_SMENDX;
1204 mutex_unlock(&smmu->stream_map_mutex);
1207 static int arm_smmu_domain_add_master(struct arm_smmu_domain *smmu_domain,
1208 struct iommu_fwspec *fwspec)
1210 struct arm_smmu_device *smmu = smmu_domain->smmu;
1211 struct arm_smmu_s2cr *s2cr = smmu->s2crs;
1212 u8 cbndx = smmu_domain->cfg.cbndx;
1213 enum arm_smmu_s2cr_type type;
1216 if (smmu_domain->stage == ARM_SMMU_DOMAIN_BYPASS)
1217 type = S2CR_TYPE_BYPASS;
1219 type = S2CR_TYPE_TRANS;
1221 for_each_cfg_sme(fwspec, i, idx) {
1222 if (type == s2cr[idx].type && cbndx == s2cr[idx].cbndx)
1225 s2cr[idx].type = type;
1226 s2cr[idx].privcfg = S2CR_PRIVCFG_DEFAULT;
1227 s2cr[idx].cbndx = cbndx;
1228 arm_smmu_write_s2cr(smmu, idx);
1233 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
1236 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1237 struct arm_smmu_device *smmu;
1238 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1240 if (!fwspec || fwspec->ops != &arm_smmu_ops) {
1241 dev_err(dev, "cannot attach to SMMU, is it on the same bus?\n");
1246 * FIXME: The arch/arm DMA API code tries to attach devices to its own
1247 * domains between of_xlate() and add_device() - we have no way to cope
1248 * with that, so until ARM gets converted to rely on groups and default
1249 * domains, just say no (but more politely than by dereferencing NULL).
1250 * This should be at least a WARN_ON once that's sorted.
1252 if (!fwspec->iommu_priv)
1255 smmu = fwspec_smmu(fwspec);
1257 ret = arm_smmu_rpm_get(smmu);
1261 /* Ensure that the domain is finalised */
1262 ret = arm_smmu_init_domain_context(domain, smmu);
1267 * Sanity check the domain. We don't support domains across
1270 if (smmu_domain->smmu != smmu) {
1272 "cannot attach to SMMU %s whilst already attached to domain on SMMU %s\n",
1273 dev_name(smmu_domain->smmu->dev), dev_name(smmu->dev));
1278 /* Looks ok, so add the device to the domain */
1279 ret = arm_smmu_domain_add_master(smmu_domain, fwspec);
1282 arm_smmu_rpm_put(smmu);
1286 static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
1287 phys_addr_t paddr, size_t size, int prot)
1289 struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1290 struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
1296 arm_smmu_rpm_get(smmu);
1297 ret = ops->map(ops, iova, paddr, size, prot);
1298 arm_smmu_rpm_put(smmu);
1303 static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
1306 struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1307 struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
1313 arm_smmu_rpm_get(smmu);
1314 ret = ops->unmap(ops, iova, size);
1315 arm_smmu_rpm_put(smmu);
1320 static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
1322 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1323 struct arm_smmu_device *smmu = smmu_domain->smmu;
1325 if (smmu_domain->tlb_ops) {
1326 arm_smmu_rpm_get(smmu);
1327 smmu_domain->tlb_ops->tlb_flush_all(smmu_domain);
1328 arm_smmu_rpm_put(smmu);
1332 static void arm_smmu_iotlb_sync(struct iommu_domain *domain)
1334 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1335 struct arm_smmu_device *smmu = smmu_domain->smmu;
1337 if (smmu_domain->tlb_ops) {
1338 arm_smmu_rpm_get(smmu);
1339 smmu_domain->tlb_ops->tlb_sync(smmu_domain);
1340 arm_smmu_rpm_put(smmu);
1344 static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
1347 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1348 struct arm_smmu_device *smmu = smmu_domain->smmu;
1349 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
1350 struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
1351 struct device *dev = smmu->dev;
1352 void __iomem *cb_base;
1355 unsigned long va, flags;
1358 ret = arm_smmu_rpm_get(smmu);
1362 cb_base = ARM_SMMU_CB(smmu, cfg->cbndx);
1364 spin_lock_irqsave(&smmu_domain->cb_lock, flags);
1365 /* ATS1 registers can only be written atomically */
1366 va = iova & ~0xfffUL;
1367 if (smmu->version == ARM_SMMU_V2)
1368 smmu_write_atomic_lq(va, cb_base + ARM_SMMU_CB_ATS1PR);
1369 else /* Register is only 32-bit in v1 */
1370 writel_relaxed(va, cb_base + ARM_SMMU_CB_ATS1PR);
1372 if (readl_poll_timeout_atomic(cb_base + ARM_SMMU_CB_ATSR, tmp,
1373 !(tmp & ATSR_ACTIVE), 5, 50)) {
1374 spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
1376 "iova to phys timed out on %pad. Falling back to software table walk.\n",
1378 return ops->iova_to_phys(ops, iova);
1381 phys = readq_relaxed(cb_base + ARM_SMMU_CB_PAR);
1382 spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
1383 if (phys & CB_PAR_F) {
1384 dev_err(dev, "translation fault!\n");
1385 dev_err(dev, "PAR = 0x%llx\n", phys);
1389 arm_smmu_rpm_put(smmu);
1391 return (phys & GENMASK_ULL(39, 12)) | (iova & 0xfff);
1394 static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain,
1397 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1398 struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
1400 if (domain->type == IOMMU_DOMAIN_IDENTITY)
1406 if (smmu_domain->smmu->features & ARM_SMMU_FEAT_TRANS_OPS &&
1407 smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
1408 return arm_smmu_iova_to_phys_hard(domain, iova);
1410 return ops->iova_to_phys(ops, iova);
1413 static bool arm_smmu_capable(enum iommu_cap cap)
1416 case IOMMU_CAP_CACHE_COHERENCY:
1418 * Return true here as the SMMU can always send out coherent
1422 case IOMMU_CAP_NOEXEC:
1429 static int arm_smmu_match_node(struct device *dev, const void *data)
1431 return dev->fwnode == data;
1435 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
1437 struct device *dev = driver_find_device(&arm_smmu_driver.driver, NULL,
1438 fwnode, arm_smmu_match_node);
1440 return dev ? dev_get_drvdata(dev) : NULL;
1443 static int arm_smmu_add_device(struct device *dev)
1445 struct arm_smmu_device *smmu;
1446 struct arm_smmu_master_cfg *cfg;
1447 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1450 if (using_legacy_binding) {
1451 ret = arm_smmu_register_legacy_master(dev, &smmu);
1454 * If dev->iommu_fwspec is initally NULL, arm_smmu_register_legacy_master()
1455 * will allocate/initialise a new one. Thus we need to update fwspec for
1458 fwspec = dev_iommu_fwspec_get(dev);
1461 } else if (fwspec && fwspec->ops == &arm_smmu_ops) {
1462 smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
1468 for (i = 0; i < fwspec->num_ids; i++) {
1469 u16 sid = fwspec->ids[i];
1470 u16 mask = fwspec->ids[i] >> SMR_MASK_SHIFT;
1472 if (sid & ~smmu->streamid_mask) {
1473 dev_err(dev, "stream ID 0x%x out of range for SMMU (0x%x)\n",
1474 sid, smmu->streamid_mask);
1477 if (mask & ~smmu->smr_mask_mask) {
1478 dev_err(dev, "SMR mask 0x%x out of range for SMMU (0x%x)\n",
1479 mask, smmu->smr_mask_mask);
1485 cfg = kzalloc(offsetof(struct arm_smmu_master_cfg, smendx[i]),
1491 fwspec->iommu_priv = cfg;
1493 cfg->smendx[i] = INVALID_SMENDX;
1495 ret = arm_smmu_rpm_get(smmu);
1499 ret = arm_smmu_master_alloc_smes(dev);
1500 arm_smmu_rpm_put(smmu);
1505 iommu_device_link(&smmu->iommu, dev);
1507 device_link_add(dev, smmu->dev,
1508 DL_FLAG_PM_RUNTIME | DL_FLAG_AUTOREMOVE_SUPPLIER);
1515 iommu_fwspec_free(dev);
1519 static void arm_smmu_remove_device(struct device *dev)
1521 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1522 struct arm_smmu_master_cfg *cfg;
1523 struct arm_smmu_device *smmu;
1526 if (!fwspec || fwspec->ops != &arm_smmu_ops)
1529 cfg = fwspec->iommu_priv;
1532 ret = arm_smmu_rpm_get(smmu);
1536 iommu_device_unlink(&smmu->iommu, dev);
1537 arm_smmu_master_free_smes(fwspec);
1539 arm_smmu_rpm_put(smmu);
1541 iommu_group_remove_device(dev);
1542 kfree(fwspec->iommu_priv);
1543 iommu_fwspec_free(dev);
1546 static struct iommu_group *arm_smmu_device_group(struct device *dev)
1548 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1549 struct arm_smmu_device *smmu = fwspec_smmu(fwspec);
1550 struct iommu_group *group = NULL;
1553 for_each_cfg_sme(fwspec, i, idx) {
1554 if (group && smmu->s2crs[idx].group &&
1555 group != smmu->s2crs[idx].group)
1556 return ERR_PTR(-EINVAL);
1558 group = smmu->s2crs[idx].group;
1562 return iommu_group_ref_get(group);
1564 if (dev_is_pci(dev))
1565 group = pci_device_group(dev);
1566 else if (dev_is_fsl_mc(dev))
1567 group = fsl_mc_device_group(dev);
1569 group = generic_device_group(dev);
1574 static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
1575 enum iommu_attr attr, void *data)
1577 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1579 switch(domain->type) {
1580 case IOMMU_DOMAIN_UNMANAGED:
1582 case DOMAIN_ATTR_NESTING:
1583 *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
1589 case IOMMU_DOMAIN_DMA:
1591 case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
1592 *(int *)data = smmu_domain->non_strict;
1603 static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
1604 enum iommu_attr attr, void *data)
1607 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1609 mutex_lock(&smmu_domain->init_mutex);
1611 switch(domain->type) {
1612 case IOMMU_DOMAIN_UNMANAGED:
1614 case DOMAIN_ATTR_NESTING:
1615 if (smmu_domain->smmu) {
1621 smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
1623 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
1629 case IOMMU_DOMAIN_DMA:
1631 case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
1632 smmu_domain->non_strict = *(int *)data;
1642 mutex_unlock(&smmu_domain->init_mutex);
1646 static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
1650 if (args->args_count > 0)
1651 fwid |= (u16)args->args[0];
1653 if (args->args_count > 1)
1654 fwid |= (u16)args->args[1] << SMR_MASK_SHIFT;
1655 else if (!of_property_read_u32(args->np, "stream-match-mask", &mask))
1656 fwid |= (u16)mask << SMR_MASK_SHIFT;
1658 return iommu_fwspec_add_ids(dev, &fwid, 1);
1661 static void arm_smmu_get_resv_regions(struct device *dev,
1662 struct list_head *head)
1664 struct iommu_resv_region *region;
1665 int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
1667 region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
1668 prot, IOMMU_RESV_SW_MSI);
1672 list_add_tail(®ion->list, head);
1674 iommu_dma_get_resv_regions(dev, head);
1677 static void arm_smmu_put_resv_regions(struct device *dev,
1678 struct list_head *head)
1680 struct iommu_resv_region *entry, *next;
1682 list_for_each_entry_safe(entry, next, head, list)
1686 static struct iommu_ops arm_smmu_ops = {
1687 .capable = arm_smmu_capable,
1688 .domain_alloc = arm_smmu_domain_alloc,
1689 .domain_free = arm_smmu_domain_free,
1690 .attach_dev = arm_smmu_attach_dev,
1691 .map = arm_smmu_map,
1692 .unmap = arm_smmu_unmap,
1693 .flush_iotlb_all = arm_smmu_flush_iotlb_all,
1694 .iotlb_sync = arm_smmu_iotlb_sync,
1695 .iova_to_phys = arm_smmu_iova_to_phys,
1696 .add_device = arm_smmu_add_device,
1697 .remove_device = arm_smmu_remove_device,
1698 .device_group = arm_smmu_device_group,
1699 .domain_get_attr = arm_smmu_domain_get_attr,
1700 .domain_set_attr = arm_smmu_domain_set_attr,
1701 .of_xlate = arm_smmu_of_xlate,
1702 .get_resv_regions = arm_smmu_get_resv_regions,
1703 .put_resv_regions = arm_smmu_put_resv_regions,
1704 .pgsize_bitmap = -1UL, /* Restricted during device attach */
1707 static void arm_smmu_device_reset(struct arm_smmu_device *smmu)
1709 void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1713 /* clear global FSR */
1714 reg = readl_relaxed(ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sGFSR);
1715 writel(reg, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sGFSR);
1718 * Reset stream mapping groups: Initial values mark all SMRn as
1719 * invalid and all S2CRn as bypass unless overridden.
1721 for (i = 0; i < smmu->num_mapping_groups; ++i)
1722 arm_smmu_write_sme(smmu, i);
1724 if (smmu->model == ARM_MMU500) {
1726 * Before clearing ARM_MMU500_ACTLR_CPRE, need to
1727 * clear CACHE_LOCK bit of ACR first. And, CACHE_LOCK
1728 * bit is only present in MMU-500r2 onwards.
1730 reg = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID7);
1731 major = (reg >> ID7_MAJOR_SHIFT) & ID7_MAJOR_MASK;
1732 reg = readl_relaxed(gr0_base + ARM_SMMU_GR0_sACR);
1734 reg &= ~ARM_MMU500_ACR_CACHE_LOCK;
1736 * Allow unmatched Stream IDs to allocate bypass
1737 * TLB entries for reduced latency.
1739 reg |= ARM_MMU500_ACR_SMTNMB_TLBEN | ARM_MMU500_ACR_S2CRB_TLBEN;
1740 writel_relaxed(reg, gr0_base + ARM_SMMU_GR0_sACR);
1743 /* Make sure all context banks are disabled and clear CB_FSR */
1744 for (i = 0; i < smmu->num_context_banks; ++i) {
1745 void __iomem *cb_base = ARM_SMMU_CB(smmu, i);
1747 arm_smmu_write_context_bank(smmu, i);
1748 writel_relaxed(FSR_FAULT, cb_base + ARM_SMMU_CB_FSR);
1750 * Disable MMU-500's not-particularly-beneficial next-page
1751 * prefetcher for the sake of errata #841119 and #826419.
1753 if (smmu->model == ARM_MMU500) {
1754 reg = readl_relaxed(cb_base + ARM_SMMU_CB_ACTLR);
1755 reg &= ~ARM_MMU500_ACTLR_CPRE;
1756 writel_relaxed(reg, cb_base + ARM_SMMU_CB_ACTLR);
1760 /* Invalidate the TLB, just in case */
1761 writel_relaxed(QCOM_DUMMY_VAL, gr0_base + ARM_SMMU_GR0_TLBIALLH);
1762 writel_relaxed(QCOM_DUMMY_VAL, gr0_base + ARM_SMMU_GR0_TLBIALLNSNH);
1764 reg = readl_relaxed(ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
1766 /* Enable fault reporting */
1767 reg |= (sCR0_GFRE | sCR0_GFIE | sCR0_GCFGFRE | sCR0_GCFGFIE);
1769 /* Disable TLB broadcasting. */
1770 reg |= (sCR0_VMIDPNE | sCR0_PTM);
1772 /* Enable client access, handling unmatched streams as appropriate */
1773 reg &= ~sCR0_CLIENTPD;
1777 reg &= ~sCR0_USFCFG;
1779 /* Disable forced broadcasting */
1782 /* Don't upgrade barriers */
1783 reg &= ~(sCR0_BSU_MASK << sCR0_BSU_SHIFT);
1785 if (smmu->features & ARM_SMMU_FEAT_VMID16)
1786 reg |= sCR0_VMID16EN;
1788 if (smmu->features & ARM_SMMU_FEAT_EXIDS)
1789 reg |= sCR0_EXIDENABLE;
1791 /* Push the button */
1792 arm_smmu_tlb_sync_global(smmu);
1793 writel(reg, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
1796 static int arm_smmu_id_size_to_bits(int size)
1815 static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
1818 void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1820 bool cttw_reg, cttw_fw = smmu->features & ARM_SMMU_FEAT_COHERENT_WALK;
1823 dev_notice(smmu->dev, "probing hardware configuration...\n");
1824 dev_notice(smmu->dev, "SMMUv%d with:\n",
1825 smmu->version == ARM_SMMU_V2 ? 2 : 1);
1828 id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID0);
1830 /* Restrict available stages based on module parameter */
1831 if (force_stage == 1)
1832 id &= ~(ID0_S2TS | ID0_NTS);
1833 else if (force_stage == 2)
1834 id &= ~(ID0_S1TS | ID0_NTS);
1836 if (id & ID0_S1TS) {
1837 smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
1838 dev_notice(smmu->dev, "\tstage 1 translation\n");
1841 if (id & ID0_S2TS) {
1842 smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
1843 dev_notice(smmu->dev, "\tstage 2 translation\n");
1847 smmu->features |= ARM_SMMU_FEAT_TRANS_NESTED;
1848 dev_notice(smmu->dev, "\tnested translation\n");
1851 if (!(smmu->features &
1852 (ARM_SMMU_FEAT_TRANS_S1 | ARM_SMMU_FEAT_TRANS_S2))) {
1853 dev_err(smmu->dev, "\tno translation support!\n");
1857 if ((id & ID0_S1TS) &&
1858 ((smmu->version < ARM_SMMU_V2) || !(id & ID0_ATOSNS))) {
1859 smmu->features |= ARM_SMMU_FEAT_TRANS_OPS;
1860 dev_notice(smmu->dev, "\taddress translation ops\n");
1864 * In order for DMA API calls to work properly, we must defer to what
1865 * the FW says about coherency, regardless of what the hardware claims.
1866 * Fortunately, this also opens up a workaround for systems where the
1867 * ID register value has ended up configured incorrectly.
1869 cttw_reg = !!(id & ID0_CTTW);
1870 if (cttw_fw || cttw_reg)
1871 dev_notice(smmu->dev, "\t%scoherent table walk\n",
1872 cttw_fw ? "" : "non-");
1873 if (cttw_fw != cttw_reg)
1874 dev_notice(smmu->dev,
1875 "\t(IDR0.CTTW overridden by FW configuration)\n");
1877 /* Max. number of entries we have for stream matching/indexing */
1878 if (smmu->version == ARM_SMMU_V2 && id & ID0_EXIDS) {
1879 smmu->features |= ARM_SMMU_FEAT_EXIDS;
1882 size = 1 << ((id >> ID0_NUMSIDB_SHIFT) & ID0_NUMSIDB_MASK);
1884 smmu->streamid_mask = size - 1;
1886 smmu->features |= ARM_SMMU_FEAT_STREAM_MATCH;
1887 size = (id >> ID0_NUMSMRG_SHIFT) & ID0_NUMSMRG_MASK;
1890 "stream-matching supported, but no SMRs present!\n");
1894 /* Zero-initialised to mark as invalid */
1895 smmu->smrs = devm_kcalloc(smmu->dev, size, sizeof(*smmu->smrs),
1900 dev_notice(smmu->dev,
1901 "\tstream matching with %lu register groups", size);
1903 /* s2cr->type == 0 means translation, so initialise explicitly */
1904 smmu->s2crs = devm_kmalloc_array(smmu->dev, size, sizeof(*smmu->s2crs),
1908 for (i = 0; i < size; i++)
1909 smmu->s2crs[i] = s2cr_init_val;
1911 smmu->num_mapping_groups = size;
1912 mutex_init(&smmu->stream_map_mutex);
1913 spin_lock_init(&smmu->global_sync_lock);
1915 if (smmu->version < ARM_SMMU_V2 || !(id & ID0_PTFS_NO_AARCH32)) {
1916 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_L;
1917 if (!(id & ID0_PTFS_NO_AARCH32S))
1918 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_S;
1922 id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID1);
1923 smmu->pgshift = (id & ID1_PAGESIZE) ? 16 : 12;
1925 /* Check for size mismatch of SMMU address space from mapped region */
1926 size = 1 << (((id >> ID1_NUMPAGENDXB_SHIFT) & ID1_NUMPAGENDXB_MASK) + 1);
1927 size <<= smmu->pgshift;
1928 if (smmu->cb_base != gr0_base + size)
1930 "SMMU address space size (0x%lx) differs from mapped region size (0x%tx)!\n",
1931 size * 2, (smmu->cb_base - gr0_base) * 2);
1933 smmu->num_s2_context_banks = (id >> ID1_NUMS2CB_SHIFT) & ID1_NUMS2CB_MASK;
1934 smmu->num_context_banks = (id >> ID1_NUMCB_SHIFT) & ID1_NUMCB_MASK;
1935 if (smmu->num_s2_context_banks > smmu->num_context_banks) {
1936 dev_err(smmu->dev, "impossible number of S2 context banks!\n");
1939 dev_notice(smmu->dev, "\t%u context banks (%u stage-2 only)\n",
1940 smmu->num_context_banks, smmu->num_s2_context_banks);
1942 * Cavium CN88xx erratum #27704.
1943 * Ensure ASID and VMID allocation is unique across all SMMUs in
1946 if (smmu->model == CAVIUM_SMMUV2) {
1947 smmu->cavium_id_base =
1948 atomic_add_return(smmu->num_context_banks,
1949 &cavium_smmu_context_count);
1950 smmu->cavium_id_base -= smmu->num_context_banks;
1951 dev_notice(smmu->dev, "\tenabling workaround for Cavium erratum 27704\n");
1953 smmu->cbs = devm_kcalloc(smmu->dev, smmu->num_context_banks,
1954 sizeof(*smmu->cbs), GFP_KERNEL);
1959 id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID2);
1960 size = arm_smmu_id_size_to_bits((id >> ID2_IAS_SHIFT) & ID2_IAS_MASK);
1961 smmu->ipa_size = size;
1963 /* The output mask is also applied for bypass */
1964 size = arm_smmu_id_size_to_bits((id >> ID2_OAS_SHIFT) & ID2_OAS_MASK);
1965 smmu->pa_size = size;
1967 if (id & ID2_VMID16)
1968 smmu->features |= ARM_SMMU_FEAT_VMID16;
1971 * What the page table walker can address actually depends on which
1972 * descriptor format is in use, but since a) we don't know that yet,
1973 * and b) it can vary per context bank, this will have to do...
1975 if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(size)))
1977 "failed to set DMA mask for table walker\n");
1979 if (smmu->version < ARM_SMMU_V2) {
1980 smmu->va_size = smmu->ipa_size;
1981 if (smmu->version == ARM_SMMU_V1_64K)
1982 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1984 size = (id >> ID2_UBS_SHIFT) & ID2_UBS_MASK;
1985 smmu->va_size = arm_smmu_id_size_to_bits(size);
1986 if (id & ID2_PTFS_4K)
1987 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_4K;
1988 if (id & ID2_PTFS_16K)
1989 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_16K;
1990 if (id & ID2_PTFS_64K)
1991 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1994 /* Now we've corralled the various formats, what'll it do? */
1995 if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S)
1996 smmu->pgsize_bitmap |= SZ_4K | SZ_64K | SZ_1M | SZ_16M;
1997 if (smmu->features &
1998 (ARM_SMMU_FEAT_FMT_AARCH32_L | ARM_SMMU_FEAT_FMT_AARCH64_4K))
1999 smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
2000 if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_16K)
2001 smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
2002 if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_64K)
2003 smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
2005 if (arm_smmu_ops.pgsize_bitmap == -1UL)
2006 arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
2008 arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
2009 dev_notice(smmu->dev, "\tSupported page sizes: 0x%08lx\n",
2010 smmu->pgsize_bitmap);
2013 if (smmu->features & ARM_SMMU_FEAT_TRANS_S1)
2014 dev_notice(smmu->dev, "\tStage-1: %lu-bit VA -> %lu-bit IPA\n",
2015 smmu->va_size, smmu->ipa_size);
2017 if (smmu->features & ARM_SMMU_FEAT_TRANS_S2)
2018 dev_notice(smmu->dev, "\tStage-2: %lu-bit IPA -> %lu-bit PA\n",
2019 smmu->ipa_size, smmu->pa_size);
2024 struct arm_smmu_match_data {
2025 enum arm_smmu_arch_version version;
2026 enum arm_smmu_implementation model;
2029 #define ARM_SMMU_MATCH_DATA(name, ver, imp) \
2030 static const struct arm_smmu_match_data name = { .version = ver, .model = imp }
2032 ARM_SMMU_MATCH_DATA(smmu_generic_v1, ARM_SMMU_V1, GENERIC_SMMU);
2033 ARM_SMMU_MATCH_DATA(smmu_generic_v2, ARM_SMMU_V2, GENERIC_SMMU);
2034 ARM_SMMU_MATCH_DATA(arm_mmu401, ARM_SMMU_V1_64K, GENERIC_SMMU);
2035 ARM_SMMU_MATCH_DATA(arm_mmu500, ARM_SMMU_V2, ARM_MMU500);
2036 ARM_SMMU_MATCH_DATA(cavium_smmuv2, ARM_SMMU_V2, CAVIUM_SMMUV2);
2037 ARM_SMMU_MATCH_DATA(qcom_smmuv2, ARM_SMMU_V2, QCOM_SMMUV2);
2039 static const struct of_device_id arm_smmu_of_match[] = {
2040 { .compatible = "arm,smmu-v1", .data = &smmu_generic_v1 },
2041 { .compatible = "arm,smmu-v2", .data = &smmu_generic_v2 },
2042 { .compatible = "arm,mmu-400", .data = &smmu_generic_v1 },
2043 { .compatible = "arm,mmu-401", .data = &arm_mmu401 },
2044 { .compatible = "arm,mmu-500", .data = &arm_mmu500 },
2045 { .compatible = "cavium,smmu-v2", .data = &cavium_smmuv2 },
2046 { .compatible = "qcom,smmu-v2", .data = &qcom_smmuv2 },
2051 static int acpi_smmu_get_data(u32 model, struct arm_smmu_device *smmu)
2056 case ACPI_IORT_SMMU_V1:
2057 case ACPI_IORT_SMMU_CORELINK_MMU400:
2058 smmu->version = ARM_SMMU_V1;
2059 smmu->model = GENERIC_SMMU;
2061 case ACPI_IORT_SMMU_CORELINK_MMU401:
2062 smmu->version = ARM_SMMU_V1_64K;
2063 smmu->model = GENERIC_SMMU;
2065 case ACPI_IORT_SMMU_V2:
2066 smmu->version = ARM_SMMU_V2;
2067 smmu->model = GENERIC_SMMU;
2069 case ACPI_IORT_SMMU_CORELINK_MMU500:
2070 smmu->version = ARM_SMMU_V2;
2071 smmu->model = ARM_MMU500;
2073 case ACPI_IORT_SMMU_CAVIUM_THUNDERX:
2074 smmu->version = ARM_SMMU_V2;
2075 smmu->model = CAVIUM_SMMUV2;
2084 static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
2085 struct arm_smmu_device *smmu)
2087 struct device *dev = smmu->dev;
2088 struct acpi_iort_node *node =
2089 *(struct acpi_iort_node **)dev_get_platdata(dev);
2090 struct acpi_iort_smmu *iort_smmu;
2093 /* Retrieve SMMU1/2 specific data */
2094 iort_smmu = (struct acpi_iort_smmu *)node->node_data;
2096 ret = acpi_smmu_get_data(iort_smmu->model, smmu);
2100 /* Ignore the configuration access interrupt */
2101 smmu->num_global_irqs = 1;
2103 if (iort_smmu->flags & ACPI_IORT_SMMU_COHERENT_WALK)
2104 smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
2109 static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
2110 struct arm_smmu_device *smmu)
2116 static int arm_smmu_device_dt_probe(struct platform_device *pdev,
2117 struct arm_smmu_device *smmu)
2119 const struct arm_smmu_match_data *data;
2120 struct device *dev = &pdev->dev;
2121 bool legacy_binding;
2123 if (of_property_read_u32(dev->of_node, "#global-interrupts",
2124 &smmu->num_global_irqs)) {
2125 dev_err(dev, "missing #global-interrupts property\n");
2129 data = of_device_get_match_data(dev);
2130 smmu->version = data->version;
2131 smmu->model = data->model;
2133 parse_driver_options(smmu);
2135 legacy_binding = of_find_property(dev->of_node, "mmu-masters", NULL);
2136 if (legacy_binding && !using_generic_binding) {
2137 if (!using_legacy_binding)
2138 pr_notice("deprecated \"mmu-masters\" DT property in use; DMA API support unavailable\n");
2139 using_legacy_binding = true;
2140 } else if (!legacy_binding && !using_legacy_binding) {
2141 using_generic_binding = true;
2143 dev_err(dev, "not probing due to mismatched DT properties\n");
2147 if (of_dma_is_coherent(dev->of_node))
2148 smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
2153 static void arm_smmu_bus_init(void)
2155 /* Oh, for a proper bus abstraction */
2156 if (!iommu_present(&platform_bus_type))
2157 bus_set_iommu(&platform_bus_type, &arm_smmu_ops);
2158 #ifdef CONFIG_ARM_AMBA
2159 if (!iommu_present(&amba_bustype))
2160 bus_set_iommu(&amba_bustype, &arm_smmu_ops);
2163 if (!iommu_present(&pci_bus_type)) {
2165 bus_set_iommu(&pci_bus_type, &arm_smmu_ops);
2168 #ifdef CONFIG_FSL_MC_BUS
2169 if (!iommu_present(&fsl_mc_bus_type))
2170 bus_set_iommu(&fsl_mc_bus_type, &arm_smmu_ops);
2174 static int arm_smmu_device_probe(struct platform_device *pdev)
2176 struct resource *res;
2177 resource_size_t ioaddr;
2178 struct arm_smmu_device *smmu;
2179 struct device *dev = &pdev->dev;
2180 int num_irqs, i, err;
2182 smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
2184 dev_err(dev, "failed to allocate arm_smmu_device\n");
2190 err = arm_smmu_device_dt_probe(pdev, smmu);
2192 err = arm_smmu_device_acpi_probe(pdev, smmu);
2197 res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
2198 ioaddr = res->start;
2199 smmu->base = devm_ioremap_resource(dev, res);
2200 if (IS_ERR(smmu->base))
2201 return PTR_ERR(smmu->base);
2202 smmu->cb_base = smmu->base + resource_size(res) / 2;
2205 while ((res = platform_get_resource(pdev, IORESOURCE_IRQ, num_irqs))) {
2207 if (num_irqs > smmu->num_global_irqs)
2208 smmu->num_context_irqs++;
2211 if (!smmu->num_context_irqs) {
2212 dev_err(dev, "found %d interrupts but expected at least %d\n",
2213 num_irqs, smmu->num_global_irqs + 1);
2217 smmu->irqs = devm_kcalloc(dev, num_irqs, sizeof(*smmu->irqs),
2220 dev_err(dev, "failed to allocate %d irqs\n", num_irqs);
2224 for (i = 0; i < num_irqs; ++i) {
2225 int irq = platform_get_irq(pdev, i);
2228 dev_err(dev, "failed to get irq index %d\n", i);
2231 smmu->irqs[i] = irq;
2234 err = devm_clk_bulk_get_all(dev, &smmu->clks);
2236 dev_err(dev, "failed to get clocks %d\n", err);
2239 smmu->num_clks = err;
2241 err = clk_bulk_prepare_enable(smmu->num_clks, smmu->clks);
2245 err = arm_smmu_device_cfg_probe(smmu);
2249 if (smmu->version == ARM_SMMU_V2) {
2250 if (smmu->num_context_banks > smmu->num_context_irqs) {
2252 "found only %d context irq(s) but %d required\n",
2253 smmu->num_context_irqs, smmu->num_context_banks);
2257 /* Ignore superfluous interrupts */
2258 smmu->num_context_irqs = smmu->num_context_banks;
2261 for (i = 0; i < smmu->num_global_irqs; ++i) {
2262 err = devm_request_irq(smmu->dev, smmu->irqs[i],
2263 arm_smmu_global_fault,
2265 "arm-smmu global fault",
2268 dev_err(dev, "failed to request global IRQ %d (%u)\n",
2274 err = iommu_device_sysfs_add(&smmu->iommu, smmu->dev, NULL,
2275 "smmu.%pa", &ioaddr);
2277 dev_err(dev, "Failed to register iommu in sysfs\n");
2281 iommu_device_set_ops(&smmu->iommu, &arm_smmu_ops);
2282 iommu_device_set_fwnode(&smmu->iommu, dev->fwnode);
2284 err = iommu_device_register(&smmu->iommu);
2286 dev_err(dev, "Failed to register iommu\n");
2290 platform_set_drvdata(pdev, smmu);
2291 arm_smmu_device_reset(smmu);
2292 arm_smmu_test_smr_masks(smmu);
2295 * We want to avoid touching dev->power.lock in fastpaths unless
2296 * it's really going to do something useful - pm_runtime_enabled()
2297 * can serve as an ideal proxy for that decision. So, conditionally
2298 * enable pm_runtime.
2300 if (dev->pm_domain) {
2301 pm_runtime_set_active(dev);
2302 pm_runtime_enable(dev);
2306 * For ACPI and generic DT bindings, an SMMU will be probed before
2307 * any device which might need it, so we want the bus ops in place
2308 * ready to handle default domain setup as soon as any SMMU exists.
2310 if (!using_legacy_binding)
2311 arm_smmu_bus_init();
2317 * With the legacy DT binding in play, though, we have no guarantees about
2318 * probe order, but then we're also not doing default domains, so we can
2319 * delay setting bus ops until we're sure every possible SMMU is ready,
2320 * and that way ensure that no add_device() calls get missed.
2322 static int arm_smmu_legacy_bus_init(void)
2324 if (using_legacy_binding)
2325 arm_smmu_bus_init();
2328 device_initcall_sync(arm_smmu_legacy_bus_init);
2330 static void arm_smmu_device_shutdown(struct platform_device *pdev)
2332 struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
2337 if (!bitmap_empty(smmu->context_map, ARM_SMMU_MAX_CBS))
2338 dev_err(&pdev->dev, "removing device with active domains!\n");
2340 arm_smmu_rpm_get(smmu);
2341 /* Turn the thing off */
2342 writel(sCR0_CLIENTPD, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
2343 arm_smmu_rpm_put(smmu);
2345 if (pm_runtime_enabled(smmu->dev))
2346 pm_runtime_force_suspend(smmu->dev);
2348 clk_bulk_disable(smmu->num_clks, smmu->clks);
2350 clk_bulk_unprepare(smmu->num_clks, smmu->clks);
2353 static int __maybe_unused arm_smmu_runtime_resume(struct device *dev)
2355 struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2358 ret = clk_bulk_enable(smmu->num_clks, smmu->clks);
2362 arm_smmu_device_reset(smmu);
2367 static int __maybe_unused arm_smmu_runtime_suspend(struct device *dev)
2369 struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2371 clk_bulk_disable(smmu->num_clks, smmu->clks);
2376 static int __maybe_unused arm_smmu_pm_resume(struct device *dev)
2378 if (pm_runtime_suspended(dev))
2381 return arm_smmu_runtime_resume(dev);
2384 static int __maybe_unused arm_smmu_pm_suspend(struct device *dev)
2386 if (pm_runtime_suspended(dev))
2389 return arm_smmu_runtime_suspend(dev);
2392 static const struct dev_pm_ops arm_smmu_pm_ops = {
2393 SET_SYSTEM_SLEEP_PM_OPS(arm_smmu_pm_suspend, arm_smmu_pm_resume)
2394 SET_RUNTIME_PM_OPS(arm_smmu_runtime_suspend,
2395 arm_smmu_runtime_resume, NULL)
2398 static struct platform_driver arm_smmu_driver = {
2401 .of_match_table = of_match_ptr(arm_smmu_of_match),
2402 .pm = &arm_smmu_pm_ops,
2403 .suppress_bind_attrs = true,
2405 .probe = arm_smmu_device_probe,
2406 .shutdown = arm_smmu_device_shutdown,
2408 builtin_platform_driver(arm_smmu_driver);