1 // SPDX-License-Identifier: GPL-2.0-only
3 * IOMMU API for ARM architected SMMU implementations.
5 * Copyright (C) 2013 ARM Limited
7 * Author: Will Deacon <will.deacon@arm.com>
9 * This driver currently supports:
10 * - SMMUv1 and v2 implementations
11 * - Stream-matching and stream-indexing
12 * - v7/v8 long-descriptor format
13 * - Non-secure access to the SMMU
14 * - Context fault reporting
15 * - Extended Stream ID (16 bit)
18 #define pr_fmt(fmt) "arm-smmu: " fmt
20 #include <linux/acpi.h>
21 #include <linux/acpi_iort.h>
22 #include <linux/atomic.h>
23 #include <linux/bitfield.h>
24 #include <linux/delay.h>
25 #include <linux/dma-iommu.h>
26 #include <linux/dma-mapping.h>
27 #include <linux/err.h>
28 #include <linux/interrupt.h>
30 #include <linux/io-64-nonatomic-hi-lo.h>
31 #include <linux/io-pgtable.h>
32 #include <linux/iommu.h>
33 #include <linux/iopoll.h>
34 #include <linux/init.h>
35 #include <linux/moduleparam.h>
37 #include <linux/of_address.h>
38 #include <linux/of_device.h>
39 #include <linux/of_iommu.h>
40 #include <linux/pci.h>
41 #include <linux/platform_device.h>
42 #include <linux/pm_runtime.h>
43 #include <linux/slab.h>
44 #include <linux/spinlock.h>
46 #include <linux/amba/bus.h>
47 #include <linux/fsl/mc.h>
49 #include "arm-smmu-regs.h"
52 * Apparently, some Qualcomm arm64 platforms which appear to expose their SMMU
53 * global register space are still, in fact, using a hypervisor to mediate it
54 * by trapping and emulating register accesses. Sadly, some deployed versions
55 * of said trapping code have bugs wherein they go horribly wrong for stores
56 * using r31 (i.e. XZR/WZR) as the source register.
58 #define QCOM_DUMMY_VAL -1
60 #define ARM_MMU500_ACTLR_CPRE (1 << 1)
62 #define ARM_MMU500_ACR_CACHE_LOCK (1 << 26)
63 #define ARM_MMU500_ACR_S2CRB_TLBEN (1 << 10)
64 #define ARM_MMU500_ACR_SMTNMB_TLBEN (1 << 8)
66 #define TLB_LOOP_TIMEOUT 1000000 /* 1s! */
67 #define TLB_SPIN_COUNT 10
69 /* Maximum number of context banks per SMMU */
70 #define ARM_SMMU_MAX_CBS 128
72 /* SMMU global address space */
73 #define ARM_SMMU_GR0(smmu) ((smmu)->base)
74 #define ARM_SMMU_GR1(smmu) ((smmu)->base + (1 << (smmu)->pgshift))
77 * SMMU global address space with conditional offset to access secure
78 * aliases of non-secure registers (e.g. nsCR0: 0x400, nsGFSR: 0x448,
81 #define ARM_SMMU_GR0_NS(smmu) \
83 ((smmu->options & ARM_SMMU_OPT_SECURE_CFG_ACCESS) \
87 * Some 64-bit registers only make sense to write atomically, but in such
88 * cases all the data relevant to AArch32 formats lies within the lower word,
89 * therefore this actually makes more sense than it might first appear.
92 #define smmu_write_atomic_lq writeq_relaxed
94 #define smmu_write_atomic_lq writel_relaxed
97 /* Translation context bank */
98 #define ARM_SMMU_CB(smmu, n) ((smmu)->cb_base + ((n) << (smmu)->pgshift))
100 #define MSI_IOVA_BASE 0x8000000
101 #define MSI_IOVA_LENGTH 0x100000
103 static int force_stage;
105 * not really modular, but the easiest way to keep compat with existing
106 * bootargs behaviour is to continue using module_param() here.
108 module_param(force_stage, int, S_IRUGO);
109 MODULE_PARM_DESC(force_stage,
110 "Force SMMU mappings to be installed at a particular stage of translation. A value of '1' or '2' forces the corresponding stage. All other values are ignored (i.e. no stage is forced). Note that selecting a specific stage will disable support for nested translation.");
111 static bool disable_bypass =
112 IS_ENABLED(CONFIG_ARM_SMMU_DISABLE_BYPASS_BY_DEFAULT);
113 module_param(disable_bypass, bool, S_IRUGO);
114 MODULE_PARM_DESC(disable_bypass,
115 "Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
117 enum arm_smmu_arch_version {
123 enum arm_smmu_implementation {
130 struct arm_smmu_s2cr {
131 struct iommu_group *group;
133 enum arm_smmu_s2cr_type type;
134 enum arm_smmu_s2cr_privcfg privcfg;
138 #define s2cr_init_val (struct arm_smmu_s2cr){ \
139 .type = disable_bypass ? S2CR_TYPE_FAULT : S2CR_TYPE_BYPASS, \
142 struct arm_smmu_smr {
152 struct arm_smmu_cfg *cfg;
155 struct arm_smmu_master_cfg {
156 struct arm_smmu_device *smmu;
159 #define INVALID_SMENDX -1
160 #define __fwspec_cfg(fw) ((struct arm_smmu_master_cfg *)fw->iommu_priv)
161 #define fwspec_smmu(fw) (__fwspec_cfg(fw)->smmu)
162 #define fwspec_smendx(fw, i) \
163 (i >= fw->num_ids ? INVALID_SMENDX : __fwspec_cfg(fw)->smendx[i])
164 #define for_each_cfg_sme(fw, i, idx) \
165 for (i = 0; idx = fwspec_smendx(fw, i), i < fw->num_ids; ++i)
167 struct arm_smmu_device {
171 void __iomem *cb_base;
172 unsigned long pgshift;
174 #define ARM_SMMU_FEAT_COHERENT_WALK (1 << 0)
175 #define ARM_SMMU_FEAT_STREAM_MATCH (1 << 1)
176 #define ARM_SMMU_FEAT_TRANS_S1 (1 << 2)
177 #define ARM_SMMU_FEAT_TRANS_S2 (1 << 3)
178 #define ARM_SMMU_FEAT_TRANS_NESTED (1 << 4)
179 #define ARM_SMMU_FEAT_TRANS_OPS (1 << 5)
180 #define ARM_SMMU_FEAT_VMID16 (1 << 6)
181 #define ARM_SMMU_FEAT_FMT_AARCH64_4K (1 << 7)
182 #define ARM_SMMU_FEAT_FMT_AARCH64_16K (1 << 8)
183 #define ARM_SMMU_FEAT_FMT_AARCH64_64K (1 << 9)
184 #define ARM_SMMU_FEAT_FMT_AARCH32_L (1 << 10)
185 #define ARM_SMMU_FEAT_FMT_AARCH32_S (1 << 11)
186 #define ARM_SMMU_FEAT_EXIDS (1 << 12)
189 #define ARM_SMMU_OPT_SECURE_CFG_ACCESS (1 << 0)
191 enum arm_smmu_arch_version version;
192 enum arm_smmu_implementation model;
194 u32 num_context_banks;
195 u32 num_s2_context_banks;
196 DECLARE_BITMAP(context_map, ARM_SMMU_MAX_CBS);
197 struct arm_smmu_cb *cbs;
200 u32 num_mapping_groups;
203 struct arm_smmu_smr *smrs;
204 struct arm_smmu_s2cr *s2crs;
205 struct mutex stream_map_mutex;
207 unsigned long va_size;
208 unsigned long ipa_size;
209 unsigned long pa_size;
210 unsigned long pgsize_bitmap;
213 u32 num_context_irqs;
215 struct clk_bulk_data *clks;
218 u32 cavium_id_base; /* Specific to Cavium */
220 spinlock_t global_sync_lock;
222 /* IOMMU core code handle */
223 struct iommu_device iommu;
226 enum arm_smmu_context_fmt {
227 ARM_SMMU_CTX_FMT_NONE,
228 ARM_SMMU_CTX_FMT_AARCH64,
229 ARM_SMMU_CTX_FMT_AARCH32_L,
230 ARM_SMMU_CTX_FMT_AARCH32_S,
233 struct arm_smmu_cfg {
240 enum arm_smmu_cbar_type cbar;
241 enum arm_smmu_context_fmt fmt;
243 #define INVALID_IRPTNDX 0xff
245 enum arm_smmu_domain_stage {
246 ARM_SMMU_DOMAIN_S1 = 0,
248 ARM_SMMU_DOMAIN_NESTED,
249 ARM_SMMU_DOMAIN_BYPASS,
252 struct arm_smmu_domain {
253 struct arm_smmu_device *smmu;
254 struct io_pgtable_ops *pgtbl_ops;
255 const struct iommu_gather_ops *tlb_ops;
256 struct arm_smmu_cfg cfg;
257 enum arm_smmu_domain_stage stage;
259 struct mutex init_mutex; /* Protects smmu pointer */
260 spinlock_t cb_lock; /* Serialises ATS1* ops and TLB syncs */
261 struct iommu_domain domain;
264 struct arm_smmu_option_prop {
269 static atomic_t cavium_smmu_context_count = ATOMIC_INIT(0);
271 static bool using_legacy_binding, using_generic_binding;
273 static struct arm_smmu_option_prop arm_smmu_options[] = {
274 { ARM_SMMU_OPT_SECURE_CFG_ACCESS, "calxeda,smmu-secure-config-access" },
278 static inline int arm_smmu_rpm_get(struct arm_smmu_device *smmu)
280 if (pm_runtime_enabled(smmu->dev))
281 return pm_runtime_get_sync(smmu->dev);
286 static inline void arm_smmu_rpm_put(struct arm_smmu_device *smmu)
288 if (pm_runtime_enabled(smmu->dev))
289 pm_runtime_put(smmu->dev);
292 static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
294 return container_of(dom, struct arm_smmu_domain, domain);
297 static void parse_driver_options(struct arm_smmu_device *smmu)
302 if (of_property_read_bool(smmu->dev->of_node,
303 arm_smmu_options[i].prop)) {
304 smmu->options |= arm_smmu_options[i].opt;
305 dev_notice(smmu->dev, "option %s\n",
306 arm_smmu_options[i].prop);
308 } while (arm_smmu_options[++i].opt);
311 static struct device_node *dev_get_dev_node(struct device *dev)
313 if (dev_is_pci(dev)) {
314 struct pci_bus *bus = to_pci_dev(dev)->bus;
316 while (!pci_is_root_bus(bus))
318 return of_node_get(bus->bridge->parent->of_node);
321 return of_node_get(dev->of_node);
324 static int __arm_smmu_get_pci_sid(struct pci_dev *pdev, u16 alias, void *data)
326 *((__be32 *)data) = cpu_to_be32(alias);
327 return 0; /* Continue walking */
330 static int __find_legacy_master_phandle(struct device *dev, void *data)
332 struct of_phandle_iterator *it = *(void **)data;
333 struct device_node *np = it->node;
336 of_for_each_phandle(it, err, dev->of_node, "mmu-masters",
337 "#stream-id-cells", 0)
338 if (it->node == np) {
339 *(void **)data = dev;
343 return err == -ENOENT ? 0 : err;
346 static struct platform_driver arm_smmu_driver;
347 static struct iommu_ops arm_smmu_ops;
349 static int arm_smmu_register_legacy_master(struct device *dev,
350 struct arm_smmu_device **smmu)
352 struct device *smmu_dev;
353 struct device_node *np;
354 struct of_phandle_iterator it;
360 np = dev_get_dev_node(dev);
361 if (!np || !of_find_property(np, "#stream-id-cells", NULL)) {
367 err = driver_for_each_device(&arm_smmu_driver.driver, NULL, &data,
368 __find_legacy_master_phandle);
376 if (dev_is_pci(dev)) {
377 /* "mmu-masters" assumes Stream ID == Requester ID */
378 pci_for_each_dma_alias(to_pci_dev(dev), __arm_smmu_get_pci_sid,
384 err = iommu_fwspec_init(dev, &smmu_dev->of_node->fwnode,
389 sids = kcalloc(it.cur_count, sizeof(*sids), GFP_KERNEL);
393 *smmu = dev_get_drvdata(smmu_dev);
394 of_phandle_iterator_args(&it, sids, it.cur_count);
395 err = iommu_fwspec_add_ids(dev, sids, it.cur_count);
400 static int __arm_smmu_alloc_bitmap(unsigned long *map, int start, int end)
405 idx = find_next_zero_bit(map, end, start);
408 } while (test_and_set_bit(idx, map));
413 static void __arm_smmu_free_bitmap(unsigned long *map, int idx)
418 /* Wait for any pending TLB invalidations to complete */
419 static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu,
420 void __iomem *sync, void __iomem *status)
422 unsigned int spin_cnt, delay;
424 writel_relaxed(QCOM_DUMMY_VAL, sync);
425 for (delay = 1; delay < TLB_LOOP_TIMEOUT; delay *= 2) {
426 for (spin_cnt = TLB_SPIN_COUNT; spin_cnt > 0; spin_cnt--) {
427 if (!(readl_relaxed(status) & sTLBGSTATUS_GSACTIVE))
433 dev_err_ratelimited(smmu->dev,
434 "TLB sync timed out -- SMMU may be deadlocked\n");
437 static void arm_smmu_tlb_sync_global(struct arm_smmu_device *smmu)
439 void __iomem *base = ARM_SMMU_GR0(smmu);
442 spin_lock_irqsave(&smmu->global_sync_lock, flags);
443 __arm_smmu_tlb_sync(smmu, base + ARM_SMMU_GR0_sTLBGSYNC,
444 base + ARM_SMMU_GR0_sTLBGSTATUS);
445 spin_unlock_irqrestore(&smmu->global_sync_lock, flags);
448 static void arm_smmu_tlb_sync_context(void *cookie)
450 struct arm_smmu_domain *smmu_domain = cookie;
451 struct arm_smmu_device *smmu = smmu_domain->smmu;
452 void __iomem *base = ARM_SMMU_CB(smmu, smmu_domain->cfg.cbndx);
455 spin_lock_irqsave(&smmu_domain->cb_lock, flags);
456 __arm_smmu_tlb_sync(smmu, base + ARM_SMMU_CB_TLBSYNC,
457 base + ARM_SMMU_CB_TLBSTATUS);
458 spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
461 static void arm_smmu_tlb_sync_vmid(void *cookie)
463 struct arm_smmu_domain *smmu_domain = cookie;
465 arm_smmu_tlb_sync_global(smmu_domain->smmu);
468 static void arm_smmu_tlb_inv_context_s1(void *cookie)
470 struct arm_smmu_domain *smmu_domain = cookie;
471 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
472 void __iomem *base = ARM_SMMU_CB(smmu_domain->smmu, cfg->cbndx);
475 * NOTE: this is not a relaxed write; it needs to guarantee that PTEs
476 * cleared by the current CPU are visible to the SMMU before the TLBI.
478 writel(cfg->asid, base + ARM_SMMU_CB_S1_TLBIASID);
479 arm_smmu_tlb_sync_context(cookie);
482 static void arm_smmu_tlb_inv_context_s2(void *cookie)
484 struct arm_smmu_domain *smmu_domain = cookie;
485 struct arm_smmu_device *smmu = smmu_domain->smmu;
486 void __iomem *base = ARM_SMMU_GR0(smmu);
488 /* NOTE: see above */
489 writel(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID);
490 arm_smmu_tlb_sync_global(smmu);
493 static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
494 size_t granule, bool leaf, void *cookie)
496 struct arm_smmu_domain *smmu_domain = cookie;
497 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
498 bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
499 void __iomem *reg = ARM_SMMU_CB(smmu_domain->smmu, cfg->cbndx);
501 if (smmu_domain->smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
505 reg += leaf ? ARM_SMMU_CB_S1_TLBIVAL : ARM_SMMU_CB_S1_TLBIVA;
507 if (cfg->fmt != ARM_SMMU_CTX_FMT_AARCH64) {
508 iova = (iova >> 12) << 12;
511 writel_relaxed(iova, reg);
513 } while (size -= granule);
516 iova |= (u64)cfg->asid << 48;
518 writeq_relaxed(iova, reg);
519 iova += granule >> 12;
520 } while (size -= granule);
523 reg += leaf ? ARM_SMMU_CB_S2_TLBIIPAS2L :
524 ARM_SMMU_CB_S2_TLBIIPAS2;
527 smmu_write_atomic_lq(iova, reg);
528 iova += granule >> 12;
529 } while (size -= granule);
534 * On MMU-401 at least, the cost of firing off multiple TLBIVMIDs appears
535 * almost negligible, but the benefit of getting the first one in as far ahead
536 * of the sync as possible is significant, hence we don't just make this a
537 * no-op and set .tlb_sync to arm_smmu_inv_context_s2() as you might think.
539 static void arm_smmu_tlb_inv_vmid_nosync(unsigned long iova, size_t size,
540 size_t granule, bool leaf, void *cookie)
542 struct arm_smmu_domain *smmu_domain = cookie;
543 void __iomem *base = ARM_SMMU_GR0(smmu_domain->smmu);
545 if (smmu_domain->smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
548 writel_relaxed(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID);
551 static const struct iommu_gather_ops arm_smmu_s1_tlb_ops = {
552 .tlb_flush_all = arm_smmu_tlb_inv_context_s1,
553 .tlb_add_flush = arm_smmu_tlb_inv_range_nosync,
554 .tlb_sync = arm_smmu_tlb_sync_context,
557 static const struct iommu_gather_ops arm_smmu_s2_tlb_ops_v2 = {
558 .tlb_flush_all = arm_smmu_tlb_inv_context_s2,
559 .tlb_add_flush = arm_smmu_tlb_inv_range_nosync,
560 .tlb_sync = arm_smmu_tlb_sync_context,
563 static const struct iommu_gather_ops arm_smmu_s2_tlb_ops_v1 = {
564 .tlb_flush_all = arm_smmu_tlb_inv_context_s2,
565 .tlb_add_flush = arm_smmu_tlb_inv_vmid_nosync,
566 .tlb_sync = arm_smmu_tlb_sync_vmid,
569 static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
571 u32 fsr, fsynr, cbfrsynra;
573 struct iommu_domain *domain = dev;
574 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
575 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
576 struct arm_smmu_device *smmu = smmu_domain->smmu;
577 void __iomem *gr1_base = ARM_SMMU_GR1(smmu);
578 void __iomem *cb_base;
580 cb_base = ARM_SMMU_CB(smmu, cfg->cbndx);
581 fsr = readl_relaxed(cb_base + ARM_SMMU_CB_FSR);
583 if (!(fsr & FSR_FAULT))
586 fsynr = readl_relaxed(cb_base + ARM_SMMU_CB_FSYNR0);
587 iova = readq_relaxed(cb_base + ARM_SMMU_CB_FAR);
588 cbfrsynra = readl_relaxed(gr1_base + ARM_SMMU_GR1_CBFRSYNRA(cfg->cbndx));
590 dev_err_ratelimited(smmu->dev,
591 "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cbfrsynra=0x%x, cb=%d\n",
592 fsr, iova, fsynr, cbfrsynra, cfg->cbndx);
594 writel(fsr, cb_base + ARM_SMMU_CB_FSR);
598 static irqreturn_t arm_smmu_global_fault(int irq, void *dev)
600 u32 gfsr, gfsynr0, gfsynr1, gfsynr2;
601 struct arm_smmu_device *smmu = dev;
602 void __iomem *gr0_base = ARM_SMMU_GR0_NS(smmu);
604 gfsr = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSR);
605 gfsynr0 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR0);
606 gfsynr1 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR1);
607 gfsynr2 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR2);
612 dev_err_ratelimited(smmu->dev,
613 "Unexpected global fault, this could be serious\n");
614 dev_err_ratelimited(smmu->dev,
615 "\tGFSR 0x%08x, GFSYNR0 0x%08x, GFSYNR1 0x%08x, GFSYNR2 0x%08x\n",
616 gfsr, gfsynr0, gfsynr1, gfsynr2);
618 writel(gfsr, gr0_base + ARM_SMMU_GR0_sGFSR);
622 static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
623 struct io_pgtable_cfg *pgtbl_cfg)
625 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
626 struct arm_smmu_cb *cb = &smmu_domain->smmu->cbs[cfg->cbndx];
627 bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
633 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
634 cb->tcr[0] = pgtbl_cfg->arm_v7s_cfg.tcr;
636 cb->tcr[0] = pgtbl_cfg->arm_lpae_s1_cfg.tcr;
637 cb->tcr[1] = pgtbl_cfg->arm_lpae_s1_cfg.tcr >> 32;
638 cb->tcr[1] |= FIELD_PREP(TCR2_SEP, TCR2_SEP_UPSTREAM);
639 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
640 cb->tcr[1] |= TCR2_AS;
643 cb->tcr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
648 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
649 cb->ttbr[0] = pgtbl_cfg->arm_v7s_cfg.ttbr[0];
650 cb->ttbr[1] = pgtbl_cfg->arm_v7s_cfg.ttbr[1];
652 cb->ttbr[0] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
653 cb->ttbr[0] |= FIELD_PREP(TTBRn_ASID, cfg->asid);
654 cb->ttbr[1] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1];
655 cb->ttbr[1] |= FIELD_PREP(TTBRn_ASID, cfg->asid);
658 cb->ttbr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
661 /* MAIRs (stage-1 only) */
663 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
664 cb->mair[0] = pgtbl_cfg->arm_v7s_cfg.prrr;
665 cb->mair[1] = pgtbl_cfg->arm_v7s_cfg.nmrr;
667 cb->mair[0] = pgtbl_cfg->arm_lpae_s1_cfg.mair[0];
668 cb->mair[1] = pgtbl_cfg->arm_lpae_s1_cfg.mair[1];
673 static void arm_smmu_write_context_bank(struct arm_smmu_device *smmu, int idx)
677 struct arm_smmu_cb *cb = &smmu->cbs[idx];
678 struct arm_smmu_cfg *cfg = cb->cfg;
679 void __iomem *cb_base, *gr1_base;
681 cb_base = ARM_SMMU_CB(smmu, idx);
683 /* Unassigned context banks only need disabling */
685 writel_relaxed(0, cb_base + ARM_SMMU_CB_SCTLR);
689 gr1_base = ARM_SMMU_GR1(smmu);
690 stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
693 if (smmu->version > ARM_SMMU_V1) {
694 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
698 /* 16-bit VMIDs live in CBA2R */
699 if (smmu->features & ARM_SMMU_FEAT_VMID16)
700 reg |= FIELD_PREP(CBA2R_VMID16, cfg->vmid);
702 writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBA2R(idx));
706 reg = FIELD_PREP(CBAR_TYPE, cfg->cbar);
707 if (smmu->version < ARM_SMMU_V2)
708 reg |= FIELD_PREP(CBAR_IRPTNDX, cfg->irptndx);
711 * Use the weakest shareability/memory types, so they are
712 * overridden by the ttbcr/pte.
715 reg |= FIELD_PREP(CBAR_S1_BPSHCFG, CBAR_S1_BPSHCFG_NSH) |
716 FIELD_PREP(CBAR_S1_MEMATTR, CBAR_S1_MEMATTR_WB);
717 } else if (!(smmu->features & ARM_SMMU_FEAT_VMID16)) {
718 /* 8-bit VMIDs live in CBAR */
719 reg |= FIELD_PREP(CBAR_VMID, cfg->vmid);
721 writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBAR(idx));
725 * We must write this before the TTBRs, since it determines the
726 * access behaviour of some fields (in particular, ASID[15:8]).
728 if (stage1 && smmu->version > ARM_SMMU_V1)
729 writel_relaxed(cb->tcr[1], cb_base + ARM_SMMU_CB_TCR2);
730 writel_relaxed(cb->tcr[0], cb_base + ARM_SMMU_CB_TCR);
733 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
734 writel_relaxed(cfg->asid, cb_base + ARM_SMMU_CB_CONTEXTIDR);
735 writel_relaxed(cb->ttbr[0], cb_base + ARM_SMMU_CB_TTBR0);
736 writel_relaxed(cb->ttbr[1], cb_base + ARM_SMMU_CB_TTBR1);
738 writeq_relaxed(cb->ttbr[0], cb_base + ARM_SMMU_CB_TTBR0);
740 writeq_relaxed(cb->ttbr[1], cb_base + ARM_SMMU_CB_TTBR1);
743 /* MAIRs (stage-1 only) */
745 writel_relaxed(cb->mair[0], cb_base + ARM_SMMU_CB_S1_MAIR0);
746 writel_relaxed(cb->mair[1], cb_base + ARM_SMMU_CB_S1_MAIR1);
750 reg = SCTLR_CFIE | SCTLR_CFRE | SCTLR_AFE | SCTLR_TRE | SCTLR_M;
752 reg |= SCTLR_S1_ASIDPNE;
753 if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN))
756 writel_relaxed(reg, cb_base + ARM_SMMU_CB_SCTLR);
759 static int arm_smmu_init_domain_context(struct iommu_domain *domain,
760 struct arm_smmu_device *smmu)
762 int irq, start, ret = 0;
763 unsigned long ias, oas;
764 struct io_pgtable_ops *pgtbl_ops;
765 struct io_pgtable_cfg pgtbl_cfg;
766 enum io_pgtable_fmt fmt;
767 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
768 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
770 mutex_lock(&smmu_domain->init_mutex);
771 if (smmu_domain->smmu)
774 if (domain->type == IOMMU_DOMAIN_IDENTITY) {
775 smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
776 smmu_domain->smmu = smmu;
781 * Mapping the requested stage onto what we support is surprisingly
782 * complicated, mainly because the spec allows S1+S2 SMMUs without
783 * support for nested translation. That means we end up with the
786 * Requested Supported Actual
796 * Note that you can't actually request stage-2 mappings.
798 if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
799 smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
800 if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
801 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
804 * Choosing a suitable context format is even more fiddly. Until we
805 * grow some way for the caller to express a preference, and/or move
806 * the decision into the io-pgtable code where it arguably belongs,
807 * just aim for the closest thing to the rest of the system, and hope
808 * that the hardware isn't esoteric enough that we can't assume AArch64
809 * support to be a superset of AArch32 support...
811 if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_L)
812 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_L;
813 if (IS_ENABLED(CONFIG_IOMMU_IO_PGTABLE_ARMV7S) &&
814 !IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_ARM_LPAE) &&
815 (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S) &&
816 (smmu_domain->stage == ARM_SMMU_DOMAIN_S1))
817 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_S;
818 if ((IS_ENABLED(CONFIG_64BIT) || cfg->fmt == ARM_SMMU_CTX_FMT_NONE) &&
819 (smmu->features & (ARM_SMMU_FEAT_FMT_AARCH64_64K |
820 ARM_SMMU_FEAT_FMT_AARCH64_16K |
821 ARM_SMMU_FEAT_FMT_AARCH64_4K)))
822 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH64;
824 if (cfg->fmt == ARM_SMMU_CTX_FMT_NONE) {
829 switch (smmu_domain->stage) {
830 case ARM_SMMU_DOMAIN_S1:
831 cfg->cbar = CBAR_TYPE_S1_TRANS_S2_BYPASS;
832 start = smmu->num_s2_context_banks;
834 oas = smmu->ipa_size;
835 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
836 fmt = ARM_64_LPAE_S1;
837 } else if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_L) {
838 fmt = ARM_32_LPAE_S1;
839 ias = min(ias, 32UL);
840 oas = min(oas, 40UL);
843 ias = min(ias, 32UL);
844 oas = min(oas, 32UL);
846 smmu_domain->tlb_ops = &arm_smmu_s1_tlb_ops;
848 case ARM_SMMU_DOMAIN_NESTED:
850 * We will likely want to change this if/when KVM gets
853 case ARM_SMMU_DOMAIN_S2:
854 cfg->cbar = CBAR_TYPE_S2_TRANS;
856 ias = smmu->ipa_size;
858 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
859 fmt = ARM_64_LPAE_S2;
861 fmt = ARM_32_LPAE_S2;
862 ias = min(ias, 40UL);
863 oas = min(oas, 40UL);
865 if (smmu->version == ARM_SMMU_V2)
866 smmu_domain->tlb_ops = &arm_smmu_s2_tlb_ops_v2;
868 smmu_domain->tlb_ops = &arm_smmu_s2_tlb_ops_v1;
874 ret = __arm_smmu_alloc_bitmap(smmu->context_map, start,
875 smmu->num_context_banks);
880 if (smmu->version < ARM_SMMU_V2) {
881 cfg->irptndx = atomic_inc_return(&smmu->irptndx);
882 cfg->irptndx %= smmu->num_context_irqs;
884 cfg->irptndx = cfg->cbndx;
887 if (smmu_domain->stage == ARM_SMMU_DOMAIN_S2)
888 cfg->vmid = cfg->cbndx + 1 + smmu->cavium_id_base;
890 cfg->asid = cfg->cbndx + smmu->cavium_id_base;
892 pgtbl_cfg = (struct io_pgtable_cfg) {
893 .pgsize_bitmap = smmu->pgsize_bitmap,
896 .coherent_walk = smmu->features & ARM_SMMU_FEAT_COHERENT_WALK,
897 .tlb = smmu_domain->tlb_ops,
898 .iommu_dev = smmu->dev,
901 if (smmu_domain->non_strict)
902 pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
904 smmu_domain->smmu = smmu;
905 pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
911 /* Update the domain's page sizes to reflect the page table format */
912 domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
913 domain->geometry.aperture_end = (1UL << ias) - 1;
914 domain->geometry.force_aperture = true;
916 /* Initialise the context bank with our page table cfg */
917 arm_smmu_init_context_bank(smmu_domain, &pgtbl_cfg);
918 arm_smmu_write_context_bank(smmu, cfg->cbndx);
921 * Request context fault interrupt. Do this last to avoid the
922 * handler seeing a half-initialised domain state.
924 irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
925 ret = devm_request_irq(smmu->dev, irq, arm_smmu_context_fault,
926 IRQF_SHARED, "arm-smmu-context-fault", domain);
928 dev_err(smmu->dev, "failed to request context IRQ %d (%u)\n",
930 cfg->irptndx = INVALID_IRPTNDX;
933 mutex_unlock(&smmu_domain->init_mutex);
935 /* Publish page table ops for map/unmap */
936 smmu_domain->pgtbl_ops = pgtbl_ops;
940 smmu_domain->smmu = NULL;
942 mutex_unlock(&smmu_domain->init_mutex);
946 static void arm_smmu_destroy_domain_context(struct iommu_domain *domain)
948 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
949 struct arm_smmu_device *smmu = smmu_domain->smmu;
950 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
953 if (!smmu || domain->type == IOMMU_DOMAIN_IDENTITY)
956 ret = arm_smmu_rpm_get(smmu);
961 * Disable the context bank and free the page tables before freeing
964 smmu->cbs[cfg->cbndx].cfg = NULL;
965 arm_smmu_write_context_bank(smmu, cfg->cbndx);
967 if (cfg->irptndx != INVALID_IRPTNDX) {
968 irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
969 devm_free_irq(smmu->dev, irq, domain);
972 free_io_pgtable_ops(smmu_domain->pgtbl_ops);
973 __arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
975 arm_smmu_rpm_put(smmu);
978 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
980 struct arm_smmu_domain *smmu_domain;
982 if (type != IOMMU_DOMAIN_UNMANAGED &&
983 type != IOMMU_DOMAIN_DMA &&
984 type != IOMMU_DOMAIN_IDENTITY)
987 * Allocate the domain and initialise some of its data structures.
988 * We can't really do anything meaningful until we've added a
991 smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
995 if (type == IOMMU_DOMAIN_DMA && (using_legacy_binding ||
996 iommu_get_dma_cookie(&smmu_domain->domain))) {
1001 mutex_init(&smmu_domain->init_mutex);
1002 spin_lock_init(&smmu_domain->cb_lock);
1004 return &smmu_domain->domain;
1007 static void arm_smmu_domain_free(struct iommu_domain *domain)
1009 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1012 * Free the domain resources. We assume that all devices have
1013 * already been detached.
1015 iommu_put_dma_cookie(domain);
1016 arm_smmu_destroy_domain_context(domain);
1020 static void arm_smmu_write_smr(struct arm_smmu_device *smmu, int idx)
1022 struct arm_smmu_smr *smr = smmu->smrs + idx;
1023 u32 reg = FIELD_PREP(SMR_ID, smr->id) | FIELD_PREP(SMR_MASK, smr->mask);
1025 if (!(smmu->features & ARM_SMMU_FEAT_EXIDS) && smr->valid)
1027 writel_relaxed(reg, ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_SMR(idx));
1030 static void arm_smmu_write_s2cr(struct arm_smmu_device *smmu, int idx)
1032 struct arm_smmu_s2cr *s2cr = smmu->s2crs + idx;
1033 u32 reg = FIELD_PREP(S2CR_TYPE, s2cr->type) |
1034 FIELD_PREP(S2CR_CBNDX, s2cr->cbndx) |
1035 FIELD_PREP(S2CR_PRIVCFG, s2cr->privcfg);
1037 if (smmu->features & ARM_SMMU_FEAT_EXIDS && smmu->smrs &&
1038 smmu->smrs[idx].valid)
1039 reg |= S2CR_EXIDVALID;
1040 writel_relaxed(reg, ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_S2CR(idx));
1043 static void arm_smmu_write_sme(struct arm_smmu_device *smmu, int idx)
1045 arm_smmu_write_s2cr(smmu, idx);
1047 arm_smmu_write_smr(smmu, idx);
1051 * The width of SMR's mask field depends on sCR0_EXIDENABLE, so this function
1052 * should be called after sCR0 is written.
1054 static void arm_smmu_test_smr_masks(struct arm_smmu_device *smmu)
1056 void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1063 * SMR.ID bits may not be preserved if the corresponding MASK
1064 * bits are set, so check each one separately. We can reject
1065 * masters later if they try to claim IDs outside these masks.
1067 smr = FIELD_PREP(SMR_ID, smmu->streamid_mask);
1068 writel_relaxed(smr, gr0_base + ARM_SMMU_GR0_SMR(0));
1069 smr = readl_relaxed(gr0_base + ARM_SMMU_GR0_SMR(0));
1070 smmu->streamid_mask = FIELD_GET(SMR_ID, smr);
1072 smr = FIELD_PREP(SMR_MASK, smmu->streamid_mask);
1073 writel_relaxed(smr, gr0_base + ARM_SMMU_GR0_SMR(0));
1074 smr = readl_relaxed(gr0_base + ARM_SMMU_GR0_SMR(0));
1075 smmu->smr_mask_mask = FIELD_GET(SMR_MASK, smr);
1078 static int arm_smmu_find_sme(struct arm_smmu_device *smmu, u16 id, u16 mask)
1080 struct arm_smmu_smr *smrs = smmu->smrs;
1081 int i, free_idx = -ENOSPC;
1083 /* Stream indexing is blissfully easy */
1087 /* Validating SMRs is... less so */
1088 for (i = 0; i < smmu->num_mapping_groups; ++i) {
1089 if (!smrs[i].valid) {
1091 * Note the first free entry we come across, which
1092 * we'll claim in the end if nothing else matches.
1099 * If the new entry is _entirely_ matched by an existing entry,
1100 * then reuse that, with the guarantee that there also cannot
1101 * be any subsequent conflicting entries. In normal use we'd
1102 * expect simply identical entries for this case, but there's
1103 * no harm in accommodating the generalisation.
1105 if ((mask & smrs[i].mask) == mask &&
1106 !((id ^ smrs[i].id) & ~smrs[i].mask))
1109 * If the new entry has any other overlap with an existing one,
1110 * though, then there always exists at least one stream ID
1111 * which would cause a conflict, and we can't allow that risk.
1113 if (!((id ^ smrs[i].id) & ~(smrs[i].mask | mask)))
1120 static bool arm_smmu_free_sme(struct arm_smmu_device *smmu, int idx)
1122 if (--smmu->s2crs[idx].count)
1125 smmu->s2crs[idx] = s2cr_init_val;
1127 smmu->smrs[idx].valid = false;
1132 static int arm_smmu_master_alloc_smes(struct device *dev)
1134 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1135 struct arm_smmu_master_cfg *cfg = fwspec->iommu_priv;
1136 struct arm_smmu_device *smmu = cfg->smmu;
1137 struct arm_smmu_smr *smrs = smmu->smrs;
1138 struct iommu_group *group;
1141 mutex_lock(&smmu->stream_map_mutex);
1142 /* Figure out a viable stream map entry allocation */
1143 for_each_cfg_sme(fwspec, i, idx) {
1144 u16 sid = FIELD_GET(SMR_ID, fwspec->ids[i]);
1145 u16 mask = FIELD_GET(SMR_MASK, fwspec->ids[i]);
1147 if (idx != INVALID_SMENDX) {
1152 ret = arm_smmu_find_sme(smmu, sid, mask);
1157 if (smrs && smmu->s2crs[idx].count == 0) {
1159 smrs[idx].mask = mask;
1160 smrs[idx].valid = true;
1162 smmu->s2crs[idx].count++;
1163 cfg->smendx[i] = (s16)idx;
1166 group = iommu_group_get_for_dev(dev);
1168 group = ERR_PTR(-ENOMEM);
1169 if (IS_ERR(group)) {
1170 ret = PTR_ERR(group);
1173 iommu_group_put(group);
1175 /* It worked! Now, poke the actual hardware */
1176 for_each_cfg_sme(fwspec, i, idx) {
1177 arm_smmu_write_sme(smmu, idx);
1178 smmu->s2crs[idx].group = group;
1181 mutex_unlock(&smmu->stream_map_mutex);
1186 arm_smmu_free_sme(smmu, cfg->smendx[i]);
1187 cfg->smendx[i] = INVALID_SMENDX;
1189 mutex_unlock(&smmu->stream_map_mutex);
1193 static void arm_smmu_master_free_smes(struct iommu_fwspec *fwspec)
1195 struct arm_smmu_device *smmu = fwspec_smmu(fwspec);
1196 struct arm_smmu_master_cfg *cfg = fwspec->iommu_priv;
1199 mutex_lock(&smmu->stream_map_mutex);
1200 for_each_cfg_sme(fwspec, i, idx) {
1201 if (arm_smmu_free_sme(smmu, idx))
1202 arm_smmu_write_sme(smmu, idx);
1203 cfg->smendx[i] = INVALID_SMENDX;
1205 mutex_unlock(&smmu->stream_map_mutex);
1208 static int arm_smmu_domain_add_master(struct arm_smmu_domain *smmu_domain,
1209 struct iommu_fwspec *fwspec)
1211 struct arm_smmu_device *smmu = smmu_domain->smmu;
1212 struct arm_smmu_s2cr *s2cr = smmu->s2crs;
1213 u8 cbndx = smmu_domain->cfg.cbndx;
1214 enum arm_smmu_s2cr_type type;
1217 if (smmu_domain->stage == ARM_SMMU_DOMAIN_BYPASS)
1218 type = S2CR_TYPE_BYPASS;
1220 type = S2CR_TYPE_TRANS;
1222 for_each_cfg_sme(fwspec, i, idx) {
1223 if (type == s2cr[idx].type && cbndx == s2cr[idx].cbndx)
1226 s2cr[idx].type = type;
1227 s2cr[idx].privcfg = S2CR_PRIVCFG_DEFAULT;
1228 s2cr[idx].cbndx = cbndx;
1229 arm_smmu_write_s2cr(smmu, idx);
1234 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
1237 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1238 struct arm_smmu_device *smmu;
1239 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1241 if (!fwspec || fwspec->ops != &arm_smmu_ops) {
1242 dev_err(dev, "cannot attach to SMMU, is it on the same bus?\n");
1247 * FIXME: The arch/arm DMA API code tries to attach devices to its own
1248 * domains between of_xlate() and add_device() - we have no way to cope
1249 * with that, so until ARM gets converted to rely on groups and default
1250 * domains, just say no (but more politely than by dereferencing NULL).
1251 * This should be at least a WARN_ON once that's sorted.
1253 if (!fwspec->iommu_priv)
1256 smmu = fwspec_smmu(fwspec);
1258 ret = arm_smmu_rpm_get(smmu);
1262 /* Ensure that the domain is finalised */
1263 ret = arm_smmu_init_domain_context(domain, smmu);
1268 * Sanity check the domain. We don't support domains across
1271 if (smmu_domain->smmu != smmu) {
1273 "cannot attach to SMMU %s whilst already attached to domain on SMMU %s\n",
1274 dev_name(smmu_domain->smmu->dev), dev_name(smmu->dev));
1279 /* Looks ok, so add the device to the domain */
1280 ret = arm_smmu_domain_add_master(smmu_domain, fwspec);
1283 arm_smmu_rpm_put(smmu);
1287 static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
1288 phys_addr_t paddr, size_t size, int prot)
1290 struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1291 struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
1297 arm_smmu_rpm_get(smmu);
1298 ret = ops->map(ops, iova, paddr, size, prot);
1299 arm_smmu_rpm_put(smmu);
1304 static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
1307 struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1308 struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
1314 arm_smmu_rpm_get(smmu);
1315 ret = ops->unmap(ops, iova, size);
1316 arm_smmu_rpm_put(smmu);
1321 static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
1323 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1324 struct arm_smmu_device *smmu = smmu_domain->smmu;
1326 if (smmu_domain->tlb_ops) {
1327 arm_smmu_rpm_get(smmu);
1328 smmu_domain->tlb_ops->tlb_flush_all(smmu_domain);
1329 arm_smmu_rpm_put(smmu);
1333 static void arm_smmu_iotlb_sync(struct iommu_domain *domain)
1335 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1336 struct arm_smmu_device *smmu = smmu_domain->smmu;
1338 if (smmu_domain->tlb_ops) {
1339 arm_smmu_rpm_get(smmu);
1340 smmu_domain->tlb_ops->tlb_sync(smmu_domain);
1341 arm_smmu_rpm_put(smmu);
1345 static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
1348 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1349 struct arm_smmu_device *smmu = smmu_domain->smmu;
1350 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
1351 struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
1352 struct device *dev = smmu->dev;
1353 void __iomem *cb_base;
1356 unsigned long va, flags;
1359 ret = arm_smmu_rpm_get(smmu);
1363 cb_base = ARM_SMMU_CB(smmu, cfg->cbndx);
1365 spin_lock_irqsave(&smmu_domain->cb_lock, flags);
1366 /* ATS1 registers can only be written atomically */
1367 va = iova & ~0xfffUL;
1368 if (smmu->version == ARM_SMMU_V2)
1369 smmu_write_atomic_lq(va, cb_base + ARM_SMMU_CB_ATS1PR);
1370 else /* Register is only 32-bit in v1 */
1371 writel_relaxed(va, cb_base + ARM_SMMU_CB_ATS1PR);
1373 if (readl_poll_timeout_atomic(cb_base + ARM_SMMU_CB_ATSR, tmp,
1374 !(tmp & ATSR_ACTIVE), 5, 50)) {
1375 spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
1377 "iova to phys timed out on %pad. Falling back to software table walk.\n",
1379 return ops->iova_to_phys(ops, iova);
1382 phys = readq_relaxed(cb_base + ARM_SMMU_CB_PAR);
1383 spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
1384 if (phys & CB_PAR_F) {
1385 dev_err(dev, "translation fault!\n");
1386 dev_err(dev, "PAR = 0x%llx\n", phys);
1390 arm_smmu_rpm_put(smmu);
1392 return (phys & GENMASK_ULL(39, 12)) | (iova & 0xfff);
1395 static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain,
1398 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1399 struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
1401 if (domain->type == IOMMU_DOMAIN_IDENTITY)
1407 if (smmu_domain->smmu->features & ARM_SMMU_FEAT_TRANS_OPS &&
1408 smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
1409 return arm_smmu_iova_to_phys_hard(domain, iova);
1411 return ops->iova_to_phys(ops, iova);
1414 static bool arm_smmu_capable(enum iommu_cap cap)
1417 case IOMMU_CAP_CACHE_COHERENCY:
1419 * Return true here as the SMMU can always send out coherent
1423 case IOMMU_CAP_NOEXEC:
1430 static int arm_smmu_match_node(struct device *dev, const void *data)
1432 return dev->fwnode == data;
1436 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
1438 struct device *dev = driver_find_device(&arm_smmu_driver.driver, NULL,
1439 fwnode, arm_smmu_match_node);
1441 return dev ? dev_get_drvdata(dev) : NULL;
1444 static int arm_smmu_add_device(struct device *dev)
1446 struct arm_smmu_device *smmu;
1447 struct arm_smmu_master_cfg *cfg;
1448 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1451 if (using_legacy_binding) {
1452 ret = arm_smmu_register_legacy_master(dev, &smmu);
1455 * If dev->iommu_fwspec is initally NULL, arm_smmu_register_legacy_master()
1456 * will allocate/initialise a new one. Thus we need to update fwspec for
1459 fwspec = dev_iommu_fwspec_get(dev);
1462 } else if (fwspec && fwspec->ops == &arm_smmu_ops) {
1463 smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
1469 for (i = 0; i < fwspec->num_ids; i++) {
1470 u16 sid = FIELD_GET(SMR_ID, fwspec->ids[i]);
1471 u16 mask = FIELD_GET(SMR_MASK, fwspec->ids[i]);
1473 if (sid & ~smmu->streamid_mask) {
1474 dev_err(dev, "stream ID 0x%x out of range for SMMU (0x%x)\n",
1475 sid, smmu->streamid_mask);
1478 if (mask & ~smmu->smr_mask_mask) {
1479 dev_err(dev, "SMR mask 0x%x out of range for SMMU (0x%x)\n",
1480 mask, smmu->smr_mask_mask);
1486 cfg = kzalloc(offsetof(struct arm_smmu_master_cfg, smendx[i]),
1492 fwspec->iommu_priv = cfg;
1494 cfg->smendx[i] = INVALID_SMENDX;
1496 ret = arm_smmu_rpm_get(smmu);
1500 ret = arm_smmu_master_alloc_smes(dev);
1501 arm_smmu_rpm_put(smmu);
1506 iommu_device_link(&smmu->iommu, dev);
1508 device_link_add(dev, smmu->dev,
1509 DL_FLAG_PM_RUNTIME | DL_FLAG_AUTOREMOVE_SUPPLIER);
1516 iommu_fwspec_free(dev);
1520 static void arm_smmu_remove_device(struct device *dev)
1522 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1523 struct arm_smmu_master_cfg *cfg;
1524 struct arm_smmu_device *smmu;
1527 if (!fwspec || fwspec->ops != &arm_smmu_ops)
1530 cfg = fwspec->iommu_priv;
1533 ret = arm_smmu_rpm_get(smmu);
1537 iommu_device_unlink(&smmu->iommu, dev);
1538 arm_smmu_master_free_smes(fwspec);
1540 arm_smmu_rpm_put(smmu);
1542 iommu_group_remove_device(dev);
1543 kfree(fwspec->iommu_priv);
1544 iommu_fwspec_free(dev);
1547 static struct iommu_group *arm_smmu_device_group(struct device *dev)
1549 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1550 struct arm_smmu_device *smmu = fwspec_smmu(fwspec);
1551 struct iommu_group *group = NULL;
1554 for_each_cfg_sme(fwspec, i, idx) {
1555 if (group && smmu->s2crs[idx].group &&
1556 group != smmu->s2crs[idx].group)
1557 return ERR_PTR(-EINVAL);
1559 group = smmu->s2crs[idx].group;
1563 return iommu_group_ref_get(group);
1565 if (dev_is_pci(dev))
1566 group = pci_device_group(dev);
1567 else if (dev_is_fsl_mc(dev))
1568 group = fsl_mc_device_group(dev);
1570 group = generic_device_group(dev);
1575 static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
1576 enum iommu_attr attr, void *data)
1578 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1580 switch(domain->type) {
1581 case IOMMU_DOMAIN_UNMANAGED:
1583 case DOMAIN_ATTR_NESTING:
1584 *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
1590 case IOMMU_DOMAIN_DMA:
1592 case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
1593 *(int *)data = smmu_domain->non_strict;
1604 static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
1605 enum iommu_attr attr, void *data)
1608 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1610 mutex_lock(&smmu_domain->init_mutex);
1612 switch(domain->type) {
1613 case IOMMU_DOMAIN_UNMANAGED:
1615 case DOMAIN_ATTR_NESTING:
1616 if (smmu_domain->smmu) {
1622 smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
1624 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
1630 case IOMMU_DOMAIN_DMA:
1632 case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
1633 smmu_domain->non_strict = *(int *)data;
1643 mutex_unlock(&smmu_domain->init_mutex);
1647 static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
1651 if (args->args_count > 0)
1652 fwid |= FIELD_PREP(SMR_ID, args->args[0]);
1654 if (args->args_count > 1)
1655 fwid |= FIELD_PREP(SMR_MASK, args->args[1]);
1656 else if (!of_property_read_u32(args->np, "stream-match-mask", &mask))
1657 fwid |= FIELD_PREP(SMR_MASK, mask);
1659 return iommu_fwspec_add_ids(dev, &fwid, 1);
1662 static void arm_smmu_get_resv_regions(struct device *dev,
1663 struct list_head *head)
1665 struct iommu_resv_region *region;
1666 int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
1668 region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
1669 prot, IOMMU_RESV_SW_MSI);
1673 list_add_tail(®ion->list, head);
1675 iommu_dma_get_resv_regions(dev, head);
1678 static void arm_smmu_put_resv_regions(struct device *dev,
1679 struct list_head *head)
1681 struct iommu_resv_region *entry, *next;
1683 list_for_each_entry_safe(entry, next, head, list)
1687 static struct iommu_ops arm_smmu_ops = {
1688 .capable = arm_smmu_capable,
1689 .domain_alloc = arm_smmu_domain_alloc,
1690 .domain_free = arm_smmu_domain_free,
1691 .attach_dev = arm_smmu_attach_dev,
1692 .map = arm_smmu_map,
1693 .unmap = arm_smmu_unmap,
1694 .flush_iotlb_all = arm_smmu_flush_iotlb_all,
1695 .iotlb_sync = arm_smmu_iotlb_sync,
1696 .iova_to_phys = arm_smmu_iova_to_phys,
1697 .add_device = arm_smmu_add_device,
1698 .remove_device = arm_smmu_remove_device,
1699 .device_group = arm_smmu_device_group,
1700 .domain_get_attr = arm_smmu_domain_get_attr,
1701 .domain_set_attr = arm_smmu_domain_set_attr,
1702 .of_xlate = arm_smmu_of_xlate,
1703 .get_resv_regions = arm_smmu_get_resv_regions,
1704 .put_resv_regions = arm_smmu_put_resv_regions,
1705 .pgsize_bitmap = -1UL, /* Restricted during device attach */
1708 static void arm_smmu_device_reset(struct arm_smmu_device *smmu)
1710 void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1714 /* clear global FSR */
1715 reg = readl_relaxed(ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sGFSR);
1716 writel(reg, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sGFSR);
1719 * Reset stream mapping groups: Initial values mark all SMRn as
1720 * invalid and all S2CRn as bypass unless overridden.
1722 for (i = 0; i < smmu->num_mapping_groups; ++i)
1723 arm_smmu_write_sme(smmu, i);
1725 if (smmu->model == ARM_MMU500) {
1727 * Before clearing ARM_MMU500_ACTLR_CPRE, need to
1728 * clear CACHE_LOCK bit of ACR first. And, CACHE_LOCK
1729 * bit is only present in MMU-500r2 onwards.
1731 reg = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID7);
1732 major = FIELD_GET(ID7_MAJOR, reg);
1733 reg = readl_relaxed(gr0_base + ARM_SMMU_GR0_sACR);
1735 reg &= ~ARM_MMU500_ACR_CACHE_LOCK;
1737 * Allow unmatched Stream IDs to allocate bypass
1738 * TLB entries for reduced latency.
1740 reg |= ARM_MMU500_ACR_SMTNMB_TLBEN | ARM_MMU500_ACR_S2CRB_TLBEN;
1741 writel_relaxed(reg, gr0_base + ARM_SMMU_GR0_sACR);
1744 /* Make sure all context banks are disabled and clear CB_FSR */
1745 for (i = 0; i < smmu->num_context_banks; ++i) {
1746 void __iomem *cb_base = ARM_SMMU_CB(smmu, i);
1748 arm_smmu_write_context_bank(smmu, i);
1749 writel_relaxed(FSR_FAULT, cb_base + ARM_SMMU_CB_FSR);
1751 * Disable MMU-500's not-particularly-beneficial next-page
1752 * prefetcher for the sake of errata #841119 and #826419.
1754 if (smmu->model == ARM_MMU500) {
1755 reg = readl_relaxed(cb_base + ARM_SMMU_CB_ACTLR);
1756 reg &= ~ARM_MMU500_ACTLR_CPRE;
1757 writel_relaxed(reg, cb_base + ARM_SMMU_CB_ACTLR);
1761 /* Invalidate the TLB, just in case */
1762 writel_relaxed(QCOM_DUMMY_VAL, gr0_base + ARM_SMMU_GR0_TLBIALLH);
1763 writel_relaxed(QCOM_DUMMY_VAL, gr0_base + ARM_SMMU_GR0_TLBIALLNSNH);
1765 reg = readl_relaxed(ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
1767 /* Enable fault reporting */
1768 reg |= (sCR0_GFRE | sCR0_GFIE | sCR0_GCFGFRE | sCR0_GCFGFIE);
1770 /* Disable TLB broadcasting. */
1771 reg |= (sCR0_VMIDPNE | sCR0_PTM);
1773 /* Enable client access, handling unmatched streams as appropriate */
1774 reg &= ~sCR0_CLIENTPD;
1778 reg &= ~sCR0_USFCFG;
1780 /* Disable forced broadcasting */
1783 /* Don't upgrade barriers */
1786 if (smmu->features & ARM_SMMU_FEAT_VMID16)
1787 reg |= sCR0_VMID16EN;
1789 if (smmu->features & ARM_SMMU_FEAT_EXIDS)
1790 reg |= sCR0_EXIDENABLE;
1792 /* Push the button */
1793 arm_smmu_tlb_sync_global(smmu);
1794 writel(reg, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
1797 static int arm_smmu_id_size_to_bits(int size)
1816 static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
1819 void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1821 bool cttw_reg, cttw_fw = smmu->features & ARM_SMMU_FEAT_COHERENT_WALK;
1824 dev_notice(smmu->dev, "probing hardware configuration...\n");
1825 dev_notice(smmu->dev, "SMMUv%d with:\n",
1826 smmu->version == ARM_SMMU_V2 ? 2 : 1);
1829 id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID0);
1831 /* Restrict available stages based on module parameter */
1832 if (force_stage == 1)
1833 id &= ~(ID0_S2TS | ID0_NTS);
1834 else if (force_stage == 2)
1835 id &= ~(ID0_S1TS | ID0_NTS);
1837 if (id & ID0_S1TS) {
1838 smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
1839 dev_notice(smmu->dev, "\tstage 1 translation\n");
1842 if (id & ID0_S2TS) {
1843 smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
1844 dev_notice(smmu->dev, "\tstage 2 translation\n");
1848 smmu->features |= ARM_SMMU_FEAT_TRANS_NESTED;
1849 dev_notice(smmu->dev, "\tnested translation\n");
1852 if (!(smmu->features &
1853 (ARM_SMMU_FEAT_TRANS_S1 | ARM_SMMU_FEAT_TRANS_S2))) {
1854 dev_err(smmu->dev, "\tno translation support!\n");
1858 if ((id & ID0_S1TS) &&
1859 ((smmu->version < ARM_SMMU_V2) || !(id & ID0_ATOSNS))) {
1860 smmu->features |= ARM_SMMU_FEAT_TRANS_OPS;
1861 dev_notice(smmu->dev, "\taddress translation ops\n");
1865 * In order for DMA API calls to work properly, we must defer to what
1866 * the FW says about coherency, regardless of what the hardware claims.
1867 * Fortunately, this also opens up a workaround for systems where the
1868 * ID register value has ended up configured incorrectly.
1870 cttw_reg = !!(id & ID0_CTTW);
1871 if (cttw_fw || cttw_reg)
1872 dev_notice(smmu->dev, "\t%scoherent table walk\n",
1873 cttw_fw ? "" : "non-");
1874 if (cttw_fw != cttw_reg)
1875 dev_notice(smmu->dev,
1876 "\t(IDR0.CTTW overridden by FW configuration)\n");
1878 /* Max. number of entries we have for stream matching/indexing */
1879 if (smmu->version == ARM_SMMU_V2 && id & ID0_EXIDS) {
1880 smmu->features |= ARM_SMMU_FEAT_EXIDS;
1883 size = 1 << FIELD_GET(ID0_NUMSIDB, id);
1885 smmu->streamid_mask = size - 1;
1887 smmu->features |= ARM_SMMU_FEAT_STREAM_MATCH;
1888 size = FIELD_GET(ID0_NUMSMRG, id);
1891 "stream-matching supported, but no SMRs present!\n");
1895 /* Zero-initialised to mark as invalid */
1896 smmu->smrs = devm_kcalloc(smmu->dev, size, sizeof(*smmu->smrs),
1901 dev_notice(smmu->dev,
1902 "\tstream matching with %lu register groups", size);
1904 /* s2cr->type == 0 means translation, so initialise explicitly */
1905 smmu->s2crs = devm_kmalloc_array(smmu->dev, size, sizeof(*smmu->s2crs),
1909 for (i = 0; i < size; i++)
1910 smmu->s2crs[i] = s2cr_init_val;
1912 smmu->num_mapping_groups = size;
1913 mutex_init(&smmu->stream_map_mutex);
1914 spin_lock_init(&smmu->global_sync_lock);
1916 if (smmu->version < ARM_SMMU_V2 || !(id & ID0_PTFS_NO_AARCH32)) {
1917 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_L;
1918 if (!(id & ID0_PTFS_NO_AARCH32S))
1919 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_S;
1923 id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID1);
1924 smmu->pgshift = (id & ID1_PAGESIZE) ? 16 : 12;
1926 /* Check for size mismatch of SMMU address space from mapped region */
1927 size = 1 << (FIELD_GET(ID1_NUMPAGENDXB, id) + 1);
1928 size <<= smmu->pgshift;
1929 if (smmu->cb_base != gr0_base + size)
1931 "SMMU address space size (0x%lx) differs from mapped region size (0x%tx)!\n",
1932 size * 2, (smmu->cb_base - gr0_base) * 2);
1934 smmu->num_s2_context_banks = FIELD_GET(ID1_NUMS2CB, id);
1935 smmu->num_context_banks = FIELD_GET(ID1_NUMCB, id);
1936 if (smmu->num_s2_context_banks > smmu->num_context_banks) {
1937 dev_err(smmu->dev, "impossible number of S2 context banks!\n");
1940 dev_notice(smmu->dev, "\t%u context banks (%u stage-2 only)\n",
1941 smmu->num_context_banks, smmu->num_s2_context_banks);
1943 * Cavium CN88xx erratum #27704.
1944 * Ensure ASID and VMID allocation is unique across all SMMUs in
1947 if (smmu->model == CAVIUM_SMMUV2) {
1948 smmu->cavium_id_base =
1949 atomic_add_return(smmu->num_context_banks,
1950 &cavium_smmu_context_count);
1951 smmu->cavium_id_base -= smmu->num_context_banks;
1952 dev_notice(smmu->dev, "\tenabling workaround for Cavium erratum 27704\n");
1954 smmu->cbs = devm_kcalloc(smmu->dev, smmu->num_context_banks,
1955 sizeof(*smmu->cbs), GFP_KERNEL);
1960 id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID2);
1961 size = arm_smmu_id_size_to_bits(FIELD_GET(ID2_IAS, id));
1962 smmu->ipa_size = size;
1964 /* The output mask is also applied for bypass */
1965 size = arm_smmu_id_size_to_bits(FIELD_GET(ID2_OAS, id));
1966 smmu->pa_size = size;
1968 if (id & ID2_VMID16)
1969 smmu->features |= ARM_SMMU_FEAT_VMID16;
1972 * What the page table walker can address actually depends on which
1973 * descriptor format is in use, but since a) we don't know that yet,
1974 * and b) it can vary per context bank, this will have to do...
1976 if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(size)))
1978 "failed to set DMA mask for table walker\n");
1980 if (smmu->version < ARM_SMMU_V2) {
1981 smmu->va_size = smmu->ipa_size;
1982 if (smmu->version == ARM_SMMU_V1_64K)
1983 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1985 size = FIELD_GET(ID2_UBS, id);
1986 smmu->va_size = arm_smmu_id_size_to_bits(size);
1987 if (id & ID2_PTFS_4K)
1988 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_4K;
1989 if (id & ID2_PTFS_16K)
1990 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_16K;
1991 if (id & ID2_PTFS_64K)
1992 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1995 /* Now we've corralled the various formats, what'll it do? */
1996 if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S)
1997 smmu->pgsize_bitmap |= SZ_4K | SZ_64K | SZ_1M | SZ_16M;
1998 if (smmu->features &
1999 (ARM_SMMU_FEAT_FMT_AARCH32_L | ARM_SMMU_FEAT_FMT_AARCH64_4K))
2000 smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
2001 if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_16K)
2002 smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
2003 if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_64K)
2004 smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
2006 if (arm_smmu_ops.pgsize_bitmap == -1UL)
2007 arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
2009 arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
2010 dev_notice(smmu->dev, "\tSupported page sizes: 0x%08lx\n",
2011 smmu->pgsize_bitmap);
2014 if (smmu->features & ARM_SMMU_FEAT_TRANS_S1)
2015 dev_notice(smmu->dev, "\tStage-1: %lu-bit VA -> %lu-bit IPA\n",
2016 smmu->va_size, smmu->ipa_size);
2018 if (smmu->features & ARM_SMMU_FEAT_TRANS_S2)
2019 dev_notice(smmu->dev, "\tStage-2: %lu-bit IPA -> %lu-bit PA\n",
2020 smmu->ipa_size, smmu->pa_size);
2025 struct arm_smmu_match_data {
2026 enum arm_smmu_arch_version version;
2027 enum arm_smmu_implementation model;
2030 #define ARM_SMMU_MATCH_DATA(name, ver, imp) \
2031 static const struct arm_smmu_match_data name = { .version = ver, .model = imp }
2033 ARM_SMMU_MATCH_DATA(smmu_generic_v1, ARM_SMMU_V1, GENERIC_SMMU);
2034 ARM_SMMU_MATCH_DATA(smmu_generic_v2, ARM_SMMU_V2, GENERIC_SMMU);
2035 ARM_SMMU_MATCH_DATA(arm_mmu401, ARM_SMMU_V1_64K, GENERIC_SMMU);
2036 ARM_SMMU_MATCH_DATA(arm_mmu500, ARM_SMMU_V2, ARM_MMU500);
2037 ARM_SMMU_MATCH_DATA(cavium_smmuv2, ARM_SMMU_V2, CAVIUM_SMMUV2);
2038 ARM_SMMU_MATCH_DATA(qcom_smmuv2, ARM_SMMU_V2, QCOM_SMMUV2);
2040 static const struct of_device_id arm_smmu_of_match[] = {
2041 { .compatible = "arm,smmu-v1", .data = &smmu_generic_v1 },
2042 { .compatible = "arm,smmu-v2", .data = &smmu_generic_v2 },
2043 { .compatible = "arm,mmu-400", .data = &smmu_generic_v1 },
2044 { .compatible = "arm,mmu-401", .data = &arm_mmu401 },
2045 { .compatible = "arm,mmu-500", .data = &arm_mmu500 },
2046 { .compatible = "cavium,smmu-v2", .data = &cavium_smmuv2 },
2047 { .compatible = "qcom,smmu-v2", .data = &qcom_smmuv2 },
2052 static int acpi_smmu_get_data(u32 model, struct arm_smmu_device *smmu)
2057 case ACPI_IORT_SMMU_V1:
2058 case ACPI_IORT_SMMU_CORELINK_MMU400:
2059 smmu->version = ARM_SMMU_V1;
2060 smmu->model = GENERIC_SMMU;
2062 case ACPI_IORT_SMMU_CORELINK_MMU401:
2063 smmu->version = ARM_SMMU_V1_64K;
2064 smmu->model = GENERIC_SMMU;
2066 case ACPI_IORT_SMMU_V2:
2067 smmu->version = ARM_SMMU_V2;
2068 smmu->model = GENERIC_SMMU;
2070 case ACPI_IORT_SMMU_CORELINK_MMU500:
2071 smmu->version = ARM_SMMU_V2;
2072 smmu->model = ARM_MMU500;
2074 case ACPI_IORT_SMMU_CAVIUM_THUNDERX:
2075 smmu->version = ARM_SMMU_V2;
2076 smmu->model = CAVIUM_SMMUV2;
2085 static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
2086 struct arm_smmu_device *smmu)
2088 struct device *dev = smmu->dev;
2089 struct acpi_iort_node *node =
2090 *(struct acpi_iort_node **)dev_get_platdata(dev);
2091 struct acpi_iort_smmu *iort_smmu;
2094 /* Retrieve SMMU1/2 specific data */
2095 iort_smmu = (struct acpi_iort_smmu *)node->node_data;
2097 ret = acpi_smmu_get_data(iort_smmu->model, smmu);
2101 /* Ignore the configuration access interrupt */
2102 smmu->num_global_irqs = 1;
2104 if (iort_smmu->flags & ACPI_IORT_SMMU_COHERENT_WALK)
2105 smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
2110 static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
2111 struct arm_smmu_device *smmu)
2117 static int arm_smmu_device_dt_probe(struct platform_device *pdev,
2118 struct arm_smmu_device *smmu)
2120 const struct arm_smmu_match_data *data;
2121 struct device *dev = &pdev->dev;
2122 bool legacy_binding;
2124 if (of_property_read_u32(dev->of_node, "#global-interrupts",
2125 &smmu->num_global_irqs)) {
2126 dev_err(dev, "missing #global-interrupts property\n");
2130 data = of_device_get_match_data(dev);
2131 smmu->version = data->version;
2132 smmu->model = data->model;
2134 parse_driver_options(smmu);
2136 legacy_binding = of_find_property(dev->of_node, "mmu-masters", NULL);
2137 if (legacy_binding && !using_generic_binding) {
2138 if (!using_legacy_binding)
2139 pr_notice("deprecated \"mmu-masters\" DT property in use; DMA API support unavailable\n");
2140 using_legacy_binding = true;
2141 } else if (!legacy_binding && !using_legacy_binding) {
2142 using_generic_binding = true;
2144 dev_err(dev, "not probing due to mismatched DT properties\n");
2148 if (of_dma_is_coherent(dev->of_node))
2149 smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
2154 static void arm_smmu_bus_init(void)
2156 /* Oh, for a proper bus abstraction */
2157 if (!iommu_present(&platform_bus_type))
2158 bus_set_iommu(&platform_bus_type, &arm_smmu_ops);
2159 #ifdef CONFIG_ARM_AMBA
2160 if (!iommu_present(&amba_bustype))
2161 bus_set_iommu(&amba_bustype, &arm_smmu_ops);
2164 if (!iommu_present(&pci_bus_type)) {
2166 bus_set_iommu(&pci_bus_type, &arm_smmu_ops);
2169 #ifdef CONFIG_FSL_MC_BUS
2170 if (!iommu_present(&fsl_mc_bus_type))
2171 bus_set_iommu(&fsl_mc_bus_type, &arm_smmu_ops);
2175 static int arm_smmu_device_probe(struct platform_device *pdev)
2177 struct resource *res;
2178 resource_size_t ioaddr;
2179 struct arm_smmu_device *smmu;
2180 struct device *dev = &pdev->dev;
2181 int num_irqs, i, err;
2183 smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
2185 dev_err(dev, "failed to allocate arm_smmu_device\n");
2191 err = arm_smmu_device_dt_probe(pdev, smmu);
2193 err = arm_smmu_device_acpi_probe(pdev, smmu);
2198 res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
2199 ioaddr = res->start;
2200 smmu->base = devm_ioremap_resource(dev, res);
2201 if (IS_ERR(smmu->base))
2202 return PTR_ERR(smmu->base);
2203 smmu->cb_base = smmu->base + resource_size(res) / 2;
2206 while ((res = platform_get_resource(pdev, IORESOURCE_IRQ, num_irqs))) {
2208 if (num_irqs > smmu->num_global_irqs)
2209 smmu->num_context_irqs++;
2212 if (!smmu->num_context_irqs) {
2213 dev_err(dev, "found %d interrupts but expected at least %d\n",
2214 num_irqs, smmu->num_global_irqs + 1);
2218 smmu->irqs = devm_kcalloc(dev, num_irqs, sizeof(*smmu->irqs),
2221 dev_err(dev, "failed to allocate %d irqs\n", num_irqs);
2225 for (i = 0; i < num_irqs; ++i) {
2226 int irq = platform_get_irq(pdev, i);
2229 dev_err(dev, "failed to get irq index %d\n", i);
2232 smmu->irqs[i] = irq;
2235 err = devm_clk_bulk_get_all(dev, &smmu->clks);
2237 dev_err(dev, "failed to get clocks %d\n", err);
2240 smmu->num_clks = err;
2242 err = clk_bulk_prepare_enable(smmu->num_clks, smmu->clks);
2246 err = arm_smmu_device_cfg_probe(smmu);
2250 if (smmu->version == ARM_SMMU_V2) {
2251 if (smmu->num_context_banks > smmu->num_context_irqs) {
2253 "found only %d context irq(s) but %d required\n",
2254 smmu->num_context_irqs, smmu->num_context_banks);
2258 /* Ignore superfluous interrupts */
2259 smmu->num_context_irqs = smmu->num_context_banks;
2262 for (i = 0; i < smmu->num_global_irqs; ++i) {
2263 err = devm_request_irq(smmu->dev, smmu->irqs[i],
2264 arm_smmu_global_fault,
2266 "arm-smmu global fault",
2269 dev_err(dev, "failed to request global IRQ %d (%u)\n",
2275 err = iommu_device_sysfs_add(&smmu->iommu, smmu->dev, NULL,
2276 "smmu.%pa", &ioaddr);
2278 dev_err(dev, "Failed to register iommu in sysfs\n");
2282 iommu_device_set_ops(&smmu->iommu, &arm_smmu_ops);
2283 iommu_device_set_fwnode(&smmu->iommu, dev->fwnode);
2285 err = iommu_device_register(&smmu->iommu);
2287 dev_err(dev, "Failed to register iommu\n");
2291 platform_set_drvdata(pdev, smmu);
2292 arm_smmu_device_reset(smmu);
2293 arm_smmu_test_smr_masks(smmu);
2296 * We want to avoid touching dev->power.lock in fastpaths unless
2297 * it's really going to do something useful - pm_runtime_enabled()
2298 * can serve as an ideal proxy for that decision. So, conditionally
2299 * enable pm_runtime.
2301 if (dev->pm_domain) {
2302 pm_runtime_set_active(dev);
2303 pm_runtime_enable(dev);
2307 * For ACPI and generic DT bindings, an SMMU will be probed before
2308 * any device which might need it, so we want the bus ops in place
2309 * ready to handle default domain setup as soon as any SMMU exists.
2311 if (!using_legacy_binding)
2312 arm_smmu_bus_init();
2318 * With the legacy DT binding in play, though, we have no guarantees about
2319 * probe order, but then we're also not doing default domains, so we can
2320 * delay setting bus ops until we're sure every possible SMMU is ready,
2321 * and that way ensure that no add_device() calls get missed.
2323 static int arm_smmu_legacy_bus_init(void)
2325 if (using_legacy_binding)
2326 arm_smmu_bus_init();
2329 device_initcall_sync(arm_smmu_legacy_bus_init);
2331 static void arm_smmu_device_shutdown(struct platform_device *pdev)
2333 struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
2338 if (!bitmap_empty(smmu->context_map, ARM_SMMU_MAX_CBS))
2339 dev_err(&pdev->dev, "removing device with active domains!\n");
2341 arm_smmu_rpm_get(smmu);
2342 /* Turn the thing off */
2343 writel(sCR0_CLIENTPD, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
2344 arm_smmu_rpm_put(smmu);
2346 if (pm_runtime_enabled(smmu->dev))
2347 pm_runtime_force_suspend(smmu->dev);
2349 clk_bulk_disable(smmu->num_clks, smmu->clks);
2351 clk_bulk_unprepare(smmu->num_clks, smmu->clks);
2354 static int __maybe_unused arm_smmu_runtime_resume(struct device *dev)
2356 struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2359 ret = clk_bulk_enable(smmu->num_clks, smmu->clks);
2363 arm_smmu_device_reset(smmu);
2368 static int __maybe_unused arm_smmu_runtime_suspend(struct device *dev)
2370 struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2372 clk_bulk_disable(smmu->num_clks, smmu->clks);
2377 static int __maybe_unused arm_smmu_pm_resume(struct device *dev)
2379 if (pm_runtime_suspended(dev))
2382 return arm_smmu_runtime_resume(dev);
2385 static int __maybe_unused arm_smmu_pm_suspend(struct device *dev)
2387 if (pm_runtime_suspended(dev))
2390 return arm_smmu_runtime_suspend(dev);
2393 static const struct dev_pm_ops arm_smmu_pm_ops = {
2394 SET_SYSTEM_SLEEP_PM_OPS(arm_smmu_pm_suspend, arm_smmu_pm_resume)
2395 SET_RUNTIME_PM_OPS(arm_smmu_runtime_suspend,
2396 arm_smmu_runtime_resume, NULL)
2399 static struct platform_driver arm_smmu_driver = {
2402 .of_match_table = of_match_ptr(arm_smmu_of_match),
2403 .pm = &arm_smmu_pm_ops,
2404 .suppress_bind_attrs = true,
2406 .probe = arm_smmu_device_probe,
2407 .shutdown = arm_smmu_device_shutdown,
2409 builtin_platform_driver(arm_smmu_driver);