1 // SPDX-License-Identifier: GPL-2.0-only
3 * IOMMU API for ARM architected SMMU implementations.
5 * Copyright (C) 2013 ARM Limited
7 * Author: Will Deacon <will.deacon@arm.com>
9 * This driver currently supports:
10 * - SMMUv1 and v2 implementations
11 * - Stream-matching and stream-indexing
12 * - v7/v8 long-descriptor format
13 * - Non-secure access to the SMMU
14 * - Context fault reporting
15 * - Extended Stream ID (16 bit)
18 #define pr_fmt(fmt) "arm-smmu: " fmt
20 #include <linux/acpi.h>
21 #include <linux/acpi_iort.h>
22 #include <linux/atomic.h>
23 #include <linux/bitfield.h>
24 #include <linux/delay.h>
25 #include <linux/dma-iommu.h>
26 #include <linux/dma-mapping.h>
27 #include <linux/err.h>
28 #include <linux/interrupt.h>
30 #include <linux/io-64-nonatomic-hi-lo.h>
31 #include <linux/io-pgtable.h>
32 #include <linux/iommu.h>
33 #include <linux/iopoll.h>
34 #include <linux/init.h>
35 #include <linux/moduleparam.h>
37 #include <linux/of_address.h>
38 #include <linux/of_device.h>
39 #include <linux/of_iommu.h>
40 #include <linux/pci.h>
41 #include <linux/platform_device.h>
42 #include <linux/pm_runtime.h>
43 #include <linux/slab.h>
44 #include <linux/spinlock.h>
46 #include <linux/amba/bus.h>
47 #include <linux/fsl/mc.h>
49 #include "arm-smmu-regs.h"
52 * Apparently, some Qualcomm arm64 platforms which appear to expose their SMMU
53 * global register space are still, in fact, using a hypervisor to mediate it
54 * by trapping and emulating register accesses. Sadly, some deployed versions
55 * of said trapping code have bugs wherein they go horribly wrong for stores
56 * using r31 (i.e. XZR/WZR) as the source register.
58 #define QCOM_DUMMY_VAL -1
60 #define ARM_MMU500_ACTLR_CPRE (1 << 1)
62 #define ARM_MMU500_ACR_CACHE_LOCK (1 << 26)
63 #define ARM_MMU500_ACR_S2CRB_TLBEN (1 << 10)
64 #define ARM_MMU500_ACR_SMTNMB_TLBEN (1 << 8)
66 #define TLB_LOOP_TIMEOUT 1000000 /* 1s! */
67 #define TLB_SPIN_COUNT 10
69 /* Maximum number of context banks per SMMU */
70 #define ARM_SMMU_MAX_CBS 128
72 #define MSI_IOVA_BASE 0x8000000
73 #define MSI_IOVA_LENGTH 0x100000
75 static int force_stage;
77 * not really modular, but the easiest way to keep compat with existing
78 * bootargs behaviour is to continue using module_param() here.
80 module_param(force_stage, int, S_IRUGO);
81 MODULE_PARM_DESC(force_stage,
82 "Force SMMU mappings to be installed at a particular stage of translation. A value of '1' or '2' forces the corresponding stage. All other values are ignored (i.e. no stage is forced). Note that selecting a specific stage will disable support for nested translation.");
83 static bool disable_bypass =
84 IS_ENABLED(CONFIG_ARM_SMMU_DISABLE_BYPASS_BY_DEFAULT);
85 module_param(disable_bypass, bool, S_IRUGO);
86 MODULE_PARM_DESC(disable_bypass,
87 "Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
89 enum arm_smmu_arch_version {
95 enum arm_smmu_implementation {
102 struct arm_smmu_s2cr {
103 struct iommu_group *group;
105 enum arm_smmu_s2cr_type type;
106 enum arm_smmu_s2cr_privcfg privcfg;
110 #define s2cr_init_val (struct arm_smmu_s2cr){ \
111 .type = disable_bypass ? S2CR_TYPE_FAULT : S2CR_TYPE_BYPASS, \
114 struct arm_smmu_smr {
124 struct arm_smmu_cfg *cfg;
127 struct arm_smmu_master_cfg {
128 struct arm_smmu_device *smmu;
131 #define INVALID_SMENDX -1
132 #define __fwspec_cfg(fw) ((struct arm_smmu_master_cfg *)fw->iommu_priv)
133 #define fwspec_smmu(fw) (__fwspec_cfg(fw)->smmu)
134 #define fwspec_smendx(fw, i) \
135 (i >= fw->num_ids ? INVALID_SMENDX : __fwspec_cfg(fw)->smendx[i])
136 #define for_each_cfg_sme(fw, i, idx) \
137 for (i = 0; idx = fwspec_smendx(fw, i), i < fw->num_ids; ++i)
139 struct arm_smmu_device {
143 unsigned int numpage;
144 unsigned int pgshift;
146 #define ARM_SMMU_FEAT_COHERENT_WALK (1 << 0)
147 #define ARM_SMMU_FEAT_STREAM_MATCH (1 << 1)
148 #define ARM_SMMU_FEAT_TRANS_S1 (1 << 2)
149 #define ARM_SMMU_FEAT_TRANS_S2 (1 << 3)
150 #define ARM_SMMU_FEAT_TRANS_NESTED (1 << 4)
151 #define ARM_SMMU_FEAT_TRANS_OPS (1 << 5)
152 #define ARM_SMMU_FEAT_VMID16 (1 << 6)
153 #define ARM_SMMU_FEAT_FMT_AARCH64_4K (1 << 7)
154 #define ARM_SMMU_FEAT_FMT_AARCH64_16K (1 << 8)
155 #define ARM_SMMU_FEAT_FMT_AARCH64_64K (1 << 9)
156 #define ARM_SMMU_FEAT_FMT_AARCH32_L (1 << 10)
157 #define ARM_SMMU_FEAT_FMT_AARCH32_S (1 << 11)
158 #define ARM_SMMU_FEAT_EXIDS (1 << 12)
161 #define ARM_SMMU_OPT_SECURE_CFG_ACCESS (1 << 0)
163 enum arm_smmu_arch_version version;
164 enum arm_smmu_implementation model;
166 u32 num_context_banks;
167 u32 num_s2_context_banks;
168 DECLARE_BITMAP(context_map, ARM_SMMU_MAX_CBS);
169 struct arm_smmu_cb *cbs;
172 u32 num_mapping_groups;
175 struct arm_smmu_smr *smrs;
176 struct arm_smmu_s2cr *s2crs;
177 struct mutex stream_map_mutex;
179 unsigned long va_size;
180 unsigned long ipa_size;
181 unsigned long pa_size;
182 unsigned long pgsize_bitmap;
185 u32 num_context_irqs;
187 struct clk_bulk_data *clks;
190 u32 cavium_id_base; /* Specific to Cavium */
192 spinlock_t global_sync_lock;
194 /* IOMMU core code handle */
195 struct iommu_device iommu;
198 enum arm_smmu_context_fmt {
199 ARM_SMMU_CTX_FMT_NONE,
200 ARM_SMMU_CTX_FMT_AARCH64,
201 ARM_SMMU_CTX_FMT_AARCH32_L,
202 ARM_SMMU_CTX_FMT_AARCH32_S,
205 struct arm_smmu_cfg {
212 enum arm_smmu_cbar_type cbar;
213 enum arm_smmu_context_fmt fmt;
215 #define INVALID_IRPTNDX 0xff
217 enum arm_smmu_domain_stage {
218 ARM_SMMU_DOMAIN_S1 = 0,
220 ARM_SMMU_DOMAIN_NESTED,
221 ARM_SMMU_DOMAIN_BYPASS,
224 struct arm_smmu_domain {
225 struct arm_smmu_device *smmu;
226 struct io_pgtable_ops *pgtbl_ops;
227 const struct iommu_gather_ops *tlb_ops;
228 struct arm_smmu_cfg cfg;
229 enum arm_smmu_domain_stage stage;
231 struct mutex init_mutex; /* Protects smmu pointer */
232 spinlock_t cb_lock; /* Serialises ATS1* ops and TLB syncs */
233 struct iommu_domain domain;
236 static int arm_smmu_gr0_ns(int offset)
239 case ARM_SMMU_GR0_sCR0:
240 case ARM_SMMU_GR0_sACR:
241 case ARM_SMMU_GR0_sGFSR:
242 case ARM_SMMU_GR0_sGFSYNR0:
243 case ARM_SMMU_GR0_sGFSYNR1:
244 case ARM_SMMU_GR0_sGFSYNR2:
245 return offset + 0x400;
251 static void __iomem *arm_smmu_page(struct arm_smmu_device *smmu, int n)
253 return smmu->base + (n << smmu->pgshift);
256 static u32 arm_smmu_readl(struct arm_smmu_device *smmu, int page, int offset)
258 if ((smmu->options & ARM_SMMU_OPT_SECURE_CFG_ACCESS) && page == 0)
259 offset = arm_smmu_gr0_ns(offset);
261 return readl_relaxed(arm_smmu_page(smmu, page) + offset);
264 static void arm_smmu_writel(struct arm_smmu_device *smmu, int page, int offset,
267 if ((smmu->options & ARM_SMMU_OPT_SECURE_CFG_ACCESS) && page == 0)
268 offset = arm_smmu_gr0_ns(offset);
270 writel_relaxed(val, arm_smmu_page(smmu, page) + offset);
273 static u64 arm_smmu_readq(struct arm_smmu_device *smmu, int page, int offset)
275 return readq_relaxed(arm_smmu_page(smmu, page) + offset);
278 static void arm_smmu_writeq(struct arm_smmu_device *smmu, int page, int offset,
281 writeq_relaxed(val, arm_smmu_page(smmu, page) + offset);
284 #define ARM_SMMU_GR0 0
285 #define ARM_SMMU_GR1 1
286 #define ARM_SMMU_CB(s, n) ((s)->numpage + (n))
288 #define arm_smmu_gr0_read(s, o) \
289 arm_smmu_readl((s), ARM_SMMU_GR0, (o))
290 #define arm_smmu_gr0_write(s, o, v) \
291 arm_smmu_writel((s), ARM_SMMU_GR0, (o), (v))
293 #define arm_smmu_gr1_read(s, o) \
294 arm_smmu_readl((s), ARM_SMMU_GR1, (o))
295 #define arm_smmu_gr1_write(s, o, v) \
296 arm_smmu_writel((s), ARM_SMMU_GR1, (o), (v))
298 #define arm_smmu_cb_read(s, n, o) \
299 arm_smmu_readl((s), ARM_SMMU_CB((s), (n)), (o))
300 #define arm_smmu_cb_write(s, n, o, v) \
301 arm_smmu_writel((s), ARM_SMMU_CB((s), (n)), (o), (v))
302 #define arm_smmu_cb_readq(s, n, o) \
303 arm_smmu_readq((s), ARM_SMMU_CB((s), (n)), (o))
304 #define arm_smmu_cb_writeq(s, n, o, v) \
305 arm_smmu_writeq((s), ARM_SMMU_CB((s), (n)), (o), (v))
307 struct arm_smmu_option_prop {
312 static atomic_t cavium_smmu_context_count = ATOMIC_INIT(0);
314 static bool using_legacy_binding, using_generic_binding;
316 static struct arm_smmu_option_prop arm_smmu_options[] = {
317 { ARM_SMMU_OPT_SECURE_CFG_ACCESS, "calxeda,smmu-secure-config-access" },
321 static inline int arm_smmu_rpm_get(struct arm_smmu_device *smmu)
323 if (pm_runtime_enabled(smmu->dev))
324 return pm_runtime_get_sync(smmu->dev);
329 static inline void arm_smmu_rpm_put(struct arm_smmu_device *smmu)
331 if (pm_runtime_enabled(smmu->dev))
332 pm_runtime_put(smmu->dev);
335 static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
337 return container_of(dom, struct arm_smmu_domain, domain);
340 static void parse_driver_options(struct arm_smmu_device *smmu)
345 if (of_property_read_bool(smmu->dev->of_node,
346 arm_smmu_options[i].prop)) {
347 smmu->options |= arm_smmu_options[i].opt;
348 dev_notice(smmu->dev, "option %s\n",
349 arm_smmu_options[i].prop);
351 } while (arm_smmu_options[++i].opt);
354 static struct device_node *dev_get_dev_node(struct device *dev)
356 if (dev_is_pci(dev)) {
357 struct pci_bus *bus = to_pci_dev(dev)->bus;
359 while (!pci_is_root_bus(bus))
361 return of_node_get(bus->bridge->parent->of_node);
364 return of_node_get(dev->of_node);
367 static int __arm_smmu_get_pci_sid(struct pci_dev *pdev, u16 alias, void *data)
369 *((__be32 *)data) = cpu_to_be32(alias);
370 return 0; /* Continue walking */
373 static int __find_legacy_master_phandle(struct device *dev, void *data)
375 struct of_phandle_iterator *it = *(void **)data;
376 struct device_node *np = it->node;
379 of_for_each_phandle(it, err, dev->of_node, "mmu-masters",
380 "#stream-id-cells", 0)
381 if (it->node == np) {
382 *(void **)data = dev;
386 return err == -ENOENT ? 0 : err;
389 static struct platform_driver arm_smmu_driver;
390 static struct iommu_ops arm_smmu_ops;
392 static int arm_smmu_register_legacy_master(struct device *dev,
393 struct arm_smmu_device **smmu)
395 struct device *smmu_dev;
396 struct device_node *np;
397 struct of_phandle_iterator it;
403 np = dev_get_dev_node(dev);
404 if (!np || !of_find_property(np, "#stream-id-cells", NULL)) {
410 err = driver_for_each_device(&arm_smmu_driver.driver, NULL, &data,
411 __find_legacy_master_phandle);
419 if (dev_is_pci(dev)) {
420 /* "mmu-masters" assumes Stream ID == Requester ID */
421 pci_for_each_dma_alias(to_pci_dev(dev), __arm_smmu_get_pci_sid,
427 err = iommu_fwspec_init(dev, &smmu_dev->of_node->fwnode,
432 sids = kcalloc(it.cur_count, sizeof(*sids), GFP_KERNEL);
436 *smmu = dev_get_drvdata(smmu_dev);
437 of_phandle_iterator_args(&it, sids, it.cur_count);
438 err = iommu_fwspec_add_ids(dev, sids, it.cur_count);
443 static int __arm_smmu_alloc_bitmap(unsigned long *map, int start, int end)
448 idx = find_next_zero_bit(map, end, start);
451 } while (test_and_set_bit(idx, map));
456 static void __arm_smmu_free_bitmap(unsigned long *map, int idx)
461 /* Wait for any pending TLB invalidations to complete */
462 static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu, int page,
463 int sync, int status)
465 unsigned int spin_cnt, delay;
468 arm_smmu_writel(smmu, page, sync, QCOM_DUMMY_VAL);
469 for (delay = 1; delay < TLB_LOOP_TIMEOUT; delay *= 2) {
470 for (spin_cnt = TLB_SPIN_COUNT; spin_cnt > 0; spin_cnt--) {
471 reg = arm_smmu_readl(smmu, page, status);
472 if (!(reg & sTLBGSTATUS_GSACTIVE))
478 dev_err_ratelimited(smmu->dev,
479 "TLB sync timed out -- SMMU may be deadlocked\n");
482 static void arm_smmu_tlb_sync_global(struct arm_smmu_device *smmu)
486 spin_lock_irqsave(&smmu->global_sync_lock, flags);
487 __arm_smmu_tlb_sync(smmu, ARM_SMMU_GR0, ARM_SMMU_GR0_sTLBGSYNC,
488 ARM_SMMU_GR0_sTLBGSTATUS);
489 spin_unlock_irqrestore(&smmu->global_sync_lock, flags);
492 static void arm_smmu_tlb_sync_context(void *cookie)
494 struct arm_smmu_domain *smmu_domain = cookie;
495 struct arm_smmu_device *smmu = smmu_domain->smmu;
498 spin_lock_irqsave(&smmu_domain->cb_lock, flags);
499 __arm_smmu_tlb_sync(smmu, ARM_SMMU_CB(smmu, smmu_domain->cfg.cbndx),
500 ARM_SMMU_CB_TLBSYNC, ARM_SMMU_CB_TLBSTATUS);
501 spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
504 static void arm_smmu_tlb_sync_vmid(void *cookie)
506 struct arm_smmu_domain *smmu_domain = cookie;
508 arm_smmu_tlb_sync_global(smmu_domain->smmu);
511 static void arm_smmu_tlb_inv_context_s1(void *cookie)
513 struct arm_smmu_domain *smmu_domain = cookie;
515 * The TLBI write may be relaxed, so ensure that PTEs cleared by the
516 * current CPU are visible beforehand.
519 arm_smmu_cb_write(smmu_domain->smmu, smmu_domain->cfg.cbndx,
520 ARM_SMMU_CB_S1_TLBIASID, smmu_domain->cfg.asid);
521 arm_smmu_tlb_sync_context(cookie);
524 static void arm_smmu_tlb_inv_context_s2(void *cookie)
526 struct arm_smmu_domain *smmu_domain = cookie;
527 struct arm_smmu_device *smmu = smmu_domain->smmu;
531 arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIVMID, smmu_domain->cfg.vmid);
532 arm_smmu_tlb_sync_global(smmu);
535 static void arm_smmu_tlb_inv_range_s1(unsigned long iova, size_t size,
536 size_t granule, bool leaf, void *cookie)
538 struct arm_smmu_domain *smmu_domain = cookie;
539 struct arm_smmu_device *smmu = smmu_domain->smmu;
540 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
541 int reg, idx = cfg->cbndx;
543 if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
546 reg = leaf ? ARM_SMMU_CB_S1_TLBIVAL : ARM_SMMU_CB_S1_TLBIVA;
548 if (cfg->fmt != ARM_SMMU_CTX_FMT_AARCH64) {
549 iova = (iova >> 12) << 12;
552 arm_smmu_cb_write(smmu, idx, reg, iova);
554 } while (size -= granule);
557 iova |= (u64)cfg->asid << 48;
559 arm_smmu_cb_writeq(smmu, idx, reg, iova);
560 iova += granule >> 12;
561 } while (size -= granule);
565 static void arm_smmu_tlb_inv_range_s2(unsigned long iova, size_t size,
566 size_t granule, bool leaf, void *cookie)
568 struct arm_smmu_domain *smmu_domain = cookie;
569 struct arm_smmu_device *smmu = smmu_domain->smmu;
570 int reg, idx = smmu_domain->cfg.cbndx;
572 if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
575 reg = leaf ? ARM_SMMU_CB_S2_TLBIIPAS2L : ARM_SMMU_CB_S2_TLBIIPAS2;
578 if (smmu_domain->cfg.fmt == ARM_SMMU_CTX_FMT_AARCH64)
579 arm_smmu_cb_writeq(smmu, idx, reg, iova);
581 arm_smmu_cb_write(smmu, idx, reg, iova);
582 iova += granule >> 12;
583 } while (size -= granule);
587 * On MMU-401 at least, the cost of firing off multiple TLBIVMIDs appears
588 * almost negligible, but the benefit of getting the first one in as far ahead
589 * of the sync as possible is significant, hence we don't just make this a
590 * no-op and set .tlb_sync to arm_smmu_inv_context_s2() as you might think.
592 static void arm_smmu_tlb_inv_vmid_nosync(unsigned long iova, size_t size,
593 size_t granule, bool leaf, void *cookie)
595 struct arm_smmu_domain *smmu_domain = cookie;
596 struct arm_smmu_device *smmu = smmu_domain->smmu;
598 if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
601 arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIVMID, smmu_domain->cfg.vmid);
604 static const struct iommu_gather_ops arm_smmu_s1_tlb_ops = {
605 .tlb_flush_all = arm_smmu_tlb_inv_context_s1,
606 .tlb_add_flush = arm_smmu_tlb_inv_range_s1,
607 .tlb_sync = arm_smmu_tlb_sync_context,
610 static const struct iommu_gather_ops arm_smmu_s2_tlb_ops_v2 = {
611 .tlb_flush_all = arm_smmu_tlb_inv_context_s2,
612 .tlb_add_flush = arm_smmu_tlb_inv_range_s2,
613 .tlb_sync = arm_smmu_tlb_sync_context,
616 static const struct iommu_gather_ops arm_smmu_s2_tlb_ops_v1 = {
617 .tlb_flush_all = arm_smmu_tlb_inv_context_s2,
618 .tlb_add_flush = arm_smmu_tlb_inv_vmid_nosync,
619 .tlb_sync = arm_smmu_tlb_sync_vmid,
622 static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
624 u32 fsr, fsynr, cbfrsynra;
626 struct iommu_domain *domain = dev;
627 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
628 struct arm_smmu_device *smmu = smmu_domain->smmu;
629 int idx = smmu_domain->cfg.cbndx;
631 fsr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSR);
632 if (!(fsr & FSR_FAULT))
635 fsynr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSYNR0);
636 iova = arm_smmu_cb_readq(smmu, idx, ARM_SMMU_CB_FAR);
637 cbfrsynra = arm_smmu_gr1_read(smmu, ARM_SMMU_GR1_CBFRSYNRA(idx));
639 dev_err_ratelimited(smmu->dev,
640 "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cbfrsynra=0x%x, cb=%d\n",
641 fsr, iova, fsynr, cbfrsynra, idx);
643 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_FSR, fsr);
647 static irqreturn_t arm_smmu_global_fault(int irq, void *dev)
649 u32 gfsr, gfsynr0, gfsynr1, gfsynr2;
650 struct arm_smmu_device *smmu = dev;
652 gfsr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSR);
653 gfsynr0 = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSYNR0);
654 gfsynr1 = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSYNR1);
655 gfsynr2 = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSYNR2);
660 dev_err_ratelimited(smmu->dev,
661 "Unexpected global fault, this could be serious\n");
662 dev_err_ratelimited(smmu->dev,
663 "\tGFSR 0x%08x, GFSYNR0 0x%08x, GFSYNR1 0x%08x, GFSYNR2 0x%08x\n",
664 gfsr, gfsynr0, gfsynr1, gfsynr2);
666 arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sGFSR, gfsr);
670 static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
671 struct io_pgtable_cfg *pgtbl_cfg)
673 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
674 struct arm_smmu_cb *cb = &smmu_domain->smmu->cbs[cfg->cbndx];
675 bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
681 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
682 cb->tcr[0] = pgtbl_cfg->arm_v7s_cfg.tcr;
684 cb->tcr[0] = pgtbl_cfg->arm_lpae_s1_cfg.tcr;
685 cb->tcr[1] = pgtbl_cfg->arm_lpae_s1_cfg.tcr >> 32;
686 cb->tcr[1] |= FIELD_PREP(TCR2_SEP, TCR2_SEP_UPSTREAM);
687 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
688 cb->tcr[1] |= TCR2_AS;
691 cb->tcr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
696 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
697 cb->ttbr[0] = pgtbl_cfg->arm_v7s_cfg.ttbr[0];
698 cb->ttbr[1] = pgtbl_cfg->arm_v7s_cfg.ttbr[1];
700 cb->ttbr[0] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
701 cb->ttbr[0] |= FIELD_PREP(TTBRn_ASID, cfg->asid);
702 cb->ttbr[1] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1];
703 cb->ttbr[1] |= FIELD_PREP(TTBRn_ASID, cfg->asid);
706 cb->ttbr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
709 /* MAIRs (stage-1 only) */
711 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
712 cb->mair[0] = pgtbl_cfg->arm_v7s_cfg.prrr;
713 cb->mair[1] = pgtbl_cfg->arm_v7s_cfg.nmrr;
715 cb->mair[0] = pgtbl_cfg->arm_lpae_s1_cfg.mair[0];
716 cb->mair[1] = pgtbl_cfg->arm_lpae_s1_cfg.mair[1];
721 static void arm_smmu_write_context_bank(struct arm_smmu_device *smmu, int idx)
725 struct arm_smmu_cb *cb = &smmu->cbs[idx];
726 struct arm_smmu_cfg *cfg = cb->cfg;
728 /* Unassigned context banks only need disabling */
730 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_SCTLR, 0);
734 stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
737 if (smmu->version > ARM_SMMU_V1) {
738 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
742 /* 16-bit VMIDs live in CBA2R */
743 if (smmu->features & ARM_SMMU_FEAT_VMID16)
744 reg |= FIELD_PREP(CBA2R_VMID16, cfg->vmid);
746 arm_smmu_gr1_write(smmu, ARM_SMMU_GR1_CBA2R(idx), reg);
750 reg = FIELD_PREP(CBAR_TYPE, cfg->cbar);
751 if (smmu->version < ARM_SMMU_V2)
752 reg |= FIELD_PREP(CBAR_IRPTNDX, cfg->irptndx);
755 * Use the weakest shareability/memory types, so they are
756 * overridden by the ttbcr/pte.
759 reg |= FIELD_PREP(CBAR_S1_BPSHCFG, CBAR_S1_BPSHCFG_NSH) |
760 FIELD_PREP(CBAR_S1_MEMATTR, CBAR_S1_MEMATTR_WB);
761 } else if (!(smmu->features & ARM_SMMU_FEAT_VMID16)) {
762 /* 8-bit VMIDs live in CBAR */
763 reg |= FIELD_PREP(CBAR_VMID, cfg->vmid);
765 arm_smmu_gr1_write(smmu, ARM_SMMU_GR1_CBAR(idx), reg);
769 * We must write this before the TTBRs, since it determines the
770 * access behaviour of some fields (in particular, ASID[15:8]).
772 if (stage1 && smmu->version > ARM_SMMU_V1)
773 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TCR2, cb->tcr[1]);
774 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TCR, cb->tcr[0]);
777 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
778 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_CONTEXTIDR, cfg->asid);
779 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TTBR0, cb->ttbr[0]);
780 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TTBR1, cb->ttbr[1]);
782 arm_smmu_cb_writeq(smmu, idx, ARM_SMMU_CB_TTBR0, cb->ttbr[0]);
784 arm_smmu_cb_writeq(smmu, idx, ARM_SMMU_CB_TTBR1,
788 /* MAIRs (stage-1 only) */
790 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_S1_MAIR0, cb->mair[0]);
791 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_S1_MAIR1, cb->mair[1]);
795 reg = SCTLR_CFIE | SCTLR_CFRE | SCTLR_AFE | SCTLR_TRE | SCTLR_M;
797 reg |= SCTLR_S1_ASIDPNE;
798 if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN))
801 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_SCTLR, reg);
804 static int arm_smmu_init_domain_context(struct iommu_domain *domain,
805 struct arm_smmu_device *smmu)
807 int irq, start, ret = 0;
808 unsigned long ias, oas;
809 struct io_pgtable_ops *pgtbl_ops;
810 struct io_pgtable_cfg pgtbl_cfg;
811 enum io_pgtable_fmt fmt;
812 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
813 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
815 mutex_lock(&smmu_domain->init_mutex);
816 if (smmu_domain->smmu)
819 if (domain->type == IOMMU_DOMAIN_IDENTITY) {
820 smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
821 smmu_domain->smmu = smmu;
826 * Mapping the requested stage onto what we support is surprisingly
827 * complicated, mainly because the spec allows S1+S2 SMMUs without
828 * support for nested translation. That means we end up with the
831 * Requested Supported Actual
841 * Note that you can't actually request stage-2 mappings.
843 if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
844 smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
845 if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
846 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
849 * Choosing a suitable context format is even more fiddly. Until we
850 * grow some way for the caller to express a preference, and/or move
851 * the decision into the io-pgtable code where it arguably belongs,
852 * just aim for the closest thing to the rest of the system, and hope
853 * that the hardware isn't esoteric enough that we can't assume AArch64
854 * support to be a superset of AArch32 support...
856 if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_L)
857 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_L;
858 if (IS_ENABLED(CONFIG_IOMMU_IO_PGTABLE_ARMV7S) &&
859 !IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_ARM_LPAE) &&
860 (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S) &&
861 (smmu_domain->stage == ARM_SMMU_DOMAIN_S1))
862 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_S;
863 if ((IS_ENABLED(CONFIG_64BIT) || cfg->fmt == ARM_SMMU_CTX_FMT_NONE) &&
864 (smmu->features & (ARM_SMMU_FEAT_FMT_AARCH64_64K |
865 ARM_SMMU_FEAT_FMT_AARCH64_16K |
866 ARM_SMMU_FEAT_FMT_AARCH64_4K)))
867 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH64;
869 if (cfg->fmt == ARM_SMMU_CTX_FMT_NONE) {
874 switch (smmu_domain->stage) {
875 case ARM_SMMU_DOMAIN_S1:
876 cfg->cbar = CBAR_TYPE_S1_TRANS_S2_BYPASS;
877 start = smmu->num_s2_context_banks;
879 oas = smmu->ipa_size;
880 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
881 fmt = ARM_64_LPAE_S1;
882 } else if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_L) {
883 fmt = ARM_32_LPAE_S1;
884 ias = min(ias, 32UL);
885 oas = min(oas, 40UL);
888 ias = min(ias, 32UL);
889 oas = min(oas, 32UL);
891 smmu_domain->tlb_ops = &arm_smmu_s1_tlb_ops;
893 case ARM_SMMU_DOMAIN_NESTED:
895 * We will likely want to change this if/when KVM gets
898 case ARM_SMMU_DOMAIN_S2:
899 cfg->cbar = CBAR_TYPE_S2_TRANS;
901 ias = smmu->ipa_size;
903 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
904 fmt = ARM_64_LPAE_S2;
906 fmt = ARM_32_LPAE_S2;
907 ias = min(ias, 40UL);
908 oas = min(oas, 40UL);
910 if (smmu->version == ARM_SMMU_V2)
911 smmu_domain->tlb_ops = &arm_smmu_s2_tlb_ops_v2;
913 smmu_domain->tlb_ops = &arm_smmu_s2_tlb_ops_v1;
919 ret = __arm_smmu_alloc_bitmap(smmu->context_map, start,
920 smmu->num_context_banks);
925 if (smmu->version < ARM_SMMU_V2) {
926 cfg->irptndx = atomic_inc_return(&smmu->irptndx);
927 cfg->irptndx %= smmu->num_context_irqs;
929 cfg->irptndx = cfg->cbndx;
932 if (smmu_domain->stage == ARM_SMMU_DOMAIN_S2)
933 cfg->vmid = cfg->cbndx + 1 + smmu->cavium_id_base;
935 cfg->asid = cfg->cbndx + smmu->cavium_id_base;
937 pgtbl_cfg = (struct io_pgtable_cfg) {
938 .pgsize_bitmap = smmu->pgsize_bitmap,
941 .coherent_walk = smmu->features & ARM_SMMU_FEAT_COHERENT_WALK,
942 .tlb = smmu_domain->tlb_ops,
943 .iommu_dev = smmu->dev,
946 if (smmu_domain->non_strict)
947 pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
949 smmu_domain->smmu = smmu;
950 pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
956 /* Update the domain's page sizes to reflect the page table format */
957 domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
958 domain->geometry.aperture_end = (1UL << ias) - 1;
959 domain->geometry.force_aperture = true;
961 /* Initialise the context bank with our page table cfg */
962 arm_smmu_init_context_bank(smmu_domain, &pgtbl_cfg);
963 arm_smmu_write_context_bank(smmu, cfg->cbndx);
966 * Request context fault interrupt. Do this last to avoid the
967 * handler seeing a half-initialised domain state.
969 irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
970 ret = devm_request_irq(smmu->dev, irq, arm_smmu_context_fault,
971 IRQF_SHARED, "arm-smmu-context-fault", domain);
973 dev_err(smmu->dev, "failed to request context IRQ %d (%u)\n",
975 cfg->irptndx = INVALID_IRPTNDX;
978 mutex_unlock(&smmu_domain->init_mutex);
980 /* Publish page table ops for map/unmap */
981 smmu_domain->pgtbl_ops = pgtbl_ops;
985 smmu_domain->smmu = NULL;
987 mutex_unlock(&smmu_domain->init_mutex);
991 static void arm_smmu_destroy_domain_context(struct iommu_domain *domain)
993 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
994 struct arm_smmu_device *smmu = smmu_domain->smmu;
995 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
998 if (!smmu || domain->type == IOMMU_DOMAIN_IDENTITY)
1001 ret = arm_smmu_rpm_get(smmu);
1006 * Disable the context bank and free the page tables before freeing
1009 smmu->cbs[cfg->cbndx].cfg = NULL;
1010 arm_smmu_write_context_bank(smmu, cfg->cbndx);
1012 if (cfg->irptndx != INVALID_IRPTNDX) {
1013 irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
1014 devm_free_irq(smmu->dev, irq, domain);
1017 free_io_pgtable_ops(smmu_domain->pgtbl_ops);
1018 __arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
1020 arm_smmu_rpm_put(smmu);
1023 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
1025 struct arm_smmu_domain *smmu_domain;
1027 if (type != IOMMU_DOMAIN_UNMANAGED &&
1028 type != IOMMU_DOMAIN_DMA &&
1029 type != IOMMU_DOMAIN_IDENTITY)
1032 * Allocate the domain and initialise some of its data structures.
1033 * We can't really do anything meaningful until we've added a
1036 smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
1040 if (type == IOMMU_DOMAIN_DMA && (using_legacy_binding ||
1041 iommu_get_dma_cookie(&smmu_domain->domain))) {
1046 mutex_init(&smmu_domain->init_mutex);
1047 spin_lock_init(&smmu_domain->cb_lock);
1049 return &smmu_domain->domain;
1052 static void arm_smmu_domain_free(struct iommu_domain *domain)
1054 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1057 * Free the domain resources. We assume that all devices have
1058 * already been detached.
1060 iommu_put_dma_cookie(domain);
1061 arm_smmu_destroy_domain_context(domain);
1065 static void arm_smmu_write_smr(struct arm_smmu_device *smmu, int idx)
1067 struct arm_smmu_smr *smr = smmu->smrs + idx;
1068 u32 reg = FIELD_PREP(SMR_ID, smr->id) | FIELD_PREP(SMR_MASK, smr->mask);
1070 if (!(smmu->features & ARM_SMMU_FEAT_EXIDS) && smr->valid)
1072 arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_SMR(idx), reg);
1075 static void arm_smmu_write_s2cr(struct arm_smmu_device *smmu, int idx)
1077 struct arm_smmu_s2cr *s2cr = smmu->s2crs + idx;
1078 u32 reg = FIELD_PREP(S2CR_TYPE, s2cr->type) |
1079 FIELD_PREP(S2CR_CBNDX, s2cr->cbndx) |
1080 FIELD_PREP(S2CR_PRIVCFG, s2cr->privcfg);
1082 if (smmu->features & ARM_SMMU_FEAT_EXIDS && smmu->smrs &&
1083 smmu->smrs[idx].valid)
1084 reg |= S2CR_EXIDVALID;
1085 arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_S2CR(idx), reg);
1088 static void arm_smmu_write_sme(struct arm_smmu_device *smmu, int idx)
1090 arm_smmu_write_s2cr(smmu, idx);
1092 arm_smmu_write_smr(smmu, idx);
1096 * The width of SMR's mask field depends on sCR0_EXIDENABLE, so this function
1097 * should be called after sCR0 is written.
1099 static void arm_smmu_test_smr_masks(struct arm_smmu_device *smmu)
1107 * SMR.ID bits may not be preserved if the corresponding MASK
1108 * bits are set, so check each one separately. We can reject
1109 * masters later if they try to claim IDs outside these masks.
1111 smr = FIELD_PREP(SMR_ID, smmu->streamid_mask);
1112 arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_SMR(0), smr);
1113 smr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_SMR(0));
1114 smmu->streamid_mask = FIELD_GET(SMR_ID, smr);
1116 smr = FIELD_PREP(SMR_MASK, smmu->streamid_mask);
1117 arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_SMR(0), smr);
1118 smr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_SMR(0));
1119 smmu->smr_mask_mask = FIELD_GET(SMR_MASK, smr);
1122 static int arm_smmu_find_sme(struct arm_smmu_device *smmu, u16 id, u16 mask)
1124 struct arm_smmu_smr *smrs = smmu->smrs;
1125 int i, free_idx = -ENOSPC;
1127 /* Stream indexing is blissfully easy */
1131 /* Validating SMRs is... less so */
1132 for (i = 0; i < smmu->num_mapping_groups; ++i) {
1133 if (!smrs[i].valid) {
1135 * Note the first free entry we come across, which
1136 * we'll claim in the end if nothing else matches.
1143 * If the new entry is _entirely_ matched by an existing entry,
1144 * then reuse that, with the guarantee that there also cannot
1145 * be any subsequent conflicting entries. In normal use we'd
1146 * expect simply identical entries for this case, but there's
1147 * no harm in accommodating the generalisation.
1149 if ((mask & smrs[i].mask) == mask &&
1150 !((id ^ smrs[i].id) & ~smrs[i].mask))
1153 * If the new entry has any other overlap with an existing one,
1154 * though, then there always exists at least one stream ID
1155 * which would cause a conflict, and we can't allow that risk.
1157 if (!((id ^ smrs[i].id) & ~(smrs[i].mask | mask)))
1164 static bool arm_smmu_free_sme(struct arm_smmu_device *smmu, int idx)
1166 if (--smmu->s2crs[idx].count)
1169 smmu->s2crs[idx] = s2cr_init_val;
1171 smmu->smrs[idx].valid = false;
1176 static int arm_smmu_master_alloc_smes(struct device *dev)
1178 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1179 struct arm_smmu_master_cfg *cfg = fwspec->iommu_priv;
1180 struct arm_smmu_device *smmu = cfg->smmu;
1181 struct arm_smmu_smr *smrs = smmu->smrs;
1182 struct iommu_group *group;
1185 mutex_lock(&smmu->stream_map_mutex);
1186 /* Figure out a viable stream map entry allocation */
1187 for_each_cfg_sme(fwspec, i, idx) {
1188 u16 sid = FIELD_GET(SMR_ID, fwspec->ids[i]);
1189 u16 mask = FIELD_GET(SMR_MASK, fwspec->ids[i]);
1191 if (idx != INVALID_SMENDX) {
1196 ret = arm_smmu_find_sme(smmu, sid, mask);
1201 if (smrs && smmu->s2crs[idx].count == 0) {
1203 smrs[idx].mask = mask;
1204 smrs[idx].valid = true;
1206 smmu->s2crs[idx].count++;
1207 cfg->smendx[i] = (s16)idx;
1210 group = iommu_group_get_for_dev(dev);
1212 group = ERR_PTR(-ENOMEM);
1213 if (IS_ERR(group)) {
1214 ret = PTR_ERR(group);
1217 iommu_group_put(group);
1219 /* It worked! Now, poke the actual hardware */
1220 for_each_cfg_sme(fwspec, i, idx) {
1221 arm_smmu_write_sme(smmu, idx);
1222 smmu->s2crs[idx].group = group;
1225 mutex_unlock(&smmu->stream_map_mutex);
1230 arm_smmu_free_sme(smmu, cfg->smendx[i]);
1231 cfg->smendx[i] = INVALID_SMENDX;
1233 mutex_unlock(&smmu->stream_map_mutex);
1237 static void arm_smmu_master_free_smes(struct iommu_fwspec *fwspec)
1239 struct arm_smmu_device *smmu = fwspec_smmu(fwspec);
1240 struct arm_smmu_master_cfg *cfg = fwspec->iommu_priv;
1243 mutex_lock(&smmu->stream_map_mutex);
1244 for_each_cfg_sme(fwspec, i, idx) {
1245 if (arm_smmu_free_sme(smmu, idx))
1246 arm_smmu_write_sme(smmu, idx);
1247 cfg->smendx[i] = INVALID_SMENDX;
1249 mutex_unlock(&smmu->stream_map_mutex);
1252 static int arm_smmu_domain_add_master(struct arm_smmu_domain *smmu_domain,
1253 struct iommu_fwspec *fwspec)
1255 struct arm_smmu_device *smmu = smmu_domain->smmu;
1256 struct arm_smmu_s2cr *s2cr = smmu->s2crs;
1257 u8 cbndx = smmu_domain->cfg.cbndx;
1258 enum arm_smmu_s2cr_type type;
1261 if (smmu_domain->stage == ARM_SMMU_DOMAIN_BYPASS)
1262 type = S2CR_TYPE_BYPASS;
1264 type = S2CR_TYPE_TRANS;
1266 for_each_cfg_sme(fwspec, i, idx) {
1267 if (type == s2cr[idx].type && cbndx == s2cr[idx].cbndx)
1270 s2cr[idx].type = type;
1271 s2cr[idx].privcfg = S2CR_PRIVCFG_DEFAULT;
1272 s2cr[idx].cbndx = cbndx;
1273 arm_smmu_write_s2cr(smmu, idx);
1278 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
1281 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1282 struct arm_smmu_device *smmu;
1283 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1285 if (!fwspec || fwspec->ops != &arm_smmu_ops) {
1286 dev_err(dev, "cannot attach to SMMU, is it on the same bus?\n");
1291 * FIXME: The arch/arm DMA API code tries to attach devices to its own
1292 * domains between of_xlate() and add_device() - we have no way to cope
1293 * with that, so until ARM gets converted to rely on groups and default
1294 * domains, just say no (but more politely than by dereferencing NULL).
1295 * This should be at least a WARN_ON once that's sorted.
1297 if (!fwspec->iommu_priv)
1300 smmu = fwspec_smmu(fwspec);
1302 ret = arm_smmu_rpm_get(smmu);
1306 /* Ensure that the domain is finalised */
1307 ret = arm_smmu_init_domain_context(domain, smmu);
1312 * Sanity check the domain. We don't support domains across
1315 if (smmu_domain->smmu != smmu) {
1317 "cannot attach to SMMU %s whilst already attached to domain on SMMU %s\n",
1318 dev_name(smmu_domain->smmu->dev), dev_name(smmu->dev));
1323 /* Looks ok, so add the device to the domain */
1324 ret = arm_smmu_domain_add_master(smmu_domain, fwspec);
1327 arm_smmu_rpm_put(smmu);
1331 static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
1332 phys_addr_t paddr, size_t size, int prot)
1334 struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1335 struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
1341 arm_smmu_rpm_get(smmu);
1342 ret = ops->map(ops, iova, paddr, size, prot);
1343 arm_smmu_rpm_put(smmu);
1348 static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
1351 struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1352 struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
1358 arm_smmu_rpm_get(smmu);
1359 ret = ops->unmap(ops, iova, size);
1360 arm_smmu_rpm_put(smmu);
1365 static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
1367 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1368 struct arm_smmu_device *smmu = smmu_domain->smmu;
1370 if (smmu_domain->tlb_ops) {
1371 arm_smmu_rpm_get(smmu);
1372 smmu_domain->tlb_ops->tlb_flush_all(smmu_domain);
1373 arm_smmu_rpm_put(smmu);
1377 static void arm_smmu_iotlb_sync(struct iommu_domain *domain)
1379 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1380 struct arm_smmu_device *smmu = smmu_domain->smmu;
1382 if (smmu_domain->tlb_ops) {
1383 arm_smmu_rpm_get(smmu);
1384 smmu_domain->tlb_ops->tlb_sync(smmu_domain);
1385 arm_smmu_rpm_put(smmu);
1389 static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
1392 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1393 struct arm_smmu_device *smmu = smmu_domain->smmu;
1394 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
1395 struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
1396 struct device *dev = smmu->dev;
1400 unsigned long va, flags;
1401 int ret, idx = cfg->cbndx;
1403 ret = arm_smmu_rpm_get(smmu);
1407 spin_lock_irqsave(&smmu_domain->cb_lock, flags);
1408 va = iova & ~0xfffUL;
1409 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
1410 arm_smmu_cb_writeq(smmu, idx, ARM_SMMU_CB_ATS1PR, va);
1412 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_ATS1PR, va);
1414 reg = arm_smmu_page(smmu, ARM_SMMU_CB(smmu, idx)) + ARM_SMMU_CB_ATSR;
1415 if (readl_poll_timeout_atomic(reg, tmp, !(tmp & ATSR_ACTIVE), 5, 50)) {
1416 spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
1418 "iova to phys timed out on %pad. Falling back to software table walk.\n",
1420 return ops->iova_to_phys(ops, iova);
1423 phys = arm_smmu_cb_readq(smmu, idx, ARM_SMMU_CB_PAR);
1424 spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
1425 if (phys & CB_PAR_F) {
1426 dev_err(dev, "translation fault!\n");
1427 dev_err(dev, "PAR = 0x%llx\n", phys);
1431 arm_smmu_rpm_put(smmu);
1433 return (phys & GENMASK_ULL(39, 12)) | (iova & 0xfff);
1436 static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain,
1439 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1440 struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
1442 if (domain->type == IOMMU_DOMAIN_IDENTITY)
1448 if (smmu_domain->smmu->features & ARM_SMMU_FEAT_TRANS_OPS &&
1449 smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
1450 return arm_smmu_iova_to_phys_hard(domain, iova);
1452 return ops->iova_to_phys(ops, iova);
1455 static bool arm_smmu_capable(enum iommu_cap cap)
1458 case IOMMU_CAP_CACHE_COHERENCY:
1460 * Return true here as the SMMU can always send out coherent
1464 case IOMMU_CAP_NOEXEC:
1471 static int arm_smmu_match_node(struct device *dev, const void *data)
1473 return dev->fwnode == data;
1477 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
1479 struct device *dev = driver_find_device(&arm_smmu_driver.driver, NULL,
1480 fwnode, arm_smmu_match_node);
1482 return dev ? dev_get_drvdata(dev) : NULL;
1485 static int arm_smmu_add_device(struct device *dev)
1487 struct arm_smmu_device *smmu;
1488 struct arm_smmu_master_cfg *cfg;
1489 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1492 if (using_legacy_binding) {
1493 ret = arm_smmu_register_legacy_master(dev, &smmu);
1496 * If dev->iommu_fwspec is initally NULL, arm_smmu_register_legacy_master()
1497 * will allocate/initialise a new one. Thus we need to update fwspec for
1500 fwspec = dev_iommu_fwspec_get(dev);
1503 } else if (fwspec && fwspec->ops == &arm_smmu_ops) {
1504 smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
1510 for (i = 0; i < fwspec->num_ids; i++) {
1511 u16 sid = FIELD_GET(SMR_ID, fwspec->ids[i]);
1512 u16 mask = FIELD_GET(SMR_MASK, fwspec->ids[i]);
1514 if (sid & ~smmu->streamid_mask) {
1515 dev_err(dev, "stream ID 0x%x out of range for SMMU (0x%x)\n",
1516 sid, smmu->streamid_mask);
1519 if (mask & ~smmu->smr_mask_mask) {
1520 dev_err(dev, "SMR mask 0x%x out of range for SMMU (0x%x)\n",
1521 mask, smmu->smr_mask_mask);
1527 cfg = kzalloc(offsetof(struct arm_smmu_master_cfg, smendx[i]),
1533 fwspec->iommu_priv = cfg;
1535 cfg->smendx[i] = INVALID_SMENDX;
1537 ret = arm_smmu_rpm_get(smmu);
1541 ret = arm_smmu_master_alloc_smes(dev);
1542 arm_smmu_rpm_put(smmu);
1547 iommu_device_link(&smmu->iommu, dev);
1549 device_link_add(dev, smmu->dev,
1550 DL_FLAG_PM_RUNTIME | DL_FLAG_AUTOREMOVE_SUPPLIER);
1557 iommu_fwspec_free(dev);
1561 static void arm_smmu_remove_device(struct device *dev)
1563 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1564 struct arm_smmu_master_cfg *cfg;
1565 struct arm_smmu_device *smmu;
1568 if (!fwspec || fwspec->ops != &arm_smmu_ops)
1571 cfg = fwspec->iommu_priv;
1574 ret = arm_smmu_rpm_get(smmu);
1578 iommu_device_unlink(&smmu->iommu, dev);
1579 arm_smmu_master_free_smes(fwspec);
1581 arm_smmu_rpm_put(smmu);
1583 iommu_group_remove_device(dev);
1584 kfree(fwspec->iommu_priv);
1585 iommu_fwspec_free(dev);
1588 static struct iommu_group *arm_smmu_device_group(struct device *dev)
1590 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1591 struct arm_smmu_device *smmu = fwspec_smmu(fwspec);
1592 struct iommu_group *group = NULL;
1595 for_each_cfg_sme(fwspec, i, idx) {
1596 if (group && smmu->s2crs[idx].group &&
1597 group != smmu->s2crs[idx].group)
1598 return ERR_PTR(-EINVAL);
1600 group = smmu->s2crs[idx].group;
1604 return iommu_group_ref_get(group);
1606 if (dev_is_pci(dev))
1607 group = pci_device_group(dev);
1608 else if (dev_is_fsl_mc(dev))
1609 group = fsl_mc_device_group(dev);
1611 group = generic_device_group(dev);
1616 static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
1617 enum iommu_attr attr, void *data)
1619 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1621 switch(domain->type) {
1622 case IOMMU_DOMAIN_UNMANAGED:
1624 case DOMAIN_ATTR_NESTING:
1625 *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
1631 case IOMMU_DOMAIN_DMA:
1633 case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
1634 *(int *)data = smmu_domain->non_strict;
1645 static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
1646 enum iommu_attr attr, void *data)
1649 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1651 mutex_lock(&smmu_domain->init_mutex);
1653 switch(domain->type) {
1654 case IOMMU_DOMAIN_UNMANAGED:
1656 case DOMAIN_ATTR_NESTING:
1657 if (smmu_domain->smmu) {
1663 smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
1665 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
1671 case IOMMU_DOMAIN_DMA:
1673 case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
1674 smmu_domain->non_strict = *(int *)data;
1684 mutex_unlock(&smmu_domain->init_mutex);
1688 static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
1692 if (args->args_count > 0)
1693 fwid |= FIELD_PREP(SMR_ID, args->args[0]);
1695 if (args->args_count > 1)
1696 fwid |= FIELD_PREP(SMR_MASK, args->args[1]);
1697 else if (!of_property_read_u32(args->np, "stream-match-mask", &mask))
1698 fwid |= FIELD_PREP(SMR_MASK, mask);
1700 return iommu_fwspec_add_ids(dev, &fwid, 1);
1703 static void arm_smmu_get_resv_regions(struct device *dev,
1704 struct list_head *head)
1706 struct iommu_resv_region *region;
1707 int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
1709 region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
1710 prot, IOMMU_RESV_SW_MSI);
1714 list_add_tail(®ion->list, head);
1716 iommu_dma_get_resv_regions(dev, head);
1719 static void arm_smmu_put_resv_regions(struct device *dev,
1720 struct list_head *head)
1722 struct iommu_resv_region *entry, *next;
1724 list_for_each_entry_safe(entry, next, head, list)
1728 static struct iommu_ops arm_smmu_ops = {
1729 .capable = arm_smmu_capable,
1730 .domain_alloc = arm_smmu_domain_alloc,
1731 .domain_free = arm_smmu_domain_free,
1732 .attach_dev = arm_smmu_attach_dev,
1733 .map = arm_smmu_map,
1734 .unmap = arm_smmu_unmap,
1735 .flush_iotlb_all = arm_smmu_flush_iotlb_all,
1736 .iotlb_sync = arm_smmu_iotlb_sync,
1737 .iova_to_phys = arm_smmu_iova_to_phys,
1738 .add_device = arm_smmu_add_device,
1739 .remove_device = arm_smmu_remove_device,
1740 .device_group = arm_smmu_device_group,
1741 .domain_get_attr = arm_smmu_domain_get_attr,
1742 .domain_set_attr = arm_smmu_domain_set_attr,
1743 .of_xlate = arm_smmu_of_xlate,
1744 .get_resv_regions = arm_smmu_get_resv_regions,
1745 .put_resv_regions = arm_smmu_put_resv_regions,
1746 .pgsize_bitmap = -1UL, /* Restricted during device attach */
1749 static void arm_smmu_device_reset(struct arm_smmu_device *smmu)
1754 /* clear global FSR */
1755 reg = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSR);
1756 arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sGFSR, reg);
1759 * Reset stream mapping groups: Initial values mark all SMRn as
1760 * invalid and all S2CRn as bypass unless overridden.
1762 for (i = 0; i < smmu->num_mapping_groups; ++i)
1763 arm_smmu_write_sme(smmu, i);
1765 if (smmu->model == ARM_MMU500) {
1767 * Before clearing ARM_MMU500_ACTLR_CPRE, need to
1768 * clear CACHE_LOCK bit of ACR first. And, CACHE_LOCK
1769 * bit is only present in MMU-500r2 onwards.
1771 reg = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_ID7);
1772 major = FIELD_GET(ID7_MAJOR, reg);
1773 reg = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sACR);
1775 reg &= ~ARM_MMU500_ACR_CACHE_LOCK;
1777 * Allow unmatched Stream IDs to allocate bypass
1778 * TLB entries for reduced latency.
1780 reg |= ARM_MMU500_ACR_SMTNMB_TLBEN | ARM_MMU500_ACR_S2CRB_TLBEN;
1781 arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sACR, reg);
1784 /* Make sure all context banks are disabled and clear CB_FSR */
1785 for (i = 0; i < smmu->num_context_banks; ++i) {
1786 arm_smmu_write_context_bank(smmu, i);
1787 arm_smmu_cb_write(smmu, i, ARM_SMMU_CB_FSR, FSR_FAULT);
1789 * Disable MMU-500's not-particularly-beneficial next-page
1790 * prefetcher for the sake of errata #841119 and #826419.
1792 if (smmu->model == ARM_MMU500) {
1793 reg = arm_smmu_cb_read(smmu, i, ARM_SMMU_CB_ACTLR);
1794 reg &= ~ARM_MMU500_ACTLR_CPRE;
1795 arm_smmu_cb_write(smmu, i, ARM_SMMU_CB_ACTLR, reg);
1799 /* Invalidate the TLB, just in case */
1800 arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIALLH, QCOM_DUMMY_VAL);
1801 arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIALLNSNH, QCOM_DUMMY_VAL);
1803 reg = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sCR0);
1805 /* Enable fault reporting */
1806 reg |= (sCR0_GFRE | sCR0_GFIE | sCR0_GCFGFRE | sCR0_GCFGFIE);
1808 /* Disable TLB broadcasting. */
1809 reg |= (sCR0_VMIDPNE | sCR0_PTM);
1811 /* Enable client access, handling unmatched streams as appropriate */
1812 reg &= ~sCR0_CLIENTPD;
1816 reg &= ~sCR0_USFCFG;
1818 /* Disable forced broadcasting */
1821 /* Don't upgrade barriers */
1824 if (smmu->features & ARM_SMMU_FEAT_VMID16)
1825 reg |= sCR0_VMID16EN;
1827 if (smmu->features & ARM_SMMU_FEAT_EXIDS)
1828 reg |= sCR0_EXIDENABLE;
1830 /* Push the button */
1831 arm_smmu_tlb_sync_global(smmu);
1832 arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sCR0, reg);
1835 static int arm_smmu_id_size_to_bits(int size)
1854 static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
1858 bool cttw_reg, cttw_fw = smmu->features & ARM_SMMU_FEAT_COHERENT_WALK;
1861 dev_notice(smmu->dev, "probing hardware configuration...\n");
1862 dev_notice(smmu->dev, "SMMUv%d with:\n",
1863 smmu->version == ARM_SMMU_V2 ? 2 : 1);
1866 id = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_ID0);
1868 /* Restrict available stages based on module parameter */
1869 if (force_stage == 1)
1870 id &= ~(ID0_S2TS | ID0_NTS);
1871 else if (force_stage == 2)
1872 id &= ~(ID0_S1TS | ID0_NTS);
1874 if (id & ID0_S1TS) {
1875 smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
1876 dev_notice(smmu->dev, "\tstage 1 translation\n");
1879 if (id & ID0_S2TS) {
1880 smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
1881 dev_notice(smmu->dev, "\tstage 2 translation\n");
1885 smmu->features |= ARM_SMMU_FEAT_TRANS_NESTED;
1886 dev_notice(smmu->dev, "\tnested translation\n");
1889 if (!(smmu->features &
1890 (ARM_SMMU_FEAT_TRANS_S1 | ARM_SMMU_FEAT_TRANS_S2))) {
1891 dev_err(smmu->dev, "\tno translation support!\n");
1895 if ((id & ID0_S1TS) &&
1896 ((smmu->version < ARM_SMMU_V2) || !(id & ID0_ATOSNS))) {
1897 smmu->features |= ARM_SMMU_FEAT_TRANS_OPS;
1898 dev_notice(smmu->dev, "\taddress translation ops\n");
1902 * In order for DMA API calls to work properly, we must defer to what
1903 * the FW says about coherency, regardless of what the hardware claims.
1904 * Fortunately, this also opens up a workaround for systems where the
1905 * ID register value has ended up configured incorrectly.
1907 cttw_reg = !!(id & ID0_CTTW);
1908 if (cttw_fw || cttw_reg)
1909 dev_notice(smmu->dev, "\t%scoherent table walk\n",
1910 cttw_fw ? "" : "non-");
1911 if (cttw_fw != cttw_reg)
1912 dev_notice(smmu->dev,
1913 "\t(IDR0.CTTW overridden by FW configuration)\n");
1915 /* Max. number of entries we have for stream matching/indexing */
1916 if (smmu->version == ARM_SMMU_V2 && id & ID0_EXIDS) {
1917 smmu->features |= ARM_SMMU_FEAT_EXIDS;
1920 size = 1 << FIELD_GET(ID0_NUMSIDB, id);
1922 smmu->streamid_mask = size - 1;
1924 smmu->features |= ARM_SMMU_FEAT_STREAM_MATCH;
1925 size = FIELD_GET(ID0_NUMSMRG, id);
1928 "stream-matching supported, but no SMRs present!\n");
1932 /* Zero-initialised to mark as invalid */
1933 smmu->smrs = devm_kcalloc(smmu->dev, size, sizeof(*smmu->smrs),
1938 dev_notice(smmu->dev,
1939 "\tstream matching with %u register groups", size);
1941 /* s2cr->type == 0 means translation, so initialise explicitly */
1942 smmu->s2crs = devm_kmalloc_array(smmu->dev, size, sizeof(*smmu->s2crs),
1946 for (i = 0; i < size; i++)
1947 smmu->s2crs[i] = s2cr_init_val;
1949 smmu->num_mapping_groups = size;
1950 mutex_init(&smmu->stream_map_mutex);
1951 spin_lock_init(&smmu->global_sync_lock);
1953 if (smmu->version < ARM_SMMU_V2 || !(id & ID0_PTFS_NO_AARCH32)) {
1954 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_L;
1955 if (!(id & ID0_PTFS_NO_AARCH32S))
1956 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_S;
1960 id = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_ID1);
1961 smmu->pgshift = (id & ID1_PAGESIZE) ? 16 : 12;
1963 /* Check for size mismatch of SMMU address space from mapped region */
1964 size = 1 << (FIELD_GET(ID1_NUMPAGENDXB, id) + 1);
1965 if (smmu->numpage != 2 * size << smmu->pgshift)
1967 "SMMU address space size (0x%x) differs from mapped region size (0x%x)!\n",
1968 2 * size << smmu->pgshift, smmu->numpage);
1969 /* Now properly encode NUMPAGE to subsequently derive SMMU_CB_BASE */
1970 smmu->numpage = size;
1972 smmu->num_s2_context_banks = FIELD_GET(ID1_NUMS2CB, id);
1973 smmu->num_context_banks = FIELD_GET(ID1_NUMCB, id);
1974 if (smmu->num_s2_context_banks > smmu->num_context_banks) {
1975 dev_err(smmu->dev, "impossible number of S2 context banks!\n");
1978 dev_notice(smmu->dev, "\t%u context banks (%u stage-2 only)\n",
1979 smmu->num_context_banks, smmu->num_s2_context_banks);
1981 * Cavium CN88xx erratum #27704.
1982 * Ensure ASID and VMID allocation is unique across all SMMUs in
1985 if (smmu->model == CAVIUM_SMMUV2) {
1986 smmu->cavium_id_base =
1987 atomic_add_return(smmu->num_context_banks,
1988 &cavium_smmu_context_count);
1989 smmu->cavium_id_base -= smmu->num_context_banks;
1990 dev_notice(smmu->dev, "\tenabling workaround for Cavium erratum 27704\n");
1992 smmu->cbs = devm_kcalloc(smmu->dev, smmu->num_context_banks,
1993 sizeof(*smmu->cbs), GFP_KERNEL);
1998 id = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_ID2);
1999 size = arm_smmu_id_size_to_bits(FIELD_GET(ID2_IAS, id));
2000 smmu->ipa_size = size;
2002 /* The output mask is also applied for bypass */
2003 size = arm_smmu_id_size_to_bits(FIELD_GET(ID2_OAS, id));
2004 smmu->pa_size = size;
2006 if (id & ID2_VMID16)
2007 smmu->features |= ARM_SMMU_FEAT_VMID16;
2010 * What the page table walker can address actually depends on which
2011 * descriptor format is in use, but since a) we don't know that yet,
2012 * and b) it can vary per context bank, this will have to do...
2014 if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(size)))
2016 "failed to set DMA mask for table walker\n");
2018 if (smmu->version < ARM_SMMU_V2) {
2019 smmu->va_size = smmu->ipa_size;
2020 if (smmu->version == ARM_SMMU_V1_64K)
2021 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
2023 size = FIELD_GET(ID2_UBS, id);
2024 smmu->va_size = arm_smmu_id_size_to_bits(size);
2025 if (id & ID2_PTFS_4K)
2026 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_4K;
2027 if (id & ID2_PTFS_16K)
2028 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_16K;
2029 if (id & ID2_PTFS_64K)
2030 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
2033 /* Now we've corralled the various formats, what'll it do? */
2034 if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S)
2035 smmu->pgsize_bitmap |= SZ_4K | SZ_64K | SZ_1M | SZ_16M;
2036 if (smmu->features &
2037 (ARM_SMMU_FEAT_FMT_AARCH32_L | ARM_SMMU_FEAT_FMT_AARCH64_4K))
2038 smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
2039 if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_16K)
2040 smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
2041 if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_64K)
2042 smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
2044 if (arm_smmu_ops.pgsize_bitmap == -1UL)
2045 arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
2047 arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
2048 dev_notice(smmu->dev, "\tSupported page sizes: 0x%08lx\n",
2049 smmu->pgsize_bitmap);
2052 if (smmu->features & ARM_SMMU_FEAT_TRANS_S1)
2053 dev_notice(smmu->dev, "\tStage-1: %lu-bit VA -> %lu-bit IPA\n",
2054 smmu->va_size, smmu->ipa_size);
2056 if (smmu->features & ARM_SMMU_FEAT_TRANS_S2)
2057 dev_notice(smmu->dev, "\tStage-2: %lu-bit IPA -> %lu-bit PA\n",
2058 smmu->ipa_size, smmu->pa_size);
2063 struct arm_smmu_match_data {
2064 enum arm_smmu_arch_version version;
2065 enum arm_smmu_implementation model;
2068 #define ARM_SMMU_MATCH_DATA(name, ver, imp) \
2069 static const struct arm_smmu_match_data name = { .version = ver, .model = imp }
2071 ARM_SMMU_MATCH_DATA(smmu_generic_v1, ARM_SMMU_V1, GENERIC_SMMU);
2072 ARM_SMMU_MATCH_DATA(smmu_generic_v2, ARM_SMMU_V2, GENERIC_SMMU);
2073 ARM_SMMU_MATCH_DATA(arm_mmu401, ARM_SMMU_V1_64K, GENERIC_SMMU);
2074 ARM_SMMU_MATCH_DATA(arm_mmu500, ARM_SMMU_V2, ARM_MMU500);
2075 ARM_SMMU_MATCH_DATA(cavium_smmuv2, ARM_SMMU_V2, CAVIUM_SMMUV2);
2076 ARM_SMMU_MATCH_DATA(qcom_smmuv2, ARM_SMMU_V2, QCOM_SMMUV2);
2078 static const struct of_device_id arm_smmu_of_match[] = {
2079 { .compatible = "arm,smmu-v1", .data = &smmu_generic_v1 },
2080 { .compatible = "arm,smmu-v2", .data = &smmu_generic_v2 },
2081 { .compatible = "arm,mmu-400", .data = &smmu_generic_v1 },
2082 { .compatible = "arm,mmu-401", .data = &arm_mmu401 },
2083 { .compatible = "arm,mmu-500", .data = &arm_mmu500 },
2084 { .compatible = "cavium,smmu-v2", .data = &cavium_smmuv2 },
2085 { .compatible = "qcom,smmu-v2", .data = &qcom_smmuv2 },
2090 static int acpi_smmu_get_data(u32 model, struct arm_smmu_device *smmu)
2095 case ACPI_IORT_SMMU_V1:
2096 case ACPI_IORT_SMMU_CORELINK_MMU400:
2097 smmu->version = ARM_SMMU_V1;
2098 smmu->model = GENERIC_SMMU;
2100 case ACPI_IORT_SMMU_CORELINK_MMU401:
2101 smmu->version = ARM_SMMU_V1_64K;
2102 smmu->model = GENERIC_SMMU;
2104 case ACPI_IORT_SMMU_V2:
2105 smmu->version = ARM_SMMU_V2;
2106 smmu->model = GENERIC_SMMU;
2108 case ACPI_IORT_SMMU_CORELINK_MMU500:
2109 smmu->version = ARM_SMMU_V2;
2110 smmu->model = ARM_MMU500;
2112 case ACPI_IORT_SMMU_CAVIUM_THUNDERX:
2113 smmu->version = ARM_SMMU_V2;
2114 smmu->model = CAVIUM_SMMUV2;
2123 static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
2124 struct arm_smmu_device *smmu)
2126 struct device *dev = smmu->dev;
2127 struct acpi_iort_node *node =
2128 *(struct acpi_iort_node **)dev_get_platdata(dev);
2129 struct acpi_iort_smmu *iort_smmu;
2132 /* Retrieve SMMU1/2 specific data */
2133 iort_smmu = (struct acpi_iort_smmu *)node->node_data;
2135 ret = acpi_smmu_get_data(iort_smmu->model, smmu);
2139 /* Ignore the configuration access interrupt */
2140 smmu->num_global_irqs = 1;
2142 if (iort_smmu->flags & ACPI_IORT_SMMU_COHERENT_WALK)
2143 smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
2148 static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
2149 struct arm_smmu_device *smmu)
2155 static int arm_smmu_device_dt_probe(struct platform_device *pdev,
2156 struct arm_smmu_device *smmu)
2158 const struct arm_smmu_match_data *data;
2159 struct device *dev = &pdev->dev;
2160 bool legacy_binding;
2162 if (of_property_read_u32(dev->of_node, "#global-interrupts",
2163 &smmu->num_global_irqs)) {
2164 dev_err(dev, "missing #global-interrupts property\n");
2168 data = of_device_get_match_data(dev);
2169 smmu->version = data->version;
2170 smmu->model = data->model;
2172 parse_driver_options(smmu);
2174 legacy_binding = of_find_property(dev->of_node, "mmu-masters", NULL);
2175 if (legacy_binding && !using_generic_binding) {
2176 if (!using_legacy_binding)
2177 pr_notice("deprecated \"mmu-masters\" DT property in use; DMA API support unavailable\n");
2178 using_legacy_binding = true;
2179 } else if (!legacy_binding && !using_legacy_binding) {
2180 using_generic_binding = true;
2182 dev_err(dev, "not probing due to mismatched DT properties\n");
2186 if (of_dma_is_coherent(dev->of_node))
2187 smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
2192 static void arm_smmu_bus_init(void)
2194 /* Oh, for a proper bus abstraction */
2195 if (!iommu_present(&platform_bus_type))
2196 bus_set_iommu(&platform_bus_type, &arm_smmu_ops);
2197 #ifdef CONFIG_ARM_AMBA
2198 if (!iommu_present(&amba_bustype))
2199 bus_set_iommu(&amba_bustype, &arm_smmu_ops);
2202 if (!iommu_present(&pci_bus_type)) {
2204 bus_set_iommu(&pci_bus_type, &arm_smmu_ops);
2207 #ifdef CONFIG_FSL_MC_BUS
2208 if (!iommu_present(&fsl_mc_bus_type))
2209 bus_set_iommu(&fsl_mc_bus_type, &arm_smmu_ops);
2213 static int arm_smmu_device_probe(struct platform_device *pdev)
2215 struct resource *res;
2216 resource_size_t ioaddr;
2217 struct arm_smmu_device *smmu;
2218 struct device *dev = &pdev->dev;
2219 int num_irqs, i, err;
2221 smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
2223 dev_err(dev, "failed to allocate arm_smmu_device\n");
2229 err = arm_smmu_device_dt_probe(pdev, smmu);
2231 err = arm_smmu_device_acpi_probe(pdev, smmu);
2236 res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
2237 ioaddr = res->start;
2238 smmu->base = devm_ioremap_resource(dev, res);
2239 if (IS_ERR(smmu->base))
2240 return PTR_ERR(smmu->base);
2242 * The resource size should effectively match the value of SMMU_TOP;
2243 * stash that temporarily until we know PAGESIZE to validate it with.
2245 smmu->numpage = resource_size(res);
2248 while ((res = platform_get_resource(pdev, IORESOURCE_IRQ, num_irqs))) {
2250 if (num_irqs > smmu->num_global_irqs)
2251 smmu->num_context_irqs++;
2254 if (!smmu->num_context_irqs) {
2255 dev_err(dev, "found %d interrupts but expected at least %d\n",
2256 num_irqs, smmu->num_global_irqs + 1);
2260 smmu->irqs = devm_kcalloc(dev, num_irqs, sizeof(*smmu->irqs),
2263 dev_err(dev, "failed to allocate %d irqs\n", num_irqs);
2267 for (i = 0; i < num_irqs; ++i) {
2268 int irq = platform_get_irq(pdev, i);
2271 dev_err(dev, "failed to get irq index %d\n", i);
2274 smmu->irqs[i] = irq;
2277 err = devm_clk_bulk_get_all(dev, &smmu->clks);
2279 dev_err(dev, "failed to get clocks %d\n", err);
2282 smmu->num_clks = err;
2284 err = clk_bulk_prepare_enable(smmu->num_clks, smmu->clks);
2288 err = arm_smmu_device_cfg_probe(smmu);
2292 if (smmu->version == ARM_SMMU_V2) {
2293 if (smmu->num_context_banks > smmu->num_context_irqs) {
2295 "found only %d context irq(s) but %d required\n",
2296 smmu->num_context_irqs, smmu->num_context_banks);
2300 /* Ignore superfluous interrupts */
2301 smmu->num_context_irqs = smmu->num_context_banks;
2304 for (i = 0; i < smmu->num_global_irqs; ++i) {
2305 err = devm_request_irq(smmu->dev, smmu->irqs[i],
2306 arm_smmu_global_fault,
2308 "arm-smmu global fault",
2311 dev_err(dev, "failed to request global IRQ %d (%u)\n",
2317 err = iommu_device_sysfs_add(&smmu->iommu, smmu->dev, NULL,
2318 "smmu.%pa", &ioaddr);
2320 dev_err(dev, "Failed to register iommu in sysfs\n");
2324 iommu_device_set_ops(&smmu->iommu, &arm_smmu_ops);
2325 iommu_device_set_fwnode(&smmu->iommu, dev->fwnode);
2327 err = iommu_device_register(&smmu->iommu);
2329 dev_err(dev, "Failed to register iommu\n");
2333 platform_set_drvdata(pdev, smmu);
2334 arm_smmu_device_reset(smmu);
2335 arm_smmu_test_smr_masks(smmu);
2338 * We want to avoid touching dev->power.lock in fastpaths unless
2339 * it's really going to do something useful - pm_runtime_enabled()
2340 * can serve as an ideal proxy for that decision. So, conditionally
2341 * enable pm_runtime.
2343 if (dev->pm_domain) {
2344 pm_runtime_set_active(dev);
2345 pm_runtime_enable(dev);
2349 * For ACPI and generic DT bindings, an SMMU will be probed before
2350 * any device which might need it, so we want the bus ops in place
2351 * ready to handle default domain setup as soon as any SMMU exists.
2353 if (!using_legacy_binding)
2354 arm_smmu_bus_init();
2360 * With the legacy DT binding in play, though, we have no guarantees about
2361 * probe order, but then we're also not doing default domains, so we can
2362 * delay setting bus ops until we're sure every possible SMMU is ready,
2363 * and that way ensure that no add_device() calls get missed.
2365 static int arm_smmu_legacy_bus_init(void)
2367 if (using_legacy_binding)
2368 arm_smmu_bus_init();
2371 device_initcall_sync(arm_smmu_legacy_bus_init);
2373 static void arm_smmu_device_shutdown(struct platform_device *pdev)
2375 struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
2380 if (!bitmap_empty(smmu->context_map, ARM_SMMU_MAX_CBS))
2381 dev_err(&pdev->dev, "removing device with active domains!\n");
2383 arm_smmu_rpm_get(smmu);
2384 /* Turn the thing off */
2385 arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sCR0, sCR0_CLIENTPD);
2386 arm_smmu_rpm_put(smmu);
2388 if (pm_runtime_enabled(smmu->dev))
2389 pm_runtime_force_suspend(smmu->dev);
2391 clk_bulk_disable(smmu->num_clks, smmu->clks);
2393 clk_bulk_unprepare(smmu->num_clks, smmu->clks);
2396 static int __maybe_unused arm_smmu_runtime_resume(struct device *dev)
2398 struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2401 ret = clk_bulk_enable(smmu->num_clks, smmu->clks);
2405 arm_smmu_device_reset(smmu);
2410 static int __maybe_unused arm_smmu_runtime_suspend(struct device *dev)
2412 struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2414 clk_bulk_disable(smmu->num_clks, smmu->clks);
2419 static int __maybe_unused arm_smmu_pm_resume(struct device *dev)
2421 if (pm_runtime_suspended(dev))
2424 return arm_smmu_runtime_resume(dev);
2427 static int __maybe_unused arm_smmu_pm_suspend(struct device *dev)
2429 if (pm_runtime_suspended(dev))
2432 return arm_smmu_runtime_suspend(dev);
2435 static const struct dev_pm_ops arm_smmu_pm_ops = {
2436 SET_SYSTEM_SLEEP_PM_OPS(arm_smmu_pm_suspend, arm_smmu_pm_resume)
2437 SET_RUNTIME_PM_OPS(arm_smmu_runtime_suspend,
2438 arm_smmu_runtime_resume, NULL)
2441 static struct platform_driver arm_smmu_driver = {
2444 .of_match_table = of_match_ptr(arm_smmu_of_match),
2445 .pm = &arm_smmu_pm_ops,
2446 .suppress_bind_attrs = true,
2448 .probe = arm_smmu_device_probe,
2449 .shutdown = arm_smmu_device_shutdown,
2451 builtin_platform_driver(arm_smmu_driver);