1 // SPDX-License-Identifier: GPL-2.0-only
3 * IOMMU API for ARM architected SMMU implementations.
5 * Copyright (C) 2013 ARM Limited
7 * Author: Will Deacon <will.deacon@arm.com>
9 * This driver currently supports:
10 * - SMMUv1 and v2 implementations
11 * - Stream-matching and stream-indexing
12 * - v7/v8 long-descriptor format
13 * - Non-secure access to the SMMU
14 * - Context fault reporting
15 * - Extended Stream ID (16 bit)
18 #define pr_fmt(fmt) "arm-smmu: " fmt
20 #include <linux/acpi.h>
21 #include <linux/acpi_iort.h>
22 #include <linux/bitfield.h>
23 #include <linux/delay.h>
24 #include <linux/dma-iommu.h>
25 #include <linux/dma-mapping.h>
26 #include <linux/err.h>
27 #include <linux/interrupt.h>
29 #include <linux/iopoll.h>
30 #include <linux/module.h>
32 #include <linux/of_address.h>
33 #include <linux/of_device.h>
34 #include <linux/of_iommu.h>
35 #include <linux/pci.h>
36 #include <linux/platform_device.h>
37 #include <linux/pm_runtime.h>
38 #include <linux/ratelimit.h>
39 #include <linux/slab.h>
41 #include <linux/amba/bus.h>
42 #include <linux/fsl/mc.h>
47 * Apparently, some Qualcomm arm64 platforms which appear to expose their SMMU
48 * global register space are still, in fact, using a hypervisor to mediate it
49 * by trapping and emulating register accesses. Sadly, some deployed versions
50 * of said trapping code have bugs wherein they go horribly wrong for stores
51 * using r31 (i.e. XZR/WZR) as the source register.
53 #define QCOM_DUMMY_VAL -1
55 #define TLB_LOOP_TIMEOUT 1000000 /* 1s! */
56 #define TLB_SPIN_COUNT 10
58 #define MSI_IOVA_BASE 0x8000000
59 #define MSI_IOVA_LENGTH 0x100000
61 static int force_stage;
62 module_param(force_stage, int, S_IRUGO);
63 MODULE_PARM_DESC(force_stage,
64 "Force SMMU mappings to be installed at a particular stage of translation. A value of '1' or '2' forces the corresponding stage. All other values are ignored (i.e. no stage is forced). Note that selecting a specific stage will disable support for nested translation.");
65 static bool disable_bypass =
66 IS_ENABLED(CONFIG_ARM_SMMU_DISABLE_BYPASS_BY_DEFAULT);
67 module_param(disable_bypass, bool, S_IRUGO);
68 MODULE_PARM_DESC(disable_bypass,
69 "Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
71 struct arm_smmu_s2cr {
72 struct iommu_group *group;
74 enum arm_smmu_s2cr_type type;
75 enum arm_smmu_s2cr_privcfg privcfg;
79 #define s2cr_init_val (struct arm_smmu_s2cr){ \
80 .type = disable_bypass ? S2CR_TYPE_FAULT : S2CR_TYPE_BYPASS, \
93 struct arm_smmu_cfg *cfg;
96 struct arm_smmu_master_cfg {
97 struct arm_smmu_device *smmu;
100 #define INVALID_SMENDX -1
101 #define __fwspec_cfg(fw) ((struct arm_smmu_master_cfg *)fw->iommu_priv)
102 #define fwspec_smmu(fw) (__fwspec_cfg(fw)->smmu)
103 #define fwspec_smendx(fw, i) \
104 (i >= fw->num_ids ? INVALID_SMENDX : __fwspec_cfg(fw)->smendx[i])
105 #define for_each_cfg_sme(fw, i, idx) \
106 for (i = 0; idx = fwspec_smendx(fw, i), i < fw->num_ids; ++i)
108 static bool using_legacy_binding, using_generic_binding;
110 static inline int arm_smmu_rpm_get(struct arm_smmu_device *smmu)
112 if (pm_runtime_enabled(smmu->dev))
113 return pm_runtime_get_sync(smmu->dev);
118 static inline void arm_smmu_rpm_put(struct arm_smmu_device *smmu)
120 if (pm_runtime_enabled(smmu->dev))
121 pm_runtime_put_autosuspend(smmu->dev);
124 static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
126 return container_of(dom, struct arm_smmu_domain, domain);
129 static struct platform_driver arm_smmu_driver;
130 static struct iommu_ops arm_smmu_ops;
132 #ifdef CONFIG_ARM_SMMU_LEGACY_DT_BINDINGS
133 static int arm_smmu_bus_init(struct iommu_ops *ops);
135 static struct device_node *dev_get_dev_node(struct device *dev)
137 if (dev_is_pci(dev)) {
138 struct pci_bus *bus = to_pci_dev(dev)->bus;
140 while (!pci_is_root_bus(bus))
142 return of_node_get(bus->bridge->parent->of_node);
145 return of_node_get(dev->of_node);
148 static int __arm_smmu_get_pci_sid(struct pci_dev *pdev, u16 alias, void *data)
150 *((__be32 *)data) = cpu_to_be32(alias);
151 return 0; /* Continue walking */
154 static int __find_legacy_master_phandle(struct device *dev, void *data)
156 struct of_phandle_iterator *it = *(void **)data;
157 struct device_node *np = it->node;
160 of_for_each_phandle(it, err, dev->of_node, "mmu-masters",
161 "#stream-id-cells", -1)
162 if (it->node == np) {
163 *(void **)data = dev;
167 return err == -ENOENT ? 0 : err;
170 static int arm_smmu_register_legacy_master(struct device *dev,
171 struct arm_smmu_device **smmu)
173 struct device *smmu_dev;
174 struct device_node *np;
175 struct of_phandle_iterator it;
181 np = dev_get_dev_node(dev);
182 if (!np || !of_find_property(np, "#stream-id-cells", NULL)) {
188 err = driver_for_each_device(&arm_smmu_driver.driver, NULL, &data,
189 __find_legacy_master_phandle);
197 if (dev_is_pci(dev)) {
198 /* "mmu-masters" assumes Stream ID == Requester ID */
199 pci_for_each_dma_alias(to_pci_dev(dev), __arm_smmu_get_pci_sid,
205 err = iommu_fwspec_init(dev, &smmu_dev->of_node->fwnode,
210 sids = kcalloc(it.cur_count, sizeof(*sids), GFP_KERNEL);
214 *smmu = dev_get_drvdata(smmu_dev);
215 of_phandle_iterator_args(&it, sids, it.cur_count);
216 err = iommu_fwspec_add_ids(dev, sids, it.cur_count);
222 * With the legacy DT binding in play, we have no guarantees about
223 * probe order, but then we're also not doing default domains, so we can
224 * delay setting bus ops until we're sure every possible SMMU is ready,
225 * and that way ensure that no add_device() calls get missed.
227 static int arm_smmu_legacy_bus_init(void)
229 if (using_legacy_binding)
230 return arm_smmu_bus_init(&arm_smmu_ops);
233 device_initcall_sync(arm_smmu_legacy_bus_init);
235 static int arm_smmu_register_legacy_master(struct device *dev,
236 struct arm_smmu_device **smmu)
240 #endif /* CONFIG_ARM_SMMU_LEGACY_DT_BINDINGS */
242 static int __arm_smmu_alloc_bitmap(unsigned long *map, int start, int end)
247 idx = find_next_zero_bit(map, end, start);
250 } while (test_and_set_bit(idx, map));
255 static void __arm_smmu_free_bitmap(unsigned long *map, int idx)
260 /* Wait for any pending TLB invalidations to complete */
261 static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu, int page,
262 int sync, int status)
264 unsigned int spin_cnt, delay;
267 if (smmu->impl && unlikely(smmu->impl->tlb_sync))
268 return smmu->impl->tlb_sync(smmu, page, sync, status);
270 arm_smmu_writel(smmu, page, sync, QCOM_DUMMY_VAL);
271 for (delay = 1; delay < TLB_LOOP_TIMEOUT; delay *= 2) {
272 for (spin_cnt = TLB_SPIN_COUNT; spin_cnt > 0; spin_cnt--) {
273 reg = arm_smmu_readl(smmu, page, status);
274 if (!(reg & ARM_SMMU_sTLBGSTATUS_GSACTIVE))
280 dev_err_ratelimited(smmu->dev,
281 "TLB sync timed out -- SMMU may be deadlocked\n");
284 static void arm_smmu_tlb_sync_global(struct arm_smmu_device *smmu)
288 spin_lock_irqsave(&smmu->global_sync_lock, flags);
289 __arm_smmu_tlb_sync(smmu, ARM_SMMU_GR0, ARM_SMMU_GR0_sTLBGSYNC,
290 ARM_SMMU_GR0_sTLBGSTATUS);
291 spin_unlock_irqrestore(&smmu->global_sync_lock, flags);
294 static void arm_smmu_tlb_sync_context(struct arm_smmu_domain *smmu_domain)
296 struct arm_smmu_device *smmu = smmu_domain->smmu;
299 spin_lock_irqsave(&smmu_domain->cb_lock, flags);
300 __arm_smmu_tlb_sync(smmu, ARM_SMMU_CB(smmu, smmu_domain->cfg.cbndx),
301 ARM_SMMU_CB_TLBSYNC, ARM_SMMU_CB_TLBSTATUS);
302 spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
305 static void arm_smmu_tlb_inv_context_s1(void *cookie)
307 struct arm_smmu_domain *smmu_domain = cookie;
309 * The TLBI write may be relaxed, so ensure that PTEs cleared by the
310 * current CPU are visible beforehand.
313 arm_smmu_cb_write(smmu_domain->smmu, smmu_domain->cfg.cbndx,
314 ARM_SMMU_CB_S1_TLBIASID, smmu_domain->cfg.asid);
315 arm_smmu_tlb_sync_context(smmu_domain);
318 static void arm_smmu_tlb_inv_context_s2(void *cookie)
320 struct arm_smmu_domain *smmu_domain = cookie;
321 struct arm_smmu_device *smmu = smmu_domain->smmu;
325 arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIVMID, smmu_domain->cfg.vmid);
326 arm_smmu_tlb_sync_global(smmu);
329 static void arm_smmu_tlb_inv_range_s1(unsigned long iova, size_t size,
330 size_t granule, void *cookie, int reg)
332 struct arm_smmu_domain *smmu_domain = cookie;
333 struct arm_smmu_device *smmu = smmu_domain->smmu;
334 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
335 int idx = cfg->cbndx;
337 if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
340 if (cfg->fmt != ARM_SMMU_CTX_FMT_AARCH64) {
341 iova = (iova >> 12) << 12;
344 arm_smmu_cb_write(smmu, idx, reg, iova);
346 } while (size -= granule);
349 iova |= (u64)cfg->asid << 48;
351 arm_smmu_cb_writeq(smmu, idx, reg, iova);
352 iova += granule >> 12;
353 } while (size -= granule);
357 static void arm_smmu_tlb_inv_range_s2(unsigned long iova, size_t size,
358 size_t granule, void *cookie, int reg)
360 struct arm_smmu_domain *smmu_domain = cookie;
361 struct arm_smmu_device *smmu = smmu_domain->smmu;
362 int idx = smmu_domain->cfg.cbndx;
364 if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
369 if (smmu_domain->cfg.fmt == ARM_SMMU_CTX_FMT_AARCH64)
370 arm_smmu_cb_writeq(smmu, idx, reg, iova);
372 arm_smmu_cb_write(smmu, idx, reg, iova);
373 iova += granule >> 12;
374 } while (size -= granule);
377 static void arm_smmu_tlb_inv_walk_s1(unsigned long iova, size_t size,
378 size_t granule, void *cookie)
380 arm_smmu_tlb_inv_range_s1(iova, size, granule, cookie,
381 ARM_SMMU_CB_S1_TLBIVA);
382 arm_smmu_tlb_sync_context(cookie);
385 static void arm_smmu_tlb_inv_leaf_s1(unsigned long iova, size_t size,
386 size_t granule, void *cookie)
388 arm_smmu_tlb_inv_range_s1(iova, size, granule, cookie,
389 ARM_SMMU_CB_S1_TLBIVAL);
390 arm_smmu_tlb_sync_context(cookie);
393 static void arm_smmu_tlb_add_page_s1(struct iommu_iotlb_gather *gather,
394 unsigned long iova, size_t granule,
397 arm_smmu_tlb_inv_range_s1(iova, granule, granule, cookie,
398 ARM_SMMU_CB_S1_TLBIVAL);
401 static void arm_smmu_tlb_inv_walk_s2(unsigned long iova, size_t size,
402 size_t granule, void *cookie)
404 arm_smmu_tlb_inv_range_s2(iova, size, granule, cookie,
405 ARM_SMMU_CB_S2_TLBIIPAS2);
406 arm_smmu_tlb_sync_context(cookie);
409 static void arm_smmu_tlb_inv_leaf_s2(unsigned long iova, size_t size,
410 size_t granule, void *cookie)
412 arm_smmu_tlb_inv_range_s2(iova, size, granule, cookie,
413 ARM_SMMU_CB_S2_TLBIIPAS2L);
414 arm_smmu_tlb_sync_context(cookie);
417 static void arm_smmu_tlb_add_page_s2(struct iommu_iotlb_gather *gather,
418 unsigned long iova, size_t granule,
421 arm_smmu_tlb_inv_range_s2(iova, granule, granule, cookie,
422 ARM_SMMU_CB_S2_TLBIIPAS2L);
425 static void arm_smmu_tlb_inv_any_s2_v1(unsigned long iova, size_t size,
426 size_t granule, void *cookie)
428 arm_smmu_tlb_inv_context_s2(cookie);
431 * On MMU-401 at least, the cost of firing off multiple TLBIVMIDs appears
432 * almost negligible, but the benefit of getting the first one in as far ahead
433 * of the sync as possible is significant, hence we don't just make this a
434 * no-op and call arm_smmu_tlb_inv_context_s2() from .iotlb_sync as you might
437 static void arm_smmu_tlb_add_page_s2_v1(struct iommu_iotlb_gather *gather,
438 unsigned long iova, size_t granule,
441 struct arm_smmu_domain *smmu_domain = cookie;
442 struct arm_smmu_device *smmu = smmu_domain->smmu;
444 if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
447 arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIVMID, smmu_domain->cfg.vmid);
450 static const struct iommu_flush_ops arm_smmu_s1_tlb_ops = {
451 .tlb_flush_all = arm_smmu_tlb_inv_context_s1,
452 .tlb_flush_walk = arm_smmu_tlb_inv_walk_s1,
453 .tlb_flush_leaf = arm_smmu_tlb_inv_leaf_s1,
454 .tlb_add_page = arm_smmu_tlb_add_page_s1,
457 static const struct iommu_flush_ops arm_smmu_s2_tlb_ops_v2 = {
458 .tlb_flush_all = arm_smmu_tlb_inv_context_s2,
459 .tlb_flush_walk = arm_smmu_tlb_inv_walk_s2,
460 .tlb_flush_leaf = arm_smmu_tlb_inv_leaf_s2,
461 .tlb_add_page = arm_smmu_tlb_add_page_s2,
464 static const struct iommu_flush_ops arm_smmu_s2_tlb_ops_v1 = {
465 .tlb_flush_all = arm_smmu_tlb_inv_context_s2,
466 .tlb_flush_walk = arm_smmu_tlb_inv_any_s2_v1,
467 .tlb_flush_leaf = arm_smmu_tlb_inv_any_s2_v1,
468 .tlb_add_page = arm_smmu_tlb_add_page_s2_v1,
471 static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
473 u32 fsr, fsynr, cbfrsynra;
475 struct iommu_domain *domain = dev;
476 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
477 struct arm_smmu_device *smmu = smmu_domain->smmu;
478 int idx = smmu_domain->cfg.cbndx;
480 fsr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSR);
481 if (!(fsr & ARM_SMMU_FSR_FAULT))
484 fsynr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSYNR0);
485 iova = arm_smmu_cb_readq(smmu, idx, ARM_SMMU_CB_FAR);
486 cbfrsynra = arm_smmu_gr1_read(smmu, ARM_SMMU_GR1_CBFRSYNRA(idx));
488 dev_err_ratelimited(smmu->dev,
489 "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cbfrsynra=0x%x, cb=%d\n",
490 fsr, iova, fsynr, cbfrsynra, idx);
492 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_FSR, fsr);
496 static irqreturn_t arm_smmu_global_fault(int irq, void *dev)
498 u32 gfsr, gfsynr0, gfsynr1, gfsynr2;
499 struct arm_smmu_device *smmu = dev;
500 static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
501 DEFAULT_RATELIMIT_BURST);
503 gfsr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSR);
504 gfsynr0 = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSYNR0);
505 gfsynr1 = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSYNR1);
506 gfsynr2 = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSYNR2);
511 if (__ratelimit(&rs)) {
512 if (IS_ENABLED(CONFIG_ARM_SMMU_DISABLE_BYPASS_BY_DEFAULT) &&
513 (gfsr & ARM_SMMU_sGFSR_USF))
515 "Blocked unknown Stream ID 0x%hx; boot with \"arm-smmu.disable_bypass=0\" to allow, but this may have security implications\n",
519 "Unexpected global fault, this could be serious\n");
521 "\tGFSR 0x%08x, GFSYNR0 0x%08x, GFSYNR1 0x%08x, GFSYNR2 0x%08x\n",
522 gfsr, gfsynr0, gfsynr1, gfsynr2);
525 arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sGFSR, gfsr);
529 static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
530 struct io_pgtable_cfg *pgtbl_cfg)
532 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
533 struct arm_smmu_cb *cb = &smmu_domain->smmu->cbs[cfg->cbndx];
534 bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
540 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
541 cb->tcr[0] = pgtbl_cfg->arm_v7s_cfg.tcr;
543 cb->tcr[0] = arm_smmu_lpae_tcr(pgtbl_cfg);
544 cb->tcr[1] = arm_smmu_lpae_tcr2(pgtbl_cfg);
545 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
546 cb->tcr[1] |= ARM_SMMU_TCR2_AS;
548 cb->tcr[0] |= ARM_SMMU_TCR_EAE;
551 cb->tcr[0] = arm_smmu_lpae_vtcr(pgtbl_cfg);
556 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
557 cb->ttbr[0] = pgtbl_cfg->arm_v7s_cfg.ttbr;
560 cb->ttbr[0] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
561 cb->ttbr[0] |= FIELD_PREP(ARM_SMMU_TTBRn_ASID,
563 cb->ttbr[1] = FIELD_PREP(ARM_SMMU_TTBRn_ASID,
567 cb->ttbr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
570 /* MAIRs (stage-1 only) */
572 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
573 cb->mair[0] = pgtbl_cfg->arm_v7s_cfg.prrr;
574 cb->mair[1] = pgtbl_cfg->arm_v7s_cfg.nmrr;
576 cb->mair[0] = pgtbl_cfg->arm_lpae_s1_cfg.mair;
577 cb->mair[1] = pgtbl_cfg->arm_lpae_s1_cfg.mair >> 32;
582 static void arm_smmu_write_context_bank(struct arm_smmu_device *smmu, int idx)
586 struct arm_smmu_cb *cb = &smmu->cbs[idx];
587 struct arm_smmu_cfg *cfg = cb->cfg;
589 /* Unassigned context banks only need disabling */
591 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_SCTLR, 0);
595 stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
598 if (smmu->version > ARM_SMMU_V1) {
599 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
600 reg = ARM_SMMU_CBA2R_VA64;
603 /* 16-bit VMIDs live in CBA2R */
604 if (smmu->features & ARM_SMMU_FEAT_VMID16)
605 reg |= FIELD_PREP(ARM_SMMU_CBA2R_VMID16, cfg->vmid);
607 arm_smmu_gr1_write(smmu, ARM_SMMU_GR1_CBA2R(idx), reg);
611 reg = FIELD_PREP(ARM_SMMU_CBAR_TYPE, cfg->cbar);
612 if (smmu->version < ARM_SMMU_V2)
613 reg |= FIELD_PREP(ARM_SMMU_CBAR_IRPTNDX, cfg->irptndx);
616 * Use the weakest shareability/memory types, so they are
617 * overridden by the ttbcr/pte.
620 reg |= FIELD_PREP(ARM_SMMU_CBAR_S1_BPSHCFG,
621 ARM_SMMU_CBAR_S1_BPSHCFG_NSH) |
622 FIELD_PREP(ARM_SMMU_CBAR_S1_MEMATTR,
623 ARM_SMMU_CBAR_S1_MEMATTR_WB);
624 } else if (!(smmu->features & ARM_SMMU_FEAT_VMID16)) {
625 /* 8-bit VMIDs live in CBAR */
626 reg |= FIELD_PREP(ARM_SMMU_CBAR_VMID, cfg->vmid);
628 arm_smmu_gr1_write(smmu, ARM_SMMU_GR1_CBAR(idx), reg);
632 * We must write this before the TTBRs, since it determines the
633 * access behaviour of some fields (in particular, ASID[15:8]).
635 if (stage1 && smmu->version > ARM_SMMU_V1)
636 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TCR2, cb->tcr[1]);
637 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TCR, cb->tcr[0]);
640 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
641 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_CONTEXTIDR, cfg->asid);
642 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TTBR0, cb->ttbr[0]);
643 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TTBR1, cb->ttbr[1]);
645 arm_smmu_cb_writeq(smmu, idx, ARM_SMMU_CB_TTBR0, cb->ttbr[0]);
647 arm_smmu_cb_writeq(smmu, idx, ARM_SMMU_CB_TTBR1,
651 /* MAIRs (stage-1 only) */
653 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_S1_MAIR0, cb->mair[0]);
654 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_S1_MAIR1, cb->mair[1]);
658 reg = ARM_SMMU_SCTLR_CFIE | ARM_SMMU_SCTLR_CFRE | ARM_SMMU_SCTLR_AFE |
659 ARM_SMMU_SCTLR_TRE | ARM_SMMU_SCTLR_M;
661 reg |= ARM_SMMU_SCTLR_S1_ASIDPNE;
662 if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN))
663 reg |= ARM_SMMU_SCTLR_E;
665 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_SCTLR, reg);
668 static int arm_smmu_init_domain_context(struct iommu_domain *domain,
669 struct arm_smmu_device *smmu)
671 int irq, start, ret = 0;
672 unsigned long ias, oas;
673 struct io_pgtable_ops *pgtbl_ops;
674 struct io_pgtable_cfg pgtbl_cfg;
675 enum io_pgtable_fmt fmt;
676 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
677 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
679 mutex_lock(&smmu_domain->init_mutex);
680 if (smmu_domain->smmu)
683 if (domain->type == IOMMU_DOMAIN_IDENTITY) {
684 smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
685 smmu_domain->smmu = smmu;
690 * Mapping the requested stage onto what we support is surprisingly
691 * complicated, mainly because the spec allows S1+S2 SMMUs without
692 * support for nested translation. That means we end up with the
695 * Requested Supported Actual
705 * Note that you can't actually request stage-2 mappings.
707 if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
708 smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
709 if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
710 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
713 * Choosing a suitable context format is even more fiddly. Until we
714 * grow some way for the caller to express a preference, and/or move
715 * the decision into the io-pgtable code where it arguably belongs,
716 * just aim for the closest thing to the rest of the system, and hope
717 * that the hardware isn't esoteric enough that we can't assume AArch64
718 * support to be a superset of AArch32 support...
720 if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_L)
721 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_L;
722 if (IS_ENABLED(CONFIG_IOMMU_IO_PGTABLE_ARMV7S) &&
723 !IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_ARM_LPAE) &&
724 (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S) &&
725 (smmu_domain->stage == ARM_SMMU_DOMAIN_S1))
726 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_S;
727 if ((IS_ENABLED(CONFIG_64BIT) || cfg->fmt == ARM_SMMU_CTX_FMT_NONE) &&
728 (smmu->features & (ARM_SMMU_FEAT_FMT_AARCH64_64K |
729 ARM_SMMU_FEAT_FMT_AARCH64_16K |
730 ARM_SMMU_FEAT_FMT_AARCH64_4K)))
731 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH64;
733 if (cfg->fmt == ARM_SMMU_CTX_FMT_NONE) {
738 switch (smmu_domain->stage) {
739 case ARM_SMMU_DOMAIN_S1:
740 cfg->cbar = CBAR_TYPE_S1_TRANS_S2_BYPASS;
741 start = smmu->num_s2_context_banks;
743 oas = smmu->ipa_size;
744 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
745 fmt = ARM_64_LPAE_S1;
746 } else if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_L) {
747 fmt = ARM_32_LPAE_S1;
748 ias = min(ias, 32UL);
749 oas = min(oas, 40UL);
752 ias = min(ias, 32UL);
753 oas = min(oas, 32UL);
755 smmu_domain->flush_ops = &arm_smmu_s1_tlb_ops;
757 case ARM_SMMU_DOMAIN_NESTED:
759 * We will likely want to change this if/when KVM gets
762 case ARM_SMMU_DOMAIN_S2:
763 cfg->cbar = CBAR_TYPE_S2_TRANS;
765 ias = smmu->ipa_size;
767 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
768 fmt = ARM_64_LPAE_S2;
770 fmt = ARM_32_LPAE_S2;
771 ias = min(ias, 40UL);
772 oas = min(oas, 40UL);
774 if (smmu->version == ARM_SMMU_V2)
775 smmu_domain->flush_ops = &arm_smmu_s2_tlb_ops_v2;
777 smmu_domain->flush_ops = &arm_smmu_s2_tlb_ops_v1;
783 ret = __arm_smmu_alloc_bitmap(smmu->context_map, start,
784 smmu->num_context_banks);
789 if (smmu->version < ARM_SMMU_V2) {
790 cfg->irptndx = atomic_inc_return(&smmu->irptndx);
791 cfg->irptndx %= smmu->num_context_irqs;
793 cfg->irptndx = cfg->cbndx;
796 if (smmu_domain->stage == ARM_SMMU_DOMAIN_S2)
797 cfg->vmid = cfg->cbndx + 1;
799 cfg->asid = cfg->cbndx;
801 smmu_domain->smmu = smmu;
802 if (smmu->impl && smmu->impl->init_context) {
803 ret = smmu->impl->init_context(smmu_domain);
808 pgtbl_cfg = (struct io_pgtable_cfg) {
809 .pgsize_bitmap = smmu->pgsize_bitmap,
812 .coherent_walk = smmu->features & ARM_SMMU_FEAT_COHERENT_WALK,
813 .tlb = smmu_domain->flush_ops,
814 .iommu_dev = smmu->dev,
817 if (smmu_domain->non_strict)
818 pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
820 pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
826 /* Update the domain's page sizes to reflect the page table format */
827 domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
828 domain->geometry.aperture_end = (1UL << ias) - 1;
829 domain->geometry.force_aperture = true;
831 /* Initialise the context bank with our page table cfg */
832 arm_smmu_init_context_bank(smmu_domain, &pgtbl_cfg);
833 arm_smmu_write_context_bank(smmu, cfg->cbndx);
836 * Request context fault interrupt. Do this last to avoid the
837 * handler seeing a half-initialised domain state.
839 irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
840 ret = devm_request_irq(smmu->dev, irq, arm_smmu_context_fault,
841 IRQF_SHARED, "arm-smmu-context-fault", domain);
843 dev_err(smmu->dev, "failed to request context IRQ %d (%u)\n",
845 cfg->irptndx = ARM_SMMU_INVALID_IRPTNDX;
848 mutex_unlock(&smmu_domain->init_mutex);
850 /* Publish page table ops for map/unmap */
851 smmu_domain->pgtbl_ops = pgtbl_ops;
855 __arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
856 smmu_domain->smmu = NULL;
858 mutex_unlock(&smmu_domain->init_mutex);
862 static void arm_smmu_destroy_domain_context(struct iommu_domain *domain)
864 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
865 struct arm_smmu_device *smmu = smmu_domain->smmu;
866 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
869 if (!smmu || domain->type == IOMMU_DOMAIN_IDENTITY)
872 ret = arm_smmu_rpm_get(smmu);
877 * Disable the context bank and free the page tables before freeing
880 smmu->cbs[cfg->cbndx].cfg = NULL;
881 arm_smmu_write_context_bank(smmu, cfg->cbndx);
883 if (cfg->irptndx != ARM_SMMU_INVALID_IRPTNDX) {
884 irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
885 devm_free_irq(smmu->dev, irq, domain);
888 free_io_pgtable_ops(smmu_domain->pgtbl_ops);
889 __arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
891 arm_smmu_rpm_put(smmu);
894 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
896 struct arm_smmu_domain *smmu_domain;
898 if (type != IOMMU_DOMAIN_UNMANAGED &&
899 type != IOMMU_DOMAIN_DMA &&
900 type != IOMMU_DOMAIN_IDENTITY)
903 * Allocate the domain and initialise some of its data structures.
904 * We can't really do anything meaningful until we've added a
907 smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
911 if (type == IOMMU_DOMAIN_DMA && (using_legacy_binding ||
912 iommu_get_dma_cookie(&smmu_domain->domain))) {
917 mutex_init(&smmu_domain->init_mutex);
918 spin_lock_init(&smmu_domain->cb_lock);
920 return &smmu_domain->domain;
923 static void arm_smmu_domain_free(struct iommu_domain *domain)
925 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
928 * Free the domain resources. We assume that all devices have
929 * already been detached.
931 iommu_put_dma_cookie(domain);
932 arm_smmu_destroy_domain_context(domain);
936 static void arm_smmu_write_smr(struct arm_smmu_device *smmu, int idx)
938 struct arm_smmu_smr *smr = smmu->smrs + idx;
939 u32 reg = FIELD_PREP(ARM_SMMU_SMR_ID, smr->id) |
940 FIELD_PREP(ARM_SMMU_SMR_MASK, smr->mask);
942 if (!(smmu->features & ARM_SMMU_FEAT_EXIDS) && smr->valid)
943 reg |= ARM_SMMU_SMR_VALID;
944 arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_SMR(idx), reg);
947 static void arm_smmu_write_s2cr(struct arm_smmu_device *smmu, int idx)
949 struct arm_smmu_s2cr *s2cr = smmu->s2crs + idx;
950 u32 reg = FIELD_PREP(ARM_SMMU_S2CR_TYPE, s2cr->type) |
951 FIELD_PREP(ARM_SMMU_S2CR_CBNDX, s2cr->cbndx) |
952 FIELD_PREP(ARM_SMMU_S2CR_PRIVCFG, s2cr->privcfg);
954 if (smmu->features & ARM_SMMU_FEAT_EXIDS && smmu->smrs &&
955 smmu->smrs[idx].valid)
956 reg |= ARM_SMMU_S2CR_EXIDVALID;
957 arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_S2CR(idx), reg);
960 static void arm_smmu_write_sme(struct arm_smmu_device *smmu, int idx)
962 arm_smmu_write_s2cr(smmu, idx);
964 arm_smmu_write_smr(smmu, idx);
968 * The width of SMR's mask field depends on sCR0_EXIDENABLE, so this function
969 * should be called after sCR0 is written.
971 static void arm_smmu_test_smr_masks(struct arm_smmu_device *smmu)
979 * If we've had to accommodate firmware memory regions, we may
980 * have live SMRs by now; tread carefully...
982 * Somewhat perversely, not having a free SMR for this test implies we
983 * can get away without it anyway, as we'll only be able to 'allocate'
984 * these SMRs for the ID/mask values we're already trusting to be OK.
986 for (i = 0; i < smmu->num_mapping_groups; i++)
987 if (!smmu->smrs[i].valid)
992 * SMR.ID bits may not be preserved if the corresponding MASK
993 * bits are set, so check each one separately. We can reject
994 * masters later if they try to claim IDs outside these masks.
996 smr = FIELD_PREP(ARM_SMMU_SMR_ID, smmu->streamid_mask);
997 arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_SMR(i), smr);
998 smr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_SMR(i));
999 smmu->streamid_mask = FIELD_GET(ARM_SMMU_SMR_ID, smr);
1001 smr = FIELD_PREP(ARM_SMMU_SMR_MASK, smmu->streamid_mask);
1002 arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_SMR(i), smr);
1003 smr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_SMR(i));
1004 smmu->smr_mask_mask = FIELD_GET(ARM_SMMU_SMR_MASK, smr);
1007 static int arm_smmu_find_sme(struct arm_smmu_device *smmu, u16 id, u16 mask)
1009 struct arm_smmu_smr *smrs = smmu->smrs;
1010 int i, free_idx = -ENOSPC;
1012 /* Stream indexing is blissfully easy */
1016 /* Validating SMRs is... less so */
1017 for (i = 0; i < smmu->num_mapping_groups; ++i) {
1018 if (!smrs[i].valid) {
1020 * Note the first free entry we come across, which
1021 * we'll claim in the end if nothing else matches.
1028 * If the new entry is _entirely_ matched by an existing entry,
1029 * then reuse that, with the guarantee that there also cannot
1030 * be any subsequent conflicting entries. In normal use we'd
1031 * expect simply identical entries for this case, but there's
1032 * no harm in accommodating the generalisation.
1034 if ((mask & smrs[i].mask) == mask &&
1035 !((id ^ smrs[i].id) & ~smrs[i].mask))
1038 * If the new entry has any other overlap with an existing one,
1039 * though, then there always exists at least one stream ID
1040 * which would cause a conflict, and we can't allow that risk.
1042 if (!((id ^ smrs[i].id) & ~(smrs[i].mask | mask)))
1049 static bool arm_smmu_free_sme(struct arm_smmu_device *smmu, int idx)
1051 if (--smmu->s2crs[idx].count)
1054 smmu->s2crs[idx] = s2cr_init_val;
1056 smmu->smrs[idx].valid = false;
1061 static int arm_smmu_master_alloc_smes(struct device *dev)
1063 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1064 struct arm_smmu_master_cfg *cfg = fwspec->iommu_priv;
1065 struct arm_smmu_device *smmu = cfg->smmu;
1066 struct arm_smmu_smr *smrs = smmu->smrs;
1067 struct iommu_group *group;
1070 mutex_lock(&smmu->stream_map_mutex);
1071 /* Figure out a viable stream map entry allocation */
1072 for_each_cfg_sme(fwspec, i, idx) {
1073 u16 sid = FIELD_GET(ARM_SMMU_SMR_ID, fwspec->ids[i]);
1074 u16 mask = FIELD_GET(ARM_SMMU_SMR_MASK, fwspec->ids[i]);
1076 if (idx != INVALID_SMENDX) {
1081 ret = arm_smmu_find_sme(smmu, sid, mask);
1086 if (smrs && smmu->s2crs[idx].count == 0) {
1088 smrs[idx].mask = mask;
1089 smrs[idx].valid = true;
1091 smmu->s2crs[idx].count++;
1092 cfg->smendx[i] = (s16)idx;
1095 group = iommu_group_get_for_dev(dev);
1096 if (IS_ERR(group)) {
1097 ret = PTR_ERR(group);
1100 iommu_group_put(group);
1102 /* It worked! Now, poke the actual hardware */
1103 for_each_cfg_sme(fwspec, i, idx) {
1104 arm_smmu_write_sme(smmu, idx);
1105 smmu->s2crs[idx].group = group;
1108 mutex_unlock(&smmu->stream_map_mutex);
1113 arm_smmu_free_sme(smmu, cfg->smendx[i]);
1114 cfg->smendx[i] = INVALID_SMENDX;
1116 mutex_unlock(&smmu->stream_map_mutex);
1120 static void arm_smmu_master_free_smes(struct iommu_fwspec *fwspec)
1122 struct arm_smmu_device *smmu = fwspec_smmu(fwspec);
1123 struct arm_smmu_master_cfg *cfg = fwspec->iommu_priv;
1126 mutex_lock(&smmu->stream_map_mutex);
1127 for_each_cfg_sme(fwspec, i, idx) {
1128 if (arm_smmu_free_sme(smmu, idx))
1129 arm_smmu_write_sme(smmu, idx);
1130 cfg->smendx[i] = INVALID_SMENDX;
1132 mutex_unlock(&smmu->stream_map_mutex);
1135 static int arm_smmu_domain_add_master(struct arm_smmu_domain *smmu_domain,
1136 struct iommu_fwspec *fwspec)
1138 struct arm_smmu_device *smmu = smmu_domain->smmu;
1139 struct arm_smmu_s2cr *s2cr = smmu->s2crs;
1140 u8 cbndx = smmu_domain->cfg.cbndx;
1141 enum arm_smmu_s2cr_type type;
1144 if (smmu_domain->stage == ARM_SMMU_DOMAIN_BYPASS)
1145 type = S2CR_TYPE_BYPASS;
1147 type = S2CR_TYPE_TRANS;
1149 for_each_cfg_sme(fwspec, i, idx) {
1150 if (type == s2cr[idx].type && cbndx == s2cr[idx].cbndx)
1153 s2cr[idx].type = type;
1154 s2cr[idx].privcfg = S2CR_PRIVCFG_DEFAULT;
1155 s2cr[idx].cbndx = cbndx;
1156 arm_smmu_write_s2cr(smmu, idx);
1161 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
1164 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1165 struct arm_smmu_device *smmu;
1166 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1168 if (!fwspec || fwspec->ops != &arm_smmu_ops) {
1169 dev_err(dev, "cannot attach to SMMU, is it on the same bus?\n");
1174 * FIXME: The arch/arm DMA API code tries to attach devices to its own
1175 * domains between of_xlate() and add_device() - we have no way to cope
1176 * with that, so until ARM gets converted to rely on groups and default
1177 * domains, just say no (but more politely than by dereferencing NULL).
1178 * This should be at least a WARN_ON once that's sorted.
1180 if (!fwspec->iommu_priv)
1183 smmu = fwspec_smmu(fwspec);
1185 ret = arm_smmu_rpm_get(smmu);
1189 /* Ensure that the domain is finalised */
1190 ret = arm_smmu_init_domain_context(domain, smmu);
1195 * Sanity check the domain. We don't support domains across
1198 if (smmu_domain->smmu != smmu) {
1200 "cannot attach to SMMU %s whilst already attached to domain on SMMU %s\n",
1201 dev_name(smmu_domain->smmu->dev), dev_name(smmu->dev));
1206 /* Looks ok, so add the device to the domain */
1207 ret = arm_smmu_domain_add_master(smmu_domain, fwspec);
1210 * Setup an autosuspend delay to avoid bouncing runpm state.
1211 * Otherwise, if a driver for a suspended consumer device
1212 * unmaps buffers, it will runpm resume/suspend for each one.
1214 * For example, when used by a GPU device, when an application
1215 * or game exits, it can trigger unmapping 100s or 1000s of
1216 * buffers. With a runpm cycle for each buffer, that adds up
1217 * to 5-10sec worth of reprogramming the context bank, while
1218 * the system appears to be locked up to the user.
1220 pm_runtime_set_autosuspend_delay(smmu->dev, 20);
1221 pm_runtime_use_autosuspend(smmu->dev);
1224 arm_smmu_rpm_put(smmu);
1228 static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
1229 phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
1231 struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1232 struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
1238 arm_smmu_rpm_get(smmu);
1239 ret = ops->map(ops, iova, paddr, size, prot);
1240 arm_smmu_rpm_put(smmu);
1245 static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
1246 size_t size, struct iommu_iotlb_gather *gather)
1248 struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1249 struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
1255 arm_smmu_rpm_get(smmu);
1256 ret = ops->unmap(ops, iova, size, gather);
1257 arm_smmu_rpm_put(smmu);
1262 static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
1264 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1265 struct arm_smmu_device *smmu = smmu_domain->smmu;
1267 if (smmu_domain->flush_ops) {
1268 arm_smmu_rpm_get(smmu);
1269 smmu_domain->flush_ops->tlb_flush_all(smmu_domain);
1270 arm_smmu_rpm_put(smmu);
1274 static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
1275 struct iommu_iotlb_gather *gather)
1277 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1278 struct arm_smmu_device *smmu = smmu_domain->smmu;
1283 arm_smmu_rpm_get(smmu);
1284 if (smmu->version == ARM_SMMU_V2 ||
1285 smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
1286 arm_smmu_tlb_sync_context(smmu_domain);
1288 arm_smmu_tlb_sync_global(smmu);
1289 arm_smmu_rpm_put(smmu);
1292 static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
1295 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1296 struct arm_smmu_device *smmu = smmu_domain->smmu;
1297 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
1298 struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
1299 struct device *dev = smmu->dev;
1303 unsigned long va, flags;
1304 int ret, idx = cfg->cbndx;
1306 ret = arm_smmu_rpm_get(smmu);
1310 spin_lock_irqsave(&smmu_domain->cb_lock, flags);
1311 va = iova & ~0xfffUL;
1312 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
1313 arm_smmu_cb_writeq(smmu, idx, ARM_SMMU_CB_ATS1PR, va);
1315 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_ATS1PR, va);
1317 reg = arm_smmu_page(smmu, ARM_SMMU_CB(smmu, idx)) + ARM_SMMU_CB_ATSR;
1318 if (readl_poll_timeout_atomic(reg, tmp, !(tmp & ARM_SMMU_ATSR_ACTIVE),
1320 spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
1322 "iova to phys timed out on %pad. Falling back to software table walk.\n",
1324 return ops->iova_to_phys(ops, iova);
1327 phys = arm_smmu_cb_readq(smmu, idx, ARM_SMMU_CB_PAR);
1328 spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
1329 if (phys & ARM_SMMU_CB_PAR_F) {
1330 dev_err(dev, "translation fault!\n");
1331 dev_err(dev, "PAR = 0x%llx\n", phys);
1335 arm_smmu_rpm_put(smmu);
1337 return (phys & GENMASK_ULL(39, 12)) | (iova & 0xfff);
1340 static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain,
1343 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1344 struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
1346 if (domain->type == IOMMU_DOMAIN_IDENTITY)
1352 if (smmu_domain->smmu->features & ARM_SMMU_FEAT_TRANS_OPS &&
1353 smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
1354 return arm_smmu_iova_to_phys_hard(domain, iova);
1356 return ops->iova_to_phys(ops, iova);
1359 static bool arm_smmu_capable(enum iommu_cap cap)
1362 case IOMMU_CAP_CACHE_COHERENCY:
1364 * Return true here as the SMMU can always send out coherent
1368 case IOMMU_CAP_NOEXEC:
1376 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
1378 struct device *dev = driver_find_device_by_fwnode(&arm_smmu_driver.driver,
1381 return dev ? dev_get_drvdata(dev) : NULL;
1384 static int arm_smmu_add_device(struct device *dev)
1386 struct arm_smmu_device *smmu;
1387 struct arm_smmu_master_cfg *cfg;
1388 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1391 if (using_legacy_binding) {
1392 ret = arm_smmu_register_legacy_master(dev, &smmu);
1395 * If dev->iommu_fwspec is initally NULL, arm_smmu_register_legacy_master()
1396 * will allocate/initialise a new one. Thus we need to update fwspec for
1399 fwspec = dev_iommu_fwspec_get(dev);
1402 } else if (fwspec && fwspec->ops == &arm_smmu_ops) {
1403 smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
1409 for (i = 0; i < fwspec->num_ids; i++) {
1410 u16 sid = FIELD_GET(ARM_SMMU_SMR_ID, fwspec->ids[i]);
1411 u16 mask = FIELD_GET(ARM_SMMU_SMR_MASK, fwspec->ids[i]);
1413 if (sid & ~smmu->streamid_mask) {
1414 dev_err(dev, "stream ID 0x%x out of range for SMMU (0x%x)\n",
1415 sid, smmu->streamid_mask);
1418 if (mask & ~smmu->smr_mask_mask) {
1419 dev_err(dev, "SMR mask 0x%x out of range for SMMU (0x%x)\n",
1420 mask, smmu->smr_mask_mask);
1426 cfg = kzalloc(offsetof(struct arm_smmu_master_cfg, smendx[i]),
1432 fwspec->iommu_priv = cfg;
1434 cfg->smendx[i] = INVALID_SMENDX;
1436 ret = arm_smmu_rpm_get(smmu);
1440 ret = arm_smmu_master_alloc_smes(dev);
1441 arm_smmu_rpm_put(smmu);
1446 iommu_device_link(&smmu->iommu, dev);
1448 device_link_add(dev, smmu->dev,
1449 DL_FLAG_PM_RUNTIME | DL_FLAG_AUTOREMOVE_SUPPLIER);
1456 iommu_fwspec_free(dev);
1460 static void arm_smmu_remove_device(struct device *dev)
1462 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1463 struct arm_smmu_master_cfg *cfg;
1464 struct arm_smmu_device *smmu;
1467 if (!fwspec || fwspec->ops != &arm_smmu_ops)
1470 cfg = fwspec->iommu_priv;
1473 ret = arm_smmu_rpm_get(smmu);
1477 iommu_device_unlink(&smmu->iommu, dev);
1478 arm_smmu_master_free_smes(fwspec);
1480 arm_smmu_rpm_put(smmu);
1482 iommu_group_remove_device(dev);
1483 kfree(fwspec->iommu_priv);
1484 iommu_fwspec_free(dev);
1487 static struct iommu_group *arm_smmu_device_group(struct device *dev)
1489 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1490 struct arm_smmu_device *smmu = fwspec_smmu(fwspec);
1491 struct iommu_group *group = NULL;
1494 for_each_cfg_sme(fwspec, i, idx) {
1495 if (group && smmu->s2crs[idx].group &&
1496 group != smmu->s2crs[idx].group)
1497 return ERR_PTR(-EINVAL);
1499 group = smmu->s2crs[idx].group;
1503 return iommu_group_ref_get(group);
1505 if (dev_is_pci(dev))
1506 group = pci_device_group(dev);
1507 else if (dev_is_fsl_mc(dev))
1508 group = fsl_mc_device_group(dev);
1510 group = generic_device_group(dev);
1515 static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
1516 enum iommu_attr attr, void *data)
1518 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1520 switch(domain->type) {
1521 case IOMMU_DOMAIN_UNMANAGED:
1523 case DOMAIN_ATTR_NESTING:
1524 *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
1530 case IOMMU_DOMAIN_DMA:
1532 case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
1533 *(int *)data = smmu_domain->non_strict;
1544 static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
1545 enum iommu_attr attr, void *data)
1548 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1550 mutex_lock(&smmu_domain->init_mutex);
1552 switch(domain->type) {
1553 case IOMMU_DOMAIN_UNMANAGED:
1555 case DOMAIN_ATTR_NESTING:
1556 if (smmu_domain->smmu) {
1562 smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
1564 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
1570 case IOMMU_DOMAIN_DMA:
1572 case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
1573 smmu_domain->non_strict = *(int *)data;
1583 mutex_unlock(&smmu_domain->init_mutex);
1587 static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
1591 if (args->args_count > 0)
1592 fwid |= FIELD_PREP(ARM_SMMU_SMR_ID, args->args[0]);
1594 if (args->args_count > 1)
1595 fwid |= FIELD_PREP(ARM_SMMU_SMR_MASK, args->args[1]);
1596 else if (!of_property_read_u32(args->np, "stream-match-mask", &mask))
1597 fwid |= FIELD_PREP(ARM_SMMU_SMR_MASK, mask);
1599 return iommu_fwspec_add_ids(dev, &fwid, 1);
1602 static void arm_smmu_get_resv_regions(struct device *dev,
1603 struct list_head *head)
1605 struct iommu_resv_region *region;
1606 int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
1608 region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
1609 prot, IOMMU_RESV_SW_MSI);
1613 list_add_tail(®ion->list, head);
1615 iommu_dma_get_resv_regions(dev, head);
1618 static void arm_smmu_put_resv_regions(struct device *dev,
1619 struct list_head *head)
1621 struct iommu_resv_region *entry, *next;
1623 list_for_each_entry_safe(entry, next, head, list)
1627 static struct iommu_ops arm_smmu_ops = {
1628 .capable = arm_smmu_capable,
1629 .domain_alloc = arm_smmu_domain_alloc,
1630 .domain_free = arm_smmu_domain_free,
1631 .attach_dev = arm_smmu_attach_dev,
1632 .map = arm_smmu_map,
1633 .unmap = arm_smmu_unmap,
1634 .flush_iotlb_all = arm_smmu_flush_iotlb_all,
1635 .iotlb_sync = arm_smmu_iotlb_sync,
1636 .iova_to_phys = arm_smmu_iova_to_phys,
1637 .add_device = arm_smmu_add_device,
1638 .remove_device = arm_smmu_remove_device,
1639 .device_group = arm_smmu_device_group,
1640 .domain_get_attr = arm_smmu_domain_get_attr,
1641 .domain_set_attr = arm_smmu_domain_set_attr,
1642 .of_xlate = arm_smmu_of_xlate,
1643 .get_resv_regions = arm_smmu_get_resv_regions,
1644 .put_resv_regions = arm_smmu_put_resv_regions,
1645 .pgsize_bitmap = -1UL, /* Restricted during device attach */
1648 static void arm_smmu_device_reset(struct arm_smmu_device *smmu)
1653 /* clear global FSR */
1654 reg = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSR);
1655 arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sGFSR, reg);
1658 * Reset stream mapping groups: Initial values mark all SMRn as
1659 * invalid and all S2CRn as bypass unless overridden.
1661 for (i = 0; i < smmu->num_mapping_groups; ++i)
1662 arm_smmu_write_sme(smmu, i);
1664 /* Make sure all context banks are disabled and clear CB_FSR */
1665 for (i = 0; i < smmu->num_context_banks; ++i) {
1666 arm_smmu_write_context_bank(smmu, i);
1667 arm_smmu_cb_write(smmu, i, ARM_SMMU_CB_FSR, ARM_SMMU_FSR_FAULT);
1670 /* Invalidate the TLB, just in case */
1671 arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIALLH, QCOM_DUMMY_VAL);
1672 arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIALLNSNH, QCOM_DUMMY_VAL);
1674 reg = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sCR0);
1676 /* Enable fault reporting */
1677 reg |= (ARM_SMMU_sCR0_GFRE | ARM_SMMU_sCR0_GFIE |
1678 ARM_SMMU_sCR0_GCFGFRE | ARM_SMMU_sCR0_GCFGFIE);
1680 /* Disable TLB broadcasting. */
1681 reg |= (ARM_SMMU_sCR0_VMIDPNE | ARM_SMMU_sCR0_PTM);
1683 /* Enable client access, handling unmatched streams as appropriate */
1684 reg &= ~ARM_SMMU_sCR0_CLIENTPD;
1686 reg |= ARM_SMMU_sCR0_USFCFG;
1688 reg &= ~ARM_SMMU_sCR0_USFCFG;
1690 /* Disable forced broadcasting */
1691 reg &= ~ARM_SMMU_sCR0_FB;
1693 /* Don't upgrade barriers */
1694 reg &= ~(ARM_SMMU_sCR0_BSU);
1696 if (smmu->features & ARM_SMMU_FEAT_VMID16)
1697 reg |= ARM_SMMU_sCR0_VMID16EN;
1699 if (smmu->features & ARM_SMMU_FEAT_EXIDS)
1700 reg |= ARM_SMMU_sCR0_EXIDENABLE;
1702 if (smmu->impl && smmu->impl->reset)
1703 smmu->impl->reset(smmu);
1705 /* Push the button */
1706 arm_smmu_tlb_sync_global(smmu);
1707 arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sCR0, reg);
1710 static int arm_smmu_id_size_to_bits(int size)
1729 static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
1733 bool cttw_reg, cttw_fw = smmu->features & ARM_SMMU_FEAT_COHERENT_WALK;
1736 dev_notice(smmu->dev, "probing hardware configuration...\n");
1737 dev_notice(smmu->dev, "SMMUv%d with:\n",
1738 smmu->version == ARM_SMMU_V2 ? 2 : 1);
1741 id = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_ID0);
1743 /* Restrict available stages based on module parameter */
1744 if (force_stage == 1)
1745 id &= ~(ARM_SMMU_ID0_S2TS | ARM_SMMU_ID0_NTS);
1746 else if (force_stage == 2)
1747 id &= ~(ARM_SMMU_ID0_S1TS | ARM_SMMU_ID0_NTS);
1749 if (id & ARM_SMMU_ID0_S1TS) {
1750 smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
1751 dev_notice(smmu->dev, "\tstage 1 translation\n");
1754 if (id & ARM_SMMU_ID0_S2TS) {
1755 smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
1756 dev_notice(smmu->dev, "\tstage 2 translation\n");
1759 if (id & ARM_SMMU_ID0_NTS) {
1760 smmu->features |= ARM_SMMU_FEAT_TRANS_NESTED;
1761 dev_notice(smmu->dev, "\tnested translation\n");
1764 if (!(smmu->features &
1765 (ARM_SMMU_FEAT_TRANS_S1 | ARM_SMMU_FEAT_TRANS_S2))) {
1766 dev_err(smmu->dev, "\tno translation support!\n");
1770 if ((id & ARM_SMMU_ID0_S1TS) &&
1771 ((smmu->version < ARM_SMMU_V2) || !(id & ARM_SMMU_ID0_ATOSNS))) {
1772 smmu->features |= ARM_SMMU_FEAT_TRANS_OPS;
1773 dev_notice(smmu->dev, "\taddress translation ops\n");
1777 * In order for DMA API calls to work properly, we must defer to what
1778 * the FW says about coherency, regardless of what the hardware claims.
1779 * Fortunately, this also opens up a workaround for systems where the
1780 * ID register value has ended up configured incorrectly.
1782 cttw_reg = !!(id & ARM_SMMU_ID0_CTTW);
1783 if (cttw_fw || cttw_reg)
1784 dev_notice(smmu->dev, "\t%scoherent table walk\n",
1785 cttw_fw ? "" : "non-");
1786 if (cttw_fw != cttw_reg)
1787 dev_notice(smmu->dev,
1788 "\t(IDR0.CTTW overridden by FW configuration)\n");
1790 /* Max. number of entries we have for stream matching/indexing */
1791 if (smmu->version == ARM_SMMU_V2 && id & ARM_SMMU_ID0_EXIDS) {
1792 smmu->features |= ARM_SMMU_FEAT_EXIDS;
1795 size = 1 << FIELD_GET(ARM_SMMU_ID0_NUMSIDB, id);
1797 smmu->streamid_mask = size - 1;
1798 if (id & ARM_SMMU_ID0_SMS) {
1799 smmu->features |= ARM_SMMU_FEAT_STREAM_MATCH;
1800 size = FIELD_GET(ARM_SMMU_ID0_NUMSMRG, id);
1803 "stream-matching supported, but no SMRs present!\n");
1807 /* Zero-initialised to mark as invalid */
1808 smmu->smrs = devm_kcalloc(smmu->dev, size, sizeof(*smmu->smrs),
1813 dev_notice(smmu->dev,
1814 "\tstream matching with %u register groups", size);
1816 /* s2cr->type == 0 means translation, so initialise explicitly */
1817 smmu->s2crs = devm_kmalloc_array(smmu->dev, size, sizeof(*smmu->s2crs),
1821 for (i = 0; i < size; i++)
1822 smmu->s2crs[i] = s2cr_init_val;
1824 smmu->num_mapping_groups = size;
1825 mutex_init(&smmu->stream_map_mutex);
1826 spin_lock_init(&smmu->global_sync_lock);
1828 if (smmu->version < ARM_SMMU_V2 ||
1829 !(id & ARM_SMMU_ID0_PTFS_NO_AARCH32)) {
1830 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_L;
1831 if (!(id & ARM_SMMU_ID0_PTFS_NO_AARCH32S))
1832 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_S;
1836 id = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_ID1);
1837 smmu->pgshift = (id & ARM_SMMU_ID1_PAGESIZE) ? 16 : 12;
1839 /* Check for size mismatch of SMMU address space from mapped region */
1840 size = 1 << (FIELD_GET(ARM_SMMU_ID1_NUMPAGENDXB, id) + 1);
1841 if (smmu->numpage != 2 * size << smmu->pgshift)
1843 "SMMU address space size (0x%x) differs from mapped region size (0x%x)!\n",
1844 2 * size << smmu->pgshift, smmu->numpage);
1845 /* Now properly encode NUMPAGE to subsequently derive SMMU_CB_BASE */
1846 smmu->numpage = size;
1848 smmu->num_s2_context_banks = FIELD_GET(ARM_SMMU_ID1_NUMS2CB, id);
1849 smmu->num_context_banks = FIELD_GET(ARM_SMMU_ID1_NUMCB, id);
1850 if (smmu->num_s2_context_banks > smmu->num_context_banks) {
1851 dev_err(smmu->dev, "impossible number of S2 context banks!\n");
1854 dev_notice(smmu->dev, "\t%u context banks (%u stage-2 only)\n",
1855 smmu->num_context_banks, smmu->num_s2_context_banks);
1856 smmu->cbs = devm_kcalloc(smmu->dev, smmu->num_context_banks,
1857 sizeof(*smmu->cbs), GFP_KERNEL);
1862 id = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_ID2);
1863 size = arm_smmu_id_size_to_bits(FIELD_GET(ARM_SMMU_ID2_IAS, id));
1864 smmu->ipa_size = size;
1866 /* The output mask is also applied for bypass */
1867 size = arm_smmu_id_size_to_bits(FIELD_GET(ARM_SMMU_ID2_OAS, id));
1868 smmu->pa_size = size;
1870 if (id & ARM_SMMU_ID2_VMID16)
1871 smmu->features |= ARM_SMMU_FEAT_VMID16;
1874 * What the page table walker can address actually depends on which
1875 * descriptor format is in use, but since a) we don't know that yet,
1876 * and b) it can vary per context bank, this will have to do...
1878 if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(size)))
1880 "failed to set DMA mask for table walker\n");
1882 if (smmu->version < ARM_SMMU_V2) {
1883 smmu->va_size = smmu->ipa_size;
1884 if (smmu->version == ARM_SMMU_V1_64K)
1885 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1887 size = FIELD_GET(ARM_SMMU_ID2_UBS, id);
1888 smmu->va_size = arm_smmu_id_size_to_bits(size);
1889 if (id & ARM_SMMU_ID2_PTFS_4K)
1890 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_4K;
1891 if (id & ARM_SMMU_ID2_PTFS_16K)
1892 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_16K;
1893 if (id & ARM_SMMU_ID2_PTFS_64K)
1894 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1897 /* Now we've corralled the various formats, what'll it do? */
1898 if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S)
1899 smmu->pgsize_bitmap |= SZ_4K | SZ_64K | SZ_1M | SZ_16M;
1900 if (smmu->features &
1901 (ARM_SMMU_FEAT_FMT_AARCH32_L | ARM_SMMU_FEAT_FMT_AARCH64_4K))
1902 smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
1903 if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_16K)
1904 smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
1905 if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_64K)
1906 smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
1908 if (arm_smmu_ops.pgsize_bitmap == -1UL)
1909 arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
1911 arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
1912 dev_notice(smmu->dev, "\tSupported page sizes: 0x%08lx\n",
1913 smmu->pgsize_bitmap);
1916 if (smmu->features & ARM_SMMU_FEAT_TRANS_S1)
1917 dev_notice(smmu->dev, "\tStage-1: %lu-bit VA -> %lu-bit IPA\n",
1918 smmu->va_size, smmu->ipa_size);
1920 if (smmu->features & ARM_SMMU_FEAT_TRANS_S2)
1921 dev_notice(smmu->dev, "\tStage-2: %lu-bit IPA -> %lu-bit PA\n",
1922 smmu->ipa_size, smmu->pa_size);
1924 if (smmu->impl && smmu->impl->cfg_probe)
1925 return smmu->impl->cfg_probe(smmu);
1930 struct arm_smmu_match_data {
1931 enum arm_smmu_arch_version version;
1932 enum arm_smmu_implementation model;
1935 #define ARM_SMMU_MATCH_DATA(name, ver, imp) \
1936 static const struct arm_smmu_match_data name = { .version = ver, .model = imp }
1938 ARM_SMMU_MATCH_DATA(smmu_generic_v1, ARM_SMMU_V1, GENERIC_SMMU);
1939 ARM_SMMU_MATCH_DATA(smmu_generic_v2, ARM_SMMU_V2, GENERIC_SMMU);
1940 ARM_SMMU_MATCH_DATA(arm_mmu401, ARM_SMMU_V1_64K, GENERIC_SMMU);
1941 ARM_SMMU_MATCH_DATA(arm_mmu500, ARM_SMMU_V2, ARM_MMU500);
1942 ARM_SMMU_MATCH_DATA(cavium_smmuv2, ARM_SMMU_V2, CAVIUM_SMMUV2);
1943 ARM_SMMU_MATCH_DATA(qcom_smmuv2, ARM_SMMU_V2, QCOM_SMMUV2);
1945 static const struct of_device_id arm_smmu_of_match[] = {
1946 { .compatible = "arm,smmu-v1", .data = &smmu_generic_v1 },
1947 { .compatible = "arm,smmu-v2", .data = &smmu_generic_v2 },
1948 { .compatible = "arm,mmu-400", .data = &smmu_generic_v1 },
1949 { .compatible = "arm,mmu-401", .data = &arm_mmu401 },
1950 { .compatible = "arm,mmu-500", .data = &arm_mmu500 },
1951 { .compatible = "cavium,smmu-v2", .data = &cavium_smmuv2 },
1952 { .compatible = "qcom,smmu-v2", .data = &qcom_smmuv2 },
1955 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
1958 static int acpi_smmu_get_data(u32 model, struct arm_smmu_device *smmu)
1963 case ACPI_IORT_SMMU_V1:
1964 case ACPI_IORT_SMMU_CORELINK_MMU400:
1965 smmu->version = ARM_SMMU_V1;
1966 smmu->model = GENERIC_SMMU;
1968 case ACPI_IORT_SMMU_CORELINK_MMU401:
1969 smmu->version = ARM_SMMU_V1_64K;
1970 smmu->model = GENERIC_SMMU;
1972 case ACPI_IORT_SMMU_V2:
1973 smmu->version = ARM_SMMU_V2;
1974 smmu->model = GENERIC_SMMU;
1976 case ACPI_IORT_SMMU_CORELINK_MMU500:
1977 smmu->version = ARM_SMMU_V2;
1978 smmu->model = ARM_MMU500;
1980 case ACPI_IORT_SMMU_CAVIUM_THUNDERX:
1981 smmu->version = ARM_SMMU_V2;
1982 smmu->model = CAVIUM_SMMUV2;
1991 static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
1992 struct arm_smmu_device *smmu)
1994 struct device *dev = smmu->dev;
1995 struct acpi_iort_node *node =
1996 *(struct acpi_iort_node **)dev_get_platdata(dev);
1997 struct acpi_iort_smmu *iort_smmu;
2000 /* Retrieve SMMU1/2 specific data */
2001 iort_smmu = (struct acpi_iort_smmu *)node->node_data;
2003 ret = acpi_smmu_get_data(iort_smmu->model, smmu);
2007 /* Ignore the configuration access interrupt */
2008 smmu->num_global_irqs = 1;
2010 if (iort_smmu->flags & ACPI_IORT_SMMU_COHERENT_WALK)
2011 smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
2016 static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
2017 struct arm_smmu_device *smmu)
2023 static int arm_smmu_device_dt_probe(struct platform_device *pdev,
2024 struct arm_smmu_device *smmu)
2026 const struct arm_smmu_match_data *data;
2027 struct device *dev = &pdev->dev;
2028 bool legacy_binding;
2030 if (of_property_read_u32(dev->of_node, "#global-interrupts",
2031 &smmu->num_global_irqs)) {
2032 dev_err(dev, "missing #global-interrupts property\n");
2036 data = of_device_get_match_data(dev);
2037 smmu->version = data->version;
2038 smmu->model = data->model;
2040 legacy_binding = of_find_property(dev->of_node, "mmu-masters", NULL);
2041 if (legacy_binding && !using_generic_binding) {
2042 if (!using_legacy_binding) {
2043 pr_notice("deprecated \"mmu-masters\" DT property in use; %s support unavailable\n",
2044 IS_ENABLED(CONFIG_ARM_SMMU_LEGACY_DT_BINDINGS) ? "DMA API" : "SMMU");
2046 using_legacy_binding = true;
2047 } else if (!legacy_binding && !using_legacy_binding) {
2048 using_generic_binding = true;
2050 dev_err(dev, "not probing due to mismatched DT properties\n");
2054 if (of_dma_is_coherent(dev->of_node))
2055 smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
2060 static int arm_smmu_bus_init(struct iommu_ops *ops)
2064 /* Oh, for a proper bus abstraction */
2065 if (!iommu_present(&platform_bus_type)) {
2066 err = bus_set_iommu(&platform_bus_type, ops);
2070 #ifdef CONFIG_ARM_AMBA
2071 if (!iommu_present(&amba_bustype)) {
2072 err = bus_set_iommu(&amba_bustype, ops);
2074 goto err_reset_platform_ops;
2078 if (!iommu_present(&pci_bus_type)) {
2079 err = bus_set_iommu(&pci_bus_type, ops);
2081 goto err_reset_amba_ops;
2084 #ifdef CONFIG_FSL_MC_BUS
2085 if (!iommu_present(&fsl_mc_bus_type)) {
2086 err = bus_set_iommu(&fsl_mc_bus_type, ops);
2088 goto err_reset_pci_ops;
2093 err_reset_pci_ops: __maybe_unused;
2095 bus_set_iommu(&pci_bus_type, NULL);
2097 err_reset_amba_ops: __maybe_unused;
2098 #ifdef CONFIG_ARM_AMBA
2099 bus_set_iommu(&amba_bustype, NULL);
2101 err_reset_platform_ops: __maybe_unused;
2102 bus_set_iommu(&platform_bus_type, NULL);
2106 static int arm_smmu_device_probe(struct platform_device *pdev)
2108 struct resource *res;
2109 resource_size_t ioaddr;
2110 struct arm_smmu_device *smmu;
2111 struct device *dev = &pdev->dev;
2112 int num_irqs, i, err;
2114 smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
2116 dev_err(dev, "failed to allocate arm_smmu_device\n");
2122 err = arm_smmu_device_dt_probe(pdev, smmu);
2124 err = arm_smmu_device_acpi_probe(pdev, smmu);
2129 smmu = arm_smmu_impl_init(smmu);
2131 return PTR_ERR(smmu);
2133 res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
2134 ioaddr = res->start;
2135 smmu->base = devm_ioremap_resource(dev, res);
2136 if (IS_ERR(smmu->base))
2137 return PTR_ERR(smmu->base);
2139 * The resource size should effectively match the value of SMMU_TOP;
2140 * stash that temporarily until we know PAGESIZE to validate it with.
2142 smmu->numpage = resource_size(res);
2145 while ((res = platform_get_resource(pdev, IORESOURCE_IRQ, num_irqs))) {
2147 if (num_irqs > smmu->num_global_irqs)
2148 smmu->num_context_irqs++;
2151 if (!smmu->num_context_irqs) {
2152 dev_err(dev, "found %d interrupts but expected at least %d\n",
2153 num_irqs, smmu->num_global_irqs + 1);
2157 smmu->irqs = devm_kcalloc(dev, num_irqs, sizeof(*smmu->irqs),
2160 dev_err(dev, "failed to allocate %d irqs\n", num_irqs);
2164 for (i = 0; i < num_irqs; ++i) {
2165 int irq = platform_get_irq(pdev, i);
2169 smmu->irqs[i] = irq;
2172 err = devm_clk_bulk_get_all(dev, &smmu->clks);
2174 dev_err(dev, "failed to get clocks %d\n", err);
2177 smmu->num_clks = err;
2179 err = clk_bulk_prepare_enable(smmu->num_clks, smmu->clks);
2183 err = arm_smmu_device_cfg_probe(smmu);
2187 if (smmu->version == ARM_SMMU_V2) {
2188 if (smmu->num_context_banks > smmu->num_context_irqs) {
2190 "found only %d context irq(s) but %d required\n",
2191 smmu->num_context_irqs, smmu->num_context_banks);
2195 /* Ignore superfluous interrupts */
2196 smmu->num_context_irqs = smmu->num_context_banks;
2199 for (i = 0; i < smmu->num_global_irqs; ++i) {
2200 err = devm_request_irq(smmu->dev, smmu->irqs[i],
2201 arm_smmu_global_fault,
2203 "arm-smmu global fault",
2206 dev_err(dev, "failed to request global IRQ %d (%u)\n",
2212 err = iommu_device_sysfs_add(&smmu->iommu, smmu->dev, NULL,
2213 "smmu.%pa", &ioaddr);
2215 dev_err(dev, "Failed to register iommu in sysfs\n");
2219 iommu_device_set_ops(&smmu->iommu, &arm_smmu_ops);
2220 iommu_device_set_fwnode(&smmu->iommu, dev->fwnode);
2222 err = iommu_device_register(&smmu->iommu);
2224 dev_err(dev, "Failed to register iommu\n");
2228 platform_set_drvdata(pdev, smmu);
2229 arm_smmu_device_reset(smmu);
2230 arm_smmu_test_smr_masks(smmu);
2233 * We want to avoid touching dev->power.lock in fastpaths unless
2234 * it's really going to do something useful - pm_runtime_enabled()
2235 * can serve as an ideal proxy for that decision. So, conditionally
2236 * enable pm_runtime.
2238 if (dev->pm_domain) {
2239 pm_runtime_set_active(dev);
2240 pm_runtime_enable(dev);
2244 * For ACPI and generic DT bindings, an SMMU will be probed before
2245 * any device which might need it, so we want the bus ops in place
2246 * ready to handle default domain setup as soon as any SMMU exists.
2248 if (!using_legacy_binding)
2249 return arm_smmu_bus_init(&arm_smmu_ops);
2254 static int arm_smmu_device_remove(struct platform_device *pdev)
2256 struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
2261 if (!bitmap_empty(smmu->context_map, ARM_SMMU_MAX_CBS))
2262 dev_err(&pdev->dev, "removing device with active domains!\n");
2264 arm_smmu_bus_init(NULL);
2265 iommu_device_unregister(&smmu->iommu);
2266 iommu_device_sysfs_remove(&smmu->iommu);
2268 arm_smmu_rpm_get(smmu);
2269 /* Turn the thing off */
2270 arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sCR0, ARM_SMMU_sCR0_CLIENTPD);
2271 arm_smmu_rpm_put(smmu);
2273 if (pm_runtime_enabled(smmu->dev))
2274 pm_runtime_force_suspend(smmu->dev);
2276 clk_bulk_disable(smmu->num_clks, smmu->clks);
2278 clk_bulk_unprepare(smmu->num_clks, smmu->clks);
2282 static void arm_smmu_device_shutdown(struct platform_device *pdev)
2284 arm_smmu_device_remove(pdev);
2287 static int __maybe_unused arm_smmu_runtime_resume(struct device *dev)
2289 struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2292 ret = clk_bulk_enable(smmu->num_clks, smmu->clks);
2296 arm_smmu_device_reset(smmu);
2301 static int __maybe_unused arm_smmu_runtime_suspend(struct device *dev)
2303 struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2305 clk_bulk_disable(smmu->num_clks, smmu->clks);
2310 static int __maybe_unused arm_smmu_pm_resume(struct device *dev)
2312 if (pm_runtime_suspended(dev))
2315 return arm_smmu_runtime_resume(dev);
2318 static int __maybe_unused arm_smmu_pm_suspend(struct device *dev)
2320 if (pm_runtime_suspended(dev))
2323 return arm_smmu_runtime_suspend(dev);
2326 static const struct dev_pm_ops arm_smmu_pm_ops = {
2327 SET_SYSTEM_SLEEP_PM_OPS(arm_smmu_pm_suspend, arm_smmu_pm_resume)
2328 SET_RUNTIME_PM_OPS(arm_smmu_runtime_suspend,
2329 arm_smmu_runtime_resume, NULL)
2332 static struct platform_driver arm_smmu_driver = {
2335 .of_match_table = arm_smmu_of_match,
2336 .pm = &arm_smmu_pm_ops,
2337 .suppress_bind_attrs = true,
2339 .probe = arm_smmu_device_probe,
2340 .remove = arm_smmu_device_remove,
2341 .shutdown = arm_smmu_device_shutdown,
2343 module_platform_driver(arm_smmu_driver);
2345 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMU implementations");
2346 MODULE_AUTHOR("Will Deacon <will@kernel.org>");
2347 MODULE_ALIAS("platform:arm-smmu");
2348 MODULE_LICENSE("GPL v2");