1 // SPDX-License-Identifier: GPL-2.0-only
3 * IOMMU API for ARM architected SMMU implementations.
5 * Copyright (C) 2013 ARM Limited
7 * Author: Will Deacon <will.deacon@arm.com>
9 * This driver currently supports:
10 * - SMMUv1 and v2 implementations
11 * - Stream-matching and stream-indexing
12 * - v7/v8 long-descriptor format
13 * - Non-secure access to the SMMU
14 * - Context fault reporting
15 * - Extended Stream ID (16 bit)
18 #define pr_fmt(fmt) "arm-smmu: " fmt
20 #include <linux/acpi.h>
21 #include <linux/acpi_iort.h>
22 #include <linux/bitfield.h>
23 #include <linux/delay.h>
24 #include <linux/dma-iommu.h>
25 #include <linux/dma-mapping.h>
26 #include <linux/err.h>
27 #include <linux/interrupt.h>
29 #include <linux/iopoll.h>
30 #include <linux/module.h>
32 #include <linux/of_address.h>
33 #include <linux/of_device.h>
34 #include <linux/of_iommu.h>
35 #include <linux/pci.h>
36 #include <linux/platform_device.h>
37 #include <linux/pm_runtime.h>
38 #include <linux/ratelimit.h>
39 #include <linux/slab.h>
41 #include <linux/amba/bus.h>
42 #include <linux/fsl/mc.h>
47 * Apparently, some Qualcomm arm64 platforms which appear to expose their SMMU
48 * global register space are still, in fact, using a hypervisor to mediate it
49 * by trapping and emulating register accesses. Sadly, some deployed versions
50 * of said trapping code have bugs wherein they go horribly wrong for stores
51 * using r31 (i.e. XZR/WZR) as the source register.
53 #define QCOM_DUMMY_VAL -1
55 #define TLB_LOOP_TIMEOUT 1000000 /* 1s! */
56 #define TLB_SPIN_COUNT 10
58 #define MSI_IOVA_BASE 0x8000000
59 #define MSI_IOVA_LENGTH 0x100000
61 static int force_stage;
62 module_param(force_stage, int, S_IRUGO);
63 MODULE_PARM_DESC(force_stage,
64 "Force SMMU mappings to be installed at a particular stage of translation. A value of '1' or '2' forces the corresponding stage. All other values are ignored (i.e. no stage is forced). Note that selecting a specific stage will disable support for nested translation.");
65 static bool disable_bypass =
66 IS_ENABLED(CONFIG_ARM_SMMU_DISABLE_BYPASS_BY_DEFAULT);
67 module_param(disable_bypass, bool, S_IRUGO);
68 MODULE_PARM_DESC(disable_bypass,
69 "Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
71 struct arm_smmu_s2cr {
72 struct iommu_group *group;
74 enum arm_smmu_s2cr_type type;
75 enum arm_smmu_s2cr_privcfg privcfg;
79 #define s2cr_init_val (struct arm_smmu_s2cr){ \
80 .type = disable_bypass ? S2CR_TYPE_FAULT : S2CR_TYPE_BYPASS, \
93 struct arm_smmu_cfg *cfg;
96 struct arm_smmu_master_cfg {
97 struct arm_smmu_device *smmu;
100 #define INVALID_SMENDX -1
101 #define __fwspec_cfg(fw) ((struct arm_smmu_master_cfg *)fw->iommu_priv)
102 #define fwspec_smmu(fw) (__fwspec_cfg(fw)->smmu)
103 #define fwspec_smendx(fw, i) \
104 (i >= fw->num_ids ? INVALID_SMENDX : __fwspec_cfg(fw)->smendx[i])
105 #define for_each_cfg_sme(fw, i, idx) \
106 for (i = 0; idx = fwspec_smendx(fw, i), i < fw->num_ids; ++i)
108 static bool using_legacy_binding, using_generic_binding;
110 static inline int arm_smmu_rpm_get(struct arm_smmu_device *smmu)
112 if (pm_runtime_enabled(smmu->dev))
113 return pm_runtime_get_sync(smmu->dev);
118 static inline void arm_smmu_rpm_put(struct arm_smmu_device *smmu)
120 if (pm_runtime_enabled(smmu->dev))
121 pm_runtime_put_autosuspend(smmu->dev);
124 static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
126 return container_of(dom, struct arm_smmu_domain, domain);
129 static struct platform_driver arm_smmu_driver;
130 static struct iommu_ops arm_smmu_ops;
132 #ifdef CONFIG_ARM_SMMU_LEGACY_DT_BINDINGS
133 static int arm_smmu_bus_init(struct iommu_ops *ops);
135 static struct device_node *dev_get_dev_node(struct device *dev)
137 if (dev_is_pci(dev)) {
138 struct pci_bus *bus = to_pci_dev(dev)->bus;
140 while (!pci_is_root_bus(bus))
142 return of_node_get(bus->bridge->parent->of_node);
145 return of_node_get(dev->of_node);
148 static int __arm_smmu_get_pci_sid(struct pci_dev *pdev, u16 alias, void *data)
150 *((__be32 *)data) = cpu_to_be32(alias);
151 return 0; /* Continue walking */
154 static int __find_legacy_master_phandle(struct device *dev, void *data)
156 struct of_phandle_iterator *it = *(void **)data;
157 struct device_node *np = it->node;
160 of_for_each_phandle(it, err, dev->of_node, "mmu-masters",
161 "#stream-id-cells", -1)
162 if (it->node == np) {
163 *(void **)data = dev;
167 return err == -ENOENT ? 0 : err;
170 static int arm_smmu_register_legacy_master(struct device *dev,
171 struct arm_smmu_device **smmu)
173 struct device *smmu_dev;
174 struct device_node *np;
175 struct of_phandle_iterator it;
181 np = dev_get_dev_node(dev);
182 if (!np || !of_find_property(np, "#stream-id-cells", NULL)) {
188 err = driver_for_each_device(&arm_smmu_driver.driver, NULL, &data,
189 __find_legacy_master_phandle);
197 if (dev_is_pci(dev)) {
198 /* "mmu-masters" assumes Stream ID == Requester ID */
199 pci_for_each_dma_alias(to_pci_dev(dev), __arm_smmu_get_pci_sid,
205 err = iommu_fwspec_init(dev, &smmu_dev->of_node->fwnode,
210 sids = kcalloc(it.cur_count, sizeof(*sids), GFP_KERNEL);
214 *smmu = dev_get_drvdata(smmu_dev);
215 of_phandle_iterator_args(&it, sids, it.cur_count);
216 err = iommu_fwspec_add_ids(dev, sids, it.cur_count);
222 * With the legacy DT binding in play, we have no guarantees about
223 * probe order, but then we're also not doing default domains, so we can
224 * delay setting bus ops until we're sure every possible SMMU is ready,
225 * and that way ensure that no add_device() calls get missed.
227 static int arm_smmu_legacy_bus_init(void)
229 if (using_legacy_binding)
230 return arm_smmu_bus_init(&arm_smmu_ops);
233 device_initcall_sync(arm_smmu_legacy_bus_init);
235 static int arm_smmu_register_legacy_master(struct device *dev,
236 struct arm_smmu_device **smmu)
240 #endif /* CONFIG_ARM_SMMU_LEGACY_DT_BINDINGS */
242 static int __arm_smmu_alloc_bitmap(unsigned long *map, int start, int end)
247 idx = find_next_zero_bit(map, end, start);
250 } while (test_and_set_bit(idx, map));
255 static void __arm_smmu_free_bitmap(unsigned long *map, int idx)
260 /* Wait for any pending TLB invalidations to complete */
261 static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu, int page,
262 int sync, int status)
264 unsigned int spin_cnt, delay;
267 if (smmu->impl && unlikely(smmu->impl->tlb_sync))
268 return smmu->impl->tlb_sync(smmu, page, sync, status);
270 arm_smmu_writel(smmu, page, sync, QCOM_DUMMY_VAL);
271 for (delay = 1; delay < TLB_LOOP_TIMEOUT; delay *= 2) {
272 for (spin_cnt = TLB_SPIN_COUNT; spin_cnt > 0; spin_cnt--) {
273 reg = arm_smmu_readl(smmu, page, status);
274 if (!(reg & sTLBGSTATUS_GSACTIVE))
280 dev_err_ratelimited(smmu->dev,
281 "TLB sync timed out -- SMMU may be deadlocked\n");
284 static void arm_smmu_tlb_sync_global(struct arm_smmu_device *smmu)
288 spin_lock_irqsave(&smmu->global_sync_lock, flags);
289 __arm_smmu_tlb_sync(smmu, ARM_SMMU_GR0, ARM_SMMU_GR0_sTLBGSYNC,
290 ARM_SMMU_GR0_sTLBGSTATUS);
291 spin_unlock_irqrestore(&smmu->global_sync_lock, flags);
294 static void arm_smmu_tlb_sync_context(struct arm_smmu_domain *smmu_domain)
296 struct arm_smmu_device *smmu = smmu_domain->smmu;
299 spin_lock_irqsave(&smmu_domain->cb_lock, flags);
300 __arm_smmu_tlb_sync(smmu, ARM_SMMU_CB(smmu, smmu_domain->cfg.cbndx),
301 ARM_SMMU_CB_TLBSYNC, ARM_SMMU_CB_TLBSTATUS);
302 spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
305 static void arm_smmu_tlb_inv_context_s1(void *cookie)
307 struct arm_smmu_domain *smmu_domain = cookie;
309 * The TLBI write may be relaxed, so ensure that PTEs cleared by the
310 * current CPU are visible beforehand.
313 arm_smmu_cb_write(smmu_domain->smmu, smmu_domain->cfg.cbndx,
314 ARM_SMMU_CB_S1_TLBIASID, smmu_domain->cfg.asid);
315 arm_smmu_tlb_sync_context(smmu_domain);
318 static void arm_smmu_tlb_inv_context_s2(void *cookie)
320 struct arm_smmu_domain *smmu_domain = cookie;
321 struct arm_smmu_device *smmu = smmu_domain->smmu;
325 arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIVMID, smmu_domain->cfg.vmid);
326 arm_smmu_tlb_sync_global(smmu);
329 static void arm_smmu_tlb_inv_range_s1(unsigned long iova, size_t size,
330 size_t granule, void *cookie, int reg)
332 struct arm_smmu_domain *smmu_domain = cookie;
333 struct arm_smmu_device *smmu = smmu_domain->smmu;
334 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
335 int idx = cfg->cbndx;
337 if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
340 if (cfg->fmt != ARM_SMMU_CTX_FMT_AARCH64) {
341 iova = (iova >> 12) << 12;
344 arm_smmu_cb_write(smmu, idx, reg, iova);
346 } while (size -= granule);
349 iova |= (u64)cfg->asid << 48;
351 arm_smmu_cb_writeq(smmu, idx, reg, iova);
352 iova += granule >> 12;
353 } while (size -= granule);
357 static void arm_smmu_tlb_inv_range_s2(unsigned long iova, size_t size,
358 size_t granule, void *cookie, int reg)
360 struct arm_smmu_domain *smmu_domain = cookie;
361 struct arm_smmu_device *smmu = smmu_domain->smmu;
362 int idx = smmu_domain->cfg.cbndx;
364 if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
369 if (smmu_domain->cfg.fmt == ARM_SMMU_CTX_FMT_AARCH64)
370 arm_smmu_cb_writeq(smmu, idx, reg, iova);
372 arm_smmu_cb_write(smmu, idx, reg, iova);
373 iova += granule >> 12;
374 } while (size -= granule);
377 static void arm_smmu_tlb_inv_walk_s1(unsigned long iova, size_t size,
378 size_t granule, void *cookie)
380 arm_smmu_tlb_inv_range_s1(iova, size, granule, cookie,
381 ARM_SMMU_CB_S1_TLBIVA);
382 arm_smmu_tlb_sync_context(cookie);
385 static void arm_smmu_tlb_inv_leaf_s1(unsigned long iova, size_t size,
386 size_t granule, void *cookie)
388 arm_smmu_tlb_inv_range_s1(iova, size, granule, cookie,
389 ARM_SMMU_CB_S1_TLBIVAL);
390 arm_smmu_tlb_sync_context(cookie);
393 static void arm_smmu_tlb_add_page_s1(struct iommu_iotlb_gather *gather,
394 unsigned long iova, size_t granule,
397 arm_smmu_tlb_inv_range_s1(iova, granule, granule, cookie,
398 ARM_SMMU_CB_S1_TLBIVAL);
401 static void arm_smmu_tlb_inv_walk_s2(unsigned long iova, size_t size,
402 size_t granule, void *cookie)
404 arm_smmu_tlb_inv_range_s2(iova, size, granule, cookie,
405 ARM_SMMU_CB_S2_TLBIIPAS2);
406 arm_smmu_tlb_sync_context(cookie);
409 static void arm_smmu_tlb_inv_leaf_s2(unsigned long iova, size_t size,
410 size_t granule, void *cookie)
412 arm_smmu_tlb_inv_range_s2(iova, size, granule, cookie,
413 ARM_SMMU_CB_S2_TLBIIPAS2L);
414 arm_smmu_tlb_sync_context(cookie);
417 static void arm_smmu_tlb_add_page_s2(struct iommu_iotlb_gather *gather,
418 unsigned long iova, size_t granule,
421 arm_smmu_tlb_inv_range_s2(iova, granule, granule, cookie,
422 ARM_SMMU_CB_S2_TLBIIPAS2L);
425 static void arm_smmu_tlb_inv_any_s2_v1(unsigned long iova, size_t size,
426 size_t granule, void *cookie)
428 arm_smmu_tlb_inv_context_s2(cookie);
431 * On MMU-401 at least, the cost of firing off multiple TLBIVMIDs appears
432 * almost negligible, but the benefit of getting the first one in as far ahead
433 * of the sync as possible is significant, hence we don't just make this a
434 * no-op and call arm_smmu_tlb_inv_context_s2() from .iotlb_sync as you might
437 static void arm_smmu_tlb_add_page_s2_v1(struct iommu_iotlb_gather *gather,
438 unsigned long iova, size_t granule,
441 struct arm_smmu_domain *smmu_domain = cookie;
442 struct arm_smmu_device *smmu = smmu_domain->smmu;
444 if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
447 arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIVMID, smmu_domain->cfg.vmid);
450 static const struct iommu_flush_ops arm_smmu_s1_tlb_ops = {
451 .tlb_flush_all = arm_smmu_tlb_inv_context_s1,
452 .tlb_flush_walk = arm_smmu_tlb_inv_walk_s1,
453 .tlb_flush_leaf = arm_smmu_tlb_inv_leaf_s1,
454 .tlb_add_page = arm_smmu_tlb_add_page_s1,
457 static const struct iommu_flush_ops arm_smmu_s2_tlb_ops_v2 = {
458 .tlb_flush_all = arm_smmu_tlb_inv_context_s2,
459 .tlb_flush_walk = arm_smmu_tlb_inv_walk_s2,
460 .tlb_flush_leaf = arm_smmu_tlb_inv_leaf_s2,
461 .tlb_add_page = arm_smmu_tlb_add_page_s2,
464 static const struct iommu_flush_ops arm_smmu_s2_tlb_ops_v1 = {
465 .tlb_flush_all = arm_smmu_tlb_inv_context_s2,
466 .tlb_flush_walk = arm_smmu_tlb_inv_any_s2_v1,
467 .tlb_flush_leaf = arm_smmu_tlb_inv_any_s2_v1,
468 .tlb_add_page = arm_smmu_tlb_add_page_s2_v1,
471 static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
473 u32 fsr, fsynr, cbfrsynra;
475 struct iommu_domain *domain = dev;
476 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
477 struct arm_smmu_device *smmu = smmu_domain->smmu;
478 int idx = smmu_domain->cfg.cbndx;
480 fsr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSR);
481 if (!(fsr & FSR_FAULT))
484 fsynr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSYNR0);
485 iova = arm_smmu_cb_readq(smmu, idx, ARM_SMMU_CB_FAR);
486 cbfrsynra = arm_smmu_gr1_read(smmu, ARM_SMMU_GR1_CBFRSYNRA(idx));
488 dev_err_ratelimited(smmu->dev,
489 "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cbfrsynra=0x%x, cb=%d\n",
490 fsr, iova, fsynr, cbfrsynra, idx);
492 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_FSR, fsr);
496 static irqreturn_t arm_smmu_global_fault(int irq, void *dev)
498 u32 gfsr, gfsynr0, gfsynr1, gfsynr2;
499 struct arm_smmu_device *smmu = dev;
500 static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
501 DEFAULT_RATELIMIT_BURST);
503 gfsr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSR);
504 gfsynr0 = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSYNR0);
505 gfsynr1 = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSYNR1);
506 gfsynr2 = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSYNR2);
511 if (__ratelimit(&rs)) {
512 if (IS_ENABLED(CONFIG_ARM_SMMU_DISABLE_BYPASS_BY_DEFAULT) &&
515 "Blocked unknown Stream ID 0x%hx; boot with \"arm-smmu.disable_bypass=0\" to allow, but this may have security implications\n",
519 "Unexpected global fault, this could be serious\n");
521 "\tGFSR 0x%08x, GFSYNR0 0x%08x, GFSYNR1 0x%08x, GFSYNR2 0x%08x\n",
522 gfsr, gfsynr0, gfsynr1, gfsynr2);
525 arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sGFSR, gfsr);
529 static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
530 struct io_pgtable_cfg *pgtbl_cfg)
532 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
533 struct arm_smmu_cb *cb = &smmu_domain->smmu->cbs[cfg->cbndx];
534 bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
540 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
541 cb->tcr[0] = pgtbl_cfg->arm_v7s_cfg.tcr;
543 cb->tcr[0] = pgtbl_cfg->arm_lpae_s1_cfg.tcr;
544 cb->tcr[1] = pgtbl_cfg->arm_lpae_s1_cfg.tcr >> 32;
545 cb->tcr[1] |= FIELD_PREP(TCR2_SEP, TCR2_SEP_UPSTREAM);
546 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
547 cb->tcr[1] |= TCR2_AS;
550 cb->tcr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
555 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
556 cb->ttbr[0] = pgtbl_cfg->arm_v7s_cfg.ttbr[0];
557 cb->ttbr[1] = pgtbl_cfg->arm_v7s_cfg.ttbr[1];
559 cb->ttbr[0] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
560 cb->ttbr[0] |= FIELD_PREP(TTBRn_ASID, cfg->asid);
561 cb->ttbr[1] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1];
562 cb->ttbr[1] |= FIELD_PREP(TTBRn_ASID, cfg->asid);
565 cb->ttbr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
568 /* MAIRs (stage-1 only) */
570 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
571 cb->mair[0] = pgtbl_cfg->arm_v7s_cfg.prrr;
572 cb->mair[1] = pgtbl_cfg->arm_v7s_cfg.nmrr;
574 cb->mair[0] = pgtbl_cfg->arm_lpae_s1_cfg.mair;
575 cb->mair[1] = pgtbl_cfg->arm_lpae_s1_cfg.mair >> 32;
580 static void arm_smmu_write_context_bank(struct arm_smmu_device *smmu, int idx)
584 struct arm_smmu_cb *cb = &smmu->cbs[idx];
585 struct arm_smmu_cfg *cfg = cb->cfg;
587 /* Unassigned context banks only need disabling */
589 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_SCTLR, 0);
593 stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
596 if (smmu->version > ARM_SMMU_V1) {
597 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
601 /* 16-bit VMIDs live in CBA2R */
602 if (smmu->features & ARM_SMMU_FEAT_VMID16)
603 reg |= FIELD_PREP(CBA2R_VMID16, cfg->vmid);
605 arm_smmu_gr1_write(smmu, ARM_SMMU_GR1_CBA2R(idx), reg);
609 reg = FIELD_PREP(CBAR_TYPE, cfg->cbar);
610 if (smmu->version < ARM_SMMU_V2)
611 reg |= FIELD_PREP(CBAR_IRPTNDX, cfg->irptndx);
614 * Use the weakest shareability/memory types, so they are
615 * overridden by the ttbcr/pte.
618 reg |= FIELD_PREP(CBAR_S1_BPSHCFG, CBAR_S1_BPSHCFG_NSH) |
619 FIELD_PREP(CBAR_S1_MEMATTR, CBAR_S1_MEMATTR_WB);
620 } else if (!(smmu->features & ARM_SMMU_FEAT_VMID16)) {
621 /* 8-bit VMIDs live in CBAR */
622 reg |= FIELD_PREP(CBAR_VMID, cfg->vmid);
624 arm_smmu_gr1_write(smmu, ARM_SMMU_GR1_CBAR(idx), reg);
628 * We must write this before the TTBRs, since it determines the
629 * access behaviour of some fields (in particular, ASID[15:8]).
631 if (stage1 && smmu->version > ARM_SMMU_V1)
632 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TCR2, cb->tcr[1]);
633 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TCR, cb->tcr[0]);
636 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
637 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_CONTEXTIDR, cfg->asid);
638 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TTBR0, cb->ttbr[0]);
639 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TTBR1, cb->ttbr[1]);
641 arm_smmu_cb_writeq(smmu, idx, ARM_SMMU_CB_TTBR0, cb->ttbr[0]);
643 arm_smmu_cb_writeq(smmu, idx, ARM_SMMU_CB_TTBR1,
647 /* MAIRs (stage-1 only) */
649 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_S1_MAIR0, cb->mair[0]);
650 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_S1_MAIR1, cb->mair[1]);
654 reg = SCTLR_CFIE | SCTLR_CFRE | SCTLR_AFE | SCTLR_TRE | SCTLR_M;
656 reg |= SCTLR_S1_ASIDPNE;
657 if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN))
660 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_SCTLR, reg);
663 static int arm_smmu_init_domain_context(struct iommu_domain *domain,
664 struct arm_smmu_device *smmu)
666 int irq, start, ret = 0;
667 unsigned long ias, oas;
668 struct io_pgtable_ops *pgtbl_ops;
669 struct io_pgtable_cfg pgtbl_cfg;
670 enum io_pgtable_fmt fmt;
671 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
672 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
674 mutex_lock(&smmu_domain->init_mutex);
675 if (smmu_domain->smmu)
678 if (domain->type == IOMMU_DOMAIN_IDENTITY) {
679 smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
680 smmu_domain->smmu = smmu;
685 * Mapping the requested stage onto what we support is surprisingly
686 * complicated, mainly because the spec allows S1+S2 SMMUs without
687 * support for nested translation. That means we end up with the
690 * Requested Supported Actual
700 * Note that you can't actually request stage-2 mappings.
702 if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
703 smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
704 if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
705 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
708 * Choosing a suitable context format is even more fiddly. Until we
709 * grow some way for the caller to express a preference, and/or move
710 * the decision into the io-pgtable code where it arguably belongs,
711 * just aim for the closest thing to the rest of the system, and hope
712 * that the hardware isn't esoteric enough that we can't assume AArch64
713 * support to be a superset of AArch32 support...
715 if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_L)
716 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_L;
717 if (IS_ENABLED(CONFIG_IOMMU_IO_PGTABLE_ARMV7S) &&
718 !IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_ARM_LPAE) &&
719 (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S) &&
720 (smmu_domain->stage == ARM_SMMU_DOMAIN_S1))
721 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_S;
722 if ((IS_ENABLED(CONFIG_64BIT) || cfg->fmt == ARM_SMMU_CTX_FMT_NONE) &&
723 (smmu->features & (ARM_SMMU_FEAT_FMT_AARCH64_64K |
724 ARM_SMMU_FEAT_FMT_AARCH64_16K |
725 ARM_SMMU_FEAT_FMT_AARCH64_4K)))
726 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH64;
728 if (cfg->fmt == ARM_SMMU_CTX_FMT_NONE) {
733 switch (smmu_domain->stage) {
734 case ARM_SMMU_DOMAIN_S1:
735 cfg->cbar = CBAR_TYPE_S1_TRANS_S2_BYPASS;
736 start = smmu->num_s2_context_banks;
738 oas = smmu->ipa_size;
739 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
740 fmt = ARM_64_LPAE_S1;
741 } else if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_L) {
742 fmt = ARM_32_LPAE_S1;
743 ias = min(ias, 32UL);
744 oas = min(oas, 40UL);
747 ias = min(ias, 32UL);
748 oas = min(oas, 32UL);
750 smmu_domain->flush_ops = &arm_smmu_s1_tlb_ops;
752 case ARM_SMMU_DOMAIN_NESTED:
754 * We will likely want to change this if/when KVM gets
757 case ARM_SMMU_DOMAIN_S2:
758 cfg->cbar = CBAR_TYPE_S2_TRANS;
760 ias = smmu->ipa_size;
762 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
763 fmt = ARM_64_LPAE_S2;
765 fmt = ARM_32_LPAE_S2;
766 ias = min(ias, 40UL);
767 oas = min(oas, 40UL);
769 if (smmu->version == ARM_SMMU_V2)
770 smmu_domain->flush_ops = &arm_smmu_s2_tlb_ops_v2;
772 smmu_domain->flush_ops = &arm_smmu_s2_tlb_ops_v1;
778 ret = __arm_smmu_alloc_bitmap(smmu->context_map, start,
779 smmu->num_context_banks);
784 if (smmu->version < ARM_SMMU_V2) {
785 cfg->irptndx = atomic_inc_return(&smmu->irptndx);
786 cfg->irptndx %= smmu->num_context_irqs;
788 cfg->irptndx = cfg->cbndx;
791 if (smmu_domain->stage == ARM_SMMU_DOMAIN_S2)
792 cfg->vmid = cfg->cbndx + 1;
794 cfg->asid = cfg->cbndx;
796 smmu_domain->smmu = smmu;
797 if (smmu->impl && smmu->impl->init_context) {
798 ret = smmu->impl->init_context(smmu_domain);
803 pgtbl_cfg = (struct io_pgtable_cfg) {
804 .pgsize_bitmap = smmu->pgsize_bitmap,
807 .coherent_walk = smmu->features & ARM_SMMU_FEAT_COHERENT_WALK,
808 .tlb = smmu_domain->flush_ops,
809 .iommu_dev = smmu->dev,
812 if (smmu_domain->non_strict)
813 pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
815 pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
821 /* Update the domain's page sizes to reflect the page table format */
822 domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
823 domain->geometry.aperture_end = (1UL << ias) - 1;
824 domain->geometry.force_aperture = true;
826 /* Initialise the context bank with our page table cfg */
827 arm_smmu_init_context_bank(smmu_domain, &pgtbl_cfg);
828 arm_smmu_write_context_bank(smmu, cfg->cbndx);
831 * Request context fault interrupt. Do this last to avoid the
832 * handler seeing a half-initialised domain state.
834 irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
835 ret = devm_request_irq(smmu->dev, irq, arm_smmu_context_fault,
836 IRQF_SHARED, "arm-smmu-context-fault", domain);
838 dev_err(smmu->dev, "failed to request context IRQ %d (%u)\n",
840 cfg->irptndx = INVALID_IRPTNDX;
843 mutex_unlock(&smmu_domain->init_mutex);
845 /* Publish page table ops for map/unmap */
846 smmu_domain->pgtbl_ops = pgtbl_ops;
850 __arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
851 smmu_domain->smmu = NULL;
853 mutex_unlock(&smmu_domain->init_mutex);
857 static void arm_smmu_destroy_domain_context(struct iommu_domain *domain)
859 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
860 struct arm_smmu_device *smmu = smmu_domain->smmu;
861 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
864 if (!smmu || domain->type == IOMMU_DOMAIN_IDENTITY)
867 ret = arm_smmu_rpm_get(smmu);
872 * Disable the context bank and free the page tables before freeing
875 smmu->cbs[cfg->cbndx].cfg = NULL;
876 arm_smmu_write_context_bank(smmu, cfg->cbndx);
878 if (cfg->irptndx != INVALID_IRPTNDX) {
879 irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
880 devm_free_irq(smmu->dev, irq, domain);
883 free_io_pgtable_ops(smmu_domain->pgtbl_ops);
884 __arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
886 arm_smmu_rpm_put(smmu);
889 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
891 struct arm_smmu_domain *smmu_domain;
893 if (type != IOMMU_DOMAIN_UNMANAGED &&
894 type != IOMMU_DOMAIN_DMA &&
895 type != IOMMU_DOMAIN_IDENTITY)
898 * Allocate the domain and initialise some of its data structures.
899 * We can't really do anything meaningful until we've added a
902 smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
906 if (type == IOMMU_DOMAIN_DMA && (using_legacy_binding ||
907 iommu_get_dma_cookie(&smmu_domain->domain))) {
912 mutex_init(&smmu_domain->init_mutex);
913 spin_lock_init(&smmu_domain->cb_lock);
915 return &smmu_domain->domain;
918 static void arm_smmu_domain_free(struct iommu_domain *domain)
920 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
923 * Free the domain resources. We assume that all devices have
924 * already been detached.
926 iommu_put_dma_cookie(domain);
927 arm_smmu_destroy_domain_context(domain);
931 static void arm_smmu_write_smr(struct arm_smmu_device *smmu, int idx)
933 struct arm_smmu_smr *smr = smmu->smrs + idx;
934 u32 reg = FIELD_PREP(SMR_ID, smr->id) | FIELD_PREP(SMR_MASK, smr->mask);
936 if (!(smmu->features & ARM_SMMU_FEAT_EXIDS) && smr->valid)
938 arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_SMR(idx), reg);
941 static void arm_smmu_write_s2cr(struct arm_smmu_device *smmu, int idx)
943 struct arm_smmu_s2cr *s2cr = smmu->s2crs + idx;
944 u32 reg = FIELD_PREP(S2CR_TYPE, s2cr->type) |
945 FIELD_PREP(S2CR_CBNDX, s2cr->cbndx) |
946 FIELD_PREP(S2CR_PRIVCFG, s2cr->privcfg);
948 if (smmu->features & ARM_SMMU_FEAT_EXIDS && smmu->smrs &&
949 smmu->smrs[idx].valid)
950 reg |= S2CR_EXIDVALID;
951 arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_S2CR(idx), reg);
954 static void arm_smmu_write_sme(struct arm_smmu_device *smmu, int idx)
956 arm_smmu_write_s2cr(smmu, idx);
958 arm_smmu_write_smr(smmu, idx);
962 * The width of SMR's mask field depends on sCR0_EXIDENABLE, so this function
963 * should be called after sCR0 is written.
965 static void arm_smmu_test_smr_masks(struct arm_smmu_device *smmu)
973 * SMR.ID bits may not be preserved if the corresponding MASK
974 * bits are set, so check each one separately. We can reject
975 * masters later if they try to claim IDs outside these masks.
977 smr = FIELD_PREP(SMR_ID, smmu->streamid_mask);
978 arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_SMR(0), smr);
979 smr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_SMR(0));
980 smmu->streamid_mask = FIELD_GET(SMR_ID, smr);
982 smr = FIELD_PREP(SMR_MASK, smmu->streamid_mask);
983 arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_SMR(0), smr);
984 smr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_SMR(0));
985 smmu->smr_mask_mask = FIELD_GET(SMR_MASK, smr);
988 static int arm_smmu_find_sme(struct arm_smmu_device *smmu, u16 id, u16 mask)
990 struct arm_smmu_smr *smrs = smmu->smrs;
991 int i, free_idx = -ENOSPC;
993 /* Stream indexing is blissfully easy */
997 /* Validating SMRs is... less so */
998 for (i = 0; i < smmu->num_mapping_groups; ++i) {
999 if (!smrs[i].valid) {
1001 * Note the first free entry we come across, which
1002 * we'll claim in the end if nothing else matches.
1009 * If the new entry is _entirely_ matched by an existing entry,
1010 * then reuse that, with the guarantee that there also cannot
1011 * be any subsequent conflicting entries. In normal use we'd
1012 * expect simply identical entries for this case, but there's
1013 * no harm in accommodating the generalisation.
1015 if ((mask & smrs[i].mask) == mask &&
1016 !((id ^ smrs[i].id) & ~smrs[i].mask))
1019 * If the new entry has any other overlap with an existing one,
1020 * though, then there always exists at least one stream ID
1021 * which would cause a conflict, and we can't allow that risk.
1023 if (!((id ^ smrs[i].id) & ~(smrs[i].mask | mask)))
1030 static bool arm_smmu_free_sme(struct arm_smmu_device *smmu, int idx)
1032 if (--smmu->s2crs[idx].count)
1035 smmu->s2crs[idx] = s2cr_init_val;
1037 smmu->smrs[idx].valid = false;
1042 static int arm_smmu_master_alloc_smes(struct device *dev)
1044 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1045 struct arm_smmu_master_cfg *cfg = fwspec->iommu_priv;
1046 struct arm_smmu_device *smmu = cfg->smmu;
1047 struct arm_smmu_smr *smrs = smmu->smrs;
1048 struct iommu_group *group;
1051 mutex_lock(&smmu->stream_map_mutex);
1052 /* Figure out a viable stream map entry allocation */
1053 for_each_cfg_sme(fwspec, i, idx) {
1054 u16 sid = FIELD_GET(SMR_ID, fwspec->ids[i]);
1055 u16 mask = FIELD_GET(SMR_MASK, fwspec->ids[i]);
1057 if (idx != INVALID_SMENDX) {
1062 ret = arm_smmu_find_sme(smmu, sid, mask);
1067 if (smrs && smmu->s2crs[idx].count == 0) {
1069 smrs[idx].mask = mask;
1070 smrs[idx].valid = true;
1072 smmu->s2crs[idx].count++;
1073 cfg->smendx[i] = (s16)idx;
1076 group = iommu_group_get_for_dev(dev);
1077 if (IS_ERR(group)) {
1078 ret = PTR_ERR(group);
1081 iommu_group_put(group);
1083 /* It worked! Now, poke the actual hardware */
1084 for_each_cfg_sme(fwspec, i, idx) {
1085 arm_smmu_write_sme(smmu, idx);
1086 smmu->s2crs[idx].group = group;
1089 mutex_unlock(&smmu->stream_map_mutex);
1094 arm_smmu_free_sme(smmu, cfg->smendx[i]);
1095 cfg->smendx[i] = INVALID_SMENDX;
1097 mutex_unlock(&smmu->stream_map_mutex);
1101 static void arm_smmu_master_free_smes(struct iommu_fwspec *fwspec)
1103 struct arm_smmu_device *smmu = fwspec_smmu(fwspec);
1104 struct arm_smmu_master_cfg *cfg = fwspec->iommu_priv;
1107 mutex_lock(&smmu->stream_map_mutex);
1108 for_each_cfg_sme(fwspec, i, idx) {
1109 if (arm_smmu_free_sme(smmu, idx))
1110 arm_smmu_write_sme(smmu, idx);
1111 cfg->smendx[i] = INVALID_SMENDX;
1113 mutex_unlock(&smmu->stream_map_mutex);
1116 static int arm_smmu_domain_add_master(struct arm_smmu_domain *smmu_domain,
1117 struct iommu_fwspec *fwspec)
1119 struct arm_smmu_device *smmu = smmu_domain->smmu;
1120 struct arm_smmu_s2cr *s2cr = smmu->s2crs;
1121 u8 cbndx = smmu_domain->cfg.cbndx;
1122 enum arm_smmu_s2cr_type type;
1125 if (smmu_domain->stage == ARM_SMMU_DOMAIN_BYPASS)
1126 type = S2CR_TYPE_BYPASS;
1128 type = S2CR_TYPE_TRANS;
1130 for_each_cfg_sme(fwspec, i, idx) {
1131 if (type == s2cr[idx].type && cbndx == s2cr[idx].cbndx)
1134 s2cr[idx].type = type;
1135 s2cr[idx].privcfg = S2CR_PRIVCFG_DEFAULT;
1136 s2cr[idx].cbndx = cbndx;
1137 arm_smmu_write_s2cr(smmu, idx);
1142 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
1145 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1146 struct arm_smmu_device *smmu;
1147 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1149 if (!fwspec || fwspec->ops != &arm_smmu_ops) {
1150 dev_err(dev, "cannot attach to SMMU, is it on the same bus?\n");
1155 * FIXME: The arch/arm DMA API code tries to attach devices to its own
1156 * domains between of_xlate() and add_device() - we have no way to cope
1157 * with that, so until ARM gets converted to rely on groups and default
1158 * domains, just say no (but more politely than by dereferencing NULL).
1159 * This should be at least a WARN_ON once that's sorted.
1161 if (!fwspec->iommu_priv)
1164 smmu = fwspec_smmu(fwspec);
1166 ret = arm_smmu_rpm_get(smmu);
1170 /* Ensure that the domain is finalised */
1171 ret = arm_smmu_init_domain_context(domain, smmu);
1176 * Sanity check the domain. We don't support domains across
1179 if (smmu_domain->smmu != smmu) {
1181 "cannot attach to SMMU %s whilst already attached to domain on SMMU %s\n",
1182 dev_name(smmu_domain->smmu->dev), dev_name(smmu->dev));
1187 /* Looks ok, so add the device to the domain */
1188 ret = arm_smmu_domain_add_master(smmu_domain, fwspec);
1191 * Setup an autosuspend delay to avoid bouncing runpm state.
1192 * Otherwise, if a driver for a suspended consumer device
1193 * unmaps buffers, it will runpm resume/suspend for each one.
1195 * For example, when used by a GPU device, when an application
1196 * or game exits, it can trigger unmapping 100s or 1000s of
1197 * buffers. With a runpm cycle for each buffer, that adds up
1198 * to 5-10sec worth of reprogramming the context bank, while
1199 * the system appears to be locked up to the user.
1201 pm_runtime_set_autosuspend_delay(smmu->dev, 20);
1202 pm_runtime_use_autosuspend(smmu->dev);
1205 arm_smmu_rpm_put(smmu);
1209 static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
1210 phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
1212 struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1213 struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
1219 arm_smmu_rpm_get(smmu);
1220 ret = ops->map(ops, iova, paddr, size, prot);
1221 arm_smmu_rpm_put(smmu);
1226 static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
1227 size_t size, struct iommu_iotlb_gather *gather)
1229 struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1230 struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
1236 arm_smmu_rpm_get(smmu);
1237 ret = ops->unmap(ops, iova, size, gather);
1238 arm_smmu_rpm_put(smmu);
1243 static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
1245 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1246 struct arm_smmu_device *smmu = smmu_domain->smmu;
1248 if (smmu_domain->flush_ops) {
1249 arm_smmu_rpm_get(smmu);
1250 smmu_domain->flush_ops->tlb_flush_all(smmu_domain);
1251 arm_smmu_rpm_put(smmu);
1255 static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
1256 struct iommu_iotlb_gather *gather)
1258 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1259 struct arm_smmu_device *smmu = smmu_domain->smmu;
1264 arm_smmu_rpm_get(smmu);
1265 if (smmu->version == ARM_SMMU_V2 ||
1266 smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
1267 arm_smmu_tlb_sync_context(smmu_domain);
1269 arm_smmu_tlb_sync_global(smmu);
1270 arm_smmu_rpm_put(smmu);
1273 static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
1276 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1277 struct arm_smmu_device *smmu = smmu_domain->smmu;
1278 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
1279 struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
1280 struct device *dev = smmu->dev;
1284 unsigned long va, flags;
1285 int ret, idx = cfg->cbndx;
1287 ret = arm_smmu_rpm_get(smmu);
1291 spin_lock_irqsave(&smmu_domain->cb_lock, flags);
1292 va = iova & ~0xfffUL;
1293 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
1294 arm_smmu_cb_writeq(smmu, idx, ARM_SMMU_CB_ATS1PR, va);
1296 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_ATS1PR, va);
1298 reg = arm_smmu_page(smmu, ARM_SMMU_CB(smmu, idx)) + ARM_SMMU_CB_ATSR;
1299 if (readl_poll_timeout_atomic(reg, tmp, !(tmp & ATSR_ACTIVE), 5, 50)) {
1300 spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
1302 "iova to phys timed out on %pad. Falling back to software table walk.\n",
1304 return ops->iova_to_phys(ops, iova);
1307 phys = arm_smmu_cb_readq(smmu, idx, ARM_SMMU_CB_PAR);
1308 spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
1309 if (phys & CB_PAR_F) {
1310 dev_err(dev, "translation fault!\n");
1311 dev_err(dev, "PAR = 0x%llx\n", phys);
1315 arm_smmu_rpm_put(smmu);
1317 return (phys & GENMASK_ULL(39, 12)) | (iova & 0xfff);
1320 static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain,
1323 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1324 struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
1326 if (domain->type == IOMMU_DOMAIN_IDENTITY)
1332 if (smmu_domain->smmu->features & ARM_SMMU_FEAT_TRANS_OPS &&
1333 smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
1334 return arm_smmu_iova_to_phys_hard(domain, iova);
1336 return ops->iova_to_phys(ops, iova);
1339 static bool arm_smmu_capable(enum iommu_cap cap)
1342 case IOMMU_CAP_CACHE_COHERENCY:
1344 * Return true here as the SMMU can always send out coherent
1348 case IOMMU_CAP_NOEXEC:
1356 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
1358 struct device *dev = driver_find_device_by_fwnode(&arm_smmu_driver.driver,
1361 return dev ? dev_get_drvdata(dev) : NULL;
1364 static int arm_smmu_add_device(struct device *dev)
1366 struct arm_smmu_device *smmu;
1367 struct arm_smmu_master_cfg *cfg;
1368 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1371 if (using_legacy_binding) {
1372 ret = arm_smmu_register_legacy_master(dev, &smmu);
1375 * If dev->iommu_fwspec is initally NULL, arm_smmu_register_legacy_master()
1376 * will allocate/initialise a new one. Thus we need to update fwspec for
1379 fwspec = dev_iommu_fwspec_get(dev);
1382 } else if (fwspec && fwspec->ops == &arm_smmu_ops) {
1383 smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
1389 for (i = 0; i < fwspec->num_ids; i++) {
1390 u16 sid = FIELD_GET(SMR_ID, fwspec->ids[i]);
1391 u16 mask = FIELD_GET(SMR_MASK, fwspec->ids[i]);
1393 if (sid & ~smmu->streamid_mask) {
1394 dev_err(dev, "stream ID 0x%x out of range for SMMU (0x%x)\n",
1395 sid, smmu->streamid_mask);
1398 if (mask & ~smmu->smr_mask_mask) {
1399 dev_err(dev, "SMR mask 0x%x out of range for SMMU (0x%x)\n",
1400 mask, smmu->smr_mask_mask);
1406 cfg = kzalloc(offsetof(struct arm_smmu_master_cfg, smendx[i]),
1412 fwspec->iommu_priv = cfg;
1414 cfg->smendx[i] = INVALID_SMENDX;
1416 ret = arm_smmu_rpm_get(smmu);
1420 ret = arm_smmu_master_alloc_smes(dev);
1421 arm_smmu_rpm_put(smmu);
1426 iommu_device_link(&smmu->iommu, dev);
1428 device_link_add(dev, smmu->dev,
1429 DL_FLAG_PM_RUNTIME | DL_FLAG_AUTOREMOVE_SUPPLIER);
1436 iommu_fwspec_free(dev);
1440 static void arm_smmu_remove_device(struct device *dev)
1442 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1443 struct arm_smmu_master_cfg *cfg;
1444 struct arm_smmu_device *smmu;
1447 if (!fwspec || fwspec->ops != &arm_smmu_ops)
1450 cfg = fwspec->iommu_priv;
1453 ret = arm_smmu_rpm_get(smmu);
1457 iommu_device_unlink(&smmu->iommu, dev);
1458 arm_smmu_master_free_smes(fwspec);
1460 arm_smmu_rpm_put(smmu);
1462 iommu_group_remove_device(dev);
1463 kfree(fwspec->iommu_priv);
1464 iommu_fwspec_free(dev);
1467 static struct iommu_group *arm_smmu_device_group(struct device *dev)
1469 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1470 struct arm_smmu_device *smmu = fwspec_smmu(fwspec);
1471 struct iommu_group *group = NULL;
1474 for_each_cfg_sme(fwspec, i, idx) {
1475 if (group && smmu->s2crs[idx].group &&
1476 group != smmu->s2crs[idx].group)
1477 return ERR_PTR(-EINVAL);
1479 group = smmu->s2crs[idx].group;
1483 return iommu_group_ref_get(group);
1485 if (dev_is_pci(dev))
1486 group = pci_device_group(dev);
1487 else if (dev_is_fsl_mc(dev))
1488 group = fsl_mc_device_group(dev);
1490 group = generic_device_group(dev);
1495 static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
1496 enum iommu_attr attr, void *data)
1498 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1500 switch(domain->type) {
1501 case IOMMU_DOMAIN_UNMANAGED:
1503 case DOMAIN_ATTR_NESTING:
1504 *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
1510 case IOMMU_DOMAIN_DMA:
1512 case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
1513 *(int *)data = smmu_domain->non_strict;
1524 static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
1525 enum iommu_attr attr, void *data)
1528 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1530 mutex_lock(&smmu_domain->init_mutex);
1532 switch(domain->type) {
1533 case IOMMU_DOMAIN_UNMANAGED:
1535 case DOMAIN_ATTR_NESTING:
1536 if (smmu_domain->smmu) {
1542 smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
1544 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
1550 case IOMMU_DOMAIN_DMA:
1552 case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
1553 smmu_domain->non_strict = *(int *)data;
1563 mutex_unlock(&smmu_domain->init_mutex);
1567 static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
1571 if (args->args_count > 0)
1572 fwid |= FIELD_PREP(SMR_ID, args->args[0]);
1574 if (args->args_count > 1)
1575 fwid |= FIELD_PREP(SMR_MASK, args->args[1]);
1576 else if (!of_property_read_u32(args->np, "stream-match-mask", &mask))
1577 fwid |= FIELD_PREP(SMR_MASK, mask);
1579 return iommu_fwspec_add_ids(dev, &fwid, 1);
1582 static void arm_smmu_get_resv_regions(struct device *dev,
1583 struct list_head *head)
1585 struct iommu_resv_region *region;
1586 int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
1588 region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
1589 prot, IOMMU_RESV_SW_MSI);
1593 list_add_tail(®ion->list, head);
1595 iommu_dma_get_resv_regions(dev, head);
1598 static void arm_smmu_put_resv_regions(struct device *dev,
1599 struct list_head *head)
1601 struct iommu_resv_region *entry, *next;
1603 list_for_each_entry_safe(entry, next, head, list)
1607 static struct iommu_ops arm_smmu_ops = {
1608 .capable = arm_smmu_capable,
1609 .domain_alloc = arm_smmu_domain_alloc,
1610 .domain_free = arm_smmu_domain_free,
1611 .attach_dev = arm_smmu_attach_dev,
1612 .map = arm_smmu_map,
1613 .unmap = arm_smmu_unmap,
1614 .flush_iotlb_all = arm_smmu_flush_iotlb_all,
1615 .iotlb_sync = arm_smmu_iotlb_sync,
1616 .iova_to_phys = arm_smmu_iova_to_phys,
1617 .add_device = arm_smmu_add_device,
1618 .remove_device = arm_smmu_remove_device,
1619 .device_group = arm_smmu_device_group,
1620 .domain_get_attr = arm_smmu_domain_get_attr,
1621 .domain_set_attr = arm_smmu_domain_set_attr,
1622 .of_xlate = arm_smmu_of_xlate,
1623 .get_resv_regions = arm_smmu_get_resv_regions,
1624 .put_resv_regions = arm_smmu_put_resv_regions,
1625 .pgsize_bitmap = -1UL, /* Restricted during device attach */
1626 .owner = THIS_MODULE,
1629 static void arm_smmu_device_reset(struct arm_smmu_device *smmu)
1634 /* clear global FSR */
1635 reg = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSR);
1636 arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sGFSR, reg);
1639 * Reset stream mapping groups: Initial values mark all SMRn as
1640 * invalid and all S2CRn as bypass unless overridden.
1642 for (i = 0; i < smmu->num_mapping_groups; ++i)
1643 arm_smmu_write_sme(smmu, i);
1645 /* Make sure all context banks are disabled and clear CB_FSR */
1646 for (i = 0; i < smmu->num_context_banks; ++i) {
1647 arm_smmu_write_context_bank(smmu, i);
1648 arm_smmu_cb_write(smmu, i, ARM_SMMU_CB_FSR, FSR_FAULT);
1651 /* Invalidate the TLB, just in case */
1652 arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIALLH, QCOM_DUMMY_VAL);
1653 arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIALLNSNH, QCOM_DUMMY_VAL);
1655 reg = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sCR0);
1657 /* Enable fault reporting */
1658 reg |= (sCR0_GFRE | sCR0_GFIE | sCR0_GCFGFRE | sCR0_GCFGFIE);
1660 /* Disable TLB broadcasting. */
1661 reg |= (sCR0_VMIDPNE | sCR0_PTM);
1663 /* Enable client access, handling unmatched streams as appropriate */
1664 reg &= ~sCR0_CLIENTPD;
1668 reg &= ~sCR0_USFCFG;
1670 /* Disable forced broadcasting */
1673 /* Don't upgrade barriers */
1676 if (smmu->features & ARM_SMMU_FEAT_VMID16)
1677 reg |= sCR0_VMID16EN;
1679 if (smmu->features & ARM_SMMU_FEAT_EXIDS)
1680 reg |= sCR0_EXIDENABLE;
1682 if (smmu->impl && smmu->impl->reset)
1683 smmu->impl->reset(smmu);
1685 /* Push the button */
1686 arm_smmu_tlb_sync_global(smmu);
1687 arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sCR0, reg);
1690 static int arm_smmu_id_size_to_bits(int size)
1709 static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
1713 bool cttw_reg, cttw_fw = smmu->features & ARM_SMMU_FEAT_COHERENT_WALK;
1716 dev_notice(smmu->dev, "probing hardware configuration...\n");
1717 dev_notice(smmu->dev, "SMMUv%d with:\n",
1718 smmu->version == ARM_SMMU_V2 ? 2 : 1);
1721 id = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_ID0);
1723 /* Restrict available stages based on module parameter */
1724 if (force_stage == 1)
1725 id &= ~(ID0_S2TS | ID0_NTS);
1726 else if (force_stage == 2)
1727 id &= ~(ID0_S1TS | ID0_NTS);
1729 if (id & ID0_S1TS) {
1730 smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
1731 dev_notice(smmu->dev, "\tstage 1 translation\n");
1734 if (id & ID0_S2TS) {
1735 smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
1736 dev_notice(smmu->dev, "\tstage 2 translation\n");
1740 smmu->features |= ARM_SMMU_FEAT_TRANS_NESTED;
1741 dev_notice(smmu->dev, "\tnested translation\n");
1744 if (!(smmu->features &
1745 (ARM_SMMU_FEAT_TRANS_S1 | ARM_SMMU_FEAT_TRANS_S2))) {
1746 dev_err(smmu->dev, "\tno translation support!\n");
1750 if ((id & ID0_S1TS) &&
1751 ((smmu->version < ARM_SMMU_V2) || !(id & ID0_ATOSNS))) {
1752 smmu->features |= ARM_SMMU_FEAT_TRANS_OPS;
1753 dev_notice(smmu->dev, "\taddress translation ops\n");
1757 * In order for DMA API calls to work properly, we must defer to what
1758 * the FW says about coherency, regardless of what the hardware claims.
1759 * Fortunately, this also opens up a workaround for systems where the
1760 * ID register value has ended up configured incorrectly.
1762 cttw_reg = !!(id & ID0_CTTW);
1763 if (cttw_fw || cttw_reg)
1764 dev_notice(smmu->dev, "\t%scoherent table walk\n",
1765 cttw_fw ? "" : "non-");
1766 if (cttw_fw != cttw_reg)
1767 dev_notice(smmu->dev,
1768 "\t(IDR0.CTTW overridden by FW configuration)\n");
1770 /* Max. number of entries we have for stream matching/indexing */
1771 if (smmu->version == ARM_SMMU_V2 && id & ID0_EXIDS) {
1772 smmu->features |= ARM_SMMU_FEAT_EXIDS;
1775 size = 1 << FIELD_GET(ID0_NUMSIDB, id);
1777 smmu->streamid_mask = size - 1;
1779 smmu->features |= ARM_SMMU_FEAT_STREAM_MATCH;
1780 size = FIELD_GET(ID0_NUMSMRG, id);
1783 "stream-matching supported, but no SMRs present!\n");
1787 /* Zero-initialised to mark as invalid */
1788 smmu->smrs = devm_kcalloc(smmu->dev, size, sizeof(*smmu->smrs),
1793 dev_notice(smmu->dev,
1794 "\tstream matching with %u register groups", size);
1796 /* s2cr->type == 0 means translation, so initialise explicitly */
1797 smmu->s2crs = devm_kmalloc_array(smmu->dev, size, sizeof(*smmu->s2crs),
1801 for (i = 0; i < size; i++)
1802 smmu->s2crs[i] = s2cr_init_val;
1804 smmu->num_mapping_groups = size;
1805 mutex_init(&smmu->stream_map_mutex);
1806 spin_lock_init(&smmu->global_sync_lock);
1808 if (smmu->version < ARM_SMMU_V2 || !(id & ID0_PTFS_NO_AARCH32)) {
1809 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_L;
1810 if (!(id & ID0_PTFS_NO_AARCH32S))
1811 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_S;
1815 id = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_ID1);
1816 smmu->pgshift = (id & ID1_PAGESIZE) ? 16 : 12;
1818 /* Check for size mismatch of SMMU address space from mapped region */
1819 size = 1 << (FIELD_GET(ID1_NUMPAGENDXB, id) + 1);
1820 if (smmu->numpage != 2 * size << smmu->pgshift)
1822 "SMMU address space size (0x%x) differs from mapped region size (0x%x)!\n",
1823 2 * size << smmu->pgshift, smmu->numpage);
1824 /* Now properly encode NUMPAGE to subsequently derive SMMU_CB_BASE */
1825 smmu->numpage = size;
1827 smmu->num_s2_context_banks = FIELD_GET(ID1_NUMS2CB, id);
1828 smmu->num_context_banks = FIELD_GET(ID1_NUMCB, id);
1829 if (smmu->num_s2_context_banks > smmu->num_context_banks) {
1830 dev_err(smmu->dev, "impossible number of S2 context banks!\n");
1833 dev_notice(smmu->dev, "\t%u context banks (%u stage-2 only)\n",
1834 smmu->num_context_banks, smmu->num_s2_context_banks);
1835 smmu->cbs = devm_kcalloc(smmu->dev, smmu->num_context_banks,
1836 sizeof(*smmu->cbs), GFP_KERNEL);
1841 id = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_ID2);
1842 size = arm_smmu_id_size_to_bits(FIELD_GET(ID2_IAS, id));
1843 smmu->ipa_size = size;
1845 /* The output mask is also applied for bypass */
1846 size = arm_smmu_id_size_to_bits(FIELD_GET(ID2_OAS, id));
1847 smmu->pa_size = size;
1849 if (id & ID2_VMID16)
1850 smmu->features |= ARM_SMMU_FEAT_VMID16;
1853 * What the page table walker can address actually depends on which
1854 * descriptor format is in use, but since a) we don't know that yet,
1855 * and b) it can vary per context bank, this will have to do...
1857 if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(size)))
1859 "failed to set DMA mask for table walker\n");
1861 if (smmu->version < ARM_SMMU_V2) {
1862 smmu->va_size = smmu->ipa_size;
1863 if (smmu->version == ARM_SMMU_V1_64K)
1864 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1866 size = FIELD_GET(ID2_UBS, id);
1867 smmu->va_size = arm_smmu_id_size_to_bits(size);
1868 if (id & ID2_PTFS_4K)
1869 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_4K;
1870 if (id & ID2_PTFS_16K)
1871 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_16K;
1872 if (id & ID2_PTFS_64K)
1873 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1876 /* Now we've corralled the various formats, what'll it do? */
1877 if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S)
1878 smmu->pgsize_bitmap |= SZ_4K | SZ_64K | SZ_1M | SZ_16M;
1879 if (smmu->features &
1880 (ARM_SMMU_FEAT_FMT_AARCH32_L | ARM_SMMU_FEAT_FMT_AARCH64_4K))
1881 smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
1882 if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_16K)
1883 smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
1884 if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_64K)
1885 smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
1887 if (arm_smmu_ops.pgsize_bitmap == -1UL)
1888 arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
1890 arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
1891 dev_notice(smmu->dev, "\tSupported page sizes: 0x%08lx\n",
1892 smmu->pgsize_bitmap);
1895 if (smmu->features & ARM_SMMU_FEAT_TRANS_S1)
1896 dev_notice(smmu->dev, "\tStage-1: %lu-bit VA -> %lu-bit IPA\n",
1897 smmu->va_size, smmu->ipa_size);
1899 if (smmu->features & ARM_SMMU_FEAT_TRANS_S2)
1900 dev_notice(smmu->dev, "\tStage-2: %lu-bit IPA -> %lu-bit PA\n",
1901 smmu->ipa_size, smmu->pa_size);
1903 if (smmu->impl && smmu->impl->cfg_probe)
1904 return smmu->impl->cfg_probe(smmu);
1909 struct arm_smmu_match_data {
1910 enum arm_smmu_arch_version version;
1911 enum arm_smmu_implementation model;
1914 #define ARM_SMMU_MATCH_DATA(name, ver, imp) \
1915 static const struct arm_smmu_match_data name = { .version = ver, .model = imp }
1917 ARM_SMMU_MATCH_DATA(smmu_generic_v1, ARM_SMMU_V1, GENERIC_SMMU);
1918 ARM_SMMU_MATCH_DATA(smmu_generic_v2, ARM_SMMU_V2, GENERIC_SMMU);
1919 ARM_SMMU_MATCH_DATA(arm_mmu401, ARM_SMMU_V1_64K, GENERIC_SMMU);
1920 ARM_SMMU_MATCH_DATA(arm_mmu500, ARM_SMMU_V2, ARM_MMU500);
1921 ARM_SMMU_MATCH_DATA(cavium_smmuv2, ARM_SMMU_V2, CAVIUM_SMMUV2);
1922 ARM_SMMU_MATCH_DATA(qcom_smmuv2, ARM_SMMU_V2, QCOM_SMMUV2);
1924 static const struct of_device_id arm_smmu_of_match[] = {
1925 { .compatible = "arm,smmu-v1", .data = &smmu_generic_v1 },
1926 { .compatible = "arm,smmu-v2", .data = &smmu_generic_v2 },
1927 { .compatible = "arm,mmu-400", .data = &smmu_generic_v1 },
1928 { .compatible = "arm,mmu-401", .data = &arm_mmu401 },
1929 { .compatible = "arm,mmu-500", .data = &arm_mmu500 },
1930 { .compatible = "cavium,smmu-v2", .data = &cavium_smmuv2 },
1931 { .compatible = "qcom,smmu-v2", .data = &qcom_smmuv2 },
1934 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
1937 static int acpi_smmu_get_data(u32 model, struct arm_smmu_device *smmu)
1942 case ACPI_IORT_SMMU_V1:
1943 case ACPI_IORT_SMMU_CORELINK_MMU400:
1944 smmu->version = ARM_SMMU_V1;
1945 smmu->model = GENERIC_SMMU;
1947 case ACPI_IORT_SMMU_CORELINK_MMU401:
1948 smmu->version = ARM_SMMU_V1_64K;
1949 smmu->model = GENERIC_SMMU;
1951 case ACPI_IORT_SMMU_V2:
1952 smmu->version = ARM_SMMU_V2;
1953 smmu->model = GENERIC_SMMU;
1955 case ACPI_IORT_SMMU_CORELINK_MMU500:
1956 smmu->version = ARM_SMMU_V2;
1957 smmu->model = ARM_MMU500;
1959 case ACPI_IORT_SMMU_CAVIUM_THUNDERX:
1960 smmu->version = ARM_SMMU_V2;
1961 smmu->model = CAVIUM_SMMUV2;
1970 static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
1971 struct arm_smmu_device *smmu)
1973 struct device *dev = smmu->dev;
1974 struct acpi_iort_node *node =
1975 *(struct acpi_iort_node **)dev_get_platdata(dev);
1976 struct acpi_iort_smmu *iort_smmu;
1979 /* Retrieve SMMU1/2 specific data */
1980 iort_smmu = (struct acpi_iort_smmu *)node->node_data;
1982 ret = acpi_smmu_get_data(iort_smmu->model, smmu);
1986 /* Ignore the configuration access interrupt */
1987 smmu->num_global_irqs = 1;
1989 if (iort_smmu->flags & ACPI_IORT_SMMU_COHERENT_WALK)
1990 smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
1995 static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
1996 struct arm_smmu_device *smmu)
2002 static int arm_smmu_device_dt_probe(struct platform_device *pdev,
2003 struct arm_smmu_device *smmu)
2005 const struct arm_smmu_match_data *data;
2006 struct device *dev = &pdev->dev;
2007 bool legacy_binding;
2009 if (of_property_read_u32(dev->of_node, "#global-interrupts",
2010 &smmu->num_global_irqs)) {
2011 dev_err(dev, "missing #global-interrupts property\n");
2015 data = of_device_get_match_data(dev);
2016 smmu->version = data->version;
2017 smmu->model = data->model;
2019 legacy_binding = of_find_property(dev->of_node, "mmu-masters", NULL);
2020 if (legacy_binding && !using_generic_binding) {
2021 if (!using_legacy_binding) {
2022 pr_notice("deprecated \"mmu-masters\" DT property in use; %s support unavailable\n",
2023 IS_ENABLED(CONFIG_ARM_SMMU_LEGACY_DT_BINDINGS) ? "DMA API" : "SMMU");
2025 using_legacy_binding = true;
2026 } else if (!legacy_binding && !using_legacy_binding) {
2027 using_generic_binding = true;
2029 dev_err(dev, "not probing due to mismatched DT properties\n");
2033 if (of_dma_is_coherent(dev->of_node))
2034 smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
2039 static int arm_smmu_bus_init(struct iommu_ops *ops)
2043 /* Oh, for a proper bus abstraction */
2044 if (!iommu_present(&platform_bus_type)) {
2045 err = bus_set_iommu(&platform_bus_type, ops);
2049 #ifdef CONFIG_ARM_AMBA
2050 if (!iommu_present(&amba_bustype)) {
2051 err = bus_set_iommu(&amba_bustype, ops);
2053 goto err_reset_platform_ops;
2057 if (!iommu_present(&pci_bus_type)) {
2058 err = bus_set_iommu(&pci_bus_type, ops);
2060 goto err_reset_amba_ops;
2063 #ifdef CONFIG_FSL_MC_BUS
2064 if (!iommu_present(&fsl_mc_bus_type)) {
2065 err = bus_set_iommu(&fsl_mc_bus_type, ops);
2067 goto err_reset_pci_ops;
2072 err_reset_pci_ops: __maybe_unused;
2074 bus_set_iommu(&pci_bus_type, NULL);
2076 err_reset_amba_ops: __maybe_unused;
2077 #ifdef CONFIG_ARM_AMBA
2078 bus_set_iommu(&amba_bustype, NULL);
2080 err_reset_platform_ops: __maybe_unused;
2081 bus_set_iommu(&platform_bus_type, NULL);
2085 static int arm_smmu_device_probe(struct platform_device *pdev)
2087 struct resource *res;
2088 resource_size_t ioaddr;
2089 struct arm_smmu_device *smmu;
2090 struct device *dev = &pdev->dev;
2091 int num_irqs, i, err;
2093 smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
2095 dev_err(dev, "failed to allocate arm_smmu_device\n");
2101 err = arm_smmu_device_dt_probe(pdev, smmu);
2103 err = arm_smmu_device_acpi_probe(pdev, smmu);
2108 smmu = arm_smmu_impl_init(smmu);
2110 return PTR_ERR(smmu);
2112 res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
2113 ioaddr = res->start;
2114 smmu->base = devm_ioremap_resource(dev, res);
2115 if (IS_ERR(smmu->base))
2116 return PTR_ERR(smmu->base);
2118 * The resource size should effectively match the value of SMMU_TOP;
2119 * stash that temporarily until we know PAGESIZE to validate it with.
2121 smmu->numpage = resource_size(res);
2124 while ((res = platform_get_resource(pdev, IORESOURCE_IRQ, num_irqs))) {
2126 if (num_irqs > smmu->num_global_irqs)
2127 smmu->num_context_irqs++;
2130 if (!smmu->num_context_irqs) {
2131 dev_err(dev, "found %d interrupts but expected at least %d\n",
2132 num_irqs, smmu->num_global_irqs + 1);
2136 smmu->irqs = devm_kcalloc(dev, num_irqs, sizeof(*smmu->irqs),
2139 dev_err(dev, "failed to allocate %d irqs\n", num_irqs);
2143 for (i = 0; i < num_irqs; ++i) {
2144 int irq = platform_get_irq(pdev, i);
2148 smmu->irqs[i] = irq;
2151 err = devm_clk_bulk_get_all(dev, &smmu->clks);
2153 dev_err(dev, "failed to get clocks %d\n", err);
2156 smmu->num_clks = err;
2158 err = clk_bulk_prepare_enable(smmu->num_clks, smmu->clks);
2162 err = arm_smmu_device_cfg_probe(smmu);
2166 if (smmu->version == ARM_SMMU_V2) {
2167 if (smmu->num_context_banks > smmu->num_context_irqs) {
2169 "found only %d context irq(s) but %d required\n",
2170 smmu->num_context_irqs, smmu->num_context_banks);
2174 /* Ignore superfluous interrupts */
2175 smmu->num_context_irqs = smmu->num_context_banks;
2178 for (i = 0; i < smmu->num_global_irqs; ++i) {
2179 err = devm_request_irq(smmu->dev, smmu->irqs[i],
2180 arm_smmu_global_fault,
2182 "arm-smmu global fault",
2185 dev_err(dev, "failed to request global IRQ %d (%u)\n",
2191 err = iommu_device_sysfs_add(&smmu->iommu, smmu->dev, NULL,
2192 "smmu.%pa", &ioaddr);
2194 dev_err(dev, "Failed to register iommu in sysfs\n");
2198 iommu_device_set_ops(&smmu->iommu, &arm_smmu_ops);
2199 iommu_device_set_fwnode(&smmu->iommu, dev->fwnode);
2201 err = iommu_device_register(&smmu->iommu);
2203 dev_err(dev, "Failed to register iommu\n");
2207 platform_set_drvdata(pdev, smmu);
2208 arm_smmu_device_reset(smmu);
2209 arm_smmu_test_smr_masks(smmu);
2212 * We want to avoid touching dev->power.lock in fastpaths unless
2213 * it's really going to do something useful - pm_runtime_enabled()
2214 * can serve as an ideal proxy for that decision. So, conditionally
2215 * enable pm_runtime.
2217 if (dev->pm_domain) {
2218 pm_runtime_set_active(dev);
2219 pm_runtime_enable(dev);
2223 * For ACPI and generic DT bindings, an SMMU will be probed before
2224 * any device which might need it, so we want the bus ops in place
2225 * ready to handle default domain setup as soon as any SMMU exists.
2227 if (!using_legacy_binding)
2228 return arm_smmu_bus_init(&arm_smmu_ops);
2233 static int arm_smmu_device_remove(struct platform_device *pdev)
2235 struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
2240 if (!bitmap_empty(smmu->context_map, ARM_SMMU_MAX_CBS))
2241 dev_err(&pdev->dev, "removing device with active domains!\n");
2243 arm_smmu_bus_init(NULL);
2244 iommu_device_unregister(&smmu->iommu);
2245 iommu_device_sysfs_remove(&smmu->iommu);
2247 arm_smmu_rpm_get(smmu);
2248 /* Turn the thing off */
2249 arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sCR0, sCR0_CLIENTPD);
2250 arm_smmu_rpm_put(smmu);
2252 if (pm_runtime_enabled(smmu->dev))
2253 pm_runtime_force_suspend(smmu->dev);
2255 clk_bulk_disable(smmu->num_clks, smmu->clks);
2257 clk_bulk_unprepare(smmu->num_clks, smmu->clks);
2261 static void arm_smmu_device_shutdown(struct platform_device *pdev)
2263 arm_smmu_device_remove(pdev);
2266 static int __maybe_unused arm_smmu_runtime_resume(struct device *dev)
2268 struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2271 ret = clk_bulk_enable(smmu->num_clks, smmu->clks);
2275 arm_smmu_device_reset(smmu);
2280 static int __maybe_unused arm_smmu_runtime_suspend(struct device *dev)
2282 struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2284 clk_bulk_disable(smmu->num_clks, smmu->clks);
2289 static int __maybe_unused arm_smmu_pm_resume(struct device *dev)
2291 if (pm_runtime_suspended(dev))
2294 return arm_smmu_runtime_resume(dev);
2297 static int __maybe_unused arm_smmu_pm_suspend(struct device *dev)
2299 if (pm_runtime_suspended(dev))
2302 return arm_smmu_runtime_suspend(dev);
2305 static const struct dev_pm_ops arm_smmu_pm_ops = {
2306 SET_SYSTEM_SLEEP_PM_OPS(arm_smmu_pm_suspend, arm_smmu_pm_resume)
2307 SET_RUNTIME_PM_OPS(arm_smmu_runtime_suspend,
2308 arm_smmu_runtime_resume, NULL)
2311 static struct platform_driver arm_smmu_driver = {
2314 .of_match_table = of_match_ptr(arm_smmu_of_match),
2315 .pm = &arm_smmu_pm_ops,
2316 .suppress_bind_attrs = true,
2318 .probe = arm_smmu_device_probe,
2319 .remove = arm_smmu_device_remove,
2320 .shutdown = arm_smmu_device_shutdown,
2322 module_platform_driver(arm_smmu_driver);
2324 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMU implementations");
2325 MODULE_AUTHOR("Will Deacon <will.deacon@arm.com>");
2326 MODULE_ALIAS("platform:arm-smmu");
2327 MODULE_LICENSE("GPL v2");