1 // SPDX-License-Identifier: GPL-2.0-only
3 * IOMMU API for ARM architected SMMU implementations.
5 * Copyright (C) 2013 ARM Limited
7 * Author: Will Deacon <will.deacon@arm.com>
9 * This driver currently supports:
10 * - SMMUv1 and v2 implementations
11 * - Stream-matching and stream-indexing
12 * - v7/v8 long-descriptor format
13 * - Non-secure access to the SMMU
14 * - Context fault reporting
15 * - Extended Stream ID (16 bit)
18 #define pr_fmt(fmt) "arm-smmu: " fmt
20 #include <linux/acpi.h>
21 #include <linux/acpi_iort.h>
22 #include <linux/bitfield.h>
23 #include <linux/delay.h>
24 #include <linux/dma-iommu.h>
25 #include <linux/dma-mapping.h>
26 #include <linux/err.h>
27 #include <linux/interrupt.h>
29 #include <linux/iopoll.h>
30 #include <linux/module.h>
32 #include <linux/of_address.h>
33 #include <linux/of_device.h>
34 #include <linux/of_iommu.h>
35 #include <linux/pci.h>
36 #include <linux/platform_device.h>
37 #include <linux/pm_runtime.h>
38 #include <linux/ratelimit.h>
39 #include <linux/slab.h>
41 #include <linux/amba/bus.h>
42 #include <linux/fsl/mc.h>
47 * Apparently, some Qualcomm arm64 platforms which appear to expose their SMMU
48 * global register space are still, in fact, using a hypervisor to mediate it
49 * by trapping and emulating register accesses. Sadly, some deployed versions
50 * of said trapping code have bugs wherein they go horribly wrong for stores
51 * using r31 (i.e. XZR/WZR) as the source register.
53 #define QCOM_DUMMY_VAL -1
55 #define TLB_LOOP_TIMEOUT 1000000 /* 1s! */
56 #define TLB_SPIN_COUNT 10
58 #define MSI_IOVA_BASE 0x8000000
59 #define MSI_IOVA_LENGTH 0x100000
61 static int force_stage;
62 module_param(force_stage, int, S_IRUGO);
63 MODULE_PARM_DESC(force_stage,
64 "Force SMMU mappings to be installed at a particular stage of translation. A value of '1' or '2' forces the corresponding stage. All other values are ignored (i.e. no stage is forced). Note that selecting a specific stage will disable support for nested translation.");
65 static bool disable_bypass =
66 IS_ENABLED(CONFIG_ARM_SMMU_DISABLE_BYPASS_BY_DEFAULT);
67 module_param(disable_bypass, bool, S_IRUGO);
68 MODULE_PARM_DESC(disable_bypass,
69 "Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
71 struct arm_smmu_s2cr {
72 struct iommu_group *group;
74 enum arm_smmu_s2cr_type type;
75 enum arm_smmu_s2cr_privcfg privcfg;
79 #define s2cr_init_val (struct arm_smmu_s2cr){ \
80 .type = disable_bypass ? S2CR_TYPE_FAULT : S2CR_TYPE_BYPASS, \
93 struct arm_smmu_cfg *cfg;
96 struct arm_smmu_master_cfg {
97 struct arm_smmu_device *smmu;
100 #define INVALID_SMENDX -1
101 #define __fwspec_cfg(fw) ((struct arm_smmu_master_cfg *)fw->iommu_priv)
102 #define fwspec_smmu(fw) (__fwspec_cfg(fw)->smmu)
103 #define fwspec_smendx(fw, i) \
104 (i >= fw->num_ids ? INVALID_SMENDX : __fwspec_cfg(fw)->smendx[i])
105 #define for_each_cfg_sme(fw, i, idx) \
106 for (i = 0; idx = fwspec_smendx(fw, i), i < fw->num_ids; ++i)
108 static bool using_legacy_binding, using_generic_binding;
110 static inline int arm_smmu_rpm_get(struct arm_smmu_device *smmu)
112 if (pm_runtime_enabled(smmu->dev))
113 return pm_runtime_get_sync(smmu->dev);
118 static inline void arm_smmu_rpm_put(struct arm_smmu_device *smmu)
120 if (pm_runtime_enabled(smmu->dev))
121 pm_runtime_put_autosuspend(smmu->dev);
124 static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
126 return container_of(dom, struct arm_smmu_domain, domain);
129 static struct platform_driver arm_smmu_driver;
130 static struct iommu_ops arm_smmu_ops;
132 #ifdef CONFIG_ARM_SMMU_LEGACY_DT_BINDINGS
133 static int arm_smmu_bus_init(struct iommu_ops *ops);
135 static struct device_node *dev_get_dev_node(struct device *dev)
137 if (dev_is_pci(dev)) {
138 struct pci_bus *bus = to_pci_dev(dev)->bus;
140 while (!pci_is_root_bus(bus))
142 return of_node_get(bus->bridge->parent->of_node);
145 return of_node_get(dev->of_node);
148 static int __arm_smmu_get_pci_sid(struct pci_dev *pdev, u16 alias, void *data)
150 *((__be32 *)data) = cpu_to_be32(alias);
151 return 0; /* Continue walking */
154 static int __find_legacy_master_phandle(struct device *dev, void *data)
156 struct of_phandle_iterator *it = *(void **)data;
157 struct device_node *np = it->node;
160 of_for_each_phandle(it, err, dev->of_node, "mmu-masters",
161 "#stream-id-cells", -1)
162 if (it->node == np) {
163 *(void **)data = dev;
167 return err == -ENOENT ? 0 : err;
170 static int arm_smmu_register_legacy_master(struct device *dev,
171 struct arm_smmu_device **smmu)
173 struct device *smmu_dev;
174 struct device_node *np;
175 struct of_phandle_iterator it;
181 np = dev_get_dev_node(dev);
182 if (!np || !of_find_property(np, "#stream-id-cells", NULL)) {
188 err = driver_for_each_device(&arm_smmu_driver.driver, NULL, &data,
189 __find_legacy_master_phandle);
197 if (dev_is_pci(dev)) {
198 /* "mmu-masters" assumes Stream ID == Requester ID */
199 pci_for_each_dma_alias(to_pci_dev(dev), __arm_smmu_get_pci_sid,
205 err = iommu_fwspec_init(dev, &smmu_dev->of_node->fwnode,
210 sids = kcalloc(it.cur_count, sizeof(*sids), GFP_KERNEL);
214 *smmu = dev_get_drvdata(smmu_dev);
215 of_phandle_iterator_args(&it, sids, it.cur_count);
216 err = iommu_fwspec_add_ids(dev, sids, it.cur_count);
222 * With the legacy DT binding in play, we have no guarantees about
223 * probe order, but then we're also not doing default domains, so we can
224 * delay setting bus ops until we're sure every possible SMMU is ready,
225 * and that way ensure that no add_device() calls get missed.
227 static int arm_smmu_legacy_bus_init(void)
229 if (using_legacy_binding)
230 return arm_smmu_bus_init(&arm_smmu_ops);
233 device_initcall_sync(arm_smmu_legacy_bus_init);
235 static int arm_smmu_register_legacy_master(struct device *dev,
236 struct arm_smmu_device **smmu)
240 #endif /* CONFIG_ARM_SMMU_LEGACY_DT_BINDINGS */
242 static int __arm_smmu_alloc_bitmap(unsigned long *map, int start, int end)
247 idx = find_next_zero_bit(map, end, start);
250 } while (test_and_set_bit(idx, map));
255 static void __arm_smmu_free_bitmap(unsigned long *map, int idx)
260 /* Wait for any pending TLB invalidations to complete */
261 static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu, int page,
262 int sync, int status)
264 unsigned int spin_cnt, delay;
267 if (smmu->impl && unlikely(smmu->impl->tlb_sync))
268 return smmu->impl->tlb_sync(smmu, page, sync, status);
270 arm_smmu_writel(smmu, page, sync, QCOM_DUMMY_VAL);
271 for (delay = 1; delay < TLB_LOOP_TIMEOUT; delay *= 2) {
272 for (spin_cnt = TLB_SPIN_COUNT; spin_cnt > 0; spin_cnt--) {
273 reg = arm_smmu_readl(smmu, page, status);
274 if (!(reg & sTLBGSTATUS_GSACTIVE))
280 dev_err_ratelimited(smmu->dev,
281 "TLB sync timed out -- SMMU may be deadlocked\n");
284 static void arm_smmu_tlb_sync_global(struct arm_smmu_device *smmu)
288 spin_lock_irqsave(&smmu->global_sync_lock, flags);
289 __arm_smmu_tlb_sync(smmu, ARM_SMMU_GR0, ARM_SMMU_GR0_sTLBGSYNC,
290 ARM_SMMU_GR0_sTLBGSTATUS);
291 spin_unlock_irqrestore(&smmu->global_sync_lock, flags);
294 static void arm_smmu_tlb_sync_context(struct arm_smmu_domain *smmu_domain)
296 struct arm_smmu_device *smmu = smmu_domain->smmu;
299 spin_lock_irqsave(&smmu_domain->cb_lock, flags);
300 __arm_smmu_tlb_sync(smmu, ARM_SMMU_CB(smmu, smmu_domain->cfg.cbndx),
301 ARM_SMMU_CB_TLBSYNC, ARM_SMMU_CB_TLBSTATUS);
302 spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
305 static void arm_smmu_tlb_inv_context_s1(void *cookie)
307 struct arm_smmu_domain *smmu_domain = cookie;
309 * The TLBI write may be relaxed, so ensure that PTEs cleared by the
310 * current CPU are visible beforehand.
313 arm_smmu_cb_write(smmu_domain->smmu, smmu_domain->cfg.cbndx,
314 ARM_SMMU_CB_S1_TLBIASID, smmu_domain->cfg.asid);
315 arm_smmu_tlb_sync_context(smmu_domain);
318 static void arm_smmu_tlb_inv_context_s2(void *cookie)
320 struct arm_smmu_domain *smmu_domain = cookie;
321 struct arm_smmu_device *smmu = smmu_domain->smmu;
325 arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIVMID, smmu_domain->cfg.vmid);
326 arm_smmu_tlb_sync_global(smmu);
329 static void arm_smmu_tlb_inv_range_s1(unsigned long iova, size_t size,
330 size_t granule, void *cookie, int reg)
332 struct arm_smmu_domain *smmu_domain = cookie;
333 struct arm_smmu_device *smmu = smmu_domain->smmu;
334 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
335 int idx = cfg->cbndx;
337 if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
340 if (cfg->fmt != ARM_SMMU_CTX_FMT_AARCH64) {
341 iova = (iova >> 12) << 12;
344 arm_smmu_cb_write(smmu, idx, reg, iova);
346 } while (size -= granule);
349 iova |= (u64)cfg->asid << 48;
351 arm_smmu_cb_writeq(smmu, idx, reg, iova);
352 iova += granule >> 12;
353 } while (size -= granule);
357 static void arm_smmu_tlb_inv_range_s2(unsigned long iova, size_t size,
358 size_t granule, void *cookie, int reg)
360 struct arm_smmu_domain *smmu_domain = cookie;
361 struct arm_smmu_device *smmu = smmu_domain->smmu;
362 int idx = smmu_domain->cfg.cbndx;
364 if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
369 if (smmu_domain->cfg.fmt == ARM_SMMU_CTX_FMT_AARCH64)
370 arm_smmu_cb_writeq(smmu, idx, reg, iova);
372 arm_smmu_cb_write(smmu, idx, reg, iova);
373 iova += granule >> 12;
374 } while (size -= granule);
377 static void arm_smmu_tlb_inv_walk_s1(unsigned long iova, size_t size,
378 size_t granule, void *cookie)
380 arm_smmu_tlb_inv_range_s1(iova, size, granule, cookie,
381 ARM_SMMU_CB_S1_TLBIVA);
382 arm_smmu_tlb_sync_context(cookie);
385 static void arm_smmu_tlb_inv_leaf_s1(unsigned long iova, size_t size,
386 size_t granule, void *cookie)
388 arm_smmu_tlb_inv_range_s1(iova, size, granule, cookie,
389 ARM_SMMU_CB_S1_TLBIVAL);
390 arm_smmu_tlb_sync_context(cookie);
393 static void arm_smmu_tlb_add_page_s1(struct iommu_iotlb_gather *gather,
394 unsigned long iova, size_t granule,
397 arm_smmu_tlb_inv_range_s1(iova, granule, granule, cookie,
398 ARM_SMMU_CB_S1_TLBIVAL);
401 static void arm_smmu_tlb_inv_walk_s2(unsigned long iova, size_t size,
402 size_t granule, void *cookie)
404 arm_smmu_tlb_inv_range_s2(iova, size, granule, cookie,
405 ARM_SMMU_CB_S2_TLBIIPAS2);
406 arm_smmu_tlb_sync_context(cookie);
409 static void arm_smmu_tlb_inv_leaf_s2(unsigned long iova, size_t size,
410 size_t granule, void *cookie)
412 arm_smmu_tlb_inv_range_s2(iova, size, granule, cookie,
413 ARM_SMMU_CB_S2_TLBIIPAS2L);
414 arm_smmu_tlb_sync_context(cookie);
417 static void arm_smmu_tlb_add_page_s2(struct iommu_iotlb_gather *gather,
418 unsigned long iova, size_t granule,
421 arm_smmu_tlb_inv_range_s2(iova, granule, granule, cookie,
422 ARM_SMMU_CB_S2_TLBIIPAS2L);
425 static void arm_smmu_tlb_inv_any_s2_v1(unsigned long iova, size_t size,
426 size_t granule, void *cookie)
428 arm_smmu_tlb_inv_context_s2(cookie);
431 * On MMU-401 at least, the cost of firing off multiple TLBIVMIDs appears
432 * almost negligible, but the benefit of getting the first one in as far ahead
433 * of the sync as possible is significant, hence we don't just make this a
434 * no-op and call arm_smmu_tlb_inv_context_s2() from .iotlb_sync as you might
437 static void arm_smmu_tlb_add_page_s2_v1(struct iommu_iotlb_gather *gather,
438 unsigned long iova, size_t granule,
441 struct arm_smmu_domain *smmu_domain = cookie;
442 struct arm_smmu_device *smmu = smmu_domain->smmu;
444 if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
447 arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIVMID, smmu_domain->cfg.vmid);
450 static const struct iommu_flush_ops arm_smmu_s1_tlb_ops = {
451 .tlb_flush_all = arm_smmu_tlb_inv_context_s1,
452 .tlb_flush_walk = arm_smmu_tlb_inv_walk_s1,
453 .tlb_flush_leaf = arm_smmu_tlb_inv_leaf_s1,
454 .tlb_add_page = arm_smmu_tlb_add_page_s1,
457 static const struct iommu_flush_ops arm_smmu_s2_tlb_ops_v2 = {
458 .tlb_flush_all = arm_smmu_tlb_inv_context_s2,
459 .tlb_flush_walk = arm_smmu_tlb_inv_walk_s2,
460 .tlb_flush_leaf = arm_smmu_tlb_inv_leaf_s2,
461 .tlb_add_page = arm_smmu_tlb_add_page_s2,
464 static const struct iommu_flush_ops arm_smmu_s2_tlb_ops_v1 = {
465 .tlb_flush_all = arm_smmu_tlb_inv_context_s2,
466 .tlb_flush_walk = arm_smmu_tlb_inv_any_s2_v1,
467 .tlb_flush_leaf = arm_smmu_tlb_inv_any_s2_v1,
468 .tlb_add_page = arm_smmu_tlb_add_page_s2_v1,
471 static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
473 u32 fsr, fsynr, cbfrsynra;
475 struct iommu_domain *domain = dev;
476 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
477 struct arm_smmu_device *smmu = smmu_domain->smmu;
478 int idx = smmu_domain->cfg.cbndx;
480 fsr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSR);
481 if (!(fsr & FSR_FAULT))
484 fsynr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSYNR0);
485 iova = arm_smmu_cb_readq(smmu, idx, ARM_SMMU_CB_FAR);
486 cbfrsynra = arm_smmu_gr1_read(smmu, ARM_SMMU_GR1_CBFRSYNRA(idx));
488 dev_err_ratelimited(smmu->dev,
489 "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cbfrsynra=0x%x, cb=%d\n",
490 fsr, iova, fsynr, cbfrsynra, idx);
492 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_FSR, fsr);
496 static irqreturn_t arm_smmu_global_fault(int irq, void *dev)
498 u32 gfsr, gfsynr0, gfsynr1, gfsynr2;
499 struct arm_smmu_device *smmu = dev;
500 static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
501 DEFAULT_RATELIMIT_BURST);
503 gfsr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSR);
504 gfsynr0 = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSYNR0);
505 gfsynr1 = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSYNR1);
506 gfsynr2 = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSYNR2);
511 if (__ratelimit(&rs)) {
512 if (IS_ENABLED(CONFIG_ARM_SMMU_DISABLE_BYPASS_BY_DEFAULT) &&
515 "Blocked unknown Stream ID 0x%hx; boot with \"arm-smmu.disable_bypass=0\" to allow, but this may have security implications\n",
519 "Unexpected global fault, this could be serious\n");
521 "\tGFSR 0x%08x, GFSYNR0 0x%08x, GFSYNR1 0x%08x, GFSYNR2 0x%08x\n",
522 gfsr, gfsynr0, gfsynr1, gfsynr2);
525 arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sGFSR, gfsr);
529 static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
530 struct io_pgtable_cfg *pgtbl_cfg)
532 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
533 struct arm_smmu_cb *cb = &smmu_domain->smmu->cbs[cfg->cbndx];
534 bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
540 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
541 cb->tcr[0] = pgtbl_cfg->arm_v7s_cfg.tcr;
543 cb->tcr[0] = arm_smmu_lpae_tcr(pgtbl_cfg);
544 cb->tcr[1] = arm_smmu_lpae_tcr2(pgtbl_cfg);
545 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
546 cb->tcr[1] |= TCR2_AS;
548 cb->tcr[0] |= TCR_EAE;
551 cb->tcr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
556 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
557 cb->ttbr[0] = pgtbl_cfg->arm_v7s_cfg.ttbr;
560 cb->ttbr[0] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
561 cb->ttbr[0] |= FIELD_PREP(TTBRn_ASID, cfg->asid);
562 cb->ttbr[1] = FIELD_PREP(TTBRn_ASID, cfg->asid);
565 cb->ttbr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
568 /* MAIRs (stage-1 only) */
570 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
571 cb->mair[0] = pgtbl_cfg->arm_v7s_cfg.prrr;
572 cb->mair[1] = pgtbl_cfg->arm_v7s_cfg.nmrr;
574 cb->mair[0] = pgtbl_cfg->arm_lpae_s1_cfg.mair;
575 cb->mair[1] = pgtbl_cfg->arm_lpae_s1_cfg.mair >> 32;
580 static void arm_smmu_write_context_bank(struct arm_smmu_device *smmu, int idx)
584 struct arm_smmu_cb *cb = &smmu->cbs[idx];
585 struct arm_smmu_cfg *cfg = cb->cfg;
587 /* Unassigned context banks only need disabling */
589 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_SCTLR, 0);
593 stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
596 if (smmu->version > ARM_SMMU_V1) {
597 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
601 /* 16-bit VMIDs live in CBA2R */
602 if (smmu->features & ARM_SMMU_FEAT_VMID16)
603 reg |= FIELD_PREP(CBA2R_VMID16, cfg->vmid);
605 arm_smmu_gr1_write(smmu, ARM_SMMU_GR1_CBA2R(idx), reg);
609 reg = FIELD_PREP(CBAR_TYPE, cfg->cbar);
610 if (smmu->version < ARM_SMMU_V2)
611 reg |= FIELD_PREP(CBAR_IRPTNDX, cfg->irptndx);
614 * Use the weakest shareability/memory types, so they are
615 * overridden by the ttbcr/pte.
618 reg |= FIELD_PREP(CBAR_S1_BPSHCFG, CBAR_S1_BPSHCFG_NSH) |
619 FIELD_PREP(CBAR_S1_MEMATTR, CBAR_S1_MEMATTR_WB);
620 } else if (!(smmu->features & ARM_SMMU_FEAT_VMID16)) {
621 /* 8-bit VMIDs live in CBAR */
622 reg |= FIELD_PREP(CBAR_VMID, cfg->vmid);
624 arm_smmu_gr1_write(smmu, ARM_SMMU_GR1_CBAR(idx), reg);
628 * We must write this before the TTBRs, since it determines the
629 * access behaviour of some fields (in particular, ASID[15:8]).
631 if (stage1 && smmu->version > ARM_SMMU_V1)
632 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TCR2, cb->tcr[1]);
633 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TCR, cb->tcr[0]);
636 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
637 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_CONTEXTIDR, cfg->asid);
638 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TTBR0, cb->ttbr[0]);
639 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TTBR1, cb->ttbr[1]);
641 arm_smmu_cb_writeq(smmu, idx, ARM_SMMU_CB_TTBR0, cb->ttbr[0]);
643 arm_smmu_cb_writeq(smmu, idx, ARM_SMMU_CB_TTBR1,
647 /* MAIRs (stage-1 only) */
649 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_S1_MAIR0, cb->mair[0]);
650 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_S1_MAIR1, cb->mair[1]);
654 reg = SCTLR_CFIE | SCTLR_CFRE | SCTLR_AFE | SCTLR_TRE | SCTLR_M;
656 reg |= SCTLR_S1_ASIDPNE;
657 if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN))
660 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_SCTLR, reg);
663 static int arm_smmu_init_domain_context(struct iommu_domain *domain,
664 struct arm_smmu_device *smmu)
666 int irq, start, ret = 0;
667 unsigned long ias, oas;
668 struct io_pgtable_ops *pgtbl_ops;
669 struct io_pgtable_cfg pgtbl_cfg;
670 enum io_pgtable_fmt fmt;
671 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
672 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
674 mutex_lock(&smmu_domain->init_mutex);
675 if (smmu_domain->smmu)
678 if (domain->type == IOMMU_DOMAIN_IDENTITY) {
679 smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
680 smmu_domain->smmu = smmu;
685 * Mapping the requested stage onto what we support is surprisingly
686 * complicated, mainly because the spec allows S1+S2 SMMUs without
687 * support for nested translation. That means we end up with the
690 * Requested Supported Actual
700 * Note that you can't actually request stage-2 mappings.
702 if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
703 smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
704 if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
705 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
708 * Choosing a suitable context format is even more fiddly. Until we
709 * grow some way for the caller to express a preference, and/or move
710 * the decision into the io-pgtable code where it arguably belongs,
711 * just aim for the closest thing to the rest of the system, and hope
712 * that the hardware isn't esoteric enough that we can't assume AArch64
713 * support to be a superset of AArch32 support...
715 if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_L)
716 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_L;
717 if (IS_ENABLED(CONFIG_IOMMU_IO_PGTABLE_ARMV7S) &&
718 !IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_ARM_LPAE) &&
719 (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S) &&
720 (smmu_domain->stage == ARM_SMMU_DOMAIN_S1))
721 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_S;
722 if ((IS_ENABLED(CONFIG_64BIT) || cfg->fmt == ARM_SMMU_CTX_FMT_NONE) &&
723 (smmu->features & (ARM_SMMU_FEAT_FMT_AARCH64_64K |
724 ARM_SMMU_FEAT_FMT_AARCH64_16K |
725 ARM_SMMU_FEAT_FMT_AARCH64_4K)))
726 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH64;
728 if (cfg->fmt == ARM_SMMU_CTX_FMT_NONE) {
733 switch (smmu_domain->stage) {
734 case ARM_SMMU_DOMAIN_S1:
735 cfg->cbar = CBAR_TYPE_S1_TRANS_S2_BYPASS;
736 start = smmu->num_s2_context_banks;
738 oas = smmu->ipa_size;
739 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
740 fmt = ARM_64_LPAE_S1;
741 } else if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_L) {
742 fmt = ARM_32_LPAE_S1;
743 ias = min(ias, 32UL);
744 oas = min(oas, 40UL);
747 ias = min(ias, 32UL);
748 oas = min(oas, 32UL);
750 smmu_domain->flush_ops = &arm_smmu_s1_tlb_ops;
752 case ARM_SMMU_DOMAIN_NESTED:
754 * We will likely want to change this if/when KVM gets
757 case ARM_SMMU_DOMAIN_S2:
758 cfg->cbar = CBAR_TYPE_S2_TRANS;
760 ias = smmu->ipa_size;
762 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
763 fmt = ARM_64_LPAE_S2;
765 fmt = ARM_32_LPAE_S2;
766 ias = min(ias, 40UL);
767 oas = min(oas, 40UL);
769 if (smmu->version == ARM_SMMU_V2)
770 smmu_domain->flush_ops = &arm_smmu_s2_tlb_ops_v2;
772 smmu_domain->flush_ops = &arm_smmu_s2_tlb_ops_v1;
778 ret = __arm_smmu_alloc_bitmap(smmu->context_map, start,
779 smmu->num_context_banks);
784 if (smmu->version < ARM_SMMU_V2) {
785 cfg->irptndx = atomic_inc_return(&smmu->irptndx);
786 cfg->irptndx %= smmu->num_context_irqs;
788 cfg->irptndx = cfg->cbndx;
791 if (smmu_domain->stage == ARM_SMMU_DOMAIN_S2)
792 cfg->vmid = cfg->cbndx + 1;
794 cfg->asid = cfg->cbndx;
796 smmu_domain->smmu = smmu;
797 if (smmu->impl && smmu->impl->init_context) {
798 ret = smmu->impl->init_context(smmu_domain);
803 pgtbl_cfg = (struct io_pgtable_cfg) {
804 .pgsize_bitmap = smmu->pgsize_bitmap,
807 .coherent_walk = smmu->features & ARM_SMMU_FEAT_COHERENT_WALK,
808 .tlb = smmu_domain->flush_ops,
809 .iommu_dev = smmu->dev,
812 if (smmu_domain->non_strict)
813 pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
815 pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
821 /* Update the domain's page sizes to reflect the page table format */
822 domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
823 domain->geometry.aperture_end = (1UL << ias) - 1;
824 domain->geometry.force_aperture = true;
826 /* Initialise the context bank with our page table cfg */
827 arm_smmu_init_context_bank(smmu_domain, &pgtbl_cfg);
828 arm_smmu_write_context_bank(smmu, cfg->cbndx);
831 * Request context fault interrupt. Do this last to avoid the
832 * handler seeing a half-initialised domain state.
834 irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
835 ret = devm_request_irq(smmu->dev, irq, arm_smmu_context_fault,
836 IRQF_SHARED, "arm-smmu-context-fault", domain);
838 dev_err(smmu->dev, "failed to request context IRQ %d (%u)\n",
840 cfg->irptndx = INVALID_IRPTNDX;
843 mutex_unlock(&smmu_domain->init_mutex);
845 /* Publish page table ops for map/unmap */
846 smmu_domain->pgtbl_ops = pgtbl_ops;
850 __arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
851 smmu_domain->smmu = NULL;
853 mutex_unlock(&smmu_domain->init_mutex);
857 static void arm_smmu_destroy_domain_context(struct iommu_domain *domain)
859 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
860 struct arm_smmu_device *smmu = smmu_domain->smmu;
861 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
864 if (!smmu || domain->type == IOMMU_DOMAIN_IDENTITY)
867 ret = arm_smmu_rpm_get(smmu);
872 * Disable the context bank and free the page tables before freeing
875 smmu->cbs[cfg->cbndx].cfg = NULL;
876 arm_smmu_write_context_bank(smmu, cfg->cbndx);
878 if (cfg->irptndx != INVALID_IRPTNDX) {
879 irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
880 devm_free_irq(smmu->dev, irq, domain);
883 free_io_pgtable_ops(smmu_domain->pgtbl_ops);
884 __arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
886 arm_smmu_rpm_put(smmu);
889 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
891 struct arm_smmu_domain *smmu_domain;
893 if (type != IOMMU_DOMAIN_UNMANAGED &&
894 type != IOMMU_DOMAIN_DMA &&
895 type != IOMMU_DOMAIN_IDENTITY)
898 * Allocate the domain and initialise some of its data structures.
899 * We can't really do anything meaningful until we've added a
902 smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
906 if (type == IOMMU_DOMAIN_DMA && (using_legacy_binding ||
907 iommu_get_dma_cookie(&smmu_domain->domain))) {
912 mutex_init(&smmu_domain->init_mutex);
913 spin_lock_init(&smmu_domain->cb_lock);
915 return &smmu_domain->domain;
918 static void arm_smmu_domain_free(struct iommu_domain *domain)
920 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
923 * Free the domain resources. We assume that all devices have
924 * already been detached.
926 iommu_put_dma_cookie(domain);
927 arm_smmu_destroy_domain_context(domain);
931 static void arm_smmu_write_smr(struct arm_smmu_device *smmu, int idx)
933 struct arm_smmu_smr *smr = smmu->smrs + idx;
934 u32 reg = FIELD_PREP(SMR_ID, smr->id) | FIELD_PREP(SMR_MASK, smr->mask);
936 if (!(smmu->features & ARM_SMMU_FEAT_EXIDS) && smr->valid)
938 arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_SMR(idx), reg);
941 static void arm_smmu_write_s2cr(struct arm_smmu_device *smmu, int idx)
943 struct arm_smmu_s2cr *s2cr = smmu->s2crs + idx;
944 u32 reg = FIELD_PREP(S2CR_TYPE, s2cr->type) |
945 FIELD_PREP(S2CR_CBNDX, s2cr->cbndx) |
946 FIELD_PREP(S2CR_PRIVCFG, s2cr->privcfg);
948 if (smmu->features & ARM_SMMU_FEAT_EXIDS && smmu->smrs &&
949 smmu->smrs[idx].valid)
950 reg |= S2CR_EXIDVALID;
951 arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_S2CR(idx), reg);
954 static void arm_smmu_write_sme(struct arm_smmu_device *smmu, int idx)
956 arm_smmu_write_s2cr(smmu, idx);
958 arm_smmu_write_smr(smmu, idx);
962 * The width of SMR's mask field depends on sCR0_EXIDENABLE, so this function
963 * should be called after sCR0 is written.
965 static void arm_smmu_test_smr_masks(struct arm_smmu_device *smmu)
973 * SMR.ID bits may not be preserved if the corresponding MASK
974 * bits are set, so check each one separately. We can reject
975 * masters later if they try to claim IDs outside these masks.
977 smr = FIELD_PREP(SMR_ID, smmu->streamid_mask);
978 arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_SMR(0), smr);
979 smr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_SMR(0));
980 smmu->streamid_mask = FIELD_GET(SMR_ID, smr);
982 smr = FIELD_PREP(SMR_MASK, smmu->streamid_mask);
983 arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_SMR(0), smr);
984 smr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_SMR(0));
985 smmu->smr_mask_mask = FIELD_GET(SMR_MASK, smr);
988 static int arm_smmu_find_sme(struct arm_smmu_device *smmu, u16 id, u16 mask)
990 struct arm_smmu_smr *smrs = smmu->smrs;
991 int i, free_idx = -ENOSPC;
993 /* Stream indexing is blissfully easy */
997 /* Validating SMRs is... less so */
998 for (i = 0; i < smmu->num_mapping_groups; ++i) {
999 if (!smrs[i].valid) {
1001 * Note the first free entry we come across, which
1002 * we'll claim in the end if nothing else matches.
1009 * If the new entry is _entirely_ matched by an existing entry,
1010 * then reuse that, with the guarantee that there also cannot
1011 * be any subsequent conflicting entries. In normal use we'd
1012 * expect simply identical entries for this case, but there's
1013 * no harm in accommodating the generalisation.
1015 if ((mask & smrs[i].mask) == mask &&
1016 !((id ^ smrs[i].id) & ~smrs[i].mask))
1019 * If the new entry has any other overlap with an existing one,
1020 * though, then there always exists at least one stream ID
1021 * which would cause a conflict, and we can't allow that risk.
1023 if (!((id ^ smrs[i].id) & ~(smrs[i].mask | mask)))
1030 static bool arm_smmu_free_sme(struct arm_smmu_device *smmu, int idx)
1032 if (--smmu->s2crs[idx].count)
1035 smmu->s2crs[idx] = s2cr_init_val;
1037 smmu->smrs[idx].valid = false;
1042 static int arm_smmu_master_alloc_smes(struct device *dev)
1044 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1045 struct arm_smmu_master_cfg *cfg = fwspec->iommu_priv;
1046 struct arm_smmu_device *smmu = cfg->smmu;
1047 struct arm_smmu_smr *smrs = smmu->smrs;
1048 struct iommu_group *group;
1051 mutex_lock(&smmu->stream_map_mutex);
1052 /* Figure out a viable stream map entry allocation */
1053 for_each_cfg_sme(fwspec, i, idx) {
1054 u16 sid = FIELD_GET(SMR_ID, fwspec->ids[i]);
1055 u16 mask = FIELD_GET(SMR_MASK, fwspec->ids[i]);
1057 if (idx != INVALID_SMENDX) {
1062 ret = arm_smmu_find_sme(smmu, sid, mask);
1067 if (smrs && smmu->s2crs[idx].count == 0) {
1069 smrs[idx].mask = mask;
1070 smrs[idx].valid = true;
1072 smmu->s2crs[idx].count++;
1073 cfg->smendx[i] = (s16)idx;
1076 group = iommu_group_get_for_dev(dev);
1077 if (IS_ERR(group)) {
1078 ret = PTR_ERR(group);
1081 iommu_group_put(group);
1083 /* It worked! Now, poke the actual hardware */
1084 for_each_cfg_sme(fwspec, i, idx) {
1085 arm_smmu_write_sme(smmu, idx);
1086 smmu->s2crs[idx].group = group;
1089 mutex_unlock(&smmu->stream_map_mutex);
1094 arm_smmu_free_sme(smmu, cfg->smendx[i]);
1095 cfg->smendx[i] = INVALID_SMENDX;
1097 mutex_unlock(&smmu->stream_map_mutex);
1101 static void arm_smmu_master_free_smes(struct iommu_fwspec *fwspec)
1103 struct arm_smmu_device *smmu = fwspec_smmu(fwspec);
1104 struct arm_smmu_master_cfg *cfg = fwspec->iommu_priv;
1107 mutex_lock(&smmu->stream_map_mutex);
1108 for_each_cfg_sme(fwspec, i, idx) {
1109 if (arm_smmu_free_sme(smmu, idx))
1110 arm_smmu_write_sme(smmu, idx);
1111 cfg->smendx[i] = INVALID_SMENDX;
1113 mutex_unlock(&smmu->stream_map_mutex);
1116 static int arm_smmu_domain_add_master(struct arm_smmu_domain *smmu_domain,
1117 struct iommu_fwspec *fwspec)
1119 struct arm_smmu_device *smmu = smmu_domain->smmu;
1120 struct arm_smmu_s2cr *s2cr = smmu->s2crs;
1121 u8 cbndx = smmu_domain->cfg.cbndx;
1122 enum arm_smmu_s2cr_type type;
1125 if (smmu_domain->stage == ARM_SMMU_DOMAIN_BYPASS)
1126 type = S2CR_TYPE_BYPASS;
1128 type = S2CR_TYPE_TRANS;
1130 for_each_cfg_sme(fwspec, i, idx) {
1131 if (type == s2cr[idx].type && cbndx == s2cr[idx].cbndx)
1134 s2cr[idx].type = type;
1135 s2cr[idx].privcfg = S2CR_PRIVCFG_DEFAULT;
1136 s2cr[idx].cbndx = cbndx;
1137 arm_smmu_write_s2cr(smmu, idx);
1142 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
1145 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1146 struct arm_smmu_device *smmu;
1147 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1149 if (!fwspec || fwspec->ops != &arm_smmu_ops) {
1150 dev_err(dev, "cannot attach to SMMU, is it on the same bus?\n");
1155 * FIXME: The arch/arm DMA API code tries to attach devices to its own
1156 * domains between of_xlate() and add_device() - we have no way to cope
1157 * with that, so until ARM gets converted to rely on groups and default
1158 * domains, just say no (but more politely than by dereferencing NULL).
1159 * This should be at least a WARN_ON once that's sorted.
1161 if (!fwspec->iommu_priv)
1164 smmu = fwspec_smmu(fwspec);
1166 ret = arm_smmu_rpm_get(smmu);
1170 /* Ensure that the domain is finalised */
1171 ret = arm_smmu_init_domain_context(domain, smmu);
1176 * Sanity check the domain. We don't support domains across
1179 if (smmu_domain->smmu != smmu) {
1181 "cannot attach to SMMU %s whilst already attached to domain on SMMU %s\n",
1182 dev_name(smmu_domain->smmu->dev), dev_name(smmu->dev));
1187 /* Looks ok, so add the device to the domain */
1188 ret = arm_smmu_domain_add_master(smmu_domain, fwspec);
1191 * Setup an autosuspend delay to avoid bouncing runpm state.
1192 * Otherwise, if a driver for a suspended consumer device
1193 * unmaps buffers, it will runpm resume/suspend for each one.
1195 * For example, when used by a GPU device, when an application
1196 * or game exits, it can trigger unmapping 100s or 1000s of
1197 * buffers. With a runpm cycle for each buffer, that adds up
1198 * to 5-10sec worth of reprogramming the context bank, while
1199 * the system appears to be locked up to the user.
1201 pm_runtime_set_autosuspend_delay(smmu->dev, 20);
1202 pm_runtime_use_autosuspend(smmu->dev);
1205 arm_smmu_rpm_put(smmu);
1209 static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
1210 phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
1212 struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1213 struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
1219 arm_smmu_rpm_get(smmu);
1220 ret = ops->map(ops, iova, paddr, size, prot);
1221 arm_smmu_rpm_put(smmu);
1226 static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
1227 size_t size, struct iommu_iotlb_gather *gather)
1229 struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1230 struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
1236 arm_smmu_rpm_get(smmu);
1237 ret = ops->unmap(ops, iova, size, gather);
1238 arm_smmu_rpm_put(smmu);
1243 static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
1245 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1246 struct arm_smmu_device *smmu = smmu_domain->smmu;
1248 if (smmu_domain->flush_ops) {
1249 arm_smmu_rpm_get(smmu);
1250 smmu_domain->flush_ops->tlb_flush_all(smmu_domain);
1251 arm_smmu_rpm_put(smmu);
1255 static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
1256 struct iommu_iotlb_gather *gather)
1258 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1259 struct arm_smmu_device *smmu = smmu_domain->smmu;
1264 arm_smmu_rpm_get(smmu);
1265 if (smmu->version == ARM_SMMU_V2 ||
1266 smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
1267 arm_smmu_tlb_sync_context(smmu_domain);
1269 arm_smmu_tlb_sync_global(smmu);
1270 arm_smmu_rpm_put(smmu);
1273 static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
1276 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1277 struct arm_smmu_device *smmu = smmu_domain->smmu;
1278 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
1279 struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
1280 struct device *dev = smmu->dev;
1284 unsigned long va, flags;
1285 int ret, idx = cfg->cbndx;
1287 ret = arm_smmu_rpm_get(smmu);
1291 spin_lock_irqsave(&smmu_domain->cb_lock, flags);
1292 va = iova & ~0xfffUL;
1293 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
1294 arm_smmu_cb_writeq(smmu, idx, ARM_SMMU_CB_ATS1PR, va);
1296 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_ATS1PR, va);
1298 reg = arm_smmu_page(smmu, ARM_SMMU_CB(smmu, idx)) + ARM_SMMU_CB_ATSR;
1299 if (readl_poll_timeout_atomic(reg, tmp, !(tmp & ATSR_ACTIVE), 5, 50)) {
1300 spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
1302 "iova to phys timed out on %pad. Falling back to software table walk.\n",
1304 return ops->iova_to_phys(ops, iova);
1307 phys = arm_smmu_cb_readq(smmu, idx, ARM_SMMU_CB_PAR);
1308 spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
1309 if (phys & CB_PAR_F) {
1310 dev_err(dev, "translation fault!\n");
1311 dev_err(dev, "PAR = 0x%llx\n", phys);
1315 arm_smmu_rpm_put(smmu);
1317 return (phys & GENMASK_ULL(39, 12)) | (iova & 0xfff);
1320 static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain,
1323 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1324 struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
1326 if (domain->type == IOMMU_DOMAIN_IDENTITY)
1332 if (smmu_domain->smmu->features & ARM_SMMU_FEAT_TRANS_OPS &&
1333 smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
1334 return arm_smmu_iova_to_phys_hard(domain, iova);
1336 return ops->iova_to_phys(ops, iova);
1339 static bool arm_smmu_capable(enum iommu_cap cap)
1342 case IOMMU_CAP_CACHE_COHERENCY:
1344 * Return true here as the SMMU can always send out coherent
1348 case IOMMU_CAP_NOEXEC:
1356 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
1358 struct device *dev = driver_find_device_by_fwnode(&arm_smmu_driver.driver,
1361 return dev ? dev_get_drvdata(dev) : NULL;
1364 static int arm_smmu_add_device(struct device *dev)
1366 struct arm_smmu_device *smmu;
1367 struct arm_smmu_master_cfg *cfg;
1368 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1371 if (using_legacy_binding) {
1372 ret = arm_smmu_register_legacy_master(dev, &smmu);
1375 * If dev->iommu_fwspec is initally NULL, arm_smmu_register_legacy_master()
1376 * will allocate/initialise a new one. Thus we need to update fwspec for
1379 fwspec = dev_iommu_fwspec_get(dev);
1382 } else if (fwspec && fwspec->ops == &arm_smmu_ops) {
1383 smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
1389 for (i = 0; i < fwspec->num_ids; i++) {
1390 u16 sid = FIELD_GET(SMR_ID, fwspec->ids[i]);
1391 u16 mask = FIELD_GET(SMR_MASK, fwspec->ids[i]);
1393 if (sid & ~smmu->streamid_mask) {
1394 dev_err(dev, "stream ID 0x%x out of range for SMMU (0x%x)\n",
1395 sid, smmu->streamid_mask);
1398 if (mask & ~smmu->smr_mask_mask) {
1399 dev_err(dev, "SMR mask 0x%x out of range for SMMU (0x%x)\n",
1400 mask, smmu->smr_mask_mask);
1406 cfg = kzalloc(offsetof(struct arm_smmu_master_cfg, smendx[i]),
1412 fwspec->iommu_priv = cfg;
1414 cfg->smendx[i] = INVALID_SMENDX;
1416 ret = arm_smmu_rpm_get(smmu);
1420 ret = arm_smmu_master_alloc_smes(dev);
1421 arm_smmu_rpm_put(smmu);
1426 iommu_device_link(&smmu->iommu, dev);
1428 device_link_add(dev, smmu->dev,
1429 DL_FLAG_PM_RUNTIME | DL_FLAG_AUTOREMOVE_SUPPLIER);
1436 iommu_fwspec_free(dev);
1440 static void arm_smmu_remove_device(struct device *dev)
1442 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1443 struct arm_smmu_master_cfg *cfg;
1444 struct arm_smmu_device *smmu;
1447 if (!fwspec || fwspec->ops != &arm_smmu_ops)
1450 cfg = fwspec->iommu_priv;
1453 ret = arm_smmu_rpm_get(smmu);
1457 iommu_device_unlink(&smmu->iommu, dev);
1458 arm_smmu_master_free_smes(fwspec);
1460 arm_smmu_rpm_put(smmu);
1462 iommu_group_remove_device(dev);
1463 kfree(fwspec->iommu_priv);
1464 iommu_fwspec_free(dev);
1467 static struct iommu_group *arm_smmu_device_group(struct device *dev)
1469 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1470 struct arm_smmu_device *smmu = fwspec_smmu(fwspec);
1471 struct iommu_group *group = NULL;
1474 for_each_cfg_sme(fwspec, i, idx) {
1475 if (group && smmu->s2crs[idx].group &&
1476 group != smmu->s2crs[idx].group)
1477 return ERR_PTR(-EINVAL);
1479 group = smmu->s2crs[idx].group;
1483 return iommu_group_ref_get(group);
1485 if (dev_is_pci(dev))
1486 group = pci_device_group(dev);
1487 else if (dev_is_fsl_mc(dev))
1488 group = fsl_mc_device_group(dev);
1490 group = generic_device_group(dev);
1495 static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
1496 enum iommu_attr attr, void *data)
1498 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1500 switch(domain->type) {
1501 case IOMMU_DOMAIN_UNMANAGED:
1503 case DOMAIN_ATTR_NESTING:
1504 *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
1510 case IOMMU_DOMAIN_DMA:
1512 case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
1513 *(int *)data = smmu_domain->non_strict;
1524 static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
1525 enum iommu_attr attr, void *data)
1528 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1530 mutex_lock(&smmu_domain->init_mutex);
1532 switch(domain->type) {
1533 case IOMMU_DOMAIN_UNMANAGED:
1535 case DOMAIN_ATTR_NESTING:
1536 if (smmu_domain->smmu) {
1542 smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
1544 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
1550 case IOMMU_DOMAIN_DMA:
1552 case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
1553 smmu_domain->non_strict = *(int *)data;
1563 mutex_unlock(&smmu_domain->init_mutex);
1567 static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
1571 if (args->args_count > 0)
1572 fwid |= FIELD_PREP(SMR_ID, args->args[0]);
1574 if (args->args_count > 1)
1575 fwid |= FIELD_PREP(SMR_MASK, args->args[1]);
1576 else if (!of_property_read_u32(args->np, "stream-match-mask", &mask))
1577 fwid |= FIELD_PREP(SMR_MASK, mask);
1579 return iommu_fwspec_add_ids(dev, &fwid, 1);
1582 static void arm_smmu_get_resv_regions(struct device *dev,
1583 struct list_head *head)
1585 struct iommu_resv_region *region;
1586 int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
1588 region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
1589 prot, IOMMU_RESV_SW_MSI);
1593 list_add_tail(®ion->list, head);
1595 iommu_dma_get_resv_regions(dev, head);
1598 static void arm_smmu_put_resv_regions(struct device *dev,
1599 struct list_head *head)
1601 struct iommu_resv_region *entry, *next;
1603 list_for_each_entry_safe(entry, next, head, list)
1607 static struct iommu_ops arm_smmu_ops = {
1608 .capable = arm_smmu_capable,
1609 .domain_alloc = arm_smmu_domain_alloc,
1610 .domain_free = arm_smmu_domain_free,
1611 .attach_dev = arm_smmu_attach_dev,
1612 .map = arm_smmu_map,
1613 .unmap = arm_smmu_unmap,
1614 .flush_iotlb_all = arm_smmu_flush_iotlb_all,
1615 .iotlb_sync = arm_smmu_iotlb_sync,
1616 .iova_to_phys = arm_smmu_iova_to_phys,
1617 .add_device = arm_smmu_add_device,
1618 .remove_device = arm_smmu_remove_device,
1619 .device_group = arm_smmu_device_group,
1620 .domain_get_attr = arm_smmu_domain_get_attr,
1621 .domain_set_attr = arm_smmu_domain_set_attr,
1622 .of_xlate = arm_smmu_of_xlate,
1623 .get_resv_regions = arm_smmu_get_resv_regions,
1624 .put_resv_regions = arm_smmu_put_resv_regions,
1625 .pgsize_bitmap = -1UL, /* Restricted during device attach */
1628 static void arm_smmu_device_reset(struct arm_smmu_device *smmu)
1633 /* clear global FSR */
1634 reg = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSR);
1635 arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sGFSR, reg);
1638 * Reset stream mapping groups: Initial values mark all SMRn as
1639 * invalid and all S2CRn as bypass unless overridden.
1641 for (i = 0; i < smmu->num_mapping_groups; ++i)
1642 arm_smmu_write_sme(smmu, i);
1644 /* Make sure all context banks are disabled and clear CB_FSR */
1645 for (i = 0; i < smmu->num_context_banks; ++i) {
1646 arm_smmu_write_context_bank(smmu, i);
1647 arm_smmu_cb_write(smmu, i, ARM_SMMU_CB_FSR, FSR_FAULT);
1650 /* Invalidate the TLB, just in case */
1651 arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIALLH, QCOM_DUMMY_VAL);
1652 arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIALLNSNH, QCOM_DUMMY_VAL);
1654 reg = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sCR0);
1656 /* Enable fault reporting */
1657 reg |= (sCR0_GFRE | sCR0_GFIE | sCR0_GCFGFRE | sCR0_GCFGFIE);
1659 /* Disable TLB broadcasting. */
1660 reg |= (sCR0_VMIDPNE | sCR0_PTM);
1662 /* Enable client access, handling unmatched streams as appropriate */
1663 reg &= ~sCR0_CLIENTPD;
1667 reg &= ~sCR0_USFCFG;
1669 /* Disable forced broadcasting */
1672 /* Don't upgrade barriers */
1675 if (smmu->features & ARM_SMMU_FEAT_VMID16)
1676 reg |= sCR0_VMID16EN;
1678 if (smmu->features & ARM_SMMU_FEAT_EXIDS)
1679 reg |= sCR0_EXIDENABLE;
1681 if (smmu->impl && smmu->impl->reset)
1682 smmu->impl->reset(smmu);
1684 /* Push the button */
1685 arm_smmu_tlb_sync_global(smmu);
1686 arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sCR0, reg);
1689 static int arm_smmu_id_size_to_bits(int size)
1708 static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
1712 bool cttw_reg, cttw_fw = smmu->features & ARM_SMMU_FEAT_COHERENT_WALK;
1715 dev_notice(smmu->dev, "probing hardware configuration...\n");
1716 dev_notice(smmu->dev, "SMMUv%d with:\n",
1717 smmu->version == ARM_SMMU_V2 ? 2 : 1);
1720 id = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_ID0);
1722 /* Restrict available stages based on module parameter */
1723 if (force_stage == 1)
1724 id &= ~(ID0_S2TS | ID0_NTS);
1725 else if (force_stage == 2)
1726 id &= ~(ID0_S1TS | ID0_NTS);
1728 if (id & ID0_S1TS) {
1729 smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
1730 dev_notice(smmu->dev, "\tstage 1 translation\n");
1733 if (id & ID0_S2TS) {
1734 smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
1735 dev_notice(smmu->dev, "\tstage 2 translation\n");
1739 smmu->features |= ARM_SMMU_FEAT_TRANS_NESTED;
1740 dev_notice(smmu->dev, "\tnested translation\n");
1743 if (!(smmu->features &
1744 (ARM_SMMU_FEAT_TRANS_S1 | ARM_SMMU_FEAT_TRANS_S2))) {
1745 dev_err(smmu->dev, "\tno translation support!\n");
1749 if ((id & ID0_S1TS) &&
1750 ((smmu->version < ARM_SMMU_V2) || !(id & ID0_ATOSNS))) {
1751 smmu->features |= ARM_SMMU_FEAT_TRANS_OPS;
1752 dev_notice(smmu->dev, "\taddress translation ops\n");
1756 * In order for DMA API calls to work properly, we must defer to what
1757 * the FW says about coherency, regardless of what the hardware claims.
1758 * Fortunately, this also opens up a workaround for systems where the
1759 * ID register value has ended up configured incorrectly.
1761 cttw_reg = !!(id & ID0_CTTW);
1762 if (cttw_fw || cttw_reg)
1763 dev_notice(smmu->dev, "\t%scoherent table walk\n",
1764 cttw_fw ? "" : "non-");
1765 if (cttw_fw != cttw_reg)
1766 dev_notice(smmu->dev,
1767 "\t(IDR0.CTTW overridden by FW configuration)\n");
1769 /* Max. number of entries we have for stream matching/indexing */
1770 if (smmu->version == ARM_SMMU_V2 && id & ID0_EXIDS) {
1771 smmu->features |= ARM_SMMU_FEAT_EXIDS;
1774 size = 1 << FIELD_GET(ID0_NUMSIDB, id);
1776 smmu->streamid_mask = size - 1;
1778 smmu->features |= ARM_SMMU_FEAT_STREAM_MATCH;
1779 size = FIELD_GET(ID0_NUMSMRG, id);
1782 "stream-matching supported, but no SMRs present!\n");
1786 /* Zero-initialised to mark as invalid */
1787 smmu->smrs = devm_kcalloc(smmu->dev, size, sizeof(*smmu->smrs),
1792 dev_notice(smmu->dev,
1793 "\tstream matching with %u register groups", size);
1795 /* s2cr->type == 0 means translation, so initialise explicitly */
1796 smmu->s2crs = devm_kmalloc_array(smmu->dev, size, sizeof(*smmu->s2crs),
1800 for (i = 0; i < size; i++)
1801 smmu->s2crs[i] = s2cr_init_val;
1803 smmu->num_mapping_groups = size;
1804 mutex_init(&smmu->stream_map_mutex);
1805 spin_lock_init(&smmu->global_sync_lock);
1807 if (smmu->version < ARM_SMMU_V2 || !(id & ID0_PTFS_NO_AARCH32)) {
1808 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_L;
1809 if (!(id & ID0_PTFS_NO_AARCH32S))
1810 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_S;
1814 id = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_ID1);
1815 smmu->pgshift = (id & ID1_PAGESIZE) ? 16 : 12;
1817 /* Check for size mismatch of SMMU address space from mapped region */
1818 size = 1 << (FIELD_GET(ID1_NUMPAGENDXB, id) + 1);
1819 if (smmu->numpage != 2 * size << smmu->pgshift)
1821 "SMMU address space size (0x%x) differs from mapped region size (0x%x)!\n",
1822 2 * size << smmu->pgshift, smmu->numpage);
1823 /* Now properly encode NUMPAGE to subsequently derive SMMU_CB_BASE */
1824 smmu->numpage = size;
1826 smmu->num_s2_context_banks = FIELD_GET(ID1_NUMS2CB, id);
1827 smmu->num_context_banks = FIELD_GET(ID1_NUMCB, id);
1828 if (smmu->num_s2_context_banks > smmu->num_context_banks) {
1829 dev_err(smmu->dev, "impossible number of S2 context banks!\n");
1832 dev_notice(smmu->dev, "\t%u context banks (%u stage-2 only)\n",
1833 smmu->num_context_banks, smmu->num_s2_context_banks);
1834 smmu->cbs = devm_kcalloc(smmu->dev, smmu->num_context_banks,
1835 sizeof(*smmu->cbs), GFP_KERNEL);
1840 id = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_ID2);
1841 size = arm_smmu_id_size_to_bits(FIELD_GET(ID2_IAS, id));
1842 smmu->ipa_size = size;
1844 /* The output mask is also applied for bypass */
1845 size = arm_smmu_id_size_to_bits(FIELD_GET(ID2_OAS, id));
1846 smmu->pa_size = size;
1848 if (id & ID2_VMID16)
1849 smmu->features |= ARM_SMMU_FEAT_VMID16;
1852 * What the page table walker can address actually depends on which
1853 * descriptor format is in use, but since a) we don't know that yet,
1854 * and b) it can vary per context bank, this will have to do...
1856 if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(size)))
1858 "failed to set DMA mask for table walker\n");
1860 if (smmu->version < ARM_SMMU_V2) {
1861 smmu->va_size = smmu->ipa_size;
1862 if (smmu->version == ARM_SMMU_V1_64K)
1863 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1865 size = FIELD_GET(ID2_UBS, id);
1866 smmu->va_size = arm_smmu_id_size_to_bits(size);
1867 if (id & ID2_PTFS_4K)
1868 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_4K;
1869 if (id & ID2_PTFS_16K)
1870 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_16K;
1871 if (id & ID2_PTFS_64K)
1872 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1875 /* Now we've corralled the various formats, what'll it do? */
1876 if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S)
1877 smmu->pgsize_bitmap |= SZ_4K | SZ_64K | SZ_1M | SZ_16M;
1878 if (smmu->features &
1879 (ARM_SMMU_FEAT_FMT_AARCH32_L | ARM_SMMU_FEAT_FMT_AARCH64_4K))
1880 smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
1881 if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_16K)
1882 smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
1883 if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_64K)
1884 smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
1886 if (arm_smmu_ops.pgsize_bitmap == -1UL)
1887 arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
1889 arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
1890 dev_notice(smmu->dev, "\tSupported page sizes: 0x%08lx\n",
1891 smmu->pgsize_bitmap);
1894 if (smmu->features & ARM_SMMU_FEAT_TRANS_S1)
1895 dev_notice(smmu->dev, "\tStage-1: %lu-bit VA -> %lu-bit IPA\n",
1896 smmu->va_size, smmu->ipa_size);
1898 if (smmu->features & ARM_SMMU_FEAT_TRANS_S2)
1899 dev_notice(smmu->dev, "\tStage-2: %lu-bit IPA -> %lu-bit PA\n",
1900 smmu->ipa_size, smmu->pa_size);
1902 if (smmu->impl && smmu->impl->cfg_probe)
1903 return smmu->impl->cfg_probe(smmu);
1908 struct arm_smmu_match_data {
1909 enum arm_smmu_arch_version version;
1910 enum arm_smmu_implementation model;
1913 #define ARM_SMMU_MATCH_DATA(name, ver, imp) \
1914 static const struct arm_smmu_match_data name = { .version = ver, .model = imp }
1916 ARM_SMMU_MATCH_DATA(smmu_generic_v1, ARM_SMMU_V1, GENERIC_SMMU);
1917 ARM_SMMU_MATCH_DATA(smmu_generic_v2, ARM_SMMU_V2, GENERIC_SMMU);
1918 ARM_SMMU_MATCH_DATA(arm_mmu401, ARM_SMMU_V1_64K, GENERIC_SMMU);
1919 ARM_SMMU_MATCH_DATA(arm_mmu500, ARM_SMMU_V2, ARM_MMU500);
1920 ARM_SMMU_MATCH_DATA(cavium_smmuv2, ARM_SMMU_V2, CAVIUM_SMMUV2);
1921 ARM_SMMU_MATCH_DATA(qcom_smmuv2, ARM_SMMU_V2, QCOM_SMMUV2);
1923 static const struct of_device_id arm_smmu_of_match[] = {
1924 { .compatible = "arm,smmu-v1", .data = &smmu_generic_v1 },
1925 { .compatible = "arm,smmu-v2", .data = &smmu_generic_v2 },
1926 { .compatible = "arm,mmu-400", .data = &smmu_generic_v1 },
1927 { .compatible = "arm,mmu-401", .data = &arm_mmu401 },
1928 { .compatible = "arm,mmu-500", .data = &arm_mmu500 },
1929 { .compatible = "cavium,smmu-v2", .data = &cavium_smmuv2 },
1930 { .compatible = "qcom,smmu-v2", .data = &qcom_smmuv2 },
1933 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
1936 static int acpi_smmu_get_data(u32 model, struct arm_smmu_device *smmu)
1941 case ACPI_IORT_SMMU_V1:
1942 case ACPI_IORT_SMMU_CORELINK_MMU400:
1943 smmu->version = ARM_SMMU_V1;
1944 smmu->model = GENERIC_SMMU;
1946 case ACPI_IORT_SMMU_CORELINK_MMU401:
1947 smmu->version = ARM_SMMU_V1_64K;
1948 smmu->model = GENERIC_SMMU;
1950 case ACPI_IORT_SMMU_V2:
1951 smmu->version = ARM_SMMU_V2;
1952 smmu->model = GENERIC_SMMU;
1954 case ACPI_IORT_SMMU_CORELINK_MMU500:
1955 smmu->version = ARM_SMMU_V2;
1956 smmu->model = ARM_MMU500;
1958 case ACPI_IORT_SMMU_CAVIUM_THUNDERX:
1959 smmu->version = ARM_SMMU_V2;
1960 smmu->model = CAVIUM_SMMUV2;
1969 static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
1970 struct arm_smmu_device *smmu)
1972 struct device *dev = smmu->dev;
1973 struct acpi_iort_node *node =
1974 *(struct acpi_iort_node **)dev_get_platdata(dev);
1975 struct acpi_iort_smmu *iort_smmu;
1978 /* Retrieve SMMU1/2 specific data */
1979 iort_smmu = (struct acpi_iort_smmu *)node->node_data;
1981 ret = acpi_smmu_get_data(iort_smmu->model, smmu);
1985 /* Ignore the configuration access interrupt */
1986 smmu->num_global_irqs = 1;
1988 if (iort_smmu->flags & ACPI_IORT_SMMU_COHERENT_WALK)
1989 smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
1994 static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
1995 struct arm_smmu_device *smmu)
2001 static int arm_smmu_device_dt_probe(struct platform_device *pdev,
2002 struct arm_smmu_device *smmu)
2004 const struct arm_smmu_match_data *data;
2005 struct device *dev = &pdev->dev;
2006 bool legacy_binding;
2008 if (of_property_read_u32(dev->of_node, "#global-interrupts",
2009 &smmu->num_global_irqs)) {
2010 dev_err(dev, "missing #global-interrupts property\n");
2014 data = of_device_get_match_data(dev);
2015 smmu->version = data->version;
2016 smmu->model = data->model;
2018 legacy_binding = of_find_property(dev->of_node, "mmu-masters", NULL);
2019 if (legacy_binding && !using_generic_binding) {
2020 if (!using_legacy_binding) {
2021 pr_notice("deprecated \"mmu-masters\" DT property in use; %s support unavailable\n",
2022 IS_ENABLED(CONFIG_ARM_SMMU_LEGACY_DT_BINDINGS) ? "DMA API" : "SMMU");
2024 using_legacy_binding = true;
2025 } else if (!legacy_binding && !using_legacy_binding) {
2026 using_generic_binding = true;
2028 dev_err(dev, "not probing due to mismatched DT properties\n");
2032 if (of_dma_is_coherent(dev->of_node))
2033 smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
2038 static int arm_smmu_bus_init(struct iommu_ops *ops)
2042 /* Oh, for a proper bus abstraction */
2043 if (!iommu_present(&platform_bus_type)) {
2044 err = bus_set_iommu(&platform_bus_type, ops);
2048 #ifdef CONFIG_ARM_AMBA
2049 if (!iommu_present(&amba_bustype)) {
2050 err = bus_set_iommu(&amba_bustype, ops);
2052 goto err_reset_platform_ops;
2056 if (!iommu_present(&pci_bus_type)) {
2057 err = bus_set_iommu(&pci_bus_type, ops);
2059 goto err_reset_amba_ops;
2062 #ifdef CONFIG_FSL_MC_BUS
2063 if (!iommu_present(&fsl_mc_bus_type)) {
2064 err = bus_set_iommu(&fsl_mc_bus_type, ops);
2066 goto err_reset_pci_ops;
2071 err_reset_pci_ops: __maybe_unused;
2073 bus_set_iommu(&pci_bus_type, NULL);
2075 err_reset_amba_ops: __maybe_unused;
2076 #ifdef CONFIG_ARM_AMBA
2077 bus_set_iommu(&amba_bustype, NULL);
2079 err_reset_platform_ops: __maybe_unused;
2080 bus_set_iommu(&platform_bus_type, NULL);
2084 static int arm_smmu_device_probe(struct platform_device *pdev)
2086 struct resource *res;
2087 resource_size_t ioaddr;
2088 struct arm_smmu_device *smmu;
2089 struct device *dev = &pdev->dev;
2090 int num_irqs, i, err;
2092 smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
2094 dev_err(dev, "failed to allocate arm_smmu_device\n");
2100 err = arm_smmu_device_dt_probe(pdev, smmu);
2102 err = arm_smmu_device_acpi_probe(pdev, smmu);
2107 smmu = arm_smmu_impl_init(smmu);
2109 return PTR_ERR(smmu);
2111 res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
2112 ioaddr = res->start;
2113 smmu->base = devm_ioremap_resource(dev, res);
2114 if (IS_ERR(smmu->base))
2115 return PTR_ERR(smmu->base);
2117 * The resource size should effectively match the value of SMMU_TOP;
2118 * stash that temporarily until we know PAGESIZE to validate it with.
2120 smmu->numpage = resource_size(res);
2123 while ((res = platform_get_resource(pdev, IORESOURCE_IRQ, num_irqs))) {
2125 if (num_irqs > smmu->num_global_irqs)
2126 smmu->num_context_irqs++;
2129 if (!smmu->num_context_irqs) {
2130 dev_err(dev, "found %d interrupts but expected at least %d\n",
2131 num_irqs, smmu->num_global_irqs + 1);
2135 smmu->irqs = devm_kcalloc(dev, num_irqs, sizeof(*smmu->irqs),
2138 dev_err(dev, "failed to allocate %d irqs\n", num_irqs);
2142 for (i = 0; i < num_irqs; ++i) {
2143 int irq = platform_get_irq(pdev, i);
2147 smmu->irqs[i] = irq;
2150 err = devm_clk_bulk_get_all(dev, &smmu->clks);
2152 dev_err(dev, "failed to get clocks %d\n", err);
2155 smmu->num_clks = err;
2157 err = clk_bulk_prepare_enable(smmu->num_clks, smmu->clks);
2161 err = arm_smmu_device_cfg_probe(smmu);
2165 if (smmu->version == ARM_SMMU_V2) {
2166 if (smmu->num_context_banks > smmu->num_context_irqs) {
2168 "found only %d context irq(s) but %d required\n",
2169 smmu->num_context_irqs, smmu->num_context_banks);
2173 /* Ignore superfluous interrupts */
2174 smmu->num_context_irqs = smmu->num_context_banks;
2177 for (i = 0; i < smmu->num_global_irqs; ++i) {
2178 err = devm_request_irq(smmu->dev, smmu->irqs[i],
2179 arm_smmu_global_fault,
2181 "arm-smmu global fault",
2184 dev_err(dev, "failed to request global IRQ %d (%u)\n",
2190 err = iommu_device_sysfs_add(&smmu->iommu, smmu->dev, NULL,
2191 "smmu.%pa", &ioaddr);
2193 dev_err(dev, "Failed to register iommu in sysfs\n");
2197 iommu_device_set_ops(&smmu->iommu, &arm_smmu_ops);
2198 iommu_device_set_fwnode(&smmu->iommu, dev->fwnode);
2200 err = iommu_device_register(&smmu->iommu);
2202 dev_err(dev, "Failed to register iommu\n");
2206 platform_set_drvdata(pdev, smmu);
2207 arm_smmu_device_reset(smmu);
2208 arm_smmu_test_smr_masks(smmu);
2211 * We want to avoid touching dev->power.lock in fastpaths unless
2212 * it's really going to do something useful - pm_runtime_enabled()
2213 * can serve as an ideal proxy for that decision. So, conditionally
2214 * enable pm_runtime.
2216 if (dev->pm_domain) {
2217 pm_runtime_set_active(dev);
2218 pm_runtime_enable(dev);
2222 * For ACPI and generic DT bindings, an SMMU will be probed before
2223 * any device which might need it, so we want the bus ops in place
2224 * ready to handle default domain setup as soon as any SMMU exists.
2226 if (!using_legacy_binding)
2227 return arm_smmu_bus_init(&arm_smmu_ops);
2232 static int arm_smmu_device_remove(struct platform_device *pdev)
2234 struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
2239 if (!bitmap_empty(smmu->context_map, ARM_SMMU_MAX_CBS))
2240 dev_err(&pdev->dev, "removing device with active domains!\n");
2242 arm_smmu_bus_init(NULL);
2243 iommu_device_unregister(&smmu->iommu);
2244 iommu_device_sysfs_remove(&smmu->iommu);
2246 arm_smmu_rpm_get(smmu);
2247 /* Turn the thing off */
2248 arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sCR0, sCR0_CLIENTPD);
2249 arm_smmu_rpm_put(smmu);
2251 if (pm_runtime_enabled(smmu->dev))
2252 pm_runtime_force_suspend(smmu->dev);
2254 clk_bulk_disable(smmu->num_clks, smmu->clks);
2256 clk_bulk_unprepare(smmu->num_clks, smmu->clks);
2260 static void arm_smmu_device_shutdown(struct platform_device *pdev)
2262 arm_smmu_device_remove(pdev);
2265 static int __maybe_unused arm_smmu_runtime_resume(struct device *dev)
2267 struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2270 ret = clk_bulk_enable(smmu->num_clks, smmu->clks);
2274 arm_smmu_device_reset(smmu);
2279 static int __maybe_unused arm_smmu_runtime_suspend(struct device *dev)
2281 struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2283 clk_bulk_disable(smmu->num_clks, smmu->clks);
2288 static int __maybe_unused arm_smmu_pm_resume(struct device *dev)
2290 if (pm_runtime_suspended(dev))
2293 return arm_smmu_runtime_resume(dev);
2296 static int __maybe_unused arm_smmu_pm_suspend(struct device *dev)
2298 if (pm_runtime_suspended(dev))
2301 return arm_smmu_runtime_suspend(dev);
2304 static const struct dev_pm_ops arm_smmu_pm_ops = {
2305 SET_SYSTEM_SLEEP_PM_OPS(arm_smmu_pm_suspend, arm_smmu_pm_resume)
2306 SET_RUNTIME_PM_OPS(arm_smmu_runtime_suspend,
2307 arm_smmu_runtime_resume, NULL)
2310 static struct platform_driver arm_smmu_driver = {
2313 .of_match_table = arm_smmu_of_match,
2314 .pm = &arm_smmu_pm_ops,
2315 .suppress_bind_attrs = true,
2317 .probe = arm_smmu_device_probe,
2318 .remove = arm_smmu_device_remove,
2319 .shutdown = arm_smmu_device_shutdown,
2321 module_platform_driver(arm_smmu_driver);
2323 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMU implementations");
2324 MODULE_AUTHOR("Will Deacon <will@kernel.org>");
2325 MODULE_ALIAS("platform:arm-smmu");
2326 MODULE_LICENSE("GPL v2");