]> asedeno.scripts.mit.edu Git - linux.git/blob - drivers/iommu/arm-smmu.c
iommu/arm-smmu: Get rid of weird "atomic" write
[linux.git] / drivers / iommu / arm-smmu.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * IOMMU API for ARM architected SMMU implementations.
4  *
5  * Copyright (C) 2013 ARM Limited
6  *
7  * Author: Will Deacon <will.deacon@arm.com>
8  *
9  * This driver currently supports:
10  *      - SMMUv1 and v2 implementations
11  *      - Stream-matching and stream-indexing
12  *      - v7/v8 long-descriptor format
13  *      - Non-secure access to the SMMU
14  *      - Context fault reporting
15  *      - Extended Stream ID (16 bit)
16  */
17
18 #define pr_fmt(fmt) "arm-smmu: " fmt
19
20 #include <linux/acpi.h>
21 #include <linux/acpi_iort.h>
22 #include <linux/atomic.h>
23 #include <linux/bitfield.h>
24 #include <linux/delay.h>
25 #include <linux/dma-iommu.h>
26 #include <linux/dma-mapping.h>
27 #include <linux/err.h>
28 #include <linux/interrupt.h>
29 #include <linux/io.h>
30 #include <linux/io-64-nonatomic-hi-lo.h>
31 #include <linux/io-pgtable.h>
32 #include <linux/iommu.h>
33 #include <linux/iopoll.h>
34 #include <linux/init.h>
35 #include <linux/moduleparam.h>
36 #include <linux/of.h>
37 #include <linux/of_address.h>
38 #include <linux/of_device.h>
39 #include <linux/of_iommu.h>
40 #include <linux/pci.h>
41 #include <linux/platform_device.h>
42 #include <linux/pm_runtime.h>
43 #include <linux/slab.h>
44 #include <linux/spinlock.h>
45
46 #include <linux/amba/bus.h>
47 #include <linux/fsl/mc.h>
48
49 #include "arm-smmu-regs.h"
50
51 /*
52  * Apparently, some Qualcomm arm64 platforms which appear to expose their SMMU
53  * global register space are still, in fact, using a hypervisor to mediate it
54  * by trapping and emulating register accesses. Sadly, some deployed versions
55  * of said trapping code have bugs wherein they go horribly wrong for stores
56  * using r31 (i.e. XZR/WZR) as the source register.
57  */
58 #define QCOM_DUMMY_VAL -1
59
60 #define ARM_MMU500_ACTLR_CPRE           (1 << 1)
61
62 #define ARM_MMU500_ACR_CACHE_LOCK       (1 << 26)
63 #define ARM_MMU500_ACR_S2CRB_TLBEN      (1 << 10)
64 #define ARM_MMU500_ACR_SMTNMB_TLBEN     (1 << 8)
65
66 #define TLB_LOOP_TIMEOUT                1000000 /* 1s! */
67 #define TLB_SPIN_COUNT                  10
68
69 /* Maximum number of context banks per SMMU */
70 #define ARM_SMMU_MAX_CBS                128
71
72 /* SMMU global address space */
73 #define ARM_SMMU_GR0(smmu)              ((smmu)->base)
74 #define ARM_SMMU_GR1(smmu)              ((smmu)->base + (1 << (smmu)->pgshift))
75
76 /*
77  * SMMU global address space with conditional offset to access secure
78  * aliases of non-secure registers (e.g. nsCR0: 0x400, nsGFSR: 0x448,
79  * nsGFSYNR0: 0x450)
80  */
81 #define ARM_SMMU_GR0_NS(smmu)                                           \
82         ((smmu)->base +                                                 \
83                 ((smmu->options & ARM_SMMU_OPT_SECURE_CFG_ACCESS)       \
84                         ? 0x400 : 0))
85
86 /* Translation context bank */
87 #define ARM_SMMU_CB(smmu, n)    ((smmu)->base + (((smmu)->numpage + (n)) << (smmu)->pgshift))
88
89 #define MSI_IOVA_BASE                   0x8000000
90 #define MSI_IOVA_LENGTH                 0x100000
91
92 static int force_stage;
93 /*
94  * not really modular, but the easiest way to keep compat with existing
95  * bootargs behaviour is to continue using module_param() here.
96  */
97 module_param(force_stage, int, S_IRUGO);
98 MODULE_PARM_DESC(force_stage,
99         "Force SMMU mappings to be installed at a particular stage of translation. A value of '1' or '2' forces the corresponding stage. All other values are ignored (i.e. no stage is forced). Note that selecting a specific stage will disable support for nested translation.");
100 static bool disable_bypass =
101         IS_ENABLED(CONFIG_ARM_SMMU_DISABLE_BYPASS_BY_DEFAULT);
102 module_param(disable_bypass, bool, S_IRUGO);
103 MODULE_PARM_DESC(disable_bypass,
104         "Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
105
106 enum arm_smmu_arch_version {
107         ARM_SMMU_V1,
108         ARM_SMMU_V1_64K,
109         ARM_SMMU_V2,
110 };
111
112 enum arm_smmu_implementation {
113         GENERIC_SMMU,
114         ARM_MMU500,
115         CAVIUM_SMMUV2,
116         QCOM_SMMUV2,
117 };
118
119 struct arm_smmu_s2cr {
120         struct iommu_group              *group;
121         int                             count;
122         enum arm_smmu_s2cr_type         type;
123         enum arm_smmu_s2cr_privcfg      privcfg;
124         u8                              cbndx;
125 };
126
127 #define s2cr_init_val (struct arm_smmu_s2cr){                           \
128         .type = disable_bypass ? S2CR_TYPE_FAULT : S2CR_TYPE_BYPASS,    \
129 }
130
131 struct arm_smmu_smr {
132         u16                             mask;
133         u16                             id;
134         bool                            valid;
135 };
136
137 struct arm_smmu_cb {
138         u64                             ttbr[2];
139         u32                             tcr[2];
140         u32                             mair[2];
141         struct arm_smmu_cfg             *cfg;
142 };
143
144 struct arm_smmu_master_cfg {
145         struct arm_smmu_device          *smmu;
146         s16                             smendx[];
147 };
148 #define INVALID_SMENDX                  -1
149 #define __fwspec_cfg(fw) ((struct arm_smmu_master_cfg *)fw->iommu_priv)
150 #define fwspec_smmu(fw)  (__fwspec_cfg(fw)->smmu)
151 #define fwspec_smendx(fw, i) \
152         (i >= fw->num_ids ? INVALID_SMENDX : __fwspec_cfg(fw)->smendx[i])
153 #define for_each_cfg_sme(fw, i, idx) \
154         for (i = 0; idx = fwspec_smendx(fw, i), i < fw->num_ids; ++i)
155
156 struct arm_smmu_device {
157         struct device                   *dev;
158
159         void __iomem                    *base;
160         unsigned int                    numpage;
161         unsigned int                    pgshift;
162
163 #define ARM_SMMU_FEAT_COHERENT_WALK     (1 << 0)
164 #define ARM_SMMU_FEAT_STREAM_MATCH      (1 << 1)
165 #define ARM_SMMU_FEAT_TRANS_S1          (1 << 2)
166 #define ARM_SMMU_FEAT_TRANS_S2          (1 << 3)
167 #define ARM_SMMU_FEAT_TRANS_NESTED      (1 << 4)
168 #define ARM_SMMU_FEAT_TRANS_OPS         (1 << 5)
169 #define ARM_SMMU_FEAT_VMID16            (1 << 6)
170 #define ARM_SMMU_FEAT_FMT_AARCH64_4K    (1 << 7)
171 #define ARM_SMMU_FEAT_FMT_AARCH64_16K   (1 << 8)
172 #define ARM_SMMU_FEAT_FMT_AARCH64_64K   (1 << 9)
173 #define ARM_SMMU_FEAT_FMT_AARCH32_L     (1 << 10)
174 #define ARM_SMMU_FEAT_FMT_AARCH32_S     (1 << 11)
175 #define ARM_SMMU_FEAT_EXIDS             (1 << 12)
176         u32                             features;
177
178 #define ARM_SMMU_OPT_SECURE_CFG_ACCESS (1 << 0)
179         u32                             options;
180         enum arm_smmu_arch_version      version;
181         enum arm_smmu_implementation    model;
182
183         u32                             num_context_banks;
184         u32                             num_s2_context_banks;
185         DECLARE_BITMAP(context_map, ARM_SMMU_MAX_CBS);
186         struct arm_smmu_cb              *cbs;
187         atomic_t                        irptndx;
188
189         u32                             num_mapping_groups;
190         u16                             streamid_mask;
191         u16                             smr_mask_mask;
192         struct arm_smmu_smr             *smrs;
193         struct arm_smmu_s2cr            *s2crs;
194         struct mutex                    stream_map_mutex;
195
196         unsigned long                   va_size;
197         unsigned long                   ipa_size;
198         unsigned long                   pa_size;
199         unsigned long                   pgsize_bitmap;
200
201         u32                             num_global_irqs;
202         u32                             num_context_irqs;
203         unsigned int                    *irqs;
204         struct clk_bulk_data            *clks;
205         int                             num_clks;
206
207         u32                             cavium_id_base; /* Specific to Cavium */
208
209         spinlock_t                      global_sync_lock;
210
211         /* IOMMU core code handle */
212         struct iommu_device             iommu;
213 };
214
215 enum arm_smmu_context_fmt {
216         ARM_SMMU_CTX_FMT_NONE,
217         ARM_SMMU_CTX_FMT_AARCH64,
218         ARM_SMMU_CTX_FMT_AARCH32_L,
219         ARM_SMMU_CTX_FMT_AARCH32_S,
220 };
221
222 struct arm_smmu_cfg {
223         u8                              cbndx;
224         u8                              irptndx;
225         union {
226                 u16                     asid;
227                 u16                     vmid;
228         };
229         enum arm_smmu_cbar_type         cbar;
230         enum arm_smmu_context_fmt       fmt;
231 };
232 #define INVALID_IRPTNDX                 0xff
233
234 enum arm_smmu_domain_stage {
235         ARM_SMMU_DOMAIN_S1 = 0,
236         ARM_SMMU_DOMAIN_S2,
237         ARM_SMMU_DOMAIN_NESTED,
238         ARM_SMMU_DOMAIN_BYPASS,
239 };
240
241 struct arm_smmu_domain {
242         struct arm_smmu_device          *smmu;
243         struct io_pgtable_ops           *pgtbl_ops;
244         const struct iommu_gather_ops   *tlb_ops;
245         struct arm_smmu_cfg             cfg;
246         enum arm_smmu_domain_stage      stage;
247         bool                            non_strict;
248         struct mutex                    init_mutex; /* Protects smmu pointer */
249         spinlock_t                      cb_lock; /* Serialises ATS1* ops and TLB syncs */
250         struct iommu_domain             domain;
251 };
252
253 struct arm_smmu_option_prop {
254         u32 opt;
255         const char *prop;
256 };
257
258 static atomic_t cavium_smmu_context_count = ATOMIC_INIT(0);
259
260 static bool using_legacy_binding, using_generic_binding;
261
262 static struct arm_smmu_option_prop arm_smmu_options[] = {
263         { ARM_SMMU_OPT_SECURE_CFG_ACCESS, "calxeda,smmu-secure-config-access" },
264         { 0, NULL},
265 };
266
267 static inline int arm_smmu_rpm_get(struct arm_smmu_device *smmu)
268 {
269         if (pm_runtime_enabled(smmu->dev))
270                 return pm_runtime_get_sync(smmu->dev);
271
272         return 0;
273 }
274
275 static inline void arm_smmu_rpm_put(struct arm_smmu_device *smmu)
276 {
277         if (pm_runtime_enabled(smmu->dev))
278                 pm_runtime_put(smmu->dev);
279 }
280
281 static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
282 {
283         return container_of(dom, struct arm_smmu_domain, domain);
284 }
285
286 static void parse_driver_options(struct arm_smmu_device *smmu)
287 {
288         int i = 0;
289
290         do {
291                 if (of_property_read_bool(smmu->dev->of_node,
292                                                 arm_smmu_options[i].prop)) {
293                         smmu->options |= arm_smmu_options[i].opt;
294                         dev_notice(smmu->dev, "option %s\n",
295                                 arm_smmu_options[i].prop);
296                 }
297         } while (arm_smmu_options[++i].opt);
298 }
299
300 static struct device_node *dev_get_dev_node(struct device *dev)
301 {
302         if (dev_is_pci(dev)) {
303                 struct pci_bus *bus = to_pci_dev(dev)->bus;
304
305                 while (!pci_is_root_bus(bus))
306                         bus = bus->parent;
307                 return of_node_get(bus->bridge->parent->of_node);
308         }
309
310         return of_node_get(dev->of_node);
311 }
312
313 static int __arm_smmu_get_pci_sid(struct pci_dev *pdev, u16 alias, void *data)
314 {
315         *((__be32 *)data) = cpu_to_be32(alias);
316         return 0; /* Continue walking */
317 }
318
319 static int __find_legacy_master_phandle(struct device *dev, void *data)
320 {
321         struct of_phandle_iterator *it = *(void **)data;
322         struct device_node *np = it->node;
323         int err;
324
325         of_for_each_phandle(it, err, dev->of_node, "mmu-masters",
326                             "#stream-id-cells", 0)
327                 if (it->node == np) {
328                         *(void **)data = dev;
329                         return 1;
330                 }
331         it->node = np;
332         return err == -ENOENT ? 0 : err;
333 }
334
335 static struct platform_driver arm_smmu_driver;
336 static struct iommu_ops arm_smmu_ops;
337
338 static int arm_smmu_register_legacy_master(struct device *dev,
339                                            struct arm_smmu_device **smmu)
340 {
341         struct device *smmu_dev;
342         struct device_node *np;
343         struct of_phandle_iterator it;
344         void *data = &it;
345         u32 *sids;
346         __be32 pci_sid;
347         int err;
348
349         np = dev_get_dev_node(dev);
350         if (!np || !of_find_property(np, "#stream-id-cells", NULL)) {
351                 of_node_put(np);
352                 return -ENODEV;
353         }
354
355         it.node = np;
356         err = driver_for_each_device(&arm_smmu_driver.driver, NULL, &data,
357                                      __find_legacy_master_phandle);
358         smmu_dev = data;
359         of_node_put(np);
360         if (err == 0)
361                 return -ENODEV;
362         if (err < 0)
363                 return err;
364
365         if (dev_is_pci(dev)) {
366                 /* "mmu-masters" assumes Stream ID == Requester ID */
367                 pci_for_each_dma_alias(to_pci_dev(dev), __arm_smmu_get_pci_sid,
368                                        &pci_sid);
369                 it.cur = &pci_sid;
370                 it.cur_count = 1;
371         }
372
373         err = iommu_fwspec_init(dev, &smmu_dev->of_node->fwnode,
374                                 &arm_smmu_ops);
375         if (err)
376                 return err;
377
378         sids = kcalloc(it.cur_count, sizeof(*sids), GFP_KERNEL);
379         if (!sids)
380                 return -ENOMEM;
381
382         *smmu = dev_get_drvdata(smmu_dev);
383         of_phandle_iterator_args(&it, sids, it.cur_count);
384         err = iommu_fwspec_add_ids(dev, sids, it.cur_count);
385         kfree(sids);
386         return err;
387 }
388
389 static int __arm_smmu_alloc_bitmap(unsigned long *map, int start, int end)
390 {
391         int idx;
392
393         do {
394                 idx = find_next_zero_bit(map, end, start);
395                 if (idx == end)
396                         return -ENOSPC;
397         } while (test_and_set_bit(idx, map));
398
399         return idx;
400 }
401
402 static void __arm_smmu_free_bitmap(unsigned long *map, int idx)
403 {
404         clear_bit(idx, map);
405 }
406
407 /* Wait for any pending TLB invalidations to complete */
408 static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu,
409                                 void __iomem *sync, void __iomem *status)
410 {
411         unsigned int spin_cnt, delay;
412
413         writel_relaxed(QCOM_DUMMY_VAL, sync);
414         for (delay = 1; delay < TLB_LOOP_TIMEOUT; delay *= 2) {
415                 for (spin_cnt = TLB_SPIN_COUNT; spin_cnt > 0; spin_cnt--) {
416                         if (!(readl_relaxed(status) & sTLBGSTATUS_GSACTIVE))
417                                 return;
418                         cpu_relax();
419                 }
420                 udelay(delay);
421         }
422         dev_err_ratelimited(smmu->dev,
423                             "TLB sync timed out -- SMMU may be deadlocked\n");
424 }
425
426 static void arm_smmu_tlb_sync_global(struct arm_smmu_device *smmu)
427 {
428         void __iomem *base = ARM_SMMU_GR0(smmu);
429         unsigned long flags;
430
431         spin_lock_irqsave(&smmu->global_sync_lock, flags);
432         __arm_smmu_tlb_sync(smmu, base + ARM_SMMU_GR0_sTLBGSYNC,
433                             base + ARM_SMMU_GR0_sTLBGSTATUS);
434         spin_unlock_irqrestore(&smmu->global_sync_lock, flags);
435 }
436
437 static void arm_smmu_tlb_sync_context(void *cookie)
438 {
439         struct arm_smmu_domain *smmu_domain = cookie;
440         struct arm_smmu_device *smmu = smmu_domain->smmu;
441         void __iomem *base = ARM_SMMU_CB(smmu, smmu_domain->cfg.cbndx);
442         unsigned long flags;
443
444         spin_lock_irqsave(&smmu_domain->cb_lock, flags);
445         __arm_smmu_tlb_sync(smmu, base + ARM_SMMU_CB_TLBSYNC,
446                             base + ARM_SMMU_CB_TLBSTATUS);
447         spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
448 }
449
450 static void arm_smmu_tlb_sync_vmid(void *cookie)
451 {
452         struct arm_smmu_domain *smmu_domain = cookie;
453
454         arm_smmu_tlb_sync_global(smmu_domain->smmu);
455 }
456
457 static void arm_smmu_tlb_inv_context_s1(void *cookie)
458 {
459         struct arm_smmu_domain *smmu_domain = cookie;
460         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
461         void __iomem *base = ARM_SMMU_CB(smmu_domain->smmu, cfg->cbndx);
462
463         /*
464          * NOTE: this is not a relaxed write; it needs to guarantee that PTEs
465          * cleared by the current CPU are visible to the SMMU before the TLBI.
466          */
467         writel(cfg->asid, base + ARM_SMMU_CB_S1_TLBIASID);
468         arm_smmu_tlb_sync_context(cookie);
469 }
470
471 static void arm_smmu_tlb_inv_context_s2(void *cookie)
472 {
473         struct arm_smmu_domain *smmu_domain = cookie;
474         struct arm_smmu_device *smmu = smmu_domain->smmu;
475         void __iomem *base = ARM_SMMU_GR0(smmu);
476
477         /* NOTE: see above */
478         writel(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID);
479         arm_smmu_tlb_sync_global(smmu);
480 }
481
482 static void arm_smmu_tlb_inv_range_s1(unsigned long iova, size_t size,
483                                       size_t granule, bool leaf, void *cookie)
484 {
485         struct arm_smmu_domain *smmu_domain = cookie;
486         struct arm_smmu_device *smmu = smmu_domain->smmu;
487         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
488         void __iomem *reg = ARM_SMMU_CB(smmu, cfg->cbndx);
489
490         if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
491                 wmb();
492
493         reg += leaf ? ARM_SMMU_CB_S1_TLBIVAL : ARM_SMMU_CB_S1_TLBIVA;
494
495         if (cfg->fmt != ARM_SMMU_CTX_FMT_AARCH64) {
496                 iova = (iova >> 12) << 12;
497                 iova |= cfg->asid;
498                 do {
499                         writel_relaxed(iova, reg);
500                         iova += granule;
501                 } while (size -= granule);
502         } else {
503                 iova >>= 12;
504                 iova |= (u64)cfg->asid << 48;
505                 do {
506                         writeq_relaxed(iova, reg);
507                         iova += granule >> 12;
508                 } while (size -= granule);
509         }
510 }
511
512 static void arm_smmu_tlb_inv_range_s2(unsigned long iova, size_t size,
513                                       size_t granule, bool leaf, void *cookie)
514 {
515         struct arm_smmu_domain *smmu_domain = cookie;
516         struct arm_smmu_device *smmu = smmu_domain->smmu;
517         void __iomem *reg = ARM_SMMU_CB(smmu, smmu_domain->cfg.cbndx);
518
519         if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
520                 wmb();
521
522         reg += leaf ? ARM_SMMU_CB_S2_TLBIIPAS2L : ARM_SMMU_CB_S2_TLBIIPAS2;
523         iova >>= 12;
524         do {
525                 if (smmu_domain->cfg.fmt == ARM_SMMU_CTX_FMT_AARCH64)
526                         writeq_relaxed(iova, reg);
527                 else
528                         writel_relaxed(iova, reg);
529                 iova += granule >> 12;
530         } while (size -= granule);
531 }
532
533 /*
534  * On MMU-401 at least, the cost of firing off multiple TLBIVMIDs appears
535  * almost negligible, but the benefit of getting the first one in as far ahead
536  * of the sync as possible is significant, hence we don't just make this a
537  * no-op and set .tlb_sync to arm_smmu_inv_context_s2() as you might think.
538  */
539 static void arm_smmu_tlb_inv_vmid_nosync(unsigned long iova, size_t size,
540                                          size_t granule, bool leaf, void *cookie)
541 {
542         struct arm_smmu_domain *smmu_domain = cookie;
543         void __iomem *base = ARM_SMMU_GR0(smmu_domain->smmu);
544
545         if (smmu_domain->smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
546                 wmb();
547
548         writel_relaxed(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID);
549 }
550
551 static const struct iommu_gather_ops arm_smmu_s1_tlb_ops = {
552         .tlb_flush_all  = arm_smmu_tlb_inv_context_s1,
553         .tlb_add_flush  = arm_smmu_tlb_inv_range_s1,
554         .tlb_sync       = arm_smmu_tlb_sync_context,
555 };
556
557 static const struct iommu_gather_ops arm_smmu_s2_tlb_ops_v2 = {
558         .tlb_flush_all  = arm_smmu_tlb_inv_context_s2,
559         .tlb_add_flush  = arm_smmu_tlb_inv_range_s2,
560         .tlb_sync       = arm_smmu_tlb_sync_context,
561 };
562
563 static const struct iommu_gather_ops arm_smmu_s2_tlb_ops_v1 = {
564         .tlb_flush_all  = arm_smmu_tlb_inv_context_s2,
565         .tlb_add_flush  = arm_smmu_tlb_inv_vmid_nosync,
566         .tlb_sync       = arm_smmu_tlb_sync_vmid,
567 };
568
569 static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
570 {
571         u32 fsr, fsynr, cbfrsynra;
572         unsigned long iova;
573         struct iommu_domain *domain = dev;
574         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
575         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
576         struct arm_smmu_device *smmu = smmu_domain->smmu;
577         void __iomem *gr1_base = ARM_SMMU_GR1(smmu);
578         void __iomem *cb_base;
579
580         cb_base = ARM_SMMU_CB(smmu, cfg->cbndx);
581         fsr = readl_relaxed(cb_base + ARM_SMMU_CB_FSR);
582
583         if (!(fsr & FSR_FAULT))
584                 return IRQ_NONE;
585
586         fsynr = readl_relaxed(cb_base + ARM_SMMU_CB_FSYNR0);
587         iova = readq_relaxed(cb_base + ARM_SMMU_CB_FAR);
588         cbfrsynra = readl_relaxed(gr1_base + ARM_SMMU_GR1_CBFRSYNRA(cfg->cbndx));
589
590         dev_err_ratelimited(smmu->dev,
591         "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cbfrsynra=0x%x, cb=%d\n",
592                             fsr, iova, fsynr, cbfrsynra, cfg->cbndx);
593
594         writel(fsr, cb_base + ARM_SMMU_CB_FSR);
595         return IRQ_HANDLED;
596 }
597
598 static irqreturn_t arm_smmu_global_fault(int irq, void *dev)
599 {
600         u32 gfsr, gfsynr0, gfsynr1, gfsynr2;
601         struct arm_smmu_device *smmu = dev;
602         void __iomem *gr0_base = ARM_SMMU_GR0_NS(smmu);
603
604         gfsr = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSR);
605         gfsynr0 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR0);
606         gfsynr1 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR1);
607         gfsynr2 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR2);
608
609         if (!gfsr)
610                 return IRQ_NONE;
611
612         dev_err_ratelimited(smmu->dev,
613                 "Unexpected global fault, this could be serious\n");
614         dev_err_ratelimited(smmu->dev,
615                 "\tGFSR 0x%08x, GFSYNR0 0x%08x, GFSYNR1 0x%08x, GFSYNR2 0x%08x\n",
616                 gfsr, gfsynr0, gfsynr1, gfsynr2);
617
618         writel(gfsr, gr0_base + ARM_SMMU_GR0_sGFSR);
619         return IRQ_HANDLED;
620 }
621
622 static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
623                                        struct io_pgtable_cfg *pgtbl_cfg)
624 {
625         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
626         struct arm_smmu_cb *cb = &smmu_domain->smmu->cbs[cfg->cbndx];
627         bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
628
629         cb->cfg = cfg;
630
631         /* TCR */
632         if (stage1) {
633                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
634                         cb->tcr[0] = pgtbl_cfg->arm_v7s_cfg.tcr;
635                 } else {
636                         cb->tcr[0] = pgtbl_cfg->arm_lpae_s1_cfg.tcr;
637                         cb->tcr[1] = pgtbl_cfg->arm_lpae_s1_cfg.tcr >> 32;
638                         cb->tcr[1] |= FIELD_PREP(TCR2_SEP, TCR2_SEP_UPSTREAM);
639                         if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
640                                 cb->tcr[1] |= TCR2_AS;
641                 }
642         } else {
643                 cb->tcr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
644         }
645
646         /* TTBRs */
647         if (stage1) {
648                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
649                         cb->ttbr[0] = pgtbl_cfg->arm_v7s_cfg.ttbr[0];
650                         cb->ttbr[1] = pgtbl_cfg->arm_v7s_cfg.ttbr[1];
651                 } else {
652                         cb->ttbr[0] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
653                         cb->ttbr[0] |= FIELD_PREP(TTBRn_ASID, cfg->asid);
654                         cb->ttbr[1] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1];
655                         cb->ttbr[1] |= FIELD_PREP(TTBRn_ASID, cfg->asid);
656                 }
657         } else {
658                 cb->ttbr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
659         }
660
661         /* MAIRs (stage-1 only) */
662         if (stage1) {
663                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
664                         cb->mair[0] = pgtbl_cfg->arm_v7s_cfg.prrr;
665                         cb->mair[1] = pgtbl_cfg->arm_v7s_cfg.nmrr;
666                 } else {
667                         cb->mair[0] = pgtbl_cfg->arm_lpae_s1_cfg.mair[0];
668                         cb->mair[1] = pgtbl_cfg->arm_lpae_s1_cfg.mair[1];
669                 }
670         }
671 }
672
673 static void arm_smmu_write_context_bank(struct arm_smmu_device *smmu, int idx)
674 {
675         u32 reg;
676         bool stage1;
677         struct arm_smmu_cb *cb = &smmu->cbs[idx];
678         struct arm_smmu_cfg *cfg = cb->cfg;
679         void __iomem *cb_base, *gr1_base;
680
681         cb_base = ARM_SMMU_CB(smmu, idx);
682
683         /* Unassigned context banks only need disabling */
684         if (!cfg) {
685                 writel_relaxed(0, cb_base + ARM_SMMU_CB_SCTLR);
686                 return;
687         }
688
689         gr1_base = ARM_SMMU_GR1(smmu);
690         stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
691
692         /* CBA2R */
693         if (smmu->version > ARM_SMMU_V1) {
694                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
695                         reg = CBA2R_VA64;
696                 else
697                         reg = 0;
698                 /* 16-bit VMIDs live in CBA2R */
699                 if (smmu->features & ARM_SMMU_FEAT_VMID16)
700                         reg |= FIELD_PREP(CBA2R_VMID16, cfg->vmid);
701
702                 writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBA2R(idx));
703         }
704
705         /* CBAR */
706         reg = FIELD_PREP(CBAR_TYPE, cfg->cbar);
707         if (smmu->version < ARM_SMMU_V2)
708                 reg |= FIELD_PREP(CBAR_IRPTNDX, cfg->irptndx);
709
710         /*
711          * Use the weakest shareability/memory types, so they are
712          * overridden by the ttbcr/pte.
713          */
714         if (stage1) {
715                 reg |= FIELD_PREP(CBAR_S1_BPSHCFG, CBAR_S1_BPSHCFG_NSH) |
716                         FIELD_PREP(CBAR_S1_MEMATTR, CBAR_S1_MEMATTR_WB);
717         } else if (!(smmu->features & ARM_SMMU_FEAT_VMID16)) {
718                 /* 8-bit VMIDs live in CBAR */
719                 reg |= FIELD_PREP(CBAR_VMID, cfg->vmid);
720         }
721         writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBAR(idx));
722
723         /*
724          * TCR
725          * We must write this before the TTBRs, since it determines the
726          * access behaviour of some fields (in particular, ASID[15:8]).
727          */
728         if (stage1 && smmu->version > ARM_SMMU_V1)
729                 writel_relaxed(cb->tcr[1], cb_base + ARM_SMMU_CB_TCR2);
730         writel_relaxed(cb->tcr[0], cb_base + ARM_SMMU_CB_TCR);
731
732         /* TTBRs */
733         if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
734                 writel_relaxed(cfg->asid, cb_base + ARM_SMMU_CB_CONTEXTIDR);
735                 writel_relaxed(cb->ttbr[0], cb_base + ARM_SMMU_CB_TTBR0);
736                 writel_relaxed(cb->ttbr[1], cb_base + ARM_SMMU_CB_TTBR1);
737         } else {
738                 writeq_relaxed(cb->ttbr[0], cb_base + ARM_SMMU_CB_TTBR0);
739                 if (stage1)
740                         writeq_relaxed(cb->ttbr[1], cb_base + ARM_SMMU_CB_TTBR1);
741         }
742
743         /* MAIRs (stage-1 only) */
744         if (stage1) {
745                 writel_relaxed(cb->mair[0], cb_base + ARM_SMMU_CB_S1_MAIR0);
746                 writel_relaxed(cb->mair[1], cb_base + ARM_SMMU_CB_S1_MAIR1);
747         }
748
749         /* SCTLR */
750         reg = SCTLR_CFIE | SCTLR_CFRE | SCTLR_AFE | SCTLR_TRE | SCTLR_M;
751         if (stage1)
752                 reg |= SCTLR_S1_ASIDPNE;
753         if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN))
754                 reg |= SCTLR_E;
755
756         writel_relaxed(reg, cb_base + ARM_SMMU_CB_SCTLR);
757 }
758
759 static int arm_smmu_init_domain_context(struct iommu_domain *domain,
760                                         struct arm_smmu_device *smmu)
761 {
762         int irq, start, ret = 0;
763         unsigned long ias, oas;
764         struct io_pgtable_ops *pgtbl_ops;
765         struct io_pgtable_cfg pgtbl_cfg;
766         enum io_pgtable_fmt fmt;
767         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
768         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
769
770         mutex_lock(&smmu_domain->init_mutex);
771         if (smmu_domain->smmu)
772                 goto out_unlock;
773
774         if (domain->type == IOMMU_DOMAIN_IDENTITY) {
775                 smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
776                 smmu_domain->smmu = smmu;
777                 goto out_unlock;
778         }
779
780         /*
781          * Mapping the requested stage onto what we support is surprisingly
782          * complicated, mainly because the spec allows S1+S2 SMMUs without
783          * support for nested translation. That means we end up with the
784          * following table:
785          *
786          * Requested        Supported        Actual
787          *     S1               N              S1
788          *     S1             S1+S2            S1
789          *     S1               S2             S2
790          *     S1               S1             S1
791          *     N                N              N
792          *     N              S1+S2            S2
793          *     N                S2             S2
794          *     N                S1             S1
795          *
796          * Note that you can't actually request stage-2 mappings.
797          */
798         if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
799                 smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
800         if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
801                 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
802
803         /*
804          * Choosing a suitable context format is even more fiddly. Until we
805          * grow some way for the caller to express a preference, and/or move
806          * the decision into the io-pgtable code where it arguably belongs,
807          * just aim for the closest thing to the rest of the system, and hope
808          * that the hardware isn't esoteric enough that we can't assume AArch64
809          * support to be a superset of AArch32 support...
810          */
811         if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_L)
812                 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_L;
813         if (IS_ENABLED(CONFIG_IOMMU_IO_PGTABLE_ARMV7S) &&
814             !IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_ARM_LPAE) &&
815             (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S) &&
816             (smmu_domain->stage == ARM_SMMU_DOMAIN_S1))
817                 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_S;
818         if ((IS_ENABLED(CONFIG_64BIT) || cfg->fmt == ARM_SMMU_CTX_FMT_NONE) &&
819             (smmu->features & (ARM_SMMU_FEAT_FMT_AARCH64_64K |
820                                ARM_SMMU_FEAT_FMT_AARCH64_16K |
821                                ARM_SMMU_FEAT_FMT_AARCH64_4K)))
822                 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH64;
823
824         if (cfg->fmt == ARM_SMMU_CTX_FMT_NONE) {
825                 ret = -EINVAL;
826                 goto out_unlock;
827         }
828
829         switch (smmu_domain->stage) {
830         case ARM_SMMU_DOMAIN_S1:
831                 cfg->cbar = CBAR_TYPE_S1_TRANS_S2_BYPASS;
832                 start = smmu->num_s2_context_banks;
833                 ias = smmu->va_size;
834                 oas = smmu->ipa_size;
835                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
836                         fmt = ARM_64_LPAE_S1;
837                 } else if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_L) {
838                         fmt = ARM_32_LPAE_S1;
839                         ias = min(ias, 32UL);
840                         oas = min(oas, 40UL);
841                 } else {
842                         fmt = ARM_V7S;
843                         ias = min(ias, 32UL);
844                         oas = min(oas, 32UL);
845                 }
846                 smmu_domain->tlb_ops = &arm_smmu_s1_tlb_ops;
847                 break;
848         case ARM_SMMU_DOMAIN_NESTED:
849                 /*
850                  * We will likely want to change this if/when KVM gets
851                  * involved.
852                  */
853         case ARM_SMMU_DOMAIN_S2:
854                 cfg->cbar = CBAR_TYPE_S2_TRANS;
855                 start = 0;
856                 ias = smmu->ipa_size;
857                 oas = smmu->pa_size;
858                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
859                         fmt = ARM_64_LPAE_S2;
860                 } else {
861                         fmt = ARM_32_LPAE_S2;
862                         ias = min(ias, 40UL);
863                         oas = min(oas, 40UL);
864                 }
865                 if (smmu->version == ARM_SMMU_V2)
866                         smmu_domain->tlb_ops = &arm_smmu_s2_tlb_ops_v2;
867                 else
868                         smmu_domain->tlb_ops = &arm_smmu_s2_tlb_ops_v1;
869                 break;
870         default:
871                 ret = -EINVAL;
872                 goto out_unlock;
873         }
874         ret = __arm_smmu_alloc_bitmap(smmu->context_map, start,
875                                       smmu->num_context_banks);
876         if (ret < 0)
877                 goto out_unlock;
878
879         cfg->cbndx = ret;
880         if (smmu->version < ARM_SMMU_V2) {
881                 cfg->irptndx = atomic_inc_return(&smmu->irptndx);
882                 cfg->irptndx %= smmu->num_context_irqs;
883         } else {
884                 cfg->irptndx = cfg->cbndx;
885         }
886
887         if (smmu_domain->stage == ARM_SMMU_DOMAIN_S2)
888                 cfg->vmid = cfg->cbndx + 1 + smmu->cavium_id_base;
889         else
890                 cfg->asid = cfg->cbndx + smmu->cavium_id_base;
891
892         pgtbl_cfg = (struct io_pgtable_cfg) {
893                 .pgsize_bitmap  = smmu->pgsize_bitmap,
894                 .ias            = ias,
895                 .oas            = oas,
896                 .coherent_walk  = smmu->features & ARM_SMMU_FEAT_COHERENT_WALK,
897                 .tlb            = smmu_domain->tlb_ops,
898                 .iommu_dev      = smmu->dev,
899         };
900
901         if (smmu_domain->non_strict)
902                 pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
903
904         smmu_domain->smmu = smmu;
905         pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
906         if (!pgtbl_ops) {
907                 ret = -ENOMEM;
908                 goto out_clear_smmu;
909         }
910
911         /* Update the domain's page sizes to reflect the page table format */
912         domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
913         domain->geometry.aperture_end = (1UL << ias) - 1;
914         domain->geometry.force_aperture = true;
915
916         /* Initialise the context bank with our page table cfg */
917         arm_smmu_init_context_bank(smmu_domain, &pgtbl_cfg);
918         arm_smmu_write_context_bank(smmu, cfg->cbndx);
919
920         /*
921          * Request context fault interrupt. Do this last to avoid the
922          * handler seeing a half-initialised domain state.
923          */
924         irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
925         ret = devm_request_irq(smmu->dev, irq, arm_smmu_context_fault,
926                                IRQF_SHARED, "arm-smmu-context-fault", domain);
927         if (ret < 0) {
928                 dev_err(smmu->dev, "failed to request context IRQ %d (%u)\n",
929                         cfg->irptndx, irq);
930                 cfg->irptndx = INVALID_IRPTNDX;
931         }
932
933         mutex_unlock(&smmu_domain->init_mutex);
934
935         /* Publish page table ops for map/unmap */
936         smmu_domain->pgtbl_ops = pgtbl_ops;
937         return 0;
938
939 out_clear_smmu:
940         smmu_domain->smmu = NULL;
941 out_unlock:
942         mutex_unlock(&smmu_domain->init_mutex);
943         return ret;
944 }
945
946 static void arm_smmu_destroy_domain_context(struct iommu_domain *domain)
947 {
948         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
949         struct arm_smmu_device *smmu = smmu_domain->smmu;
950         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
951         int ret, irq;
952
953         if (!smmu || domain->type == IOMMU_DOMAIN_IDENTITY)
954                 return;
955
956         ret = arm_smmu_rpm_get(smmu);
957         if (ret < 0)
958                 return;
959
960         /*
961          * Disable the context bank and free the page tables before freeing
962          * it.
963          */
964         smmu->cbs[cfg->cbndx].cfg = NULL;
965         arm_smmu_write_context_bank(smmu, cfg->cbndx);
966
967         if (cfg->irptndx != INVALID_IRPTNDX) {
968                 irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
969                 devm_free_irq(smmu->dev, irq, domain);
970         }
971
972         free_io_pgtable_ops(smmu_domain->pgtbl_ops);
973         __arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
974
975         arm_smmu_rpm_put(smmu);
976 }
977
978 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
979 {
980         struct arm_smmu_domain *smmu_domain;
981
982         if (type != IOMMU_DOMAIN_UNMANAGED &&
983             type != IOMMU_DOMAIN_DMA &&
984             type != IOMMU_DOMAIN_IDENTITY)
985                 return NULL;
986         /*
987          * Allocate the domain and initialise some of its data structures.
988          * We can't really do anything meaningful until we've added a
989          * master.
990          */
991         smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
992         if (!smmu_domain)
993                 return NULL;
994
995         if (type == IOMMU_DOMAIN_DMA && (using_legacy_binding ||
996             iommu_get_dma_cookie(&smmu_domain->domain))) {
997                 kfree(smmu_domain);
998                 return NULL;
999         }
1000
1001         mutex_init(&smmu_domain->init_mutex);
1002         spin_lock_init(&smmu_domain->cb_lock);
1003
1004         return &smmu_domain->domain;
1005 }
1006
1007 static void arm_smmu_domain_free(struct iommu_domain *domain)
1008 {
1009         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1010
1011         /*
1012          * Free the domain resources. We assume that all devices have
1013          * already been detached.
1014          */
1015         iommu_put_dma_cookie(domain);
1016         arm_smmu_destroy_domain_context(domain);
1017         kfree(smmu_domain);
1018 }
1019
1020 static void arm_smmu_write_smr(struct arm_smmu_device *smmu, int idx)
1021 {
1022         struct arm_smmu_smr *smr = smmu->smrs + idx;
1023         u32 reg = FIELD_PREP(SMR_ID, smr->id) | FIELD_PREP(SMR_MASK, smr->mask);
1024
1025         if (!(smmu->features & ARM_SMMU_FEAT_EXIDS) && smr->valid)
1026                 reg |= SMR_VALID;
1027         writel_relaxed(reg, ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_SMR(idx));
1028 }
1029
1030 static void arm_smmu_write_s2cr(struct arm_smmu_device *smmu, int idx)
1031 {
1032         struct arm_smmu_s2cr *s2cr = smmu->s2crs + idx;
1033         u32 reg = FIELD_PREP(S2CR_TYPE, s2cr->type) |
1034                   FIELD_PREP(S2CR_CBNDX, s2cr->cbndx) |
1035                   FIELD_PREP(S2CR_PRIVCFG, s2cr->privcfg);
1036
1037         if (smmu->features & ARM_SMMU_FEAT_EXIDS && smmu->smrs &&
1038             smmu->smrs[idx].valid)
1039                 reg |= S2CR_EXIDVALID;
1040         writel_relaxed(reg, ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_S2CR(idx));
1041 }
1042
1043 static void arm_smmu_write_sme(struct arm_smmu_device *smmu, int idx)
1044 {
1045         arm_smmu_write_s2cr(smmu, idx);
1046         if (smmu->smrs)
1047                 arm_smmu_write_smr(smmu, idx);
1048 }
1049
1050 /*
1051  * The width of SMR's mask field depends on sCR0_EXIDENABLE, so this function
1052  * should be called after sCR0 is written.
1053  */
1054 static void arm_smmu_test_smr_masks(struct arm_smmu_device *smmu)
1055 {
1056         void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1057         u32 smr;
1058
1059         if (!smmu->smrs)
1060                 return;
1061
1062         /*
1063          * SMR.ID bits may not be preserved if the corresponding MASK
1064          * bits are set, so check each one separately. We can reject
1065          * masters later if they try to claim IDs outside these masks.
1066          */
1067         smr = FIELD_PREP(SMR_ID, smmu->streamid_mask);
1068         writel_relaxed(smr, gr0_base + ARM_SMMU_GR0_SMR(0));
1069         smr = readl_relaxed(gr0_base + ARM_SMMU_GR0_SMR(0));
1070         smmu->streamid_mask = FIELD_GET(SMR_ID, smr);
1071
1072         smr = FIELD_PREP(SMR_MASK, smmu->streamid_mask);
1073         writel_relaxed(smr, gr0_base + ARM_SMMU_GR0_SMR(0));
1074         smr = readl_relaxed(gr0_base + ARM_SMMU_GR0_SMR(0));
1075         smmu->smr_mask_mask = FIELD_GET(SMR_MASK, smr);
1076 }
1077
1078 static int arm_smmu_find_sme(struct arm_smmu_device *smmu, u16 id, u16 mask)
1079 {
1080         struct arm_smmu_smr *smrs = smmu->smrs;
1081         int i, free_idx = -ENOSPC;
1082
1083         /* Stream indexing is blissfully easy */
1084         if (!smrs)
1085                 return id;
1086
1087         /* Validating SMRs is... less so */
1088         for (i = 0; i < smmu->num_mapping_groups; ++i) {
1089                 if (!smrs[i].valid) {
1090                         /*
1091                          * Note the first free entry we come across, which
1092                          * we'll claim in the end if nothing else matches.
1093                          */
1094                         if (free_idx < 0)
1095                                 free_idx = i;
1096                         continue;
1097                 }
1098                 /*
1099                  * If the new entry is _entirely_ matched by an existing entry,
1100                  * then reuse that, with the guarantee that there also cannot
1101                  * be any subsequent conflicting entries. In normal use we'd
1102                  * expect simply identical entries for this case, but there's
1103                  * no harm in accommodating the generalisation.
1104                  */
1105                 if ((mask & smrs[i].mask) == mask &&
1106                     !((id ^ smrs[i].id) & ~smrs[i].mask))
1107                         return i;
1108                 /*
1109                  * If the new entry has any other overlap with an existing one,
1110                  * though, then there always exists at least one stream ID
1111                  * which would cause a conflict, and we can't allow that risk.
1112                  */
1113                 if (!((id ^ smrs[i].id) & ~(smrs[i].mask | mask)))
1114                         return -EINVAL;
1115         }
1116
1117         return free_idx;
1118 }
1119
1120 static bool arm_smmu_free_sme(struct arm_smmu_device *smmu, int idx)
1121 {
1122         if (--smmu->s2crs[idx].count)
1123                 return false;
1124
1125         smmu->s2crs[idx] = s2cr_init_val;
1126         if (smmu->smrs)
1127                 smmu->smrs[idx].valid = false;
1128
1129         return true;
1130 }
1131
1132 static int arm_smmu_master_alloc_smes(struct device *dev)
1133 {
1134         struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1135         struct arm_smmu_master_cfg *cfg = fwspec->iommu_priv;
1136         struct arm_smmu_device *smmu = cfg->smmu;
1137         struct arm_smmu_smr *smrs = smmu->smrs;
1138         struct iommu_group *group;
1139         int i, idx, ret;
1140
1141         mutex_lock(&smmu->stream_map_mutex);
1142         /* Figure out a viable stream map entry allocation */
1143         for_each_cfg_sme(fwspec, i, idx) {
1144                 u16 sid = FIELD_GET(SMR_ID, fwspec->ids[i]);
1145                 u16 mask = FIELD_GET(SMR_MASK, fwspec->ids[i]);
1146
1147                 if (idx != INVALID_SMENDX) {
1148                         ret = -EEXIST;
1149                         goto out_err;
1150                 }
1151
1152                 ret = arm_smmu_find_sme(smmu, sid, mask);
1153                 if (ret < 0)
1154                         goto out_err;
1155
1156                 idx = ret;
1157                 if (smrs && smmu->s2crs[idx].count == 0) {
1158                         smrs[idx].id = sid;
1159                         smrs[idx].mask = mask;
1160                         smrs[idx].valid = true;
1161                 }
1162                 smmu->s2crs[idx].count++;
1163                 cfg->smendx[i] = (s16)idx;
1164         }
1165
1166         group = iommu_group_get_for_dev(dev);
1167         if (!group)
1168                 group = ERR_PTR(-ENOMEM);
1169         if (IS_ERR(group)) {
1170                 ret = PTR_ERR(group);
1171                 goto out_err;
1172         }
1173         iommu_group_put(group);
1174
1175         /* It worked! Now, poke the actual hardware */
1176         for_each_cfg_sme(fwspec, i, idx) {
1177                 arm_smmu_write_sme(smmu, idx);
1178                 smmu->s2crs[idx].group = group;
1179         }
1180
1181         mutex_unlock(&smmu->stream_map_mutex);
1182         return 0;
1183
1184 out_err:
1185         while (i--) {
1186                 arm_smmu_free_sme(smmu, cfg->smendx[i]);
1187                 cfg->smendx[i] = INVALID_SMENDX;
1188         }
1189         mutex_unlock(&smmu->stream_map_mutex);
1190         return ret;
1191 }
1192
1193 static void arm_smmu_master_free_smes(struct iommu_fwspec *fwspec)
1194 {
1195         struct arm_smmu_device *smmu = fwspec_smmu(fwspec);
1196         struct arm_smmu_master_cfg *cfg = fwspec->iommu_priv;
1197         int i, idx;
1198
1199         mutex_lock(&smmu->stream_map_mutex);
1200         for_each_cfg_sme(fwspec, i, idx) {
1201                 if (arm_smmu_free_sme(smmu, idx))
1202                         arm_smmu_write_sme(smmu, idx);
1203                 cfg->smendx[i] = INVALID_SMENDX;
1204         }
1205         mutex_unlock(&smmu->stream_map_mutex);
1206 }
1207
1208 static int arm_smmu_domain_add_master(struct arm_smmu_domain *smmu_domain,
1209                                       struct iommu_fwspec *fwspec)
1210 {
1211         struct arm_smmu_device *smmu = smmu_domain->smmu;
1212         struct arm_smmu_s2cr *s2cr = smmu->s2crs;
1213         u8 cbndx = smmu_domain->cfg.cbndx;
1214         enum arm_smmu_s2cr_type type;
1215         int i, idx;
1216
1217         if (smmu_domain->stage == ARM_SMMU_DOMAIN_BYPASS)
1218                 type = S2CR_TYPE_BYPASS;
1219         else
1220                 type = S2CR_TYPE_TRANS;
1221
1222         for_each_cfg_sme(fwspec, i, idx) {
1223                 if (type == s2cr[idx].type && cbndx == s2cr[idx].cbndx)
1224                         continue;
1225
1226                 s2cr[idx].type = type;
1227                 s2cr[idx].privcfg = S2CR_PRIVCFG_DEFAULT;
1228                 s2cr[idx].cbndx = cbndx;
1229                 arm_smmu_write_s2cr(smmu, idx);
1230         }
1231         return 0;
1232 }
1233
1234 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
1235 {
1236         int ret;
1237         struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1238         struct arm_smmu_device *smmu;
1239         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1240
1241         if (!fwspec || fwspec->ops != &arm_smmu_ops) {
1242                 dev_err(dev, "cannot attach to SMMU, is it on the same bus?\n");
1243                 return -ENXIO;
1244         }
1245
1246         /*
1247          * FIXME: The arch/arm DMA API code tries to attach devices to its own
1248          * domains between of_xlate() and add_device() - we have no way to cope
1249          * with that, so until ARM gets converted to rely on groups and default
1250          * domains, just say no (but more politely than by dereferencing NULL).
1251          * This should be at least a WARN_ON once that's sorted.
1252          */
1253         if (!fwspec->iommu_priv)
1254                 return -ENODEV;
1255
1256         smmu = fwspec_smmu(fwspec);
1257
1258         ret = arm_smmu_rpm_get(smmu);
1259         if (ret < 0)
1260                 return ret;
1261
1262         /* Ensure that the domain is finalised */
1263         ret = arm_smmu_init_domain_context(domain, smmu);
1264         if (ret < 0)
1265                 goto rpm_put;
1266
1267         /*
1268          * Sanity check the domain. We don't support domains across
1269          * different SMMUs.
1270          */
1271         if (smmu_domain->smmu != smmu) {
1272                 dev_err(dev,
1273                         "cannot attach to SMMU %s whilst already attached to domain on SMMU %s\n",
1274                         dev_name(smmu_domain->smmu->dev), dev_name(smmu->dev));
1275                 ret = -EINVAL;
1276                 goto rpm_put;
1277         }
1278
1279         /* Looks ok, so add the device to the domain */
1280         ret = arm_smmu_domain_add_master(smmu_domain, fwspec);
1281
1282 rpm_put:
1283         arm_smmu_rpm_put(smmu);
1284         return ret;
1285 }
1286
1287 static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
1288                         phys_addr_t paddr, size_t size, int prot)
1289 {
1290         struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1291         struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
1292         int ret;
1293
1294         if (!ops)
1295                 return -ENODEV;
1296
1297         arm_smmu_rpm_get(smmu);
1298         ret = ops->map(ops, iova, paddr, size, prot);
1299         arm_smmu_rpm_put(smmu);
1300
1301         return ret;
1302 }
1303
1304 static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
1305                              size_t size)
1306 {
1307         struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1308         struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
1309         size_t ret;
1310
1311         if (!ops)
1312                 return 0;
1313
1314         arm_smmu_rpm_get(smmu);
1315         ret = ops->unmap(ops, iova, size);
1316         arm_smmu_rpm_put(smmu);
1317
1318         return ret;
1319 }
1320
1321 static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
1322 {
1323         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1324         struct arm_smmu_device *smmu = smmu_domain->smmu;
1325
1326         if (smmu_domain->tlb_ops) {
1327                 arm_smmu_rpm_get(smmu);
1328                 smmu_domain->tlb_ops->tlb_flush_all(smmu_domain);
1329                 arm_smmu_rpm_put(smmu);
1330         }
1331 }
1332
1333 static void arm_smmu_iotlb_sync(struct iommu_domain *domain)
1334 {
1335         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1336         struct arm_smmu_device *smmu = smmu_domain->smmu;
1337
1338         if (smmu_domain->tlb_ops) {
1339                 arm_smmu_rpm_get(smmu);
1340                 smmu_domain->tlb_ops->tlb_sync(smmu_domain);
1341                 arm_smmu_rpm_put(smmu);
1342         }
1343 }
1344
1345 static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
1346                                               dma_addr_t iova)
1347 {
1348         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1349         struct arm_smmu_device *smmu = smmu_domain->smmu;
1350         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
1351         struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
1352         struct device *dev = smmu->dev;
1353         void __iomem *cb_base;
1354         u32 tmp;
1355         u64 phys;
1356         unsigned long va, flags;
1357         int ret;
1358
1359         ret = arm_smmu_rpm_get(smmu);
1360         if (ret < 0)
1361                 return 0;
1362
1363         cb_base = ARM_SMMU_CB(smmu, cfg->cbndx);
1364
1365         spin_lock_irqsave(&smmu_domain->cb_lock, flags);
1366         va = iova & ~0xfffUL;
1367         if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
1368                 writeq_relaxed(va, cb_base + ARM_SMMU_CB_ATS1PR);
1369         else
1370                 writel_relaxed(va, cb_base + ARM_SMMU_CB_ATS1PR);
1371
1372         if (readl_poll_timeout_atomic(cb_base + ARM_SMMU_CB_ATSR, tmp,
1373                                       !(tmp & ATSR_ACTIVE), 5, 50)) {
1374                 spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
1375                 dev_err(dev,
1376                         "iova to phys timed out on %pad. Falling back to software table walk.\n",
1377                         &iova);
1378                 return ops->iova_to_phys(ops, iova);
1379         }
1380
1381         phys = readq_relaxed(cb_base + ARM_SMMU_CB_PAR);
1382         spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
1383         if (phys & CB_PAR_F) {
1384                 dev_err(dev, "translation fault!\n");
1385                 dev_err(dev, "PAR = 0x%llx\n", phys);
1386                 return 0;
1387         }
1388
1389         arm_smmu_rpm_put(smmu);
1390
1391         return (phys & GENMASK_ULL(39, 12)) | (iova & 0xfff);
1392 }
1393
1394 static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain,
1395                                         dma_addr_t iova)
1396 {
1397         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1398         struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
1399
1400         if (domain->type == IOMMU_DOMAIN_IDENTITY)
1401                 return iova;
1402
1403         if (!ops)
1404                 return 0;
1405
1406         if (smmu_domain->smmu->features & ARM_SMMU_FEAT_TRANS_OPS &&
1407                         smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
1408                 return arm_smmu_iova_to_phys_hard(domain, iova);
1409
1410         return ops->iova_to_phys(ops, iova);
1411 }
1412
1413 static bool arm_smmu_capable(enum iommu_cap cap)
1414 {
1415         switch (cap) {
1416         case IOMMU_CAP_CACHE_COHERENCY:
1417                 /*
1418                  * Return true here as the SMMU can always send out coherent
1419                  * requests.
1420                  */
1421                 return true;
1422         case IOMMU_CAP_NOEXEC:
1423                 return true;
1424         default:
1425                 return false;
1426         }
1427 }
1428
1429 static int arm_smmu_match_node(struct device *dev, const void *data)
1430 {
1431         return dev->fwnode == data;
1432 }
1433
1434 static
1435 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
1436 {
1437         struct device *dev = driver_find_device(&arm_smmu_driver.driver, NULL,
1438                                                 fwnode, arm_smmu_match_node);
1439         put_device(dev);
1440         return dev ? dev_get_drvdata(dev) : NULL;
1441 }
1442
1443 static int arm_smmu_add_device(struct device *dev)
1444 {
1445         struct arm_smmu_device *smmu;
1446         struct arm_smmu_master_cfg *cfg;
1447         struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1448         int i, ret;
1449
1450         if (using_legacy_binding) {
1451                 ret = arm_smmu_register_legacy_master(dev, &smmu);
1452
1453                 /*
1454                  * If dev->iommu_fwspec is initally NULL, arm_smmu_register_legacy_master()
1455                  * will allocate/initialise a new one. Thus we need to update fwspec for
1456                  * later use.
1457                  */
1458                 fwspec = dev_iommu_fwspec_get(dev);
1459                 if (ret)
1460                         goto out_free;
1461         } else if (fwspec && fwspec->ops == &arm_smmu_ops) {
1462                 smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
1463         } else {
1464                 return -ENODEV;
1465         }
1466
1467         ret = -EINVAL;
1468         for (i = 0; i < fwspec->num_ids; i++) {
1469                 u16 sid = FIELD_GET(SMR_ID, fwspec->ids[i]);
1470                 u16 mask = FIELD_GET(SMR_MASK, fwspec->ids[i]);
1471
1472                 if (sid & ~smmu->streamid_mask) {
1473                         dev_err(dev, "stream ID 0x%x out of range for SMMU (0x%x)\n",
1474                                 sid, smmu->streamid_mask);
1475                         goto out_free;
1476                 }
1477                 if (mask & ~smmu->smr_mask_mask) {
1478                         dev_err(dev, "SMR mask 0x%x out of range for SMMU (0x%x)\n",
1479                                 mask, smmu->smr_mask_mask);
1480                         goto out_free;
1481                 }
1482         }
1483
1484         ret = -ENOMEM;
1485         cfg = kzalloc(offsetof(struct arm_smmu_master_cfg, smendx[i]),
1486                       GFP_KERNEL);
1487         if (!cfg)
1488                 goto out_free;
1489
1490         cfg->smmu = smmu;
1491         fwspec->iommu_priv = cfg;
1492         while (i--)
1493                 cfg->smendx[i] = INVALID_SMENDX;
1494
1495         ret = arm_smmu_rpm_get(smmu);
1496         if (ret < 0)
1497                 goto out_cfg_free;
1498
1499         ret = arm_smmu_master_alloc_smes(dev);
1500         arm_smmu_rpm_put(smmu);
1501
1502         if (ret)
1503                 goto out_cfg_free;
1504
1505         iommu_device_link(&smmu->iommu, dev);
1506
1507         device_link_add(dev, smmu->dev,
1508                         DL_FLAG_PM_RUNTIME | DL_FLAG_AUTOREMOVE_SUPPLIER);
1509
1510         return 0;
1511
1512 out_cfg_free:
1513         kfree(cfg);
1514 out_free:
1515         iommu_fwspec_free(dev);
1516         return ret;
1517 }
1518
1519 static void arm_smmu_remove_device(struct device *dev)
1520 {
1521         struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1522         struct arm_smmu_master_cfg *cfg;
1523         struct arm_smmu_device *smmu;
1524         int ret;
1525
1526         if (!fwspec || fwspec->ops != &arm_smmu_ops)
1527                 return;
1528
1529         cfg  = fwspec->iommu_priv;
1530         smmu = cfg->smmu;
1531
1532         ret = arm_smmu_rpm_get(smmu);
1533         if (ret < 0)
1534                 return;
1535
1536         iommu_device_unlink(&smmu->iommu, dev);
1537         arm_smmu_master_free_smes(fwspec);
1538
1539         arm_smmu_rpm_put(smmu);
1540
1541         iommu_group_remove_device(dev);
1542         kfree(fwspec->iommu_priv);
1543         iommu_fwspec_free(dev);
1544 }
1545
1546 static struct iommu_group *arm_smmu_device_group(struct device *dev)
1547 {
1548         struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1549         struct arm_smmu_device *smmu = fwspec_smmu(fwspec);
1550         struct iommu_group *group = NULL;
1551         int i, idx;
1552
1553         for_each_cfg_sme(fwspec, i, idx) {
1554                 if (group && smmu->s2crs[idx].group &&
1555                     group != smmu->s2crs[idx].group)
1556                         return ERR_PTR(-EINVAL);
1557
1558                 group = smmu->s2crs[idx].group;
1559         }
1560
1561         if (group)
1562                 return iommu_group_ref_get(group);
1563
1564         if (dev_is_pci(dev))
1565                 group = pci_device_group(dev);
1566         else if (dev_is_fsl_mc(dev))
1567                 group = fsl_mc_device_group(dev);
1568         else
1569                 group = generic_device_group(dev);
1570
1571         return group;
1572 }
1573
1574 static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
1575                                     enum iommu_attr attr, void *data)
1576 {
1577         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1578
1579         switch(domain->type) {
1580         case IOMMU_DOMAIN_UNMANAGED:
1581                 switch (attr) {
1582                 case DOMAIN_ATTR_NESTING:
1583                         *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
1584                         return 0;
1585                 default:
1586                         return -ENODEV;
1587                 }
1588                 break;
1589         case IOMMU_DOMAIN_DMA:
1590                 switch (attr) {
1591                 case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
1592                         *(int *)data = smmu_domain->non_strict;
1593                         return 0;
1594                 default:
1595                         return -ENODEV;
1596                 }
1597                 break;
1598         default:
1599                 return -EINVAL;
1600         }
1601 }
1602
1603 static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
1604                                     enum iommu_attr attr, void *data)
1605 {
1606         int ret = 0;
1607         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1608
1609         mutex_lock(&smmu_domain->init_mutex);
1610
1611         switch(domain->type) {
1612         case IOMMU_DOMAIN_UNMANAGED:
1613                 switch (attr) {
1614                 case DOMAIN_ATTR_NESTING:
1615                         if (smmu_domain->smmu) {
1616                                 ret = -EPERM;
1617                                 goto out_unlock;
1618                         }
1619
1620                         if (*(int *)data)
1621                                 smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
1622                         else
1623                                 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
1624                         break;
1625                 default:
1626                         ret = -ENODEV;
1627                 }
1628                 break;
1629         case IOMMU_DOMAIN_DMA:
1630                 switch (attr) {
1631                 case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
1632                         smmu_domain->non_strict = *(int *)data;
1633                         break;
1634                 default:
1635                         ret = -ENODEV;
1636                 }
1637                 break;
1638         default:
1639                 ret = -EINVAL;
1640         }
1641 out_unlock:
1642         mutex_unlock(&smmu_domain->init_mutex);
1643         return ret;
1644 }
1645
1646 static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
1647 {
1648         u32 mask, fwid = 0;
1649
1650         if (args->args_count > 0)
1651                 fwid |= FIELD_PREP(SMR_ID, args->args[0]);
1652
1653         if (args->args_count > 1)
1654                 fwid |= FIELD_PREP(SMR_MASK, args->args[1]);
1655         else if (!of_property_read_u32(args->np, "stream-match-mask", &mask))
1656                 fwid |= FIELD_PREP(SMR_MASK, mask);
1657
1658         return iommu_fwspec_add_ids(dev, &fwid, 1);
1659 }
1660
1661 static void arm_smmu_get_resv_regions(struct device *dev,
1662                                       struct list_head *head)
1663 {
1664         struct iommu_resv_region *region;
1665         int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
1666
1667         region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
1668                                          prot, IOMMU_RESV_SW_MSI);
1669         if (!region)
1670                 return;
1671
1672         list_add_tail(&region->list, head);
1673
1674         iommu_dma_get_resv_regions(dev, head);
1675 }
1676
1677 static void arm_smmu_put_resv_regions(struct device *dev,
1678                                       struct list_head *head)
1679 {
1680         struct iommu_resv_region *entry, *next;
1681
1682         list_for_each_entry_safe(entry, next, head, list)
1683                 kfree(entry);
1684 }
1685
1686 static struct iommu_ops arm_smmu_ops = {
1687         .capable                = arm_smmu_capable,
1688         .domain_alloc           = arm_smmu_domain_alloc,
1689         .domain_free            = arm_smmu_domain_free,
1690         .attach_dev             = arm_smmu_attach_dev,
1691         .map                    = arm_smmu_map,
1692         .unmap                  = arm_smmu_unmap,
1693         .flush_iotlb_all        = arm_smmu_flush_iotlb_all,
1694         .iotlb_sync             = arm_smmu_iotlb_sync,
1695         .iova_to_phys           = arm_smmu_iova_to_phys,
1696         .add_device             = arm_smmu_add_device,
1697         .remove_device          = arm_smmu_remove_device,
1698         .device_group           = arm_smmu_device_group,
1699         .domain_get_attr        = arm_smmu_domain_get_attr,
1700         .domain_set_attr        = arm_smmu_domain_set_attr,
1701         .of_xlate               = arm_smmu_of_xlate,
1702         .get_resv_regions       = arm_smmu_get_resv_regions,
1703         .put_resv_regions       = arm_smmu_put_resv_regions,
1704         .pgsize_bitmap          = -1UL, /* Restricted during device attach */
1705 };
1706
1707 static void arm_smmu_device_reset(struct arm_smmu_device *smmu)
1708 {
1709         void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1710         int i;
1711         u32 reg, major;
1712
1713         /* clear global FSR */
1714         reg = readl_relaxed(ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sGFSR);
1715         writel(reg, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sGFSR);
1716
1717         /*
1718          * Reset stream mapping groups: Initial values mark all SMRn as
1719          * invalid and all S2CRn as bypass unless overridden.
1720          */
1721         for (i = 0; i < smmu->num_mapping_groups; ++i)
1722                 arm_smmu_write_sme(smmu, i);
1723
1724         if (smmu->model == ARM_MMU500) {
1725                 /*
1726                  * Before clearing ARM_MMU500_ACTLR_CPRE, need to
1727                  * clear CACHE_LOCK bit of ACR first. And, CACHE_LOCK
1728                  * bit is only present in MMU-500r2 onwards.
1729                  */
1730                 reg = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID7);
1731                 major = FIELD_GET(ID7_MAJOR, reg);
1732                 reg = readl_relaxed(gr0_base + ARM_SMMU_GR0_sACR);
1733                 if (major >= 2)
1734                         reg &= ~ARM_MMU500_ACR_CACHE_LOCK;
1735                 /*
1736                  * Allow unmatched Stream IDs to allocate bypass
1737                  * TLB entries for reduced latency.
1738                  */
1739                 reg |= ARM_MMU500_ACR_SMTNMB_TLBEN | ARM_MMU500_ACR_S2CRB_TLBEN;
1740                 writel_relaxed(reg, gr0_base + ARM_SMMU_GR0_sACR);
1741         }
1742
1743         /* Make sure all context banks are disabled and clear CB_FSR  */
1744         for (i = 0; i < smmu->num_context_banks; ++i) {
1745                 void __iomem *cb_base = ARM_SMMU_CB(smmu, i);
1746
1747                 arm_smmu_write_context_bank(smmu, i);
1748                 writel_relaxed(FSR_FAULT, cb_base + ARM_SMMU_CB_FSR);
1749                 /*
1750                  * Disable MMU-500's not-particularly-beneficial next-page
1751                  * prefetcher for the sake of errata #841119 and #826419.
1752                  */
1753                 if (smmu->model == ARM_MMU500) {
1754                         reg = readl_relaxed(cb_base + ARM_SMMU_CB_ACTLR);
1755                         reg &= ~ARM_MMU500_ACTLR_CPRE;
1756                         writel_relaxed(reg, cb_base + ARM_SMMU_CB_ACTLR);
1757                 }
1758         }
1759
1760         /* Invalidate the TLB, just in case */
1761         writel_relaxed(QCOM_DUMMY_VAL, gr0_base + ARM_SMMU_GR0_TLBIALLH);
1762         writel_relaxed(QCOM_DUMMY_VAL, gr0_base + ARM_SMMU_GR0_TLBIALLNSNH);
1763
1764         reg = readl_relaxed(ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
1765
1766         /* Enable fault reporting */
1767         reg |= (sCR0_GFRE | sCR0_GFIE | sCR0_GCFGFRE | sCR0_GCFGFIE);
1768
1769         /* Disable TLB broadcasting. */
1770         reg |= (sCR0_VMIDPNE | sCR0_PTM);
1771
1772         /* Enable client access, handling unmatched streams as appropriate */
1773         reg &= ~sCR0_CLIENTPD;
1774         if (disable_bypass)
1775                 reg |= sCR0_USFCFG;
1776         else
1777                 reg &= ~sCR0_USFCFG;
1778
1779         /* Disable forced broadcasting */
1780         reg &= ~sCR0_FB;
1781
1782         /* Don't upgrade barriers */
1783         reg &= ~(sCR0_BSU);
1784
1785         if (smmu->features & ARM_SMMU_FEAT_VMID16)
1786                 reg |= sCR0_VMID16EN;
1787
1788         if (smmu->features & ARM_SMMU_FEAT_EXIDS)
1789                 reg |= sCR0_EXIDENABLE;
1790
1791         /* Push the button */
1792         arm_smmu_tlb_sync_global(smmu);
1793         writel(reg, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
1794 }
1795
1796 static int arm_smmu_id_size_to_bits(int size)
1797 {
1798         switch (size) {
1799         case 0:
1800                 return 32;
1801         case 1:
1802                 return 36;
1803         case 2:
1804                 return 40;
1805         case 3:
1806                 return 42;
1807         case 4:
1808                 return 44;
1809         case 5:
1810         default:
1811                 return 48;
1812         }
1813 }
1814
1815 static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
1816 {
1817         unsigned int size;
1818         void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1819         u32 id;
1820         bool cttw_reg, cttw_fw = smmu->features & ARM_SMMU_FEAT_COHERENT_WALK;
1821         int i;
1822
1823         dev_notice(smmu->dev, "probing hardware configuration...\n");
1824         dev_notice(smmu->dev, "SMMUv%d with:\n",
1825                         smmu->version == ARM_SMMU_V2 ? 2 : 1);
1826
1827         /* ID0 */
1828         id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID0);
1829
1830         /* Restrict available stages based on module parameter */
1831         if (force_stage == 1)
1832                 id &= ~(ID0_S2TS | ID0_NTS);
1833         else if (force_stage == 2)
1834                 id &= ~(ID0_S1TS | ID0_NTS);
1835
1836         if (id & ID0_S1TS) {
1837                 smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
1838                 dev_notice(smmu->dev, "\tstage 1 translation\n");
1839         }
1840
1841         if (id & ID0_S2TS) {
1842                 smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
1843                 dev_notice(smmu->dev, "\tstage 2 translation\n");
1844         }
1845
1846         if (id & ID0_NTS) {
1847                 smmu->features |= ARM_SMMU_FEAT_TRANS_NESTED;
1848                 dev_notice(smmu->dev, "\tnested translation\n");
1849         }
1850
1851         if (!(smmu->features &
1852                 (ARM_SMMU_FEAT_TRANS_S1 | ARM_SMMU_FEAT_TRANS_S2))) {
1853                 dev_err(smmu->dev, "\tno translation support!\n");
1854                 return -ENODEV;
1855         }
1856
1857         if ((id & ID0_S1TS) &&
1858                 ((smmu->version < ARM_SMMU_V2) || !(id & ID0_ATOSNS))) {
1859                 smmu->features |= ARM_SMMU_FEAT_TRANS_OPS;
1860                 dev_notice(smmu->dev, "\taddress translation ops\n");
1861         }
1862
1863         /*
1864          * In order for DMA API calls to work properly, we must defer to what
1865          * the FW says about coherency, regardless of what the hardware claims.
1866          * Fortunately, this also opens up a workaround for systems where the
1867          * ID register value has ended up configured incorrectly.
1868          */
1869         cttw_reg = !!(id & ID0_CTTW);
1870         if (cttw_fw || cttw_reg)
1871                 dev_notice(smmu->dev, "\t%scoherent table walk\n",
1872                            cttw_fw ? "" : "non-");
1873         if (cttw_fw != cttw_reg)
1874                 dev_notice(smmu->dev,
1875                            "\t(IDR0.CTTW overridden by FW configuration)\n");
1876
1877         /* Max. number of entries we have for stream matching/indexing */
1878         if (smmu->version == ARM_SMMU_V2 && id & ID0_EXIDS) {
1879                 smmu->features |= ARM_SMMU_FEAT_EXIDS;
1880                 size = 1 << 16;
1881         } else {
1882                 size = 1 << FIELD_GET(ID0_NUMSIDB, id);
1883         }
1884         smmu->streamid_mask = size - 1;
1885         if (id & ID0_SMS) {
1886                 smmu->features |= ARM_SMMU_FEAT_STREAM_MATCH;
1887                 size = FIELD_GET(ID0_NUMSMRG, id);
1888                 if (size == 0) {
1889                         dev_err(smmu->dev,
1890                                 "stream-matching supported, but no SMRs present!\n");
1891                         return -ENODEV;
1892                 }
1893
1894                 /* Zero-initialised to mark as invalid */
1895                 smmu->smrs = devm_kcalloc(smmu->dev, size, sizeof(*smmu->smrs),
1896                                           GFP_KERNEL);
1897                 if (!smmu->smrs)
1898                         return -ENOMEM;
1899
1900                 dev_notice(smmu->dev,
1901                            "\tstream matching with %u register groups", size);
1902         }
1903         /* s2cr->type == 0 means translation, so initialise explicitly */
1904         smmu->s2crs = devm_kmalloc_array(smmu->dev, size, sizeof(*smmu->s2crs),
1905                                          GFP_KERNEL);
1906         if (!smmu->s2crs)
1907                 return -ENOMEM;
1908         for (i = 0; i < size; i++)
1909                 smmu->s2crs[i] = s2cr_init_val;
1910
1911         smmu->num_mapping_groups = size;
1912         mutex_init(&smmu->stream_map_mutex);
1913         spin_lock_init(&smmu->global_sync_lock);
1914
1915         if (smmu->version < ARM_SMMU_V2 || !(id & ID0_PTFS_NO_AARCH32)) {
1916                 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_L;
1917                 if (!(id & ID0_PTFS_NO_AARCH32S))
1918                         smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_S;
1919         }
1920
1921         /* ID1 */
1922         id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID1);
1923         smmu->pgshift = (id & ID1_PAGESIZE) ? 16 : 12;
1924
1925         /* Check for size mismatch of SMMU address space from mapped region */
1926         size = 1 << (FIELD_GET(ID1_NUMPAGENDXB, id) + 1);
1927         if (smmu->numpage != 2 * size << smmu->pgshift)
1928                 dev_warn(smmu->dev,
1929                         "SMMU address space size (0x%x) differs from mapped region size (0x%x)!\n",
1930                         2 * size << smmu->pgshift, smmu->numpage);
1931         /* Now properly encode NUMPAGE to subsequently derive SMMU_CB_BASE */
1932         smmu->numpage = size;
1933
1934         smmu->num_s2_context_banks = FIELD_GET(ID1_NUMS2CB, id);
1935         smmu->num_context_banks = FIELD_GET(ID1_NUMCB, id);
1936         if (smmu->num_s2_context_banks > smmu->num_context_banks) {
1937                 dev_err(smmu->dev, "impossible number of S2 context banks!\n");
1938                 return -ENODEV;
1939         }
1940         dev_notice(smmu->dev, "\t%u context banks (%u stage-2 only)\n",
1941                    smmu->num_context_banks, smmu->num_s2_context_banks);
1942         /*
1943          * Cavium CN88xx erratum #27704.
1944          * Ensure ASID and VMID allocation is unique across all SMMUs in
1945          * the system.
1946          */
1947         if (smmu->model == CAVIUM_SMMUV2) {
1948                 smmu->cavium_id_base =
1949                         atomic_add_return(smmu->num_context_banks,
1950                                           &cavium_smmu_context_count);
1951                 smmu->cavium_id_base -= smmu->num_context_banks;
1952                 dev_notice(smmu->dev, "\tenabling workaround for Cavium erratum 27704\n");
1953         }
1954         smmu->cbs = devm_kcalloc(smmu->dev, smmu->num_context_banks,
1955                                  sizeof(*smmu->cbs), GFP_KERNEL);
1956         if (!smmu->cbs)
1957                 return -ENOMEM;
1958
1959         /* ID2 */
1960         id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID2);
1961         size = arm_smmu_id_size_to_bits(FIELD_GET(ID2_IAS, id));
1962         smmu->ipa_size = size;
1963
1964         /* The output mask is also applied for bypass */
1965         size = arm_smmu_id_size_to_bits(FIELD_GET(ID2_OAS, id));
1966         smmu->pa_size = size;
1967
1968         if (id & ID2_VMID16)
1969                 smmu->features |= ARM_SMMU_FEAT_VMID16;
1970
1971         /*
1972          * What the page table walker can address actually depends on which
1973          * descriptor format is in use, but since a) we don't know that yet,
1974          * and b) it can vary per context bank, this will have to do...
1975          */
1976         if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(size)))
1977                 dev_warn(smmu->dev,
1978                          "failed to set DMA mask for table walker\n");
1979
1980         if (smmu->version < ARM_SMMU_V2) {
1981                 smmu->va_size = smmu->ipa_size;
1982                 if (smmu->version == ARM_SMMU_V1_64K)
1983                         smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1984         } else {
1985                 size = FIELD_GET(ID2_UBS, id);
1986                 smmu->va_size = arm_smmu_id_size_to_bits(size);
1987                 if (id & ID2_PTFS_4K)
1988                         smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_4K;
1989                 if (id & ID2_PTFS_16K)
1990                         smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_16K;
1991                 if (id & ID2_PTFS_64K)
1992                         smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1993         }
1994
1995         /* Now we've corralled the various formats, what'll it do? */
1996         if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S)
1997                 smmu->pgsize_bitmap |= SZ_4K | SZ_64K | SZ_1M | SZ_16M;
1998         if (smmu->features &
1999             (ARM_SMMU_FEAT_FMT_AARCH32_L | ARM_SMMU_FEAT_FMT_AARCH64_4K))
2000                 smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
2001         if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_16K)
2002                 smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
2003         if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_64K)
2004                 smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
2005
2006         if (arm_smmu_ops.pgsize_bitmap == -1UL)
2007                 arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
2008         else
2009                 arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
2010         dev_notice(smmu->dev, "\tSupported page sizes: 0x%08lx\n",
2011                    smmu->pgsize_bitmap);
2012
2013
2014         if (smmu->features & ARM_SMMU_FEAT_TRANS_S1)
2015                 dev_notice(smmu->dev, "\tStage-1: %lu-bit VA -> %lu-bit IPA\n",
2016                            smmu->va_size, smmu->ipa_size);
2017
2018         if (smmu->features & ARM_SMMU_FEAT_TRANS_S2)
2019                 dev_notice(smmu->dev, "\tStage-2: %lu-bit IPA -> %lu-bit PA\n",
2020                            smmu->ipa_size, smmu->pa_size);
2021
2022         return 0;
2023 }
2024
2025 struct arm_smmu_match_data {
2026         enum arm_smmu_arch_version version;
2027         enum arm_smmu_implementation model;
2028 };
2029
2030 #define ARM_SMMU_MATCH_DATA(name, ver, imp)     \
2031 static const struct arm_smmu_match_data name = { .version = ver, .model = imp }
2032
2033 ARM_SMMU_MATCH_DATA(smmu_generic_v1, ARM_SMMU_V1, GENERIC_SMMU);
2034 ARM_SMMU_MATCH_DATA(smmu_generic_v2, ARM_SMMU_V2, GENERIC_SMMU);
2035 ARM_SMMU_MATCH_DATA(arm_mmu401, ARM_SMMU_V1_64K, GENERIC_SMMU);
2036 ARM_SMMU_MATCH_DATA(arm_mmu500, ARM_SMMU_V2, ARM_MMU500);
2037 ARM_SMMU_MATCH_DATA(cavium_smmuv2, ARM_SMMU_V2, CAVIUM_SMMUV2);
2038 ARM_SMMU_MATCH_DATA(qcom_smmuv2, ARM_SMMU_V2, QCOM_SMMUV2);
2039
2040 static const struct of_device_id arm_smmu_of_match[] = {
2041         { .compatible = "arm,smmu-v1", .data = &smmu_generic_v1 },
2042         { .compatible = "arm,smmu-v2", .data = &smmu_generic_v2 },
2043         { .compatible = "arm,mmu-400", .data = &smmu_generic_v1 },
2044         { .compatible = "arm,mmu-401", .data = &arm_mmu401 },
2045         { .compatible = "arm,mmu-500", .data = &arm_mmu500 },
2046         { .compatible = "cavium,smmu-v2", .data = &cavium_smmuv2 },
2047         { .compatible = "qcom,smmu-v2", .data = &qcom_smmuv2 },
2048         { },
2049 };
2050
2051 #ifdef CONFIG_ACPI
2052 static int acpi_smmu_get_data(u32 model, struct arm_smmu_device *smmu)
2053 {
2054         int ret = 0;
2055
2056         switch (model) {
2057         case ACPI_IORT_SMMU_V1:
2058         case ACPI_IORT_SMMU_CORELINK_MMU400:
2059                 smmu->version = ARM_SMMU_V1;
2060                 smmu->model = GENERIC_SMMU;
2061                 break;
2062         case ACPI_IORT_SMMU_CORELINK_MMU401:
2063                 smmu->version = ARM_SMMU_V1_64K;
2064                 smmu->model = GENERIC_SMMU;
2065                 break;
2066         case ACPI_IORT_SMMU_V2:
2067                 smmu->version = ARM_SMMU_V2;
2068                 smmu->model = GENERIC_SMMU;
2069                 break;
2070         case ACPI_IORT_SMMU_CORELINK_MMU500:
2071                 smmu->version = ARM_SMMU_V2;
2072                 smmu->model = ARM_MMU500;
2073                 break;
2074         case ACPI_IORT_SMMU_CAVIUM_THUNDERX:
2075                 smmu->version = ARM_SMMU_V2;
2076                 smmu->model = CAVIUM_SMMUV2;
2077                 break;
2078         default:
2079                 ret = -ENODEV;
2080         }
2081
2082         return ret;
2083 }
2084
2085 static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
2086                                       struct arm_smmu_device *smmu)
2087 {
2088         struct device *dev = smmu->dev;
2089         struct acpi_iort_node *node =
2090                 *(struct acpi_iort_node **)dev_get_platdata(dev);
2091         struct acpi_iort_smmu *iort_smmu;
2092         int ret;
2093
2094         /* Retrieve SMMU1/2 specific data */
2095         iort_smmu = (struct acpi_iort_smmu *)node->node_data;
2096
2097         ret = acpi_smmu_get_data(iort_smmu->model, smmu);
2098         if (ret < 0)
2099                 return ret;
2100
2101         /* Ignore the configuration access interrupt */
2102         smmu->num_global_irqs = 1;
2103
2104         if (iort_smmu->flags & ACPI_IORT_SMMU_COHERENT_WALK)
2105                 smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
2106
2107         return 0;
2108 }
2109 #else
2110 static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
2111                                              struct arm_smmu_device *smmu)
2112 {
2113         return -ENODEV;
2114 }
2115 #endif
2116
2117 static int arm_smmu_device_dt_probe(struct platform_device *pdev,
2118                                     struct arm_smmu_device *smmu)
2119 {
2120         const struct arm_smmu_match_data *data;
2121         struct device *dev = &pdev->dev;
2122         bool legacy_binding;
2123
2124         if (of_property_read_u32(dev->of_node, "#global-interrupts",
2125                                  &smmu->num_global_irqs)) {
2126                 dev_err(dev, "missing #global-interrupts property\n");
2127                 return -ENODEV;
2128         }
2129
2130         data = of_device_get_match_data(dev);
2131         smmu->version = data->version;
2132         smmu->model = data->model;
2133
2134         parse_driver_options(smmu);
2135
2136         legacy_binding = of_find_property(dev->of_node, "mmu-masters", NULL);
2137         if (legacy_binding && !using_generic_binding) {
2138                 if (!using_legacy_binding)
2139                         pr_notice("deprecated \"mmu-masters\" DT property in use; DMA API support unavailable\n");
2140                 using_legacy_binding = true;
2141         } else if (!legacy_binding && !using_legacy_binding) {
2142                 using_generic_binding = true;
2143         } else {
2144                 dev_err(dev, "not probing due to mismatched DT properties\n");
2145                 return -ENODEV;
2146         }
2147
2148         if (of_dma_is_coherent(dev->of_node))
2149                 smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
2150
2151         return 0;
2152 }
2153
2154 static void arm_smmu_bus_init(void)
2155 {
2156         /* Oh, for a proper bus abstraction */
2157         if (!iommu_present(&platform_bus_type))
2158                 bus_set_iommu(&platform_bus_type, &arm_smmu_ops);
2159 #ifdef CONFIG_ARM_AMBA
2160         if (!iommu_present(&amba_bustype))
2161                 bus_set_iommu(&amba_bustype, &arm_smmu_ops);
2162 #endif
2163 #ifdef CONFIG_PCI
2164         if (!iommu_present(&pci_bus_type)) {
2165                 pci_request_acs();
2166                 bus_set_iommu(&pci_bus_type, &arm_smmu_ops);
2167         }
2168 #endif
2169 #ifdef CONFIG_FSL_MC_BUS
2170         if (!iommu_present(&fsl_mc_bus_type))
2171                 bus_set_iommu(&fsl_mc_bus_type, &arm_smmu_ops);
2172 #endif
2173 }
2174
2175 static int arm_smmu_device_probe(struct platform_device *pdev)
2176 {
2177         struct resource *res;
2178         resource_size_t ioaddr;
2179         struct arm_smmu_device *smmu;
2180         struct device *dev = &pdev->dev;
2181         int num_irqs, i, err;
2182
2183         smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
2184         if (!smmu) {
2185                 dev_err(dev, "failed to allocate arm_smmu_device\n");
2186                 return -ENOMEM;
2187         }
2188         smmu->dev = dev;
2189
2190         if (dev->of_node)
2191                 err = arm_smmu_device_dt_probe(pdev, smmu);
2192         else
2193                 err = arm_smmu_device_acpi_probe(pdev, smmu);
2194
2195         if (err)
2196                 return err;
2197
2198         res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
2199         ioaddr = res->start;
2200         smmu->base = devm_ioremap_resource(dev, res);
2201         if (IS_ERR(smmu->base))
2202                 return PTR_ERR(smmu->base);
2203         /*
2204          * The resource size should effectively match the value of SMMU_TOP;
2205          * stash that temporarily until we know PAGESIZE to validate it with.
2206          */
2207         smmu->numpage = resource_size(res);
2208
2209         num_irqs = 0;
2210         while ((res = platform_get_resource(pdev, IORESOURCE_IRQ, num_irqs))) {
2211                 num_irqs++;
2212                 if (num_irqs > smmu->num_global_irqs)
2213                         smmu->num_context_irqs++;
2214         }
2215
2216         if (!smmu->num_context_irqs) {
2217                 dev_err(dev, "found %d interrupts but expected at least %d\n",
2218                         num_irqs, smmu->num_global_irqs + 1);
2219                 return -ENODEV;
2220         }
2221
2222         smmu->irqs = devm_kcalloc(dev, num_irqs, sizeof(*smmu->irqs),
2223                                   GFP_KERNEL);
2224         if (!smmu->irqs) {
2225                 dev_err(dev, "failed to allocate %d irqs\n", num_irqs);
2226                 return -ENOMEM;
2227         }
2228
2229         for (i = 0; i < num_irqs; ++i) {
2230                 int irq = platform_get_irq(pdev, i);
2231
2232                 if (irq < 0) {
2233                         dev_err(dev, "failed to get irq index %d\n", i);
2234                         return -ENODEV;
2235                 }
2236                 smmu->irqs[i] = irq;
2237         }
2238
2239         err = devm_clk_bulk_get_all(dev, &smmu->clks);
2240         if (err < 0) {
2241                 dev_err(dev, "failed to get clocks %d\n", err);
2242                 return err;
2243         }
2244         smmu->num_clks = err;
2245
2246         err = clk_bulk_prepare_enable(smmu->num_clks, smmu->clks);
2247         if (err)
2248                 return err;
2249
2250         err = arm_smmu_device_cfg_probe(smmu);
2251         if (err)
2252                 return err;
2253
2254         if (smmu->version == ARM_SMMU_V2) {
2255                 if (smmu->num_context_banks > smmu->num_context_irqs) {
2256                         dev_err(dev,
2257                               "found only %d context irq(s) but %d required\n",
2258                               smmu->num_context_irqs, smmu->num_context_banks);
2259                         return -ENODEV;
2260                 }
2261
2262                 /* Ignore superfluous interrupts */
2263                 smmu->num_context_irqs = smmu->num_context_banks;
2264         }
2265
2266         for (i = 0; i < smmu->num_global_irqs; ++i) {
2267                 err = devm_request_irq(smmu->dev, smmu->irqs[i],
2268                                        arm_smmu_global_fault,
2269                                        IRQF_SHARED,
2270                                        "arm-smmu global fault",
2271                                        smmu);
2272                 if (err) {
2273                         dev_err(dev, "failed to request global IRQ %d (%u)\n",
2274                                 i, smmu->irqs[i]);
2275                         return err;
2276                 }
2277         }
2278
2279         err = iommu_device_sysfs_add(&smmu->iommu, smmu->dev, NULL,
2280                                      "smmu.%pa", &ioaddr);
2281         if (err) {
2282                 dev_err(dev, "Failed to register iommu in sysfs\n");
2283                 return err;
2284         }
2285
2286         iommu_device_set_ops(&smmu->iommu, &arm_smmu_ops);
2287         iommu_device_set_fwnode(&smmu->iommu, dev->fwnode);
2288
2289         err = iommu_device_register(&smmu->iommu);
2290         if (err) {
2291                 dev_err(dev, "Failed to register iommu\n");
2292                 return err;
2293         }
2294
2295         platform_set_drvdata(pdev, smmu);
2296         arm_smmu_device_reset(smmu);
2297         arm_smmu_test_smr_masks(smmu);
2298
2299         /*
2300          * We want to avoid touching dev->power.lock in fastpaths unless
2301          * it's really going to do something useful - pm_runtime_enabled()
2302          * can serve as an ideal proxy for that decision. So, conditionally
2303          * enable pm_runtime.
2304          */
2305         if (dev->pm_domain) {
2306                 pm_runtime_set_active(dev);
2307                 pm_runtime_enable(dev);
2308         }
2309
2310         /*
2311          * For ACPI and generic DT bindings, an SMMU will be probed before
2312          * any device which might need it, so we want the bus ops in place
2313          * ready to handle default domain setup as soon as any SMMU exists.
2314          */
2315         if (!using_legacy_binding)
2316                 arm_smmu_bus_init();
2317
2318         return 0;
2319 }
2320
2321 /*
2322  * With the legacy DT binding in play, though, we have no guarantees about
2323  * probe order, but then we're also not doing default domains, so we can
2324  * delay setting bus ops until we're sure every possible SMMU is ready,
2325  * and that way ensure that no add_device() calls get missed.
2326  */
2327 static int arm_smmu_legacy_bus_init(void)
2328 {
2329         if (using_legacy_binding)
2330                 arm_smmu_bus_init();
2331         return 0;
2332 }
2333 device_initcall_sync(arm_smmu_legacy_bus_init);
2334
2335 static void arm_smmu_device_shutdown(struct platform_device *pdev)
2336 {
2337         struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
2338
2339         if (!smmu)
2340                 return;
2341
2342         if (!bitmap_empty(smmu->context_map, ARM_SMMU_MAX_CBS))
2343                 dev_err(&pdev->dev, "removing device with active domains!\n");
2344
2345         arm_smmu_rpm_get(smmu);
2346         /* Turn the thing off */
2347         writel(sCR0_CLIENTPD, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
2348         arm_smmu_rpm_put(smmu);
2349
2350         if (pm_runtime_enabled(smmu->dev))
2351                 pm_runtime_force_suspend(smmu->dev);
2352         else
2353                 clk_bulk_disable(smmu->num_clks, smmu->clks);
2354
2355         clk_bulk_unprepare(smmu->num_clks, smmu->clks);
2356 }
2357
2358 static int __maybe_unused arm_smmu_runtime_resume(struct device *dev)
2359 {
2360         struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2361         int ret;
2362
2363         ret = clk_bulk_enable(smmu->num_clks, smmu->clks);
2364         if (ret)
2365                 return ret;
2366
2367         arm_smmu_device_reset(smmu);
2368
2369         return 0;
2370 }
2371
2372 static int __maybe_unused arm_smmu_runtime_suspend(struct device *dev)
2373 {
2374         struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2375
2376         clk_bulk_disable(smmu->num_clks, smmu->clks);
2377
2378         return 0;
2379 }
2380
2381 static int __maybe_unused arm_smmu_pm_resume(struct device *dev)
2382 {
2383         if (pm_runtime_suspended(dev))
2384                 return 0;
2385
2386         return arm_smmu_runtime_resume(dev);
2387 }
2388
2389 static int __maybe_unused arm_smmu_pm_suspend(struct device *dev)
2390 {
2391         if (pm_runtime_suspended(dev))
2392                 return 0;
2393
2394         return arm_smmu_runtime_suspend(dev);
2395 }
2396
2397 static const struct dev_pm_ops arm_smmu_pm_ops = {
2398         SET_SYSTEM_SLEEP_PM_OPS(arm_smmu_pm_suspend, arm_smmu_pm_resume)
2399         SET_RUNTIME_PM_OPS(arm_smmu_runtime_suspend,
2400                            arm_smmu_runtime_resume, NULL)
2401 };
2402
2403 static struct platform_driver arm_smmu_driver = {
2404         .driver = {
2405                 .name                   = "arm-smmu",
2406                 .of_match_table         = of_match_ptr(arm_smmu_of_match),
2407                 .pm                     = &arm_smmu_pm_ops,
2408                 .suppress_bind_attrs    = true,
2409         },
2410         .probe  = arm_smmu_device_probe,
2411         .shutdown = arm_smmu_device_shutdown,
2412 };
2413 builtin_platform_driver(arm_smmu_driver);