]> asedeno.scripts.mit.edu Git - linux.git/blob - drivers/iommu/arm-smmu.c
Merge tag 'omap-for-v5.4/fixes-rc1-signed' of git://git.kernel.org/pub/scm/linux...
[linux.git] / drivers / iommu / arm-smmu.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * IOMMU API for ARM architected SMMU implementations.
4  *
5  * Copyright (C) 2013 ARM Limited
6  *
7  * Author: Will Deacon <will.deacon@arm.com>
8  *
9  * This driver currently supports:
10  *      - SMMUv1 and v2 implementations
11  *      - Stream-matching and stream-indexing
12  *      - v7/v8 long-descriptor format
13  *      - Non-secure access to the SMMU
14  *      - Context fault reporting
15  *      - Extended Stream ID (16 bit)
16  */
17
18 #define pr_fmt(fmt) "arm-smmu: " fmt
19
20 #include <linux/acpi.h>
21 #include <linux/acpi_iort.h>
22 #include <linux/bitfield.h>
23 #include <linux/delay.h>
24 #include <linux/dma-iommu.h>
25 #include <linux/dma-mapping.h>
26 #include <linux/err.h>
27 #include <linux/interrupt.h>
28 #include <linux/io.h>
29 #include <linux/iopoll.h>
30 #include <linux/init.h>
31 #include <linux/moduleparam.h>
32 #include <linux/of.h>
33 #include <linux/of_address.h>
34 #include <linux/of_device.h>
35 #include <linux/of_iommu.h>
36 #include <linux/pci.h>
37 #include <linux/platform_device.h>
38 #include <linux/pm_runtime.h>
39 #include <linux/slab.h>
40
41 #include <linux/amba/bus.h>
42 #include <linux/fsl/mc.h>
43
44 #include "arm-smmu.h"
45
46 /*
47  * Apparently, some Qualcomm arm64 platforms which appear to expose their SMMU
48  * global register space are still, in fact, using a hypervisor to mediate it
49  * by trapping and emulating register accesses. Sadly, some deployed versions
50  * of said trapping code have bugs wherein they go horribly wrong for stores
51  * using r31 (i.e. XZR/WZR) as the source register.
52  */
53 #define QCOM_DUMMY_VAL -1
54
55 #define TLB_LOOP_TIMEOUT                1000000 /* 1s! */
56 #define TLB_SPIN_COUNT                  10
57
58 #define MSI_IOVA_BASE                   0x8000000
59 #define MSI_IOVA_LENGTH                 0x100000
60
61 static int force_stage;
62 /*
63  * not really modular, but the easiest way to keep compat with existing
64  * bootargs behaviour is to continue using module_param() here.
65  */
66 module_param(force_stage, int, S_IRUGO);
67 MODULE_PARM_DESC(force_stage,
68         "Force SMMU mappings to be installed at a particular stage of translation. A value of '1' or '2' forces the corresponding stage. All other values are ignored (i.e. no stage is forced). Note that selecting a specific stage will disable support for nested translation.");
69 static bool disable_bypass =
70         IS_ENABLED(CONFIG_ARM_SMMU_DISABLE_BYPASS_BY_DEFAULT);
71 module_param(disable_bypass, bool, S_IRUGO);
72 MODULE_PARM_DESC(disable_bypass,
73         "Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
74
75 struct arm_smmu_s2cr {
76         struct iommu_group              *group;
77         int                             count;
78         enum arm_smmu_s2cr_type         type;
79         enum arm_smmu_s2cr_privcfg      privcfg;
80         u8                              cbndx;
81 };
82
83 #define s2cr_init_val (struct arm_smmu_s2cr){                           \
84         .type = disable_bypass ? S2CR_TYPE_FAULT : S2CR_TYPE_BYPASS,    \
85 }
86
87 struct arm_smmu_smr {
88         u16                             mask;
89         u16                             id;
90         bool                            valid;
91 };
92
93 struct arm_smmu_cb {
94         u64                             ttbr[2];
95         u32                             tcr[2];
96         u32                             mair[2];
97         struct arm_smmu_cfg             *cfg;
98 };
99
100 struct arm_smmu_master_cfg {
101         struct arm_smmu_device          *smmu;
102         s16                             smendx[];
103 };
104 #define INVALID_SMENDX                  -1
105 #define __fwspec_cfg(fw) ((struct arm_smmu_master_cfg *)fw->iommu_priv)
106 #define fwspec_smmu(fw)  (__fwspec_cfg(fw)->smmu)
107 #define fwspec_smendx(fw, i) \
108         (i >= fw->num_ids ? INVALID_SMENDX : __fwspec_cfg(fw)->smendx[i])
109 #define for_each_cfg_sme(fw, i, idx) \
110         for (i = 0; idx = fwspec_smendx(fw, i), i < fw->num_ids; ++i)
111
112 static bool using_legacy_binding, using_generic_binding;
113
114 static inline int arm_smmu_rpm_get(struct arm_smmu_device *smmu)
115 {
116         if (pm_runtime_enabled(smmu->dev))
117                 return pm_runtime_get_sync(smmu->dev);
118
119         return 0;
120 }
121
122 static inline void arm_smmu_rpm_put(struct arm_smmu_device *smmu)
123 {
124         if (pm_runtime_enabled(smmu->dev))
125                 pm_runtime_put(smmu->dev);
126 }
127
128 static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
129 {
130         return container_of(dom, struct arm_smmu_domain, domain);
131 }
132
133 static struct device_node *dev_get_dev_node(struct device *dev)
134 {
135         if (dev_is_pci(dev)) {
136                 struct pci_bus *bus = to_pci_dev(dev)->bus;
137
138                 while (!pci_is_root_bus(bus))
139                         bus = bus->parent;
140                 return of_node_get(bus->bridge->parent->of_node);
141         }
142
143         return of_node_get(dev->of_node);
144 }
145
146 static int __arm_smmu_get_pci_sid(struct pci_dev *pdev, u16 alias, void *data)
147 {
148         *((__be32 *)data) = cpu_to_be32(alias);
149         return 0; /* Continue walking */
150 }
151
152 static int __find_legacy_master_phandle(struct device *dev, void *data)
153 {
154         struct of_phandle_iterator *it = *(void **)data;
155         struct device_node *np = it->node;
156         int err;
157
158         of_for_each_phandle(it, err, dev->of_node, "mmu-masters",
159                             "#stream-id-cells", -1)
160                 if (it->node == np) {
161                         *(void **)data = dev;
162                         return 1;
163                 }
164         it->node = np;
165         return err == -ENOENT ? 0 : err;
166 }
167
168 static struct platform_driver arm_smmu_driver;
169 static struct iommu_ops arm_smmu_ops;
170
171 static int arm_smmu_register_legacy_master(struct device *dev,
172                                            struct arm_smmu_device **smmu)
173 {
174         struct device *smmu_dev;
175         struct device_node *np;
176         struct of_phandle_iterator it;
177         void *data = &it;
178         u32 *sids;
179         __be32 pci_sid;
180         int err;
181
182         np = dev_get_dev_node(dev);
183         if (!np || !of_find_property(np, "#stream-id-cells", NULL)) {
184                 of_node_put(np);
185                 return -ENODEV;
186         }
187
188         it.node = np;
189         err = driver_for_each_device(&arm_smmu_driver.driver, NULL, &data,
190                                      __find_legacy_master_phandle);
191         smmu_dev = data;
192         of_node_put(np);
193         if (err == 0)
194                 return -ENODEV;
195         if (err < 0)
196                 return err;
197
198         if (dev_is_pci(dev)) {
199                 /* "mmu-masters" assumes Stream ID == Requester ID */
200                 pci_for_each_dma_alias(to_pci_dev(dev), __arm_smmu_get_pci_sid,
201                                        &pci_sid);
202                 it.cur = &pci_sid;
203                 it.cur_count = 1;
204         }
205
206         err = iommu_fwspec_init(dev, &smmu_dev->of_node->fwnode,
207                                 &arm_smmu_ops);
208         if (err)
209                 return err;
210
211         sids = kcalloc(it.cur_count, sizeof(*sids), GFP_KERNEL);
212         if (!sids)
213                 return -ENOMEM;
214
215         *smmu = dev_get_drvdata(smmu_dev);
216         of_phandle_iterator_args(&it, sids, it.cur_count);
217         err = iommu_fwspec_add_ids(dev, sids, it.cur_count);
218         kfree(sids);
219         return err;
220 }
221
222 static int __arm_smmu_alloc_bitmap(unsigned long *map, int start, int end)
223 {
224         int idx;
225
226         do {
227                 idx = find_next_zero_bit(map, end, start);
228                 if (idx == end)
229                         return -ENOSPC;
230         } while (test_and_set_bit(idx, map));
231
232         return idx;
233 }
234
235 static void __arm_smmu_free_bitmap(unsigned long *map, int idx)
236 {
237         clear_bit(idx, map);
238 }
239
240 /* Wait for any pending TLB invalidations to complete */
241 static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu, int page,
242                                 int sync, int status)
243 {
244         unsigned int spin_cnt, delay;
245         u32 reg;
246
247         arm_smmu_writel(smmu, page, sync, QCOM_DUMMY_VAL);
248         for (delay = 1; delay < TLB_LOOP_TIMEOUT; delay *= 2) {
249                 for (spin_cnt = TLB_SPIN_COUNT; spin_cnt > 0; spin_cnt--) {
250                         reg = arm_smmu_readl(smmu, page, status);
251                         if (!(reg & sTLBGSTATUS_GSACTIVE))
252                                 return;
253                         cpu_relax();
254                 }
255                 udelay(delay);
256         }
257         dev_err_ratelimited(smmu->dev,
258                             "TLB sync timed out -- SMMU may be deadlocked\n");
259 }
260
261 static void arm_smmu_tlb_sync_global(struct arm_smmu_device *smmu)
262 {
263         unsigned long flags;
264
265         spin_lock_irqsave(&smmu->global_sync_lock, flags);
266         __arm_smmu_tlb_sync(smmu, ARM_SMMU_GR0, ARM_SMMU_GR0_sTLBGSYNC,
267                             ARM_SMMU_GR0_sTLBGSTATUS);
268         spin_unlock_irqrestore(&smmu->global_sync_lock, flags);
269 }
270
271 static void arm_smmu_tlb_sync_context(void *cookie)
272 {
273         struct arm_smmu_domain *smmu_domain = cookie;
274         struct arm_smmu_device *smmu = smmu_domain->smmu;
275         unsigned long flags;
276
277         spin_lock_irqsave(&smmu_domain->cb_lock, flags);
278         __arm_smmu_tlb_sync(smmu, ARM_SMMU_CB(smmu, smmu_domain->cfg.cbndx),
279                             ARM_SMMU_CB_TLBSYNC, ARM_SMMU_CB_TLBSTATUS);
280         spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
281 }
282
283 static void arm_smmu_tlb_sync_vmid(void *cookie)
284 {
285         struct arm_smmu_domain *smmu_domain = cookie;
286
287         arm_smmu_tlb_sync_global(smmu_domain->smmu);
288 }
289
290 static void arm_smmu_tlb_inv_context_s1(void *cookie)
291 {
292         struct arm_smmu_domain *smmu_domain = cookie;
293         /*
294          * The TLBI write may be relaxed, so ensure that PTEs cleared by the
295          * current CPU are visible beforehand.
296          */
297         wmb();
298         arm_smmu_cb_write(smmu_domain->smmu, smmu_domain->cfg.cbndx,
299                           ARM_SMMU_CB_S1_TLBIASID, smmu_domain->cfg.asid);
300         arm_smmu_tlb_sync_context(cookie);
301 }
302
303 static void arm_smmu_tlb_inv_context_s2(void *cookie)
304 {
305         struct arm_smmu_domain *smmu_domain = cookie;
306         struct arm_smmu_device *smmu = smmu_domain->smmu;
307
308         /* See above */
309         wmb();
310         arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIVMID, smmu_domain->cfg.vmid);
311         arm_smmu_tlb_sync_global(smmu);
312 }
313
314 static void arm_smmu_tlb_inv_range_s1(unsigned long iova, size_t size,
315                                       size_t granule, bool leaf, void *cookie)
316 {
317         struct arm_smmu_domain *smmu_domain = cookie;
318         struct arm_smmu_device *smmu = smmu_domain->smmu;
319         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
320         int reg, idx = cfg->cbndx;
321
322         if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
323                 wmb();
324
325         reg = leaf ? ARM_SMMU_CB_S1_TLBIVAL : ARM_SMMU_CB_S1_TLBIVA;
326
327         if (cfg->fmt != ARM_SMMU_CTX_FMT_AARCH64) {
328                 iova = (iova >> 12) << 12;
329                 iova |= cfg->asid;
330                 do {
331                         arm_smmu_cb_write(smmu, idx, reg, iova);
332                         iova += granule;
333                 } while (size -= granule);
334         } else {
335                 iova >>= 12;
336                 iova |= (u64)cfg->asid << 48;
337                 do {
338                         arm_smmu_cb_writeq(smmu, idx, reg, iova);
339                         iova += granule >> 12;
340                 } while (size -= granule);
341         }
342 }
343
344 static void arm_smmu_tlb_inv_range_s2(unsigned long iova, size_t size,
345                                       size_t granule, bool leaf, void *cookie)
346 {
347         struct arm_smmu_domain *smmu_domain = cookie;
348         struct arm_smmu_device *smmu = smmu_domain->smmu;
349         int reg, idx = smmu_domain->cfg.cbndx;
350
351         if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
352                 wmb();
353
354         reg = leaf ? ARM_SMMU_CB_S2_TLBIIPAS2L : ARM_SMMU_CB_S2_TLBIIPAS2;
355         iova >>= 12;
356         do {
357                 if (smmu_domain->cfg.fmt == ARM_SMMU_CTX_FMT_AARCH64)
358                         arm_smmu_cb_writeq(smmu, idx, reg, iova);
359                 else
360                         arm_smmu_cb_write(smmu, idx, reg, iova);
361                 iova += granule >> 12;
362         } while (size -= granule);
363 }
364
365 /*
366  * On MMU-401 at least, the cost of firing off multiple TLBIVMIDs appears
367  * almost negligible, but the benefit of getting the first one in as far ahead
368  * of the sync as possible is significant, hence we don't just make this a
369  * no-op and set .tlb_sync to arm_smmu_tlb_inv_context_s2() as you might think.
370  */
371 static void arm_smmu_tlb_inv_vmid_nosync(unsigned long iova, size_t size,
372                                          size_t granule, bool leaf, void *cookie)
373 {
374         struct arm_smmu_domain *smmu_domain = cookie;
375         struct arm_smmu_device *smmu = smmu_domain->smmu;
376
377         if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
378                 wmb();
379
380         arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIVMID, smmu_domain->cfg.vmid);
381 }
382
383 static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size,
384                                   size_t granule, void *cookie)
385 {
386         struct arm_smmu_domain *smmu_domain = cookie;
387         const struct arm_smmu_flush_ops *ops = smmu_domain->flush_ops;
388
389         ops->tlb_inv_range(iova, size, granule, false, cookie);
390         ops->tlb_sync(cookie);
391 }
392
393 static void arm_smmu_tlb_inv_leaf(unsigned long iova, size_t size,
394                                   size_t granule, void *cookie)
395 {
396         struct arm_smmu_domain *smmu_domain = cookie;
397         const struct arm_smmu_flush_ops *ops = smmu_domain->flush_ops;
398
399         ops->tlb_inv_range(iova, size, granule, true, cookie);
400         ops->tlb_sync(cookie);
401 }
402
403 static void arm_smmu_tlb_add_page(struct iommu_iotlb_gather *gather,
404                                   unsigned long iova, size_t granule,
405                                   void *cookie)
406 {
407         struct arm_smmu_domain *smmu_domain = cookie;
408         const struct arm_smmu_flush_ops *ops = smmu_domain->flush_ops;
409
410         ops->tlb_inv_range(iova, granule, granule, true, cookie);
411 }
412
413 static const struct arm_smmu_flush_ops arm_smmu_s1_tlb_ops = {
414         .tlb = {
415                 .tlb_flush_all  = arm_smmu_tlb_inv_context_s1,
416                 .tlb_flush_walk = arm_smmu_tlb_inv_walk,
417                 .tlb_flush_leaf = arm_smmu_tlb_inv_leaf,
418                 .tlb_add_page   = arm_smmu_tlb_add_page,
419         },
420         .tlb_inv_range          = arm_smmu_tlb_inv_range_s1,
421         .tlb_sync               = arm_smmu_tlb_sync_context,
422 };
423
424 static const struct arm_smmu_flush_ops arm_smmu_s2_tlb_ops_v2 = {
425         .tlb = {
426                 .tlb_flush_all  = arm_smmu_tlb_inv_context_s2,
427                 .tlb_flush_walk = arm_smmu_tlb_inv_walk,
428                 .tlb_flush_leaf = arm_smmu_tlb_inv_leaf,
429                 .tlb_add_page   = arm_smmu_tlb_add_page,
430         },
431         .tlb_inv_range          = arm_smmu_tlb_inv_range_s2,
432         .tlb_sync               = arm_smmu_tlb_sync_context,
433 };
434
435 static const struct arm_smmu_flush_ops arm_smmu_s2_tlb_ops_v1 = {
436         .tlb = {
437                 .tlb_flush_all  = arm_smmu_tlb_inv_context_s2,
438                 .tlb_flush_walk = arm_smmu_tlb_inv_walk,
439                 .tlb_flush_leaf = arm_smmu_tlb_inv_leaf,
440                 .tlb_add_page   = arm_smmu_tlb_add_page,
441         },
442         .tlb_inv_range          = arm_smmu_tlb_inv_vmid_nosync,
443         .tlb_sync               = arm_smmu_tlb_sync_vmid,
444 };
445
446 static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
447 {
448         u32 fsr, fsynr, cbfrsynra;
449         unsigned long iova;
450         struct iommu_domain *domain = dev;
451         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
452         struct arm_smmu_device *smmu = smmu_domain->smmu;
453         int idx = smmu_domain->cfg.cbndx;
454
455         fsr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSR);
456         if (!(fsr & FSR_FAULT))
457                 return IRQ_NONE;
458
459         fsynr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSYNR0);
460         iova = arm_smmu_cb_readq(smmu, idx, ARM_SMMU_CB_FAR);
461         cbfrsynra = arm_smmu_gr1_read(smmu, ARM_SMMU_GR1_CBFRSYNRA(idx));
462
463         dev_err_ratelimited(smmu->dev,
464         "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cbfrsynra=0x%x, cb=%d\n",
465                             fsr, iova, fsynr, cbfrsynra, idx);
466
467         arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_FSR, fsr);
468         return IRQ_HANDLED;
469 }
470
471 static irqreturn_t arm_smmu_global_fault(int irq, void *dev)
472 {
473         u32 gfsr, gfsynr0, gfsynr1, gfsynr2;
474         struct arm_smmu_device *smmu = dev;
475
476         gfsr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSR);
477         gfsynr0 = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSYNR0);
478         gfsynr1 = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSYNR1);
479         gfsynr2 = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSYNR2);
480
481         if (!gfsr)
482                 return IRQ_NONE;
483
484         dev_err_ratelimited(smmu->dev,
485                 "Unexpected global fault, this could be serious\n");
486         dev_err_ratelimited(smmu->dev,
487                 "\tGFSR 0x%08x, GFSYNR0 0x%08x, GFSYNR1 0x%08x, GFSYNR2 0x%08x\n",
488                 gfsr, gfsynr0, gfsynr1, gfsynr2);
489
490         arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sGFSR, gfsr);
491         return IRQ_HANDLED;
492 }
493
494 static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
495                                        struct io_pgtable_cfg *pgtbl_cfg)
496 {
497         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
498         struct arm_smmu_cb *cb = &smmu_domain->smmu->cbs[cfg->cbndx];
499         bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
500
501         cb->cfg = cfg;
502
503         /* TCR */
504         if (stage1) {
505                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
506                         cb->tcr[0] = pgtbl_cfg->arm_v7s_cfg.tcr;
507                 } else {
508                         cb->tcr[0] = pgtbl_cfg->arm_lpae_s1_cfg.tcr;
509                         cb->tcr[1] = pgtbl_cfg->arm_lpae_s1_cfg.tcr >> 32;
510                         cb->tcr[1] |= FIELD_PREP(TCR2_SEP, TCR2_SEP_UPSTREAM);
511                         if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
512                                 cb->tcr[1] |= TCR2_AS;
513                 }
514         } else {
515                 cb->tcr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
516         }
517
518         /* TTBRs */
519         if (stage1) {
520                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
521                         cb->ttbr[0] = pgtbl_cfg->arm_v7s_cfg.ttbr[0];
522                         cb->ttbr[1] = pgtbl_cfg->arm_v7s_cfg.ttbr[1];
523                 } else {
524                         cb->ttbr[0] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
525                         cb->ttbr[0] |= FIELD_PREP(TTBRn_ASID, cfg->asid);
526                         cb->ttbr[1] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1];
527                         cb->ttbr[1] |= FIELD_PREP(TTBRn_ASID, cfg->asid);
528                 }
529         } else {
530                 cb->ttbr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
531         }
532
533         /* MAIRs (stage-1 only) */
534         if (stage1) {
535                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
536                         cb->mair[0] = pgtbl_cfg->arm_v7s_cfg.prrr;
537                         cb->mair[1] = pgtbl_cfg->arm_v7s_cfg.nmrr;
538                 } else {
539                         cb->mair[0] = pgtbl_cfg->arm_lpae_s1_cfg.mair[0];
540                         cb->mair[1] = pgtbl_cfg->arm_lpae_s1_cfg.mair[1];
541                 }
542         }
543 }
544
545 static void arm_smmu_write_context_bank(struct arm_smmu_device *smmu, int idx)
546 {
547         u32 reg;
548         bool stage1;
549         struct arm_smmu_cb *cb = &smmu->cbs[idx];
550         struct arm_smmu_cfg *cfg = cb->cfg;
551
552         /* Unassigned context banks only need disabling */
553         if (!cfg) {
554                 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_SCTLR, 0);
555                 return;
556         }
557
558         stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
559
560         /* CBA2R */
561         if (smmu->version > ARM_SMMU_V1) {
562                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
563                         reg = CBA2R_VA64;
564                 else
565                         reg = 0;
566                 /* 16-bit VMIDs live in CBA2R */
567                 if (smmu->features & ARM_SMMU_FEAT_VMID16)
568                         reg |= FIELD_PREP(CBA2R_VMID16, cfg->vmid);
569
570                 arm_smmu_gr1_write(smmu, ARM_SMMU_GR1_CBA2R(idx), reg);
571         }
572
573         /* CBAR */
574         reg = FIELD_PREP(CBAR_TYPE, cfg->cbar);
575         if (smmu->version < ARM_SMMU_V2)
576                 reg |= FIELD_PREP(CBAR_IRPTNDX, cfg->irptndx);
577
578         /*
579          * Use the weakest shareability/memory types, so they are
580          * overridden by the ttbcr/pte.
581          */
582         if (stage1) {
583                 reg |= FIELD_PREP(CBAR_S1_BPSHCFG, CBAR_S1_BPSHCFG_NSH) |
584                         FIELD_PREP(CBAR_S1_MEMATTR, CBAR_S1_MEMATTR_WB);
585         } else if (!(smmu->features & ARM_SMMU_FEAT_VMID16)) {
586                 /* 8-bit VMIDs live in CBAR */
587                 reg |= FIELD_PREP(CBAR_VMID, cfg->vmid);
588         }
589         arm_smmu_gr1_write(smmu, ARM_SMMU_GR1_CBAR(idx), reg);
590
591         /*
592          * TCR
593          * We must write this before the TTBRs, since it determines the
594          * access behaviour of some fields (in particular, ASID[15:8]).
595          */
596         if (stage1 && smmu->version > ARM_SMMU_V1)
597                 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TCR2, cb->tcr[1]);
598         arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TCR, cb->tcr[0]);
599
600         /* TTBRs */
601         if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
602                 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_CONTEXTIDR, cfg->asid);
603                 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TTBR0, cb->ttbr[0]);
604                 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TTBR1, cb->ttbr[1]);
605         } else {
606                 arm_smmu_cb_writeq(smmu, idx, ARM_SMMU_CB_TTBR0, cb->ttbr[0]);
607                 if (stage1)
608                         arm_smmu_cb_writeq(smmu, idx, ARM_SMMU_CB_TTBR1,
609                                            cb->ttbr[1]);
610         }
611
612         /* MAIRs (stage-1 only) */
613         if (stage1) {
614                 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_S1_MAIR0, cb->mair[0]);
615                 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_S1_MAIR1, cb->mair[1]);
616         }
617
618         /* SCTLR */
619         reg = SCTLR_CFIE | SCTLR_CFRE | SCTLR_AFE | SCTLR_TRE | SCTLR_M;
620         if (stage1)
621                 reg |= SCTLR_S1_ASIDPNE;
622         if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN))
623                 reg |= SCTLR_E;
624
625         arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_SCTLR, reg);
626 }
627
628 static int arm_smmu_init_domain_context(struct iommu_domain *domain,
629                                         struct arm_smmu_device *smmu)
630 {
631         int irq, start, ret = 0;
632         unsigned long ias, oas;
633         struct io_pgtable_ops *pgtbl_ops;
634         struct io_pgtable_cfg pgtbl_cfg;
635         enum io_pgtable_fmt fmt;
636         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
637         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
638
639         mutex_lock(&smmu_domain->init_mutex);
640         if (smmu_domain->smmu)
641                 goto out_unlock;
642
643         if (domain->type == IOMMU_DOMAIN_IDENTITY) {
644                 smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
645                 smmu_domain->smmu = smmu;
646                 goto out_unlock;
647         }
648
649         /*
650          * Mapping the requested stage onto what we support is surprisingly
651          * complicated, mainly because the spec allows S1+S2 SMMUs without
652          * support for nested translation. That means we end up with the
653          * following table:
654          *
655          * Requested        Supported        Actual
656          *     S1               N              S1
657          *     S1             S1+S2            S1
658          *     S1               S2             S2
659          *     S1               S1             S1
660          *     N                N              N
661          *     N              S1+S2            S2
662          *     N                S2             S2
663          *     N                S1             S1
664          *
665          * Note that you can't actually request stage-2 mappings.
666          */
667         if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
668                 smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
669         if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
670                 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
671
672         /*
673          * Choosing a suitable context format is even more fiddly. Until we
674          * grow some way for the caller to express a preference, and/or move
675          * the decision into the io-pgtable code where it arguably belongs,
676          * just aim for the closest thing to the rest of the system, and hope
677          * that the hardware isn't esoteric enough that we can't assume AArch64
678          * support to be a superset of AArch32 support...
679          */
680         if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_L)
681                 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_L;
682         if (IS_ENABLED(CONFIG_IOMMU_IO_PGTABLE_ARMV7S) &&
683             !IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_ARM_LPAE) &&
684             (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S) &&
685             (smmu_domain->stage == ARM_SMMU_DOMAIN_S1))
686                 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_S;
687         if ((IS_ENABLED(CONFIG_64BIT) || cfg->fmt == ARM_SMMU_CTX_FMT_NONE) &&
688             (smmu->features & (ARM_SMMU_FEAT_FMT_AARCH64_64K |
689                                ARM_SMMU_FEAT_FMT_AARCH64_16K |
690                                ARM_SMMU_FEAT_FMT_AARCH64_4K)))
691                 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH64;
692
693         if (cfg->fmt == ARM_SMMU_CTX_FMT_NONE) {
694                 ret = -EINVAL;
695                 goto out_unlock;
696         }
697
698         switch (smmu_domain->stage) {
699         case ARM_SMMU_DOMAIN_S1:
700                 cfg->cbar = CBAR_TYPE_S1_TRANS_S2_BYPASS;
701                 start = smmu->num_s2_context_banks;
702                 ias = smmu->va_size;
703                 oas = smmu->ipa_size;
704                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
705                         fmt = ARM_64_LPAE_S1;
706                 } else if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_L) {
707                         fmt = ARM_32_LPAE_S1;
708                         ias = min(ias, 32UL);
709                         oas = min(oas, 40UL);
710                 } else {
711                         fmt = ARM_V7S;
712                         ias = min(ias, 32UL);
713                         oas = min(oas, 32UL);
714                 }
715                 smmu_domain->flush_ops = &arm_smmu_s1_tlb_ops;
716                 break;
717         case ARM_SMMU_DOMAIN_NESTED:
718                 /*
719                  * We will likely want to change this if/when KVM gets
720                  * involved.
721                  */
722         case ARM_SMMU_DOMAIN_S2:
723                 cfg->cbar = CBAR_TYPE_S2_TRANS;
724                 start = 0;
725                 ias = smmu->ipa_size;
726                 oas = smmu->pa_size;
727                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
728                         fmt = ARM_64_LPAE_S2;
729                 } else {
730                         fmt = ARM_32_LPAE_S2;
731                         ias = min(ias, 40UL);
732                         oas = min(oas, 40UL);
733                 }
734                 if (smmu->version == ARM_SMMU_V2)
735                         smmu_domain->flush_ops = &arm_smmu_s2_tlb_ops_v2;
736                 else
737                         smmu_domain->flush_ops = &arm_smmu_s2_tlb_ops_v1;
738                 break;
739         default:
740                 ret = -EINVAL;
741                 goto out_unlock;
742         }
743         ret = __arm_smmu_alloc_bitmap(smmu->context_map, start,
744                                       smmu->num_context_banks);
745         if (ret < 0)
746                 goto out_unlock;
747
748         cfg->cbndx = ret;
749         if (smmu->version < ARM_SMMU_V2) {
750                 cfg->irptndx = atomic_inc_return(&smmu->irptndx);
751                 cfg->irptndx %= smmu->num_context_irqs;
752         } else {
753                 cfg->irptndx = cfg->cbndx;
754         }
755
756         if (smmu_domain->stage == ARM_SMMU_DOMAIN_S2)
757                 cfg->vmid = cfg->cbndx + 1;
758         else
759                 cfg->asid = cfg->cbndx;
760
761         smmu_domain->smmu = smmu;
762         if (smmu->impl && smmu->impl->init_context) {
763                 ret = smmu->impl->init_context(smmu_domain);
764                 if (ret)
765                         goto out_unlock;
766         }
767
768         pgtbl_cfg = (struct io_pgtable_cfg) {
769                 .pgsize_bitmap  = smmu->pgsize_bitmap,
770                 .ias            = ias,
771                 .oas            = oas,
772                 .coherent_walk  = smmu->features & ARM_SMMU_FEAT_COHERENT_WALK,
773                 .tlb            = &smmu_domain->flush_ops->tlb,
774                 .iommu_dev      = smmu->dev,
775         };
776
777         if (smmu_domain->non_strict)
778                 pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
779
780         pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
781         if (!pgtbl_ops) {
782                 ret = -ENOMEM;
783                 goto out_clear_smmu;
784         }
785
786         /* Update the domain's page sizes to reflect the page table format */
787         domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
788         domain->geometry.aperture_end = (1UL << ias) - 1;
789         domain->geometry.force_aperture = true;
790
791         /* Initialise the context bank with our page table cfg */
792         arm_smmu_init_context_bank(smmu_domain, &pgtbl_cfg);
793         arm_smmu_write_context_bank(smmu, cfg->cbndx);
794
795         /*
796          * Request context fault interrupt. Do this last to avoid the
797          * handler seeing a half-initialised domain state.
798          */
799         irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
800         ret = devm_request_irq(smmu->dev, irq, arm_smmu_context_fault,
801                                IRQF_SHARED, "arm-smmu-context-fault", domain);
802         if (ret < 0) {
803                 dev_err(smmu->dev, "failed to request context IRQ %d (%u)\n",
804                         cfg->irptndx, irq);
805                 cfg->irptndx = INVALID_IRPTNDX;
806         }
807
808         mutex_unlock(&smmu_domain->init_mutex);
809
810         /* Publish page table ops for map/unmap */
811         smmu_domain->pgtbl_ops = pgtbl_ops;
812         return 0;
813
814 out_clear_smmu:
815         smmu_domain->smmu = NULL;
816 out_unlock:
817         mutex_unlock(&smmu_domain->init_mutex);
818         return ret;
819 }
820
821 static void arm_smmu_destroy_domain_context(struct iommu_domain *domain)
822 {
823         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
824         struct arm_smmu_device *smmu = smmu_domain->smmu;
825         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
826         int ret, irq;
827
828         if (!smmu || domain->type == IOMMU_DOMAIN_IDENTITY)
829                 return;
830
831         ret = arm_smmu_rpm_get(smmu);
832         if (ret < 0)
833                 return;
834
835         /*
836          * Disable the context bank and free the page tables before freeing
837          * it.
838          */
839         smmu->cbs[cfg->cbndx].cfg = NULL;
840         arm_smmu_write_context_bank(smmu, cfg->cbndx);
841
842         if (cfg->irptndx != INVALID_IRPTNDX) {
843                 irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
844                 devm_free_irq(smmu->dev, irq, domain);
845         }
846
847         free_io_pgtable_ops(smmu_domain->pgtbl_ops);
848         __arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
849
850         arm_smmu_rpm_put(smmu);
851 }
852
853 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
854 {
855         struct arm_smmu_domain *smmu_domain;
856
857         if (type != IOMMU_DOMAIN_UNMANAGED &&
858             type != IOMMU_DOMAIN_DMA &&
859             type != IOMMU_DOMAIN_IDENTITY)
860                 return NULL;
861         /*
862          * Allocate the domain and initialise some of its data structures.
863          * We can't really do anything meaningful until we've added a
864          * master.
865          */
866         smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
867         if (!smmu_domain)
868                 return NULL;
869
870         if (type == IOMMU_DOMAIN_DMA && (using_legacy_binding ||
871             iommu_get_dma_cookie(&smmu_domain->domain))) {
872                 kfree(smmu_domain);
873                 return NULL;
874         }
875
876         mutex_init(&smmu_domain->init_mutex);
877         spin_lock_init(&smmu_domain->cb_lock);
878
879         return &smmu_domain->domain;
880 }
881
882 static void arm_smmu_domain_free(struct iommu_domain *domain)
883 {
884         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
885
886         /*
887          * Free the domain resources. We assume that all devices have
888          * already been detached.
889          */
890         iommu_put_dma_cookie(domain);
891         arm_smmu_destroy_domain_context(domain);
892         kfree(smmu_domain);
893 }
894
895 static void arm_smmu_write_smr(struct arm_smmu_device *smmu, int idx)
896 {
897         struct arm_smmu_smr *smr = smmu->smrs + idx;
898         u32 reg = FIELD_PREP(SMR_ID, smr->id) | FIELD_PREP(SMR_MASK, smr->mask);
899
900         if (!(smmu->features & ARM_SMMU_FEAT_EXIDS) && smr->valid)
901                 reg |= SMR_VALID;
902         arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_SMR(idx), reg);
903 }
904
905 static void arm_smmu_write_s2cr(struct arm_smmu_device *smmu, int idx)
906 {
907         struct arm_smmu_s2cr *s2cr = smmu->s2crs + idx;
908         u32 reg = FIELD_PREP(S2CR_TYPE, s2cr->type) |
909                   FIELD_PREP(S2CR_CBNDX, s2cr->cbndx) |
910                   FIELD_PREP(S2CR_PRIVCFG, s2cr->privcfg);
911
912         if (smmu->features & ARM_SMMU_FEAT_EXIDS && smmu->smrs &&
913             smmu->smrs[idx].valid)
914                 reg |= S2CR_EXIDVALID;
915         arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_S2CR(idx), reg);
916 }
917
918 static void arm_smmu_write_sme(struct arm_smmu_device *smmu, int idx)
919 {
920         arm_smmu_write_s2cr(smmu, idx);
921         if (smmu->smrs)
922                 arm_smmu_write_smr(smmu, idx);
923 }
924
925 /*
926  * The width of SMR's mask field depends on sCR0_EXIDENABLE, so this function
927  * should be called after sCR0 is written.
928  */
929 static void arm_smmu_test_smr_masks(struct arm_smmu_device *smmu)
930 {
931         u32 smr;
932
933         if (!smmu->smrs)
934                 return;
935
936         /*
937          * SMR.ID bits may not be preserved if the corresponding MASK
938          * bits are set, so check each one separately. We can reject
939          * masters later if they try to claim IDs outside these masks.
940          */
941         smr = FIELD_PREP(SMR_ID, smmu->streamid_mask);
942         arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_SMR(0), smr);
943         smr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_SMR(0));
944         smmu->streamid_mask = FIELD_GET(SMR_ID, smr);
945
946         smr = FIELD_PREP(SMR_MASK, smmu->streamid_mask);
947         arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_SMR(0), smr);
948         smr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_SMR(0));
949         smmu->smr_mask_mask = FIELD_GET(SMR_MASK, smr);
950 }
951
952 static int arm_smmu_find_sme(struct arm_smmu_device *smmu, u16 id, u16 mask)
953 {
954         struct arm_smmu_smr *smrs = smmu->smrs;
955         int i, free_idx = -ENOSPC;
956
957         /* Stream indexing is blissfully easy */
958         if (!smrs)
959                 return id;
960
961         /* Validating SMRs is... less so */
962         for (i = 0; i < smmu->num_mapping_groups; ++i) {
963                 if (!smrs[i].valid) {
964                         /*
965                          * Note the first free entry we come across, which
966                          * we'll claim in the end if nothing else matches.
967                          */
968                         if (free_idx < 0)
969                                 free_idx = i;
970                         continue;
971                 }
972                 /*
973                  * If the new entry is _entirely_ matched by an existing entry,
974                  * then reuse that, with the guarantee that there also cannot
975                  * be any subsequent conflicting entries. In normal use we'd
976                  * expect simply identical entries for this case, but there's
977                  * no harm in accommodating the generalisation.
978                  */
979                 if ((mask & smrs[i].mask) == mask &&
980                     !((id ^ smrs[i].id) & ~smrs[i].mask))
981                         return i;
982                 /*
983                  * If the new entry has any other overlap with an existing one,
984                  * though, then there always exists at least one stream ID
985                  * which would cause a conflict, and we can't allow that risk.
986                  */
987                 if (!((id ^ smrs[i].id) & ~(smrs[i].mask | mask)))
988                         return -EINVAL;
989         }
990
991         return free_idx;
992 }
993
994 static bool arm_smmu_free_sme(struct arm_smmu_device *smmu, int idx)
995 {
996         if (--smmu->s2crs[idx].count)
997                 return false;
998
999         smmu->s2crs[idx] = s2cr_init_val;
1000         if (smmu->smrs)
1001                 smmu->smrs[idx].valid = false;
1002
1003         return true;
1004 }
1005
1006 static int arm_smmu_master_alloc_smes(struct device *dev)
1007 {
1008         struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1009         struct arm_smmu_master_cfg *cfg = fwspec->iommu_priv;
1010         struct arm_smmu_device *smmu = cfg->smmu;
1011         struct arm_smmu_smr *smrs = smmu->smrs;
1012         struct iommu_group *group;
1013         int i, idx, ret;
1014
1015         mutex_lock(&smmu->stream_map_mutex);
1016         /* Figure out a viable stream map entry allocation */
1017         for_each_cfg_sme(fwspec, i, idx) {
1018                 u16 sid = FIELD_GET(SMR_ID, fwspec->ids[i]);
1019                 u16 mask = FIELD_GET(SMR_MASK, fwspec->ids[i]);
1020
1021                 if (idx != INVALID_SMENDX) {
1022                         ret = -EEXIST;
1023                         goto out_err;
1024                 }
1025
1026                 ret = arm_smmu_find_sme(smmu, sid, mask);
1027                 if (ret < 0)
1028                         goto out_err;
1029
1030                 idx = ret;
1031                 if (smrs && smmu->s2crs[idx].count == 0) {
1032                         smrs[idx].id = sid;
1033                         smrs[idx].mask = mask;
1034                         smrs[idx].valid = true;
1035                 }
1036                 smmu->s2crs[idx].count++;
1037                 cfg->smendx[i] = (s16)idx;
1038         }
1039
1040         group = iommu_group_get_for_dev(dev);
1041         if (!group)
1042                 group = ERR_PTR(-ENOMEM);
1043         if (IS_ERR(group)) {
1044                 ret = PTR_ERR(group);
1045                 goto out_err;
1046         }
1047         iommu_group_put(group);
1048
1049         /* It worked! Now, poke the actual hardware */
1050         for_each_cfg_sme(fwspec, i, idx) {
1051                 arm_smmu_write_sme(smmu, idx);
1052                 smmu->s2crs[idx].group = group;
1053         }
1054
1055         mutex_unlock(&smmu->stream_map_mutex);
1056         return 0;
1057
1058 out_err:
1059         while (i--) {
1060                 arm_smmu_free_sme(smmu, cfg->smendx[i]);
1061                 cfg->smendx[i] = INVALID_SMENDX;
1062         }
1063         mutex_unlock(&smmu->stream_map_mutex);
1064         return ret;
1065 }
1066
1067 static void arm_smmu_master_free_smes(struct iommu_fwspec *fwspec)
1068 {
1069         struct arm_smmu_device *smmu = fwspec_smmu(fwspec);
1070         struct arm_smmu_master_cfg *cfg = fwspec->iommu_priv;
1071         int i, idx;
1072
1073         mutex_lock(&smmu->stream_map_mutex);
1074         for_each_cfg_sme(fwspec, i, idx) {
1075                 if (arm_smmu_free_sme(smmu, idx))
1076                         arm_smmu_write_sme(smmu, idx);
1077                 cfg->smendx[i] = INVALID_SMENDX;
1078         }
1079         mutex_unlock(&smmu->stream_map_mutex);
1080 }
1081
1082 static int arm_smmu_domain_add_master(struct arm_smmu_domain *smmu_domain,
1083                                       struct iommu_fwspec *fwspec)
1084 {
1085         struct arm_smmu_device *smmu = smmu_domain->smmu;
1086         struct arm_smmu_s2cr *s2cr = smmu->s2crs;
1087         u8 cbndx = smmu_domain->cfg.cbndx;
1088         enum arm_smmu_s2cr_type type;
1089         int i, idx;
1090
1091         if (smmu_domain->stage == ARM_SMMU_DOMAIN_BYPASS)
1092                 type = S2CR_TYPE_BYPASS;
1093         else
1094                 type = S2CR_TYPE_TRANS;
1095
1096         for_each_cfg_sme(fwspec, i, idx) {
1097                 if (type == s2cr[idx].type && cbndx == s2cr[idx].cbndx)
1098                         continue;
1099
1100                 s2cr[idx].type = type;
1101                 s2cr[idx].privcfg = S2CR_PRIVCFG_DEFAULT;
1102                 s2cr[idx].cbndx = cbndx;
1103                 arm_smmu_write_s2cr(smmu, idx);
1104         }
1105         return 0;
1106 }
1107
1108 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
1109 {
1110         int ret;
1111         struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1112         struct arm_smmu_device *smmu;
1113         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1114
1115         if (!fwspec || fwspec->ops != &arm_smmu_ops) {
1116                 dev_err(dev, "cannot attach to SMMU, is it on the same bus?\n");
1117                 return -ENXIO;
1118         }
1119
1120         /*
1121          * FIXME: The arch/arm DMA API code tries to attach devices to its own
1122          * domains between of_xlate() and add_device() - we have no way to cope
1123          * with that, so until ARM gets converted to rely on groups and default
1124          * domains, just say no (but more politely than by dereferencing NULL).
1125          * This should be at least a WARN_ON once that's sorted.
1126          */
1127         if (!fwspec->iommu_priv)
1128                 return -ENODEV;
1129
1130         smmu = fwspec_smmu(fwspec);
1131
1132         ret = arm_smmu_rpm_get(smmu);
1133         if (ret < 0)
1134                 return ret;
1135
1136         /* Ensure that the domain is finalised */
1137         ret = arm_smmu_init_domain_context(domain, smmu);
1138         if (ret < 0)
1139                 goto rpm_put;
1140
1141         /*
1142          * Sanity check the domain. We don't support domains across
1143          * different SMMUs.
1144          */
1145         if (smmu_domain->smmu != smmu) {
1146                 dev_err(dev,
1147                         "cannot attach to SMMU %s whilst already attached to domain on SMMU %s\n",
1148                         dev_name(smmu_domain->smmu->dev), dev_name(smmu->dev));
1149                 ret = -EINVAL;
1150                 goto rpm_put;
1151         }
1152
1153         /* Looks ok, so add the device to the domain */
1154         ret = arm_smmu_domain_add_master(smmu_domain, fwspec);
1155
1156 rpm_put:
1157         arm_smmu_rpm_put(smmu);
1158         return ret;
1159 }
1160
1161 static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
1162                         phys_addr_t paddr, size_t size, int prot)
1163 {
1164         struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1165         struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
1166         int ret;
1167
1168         if (!ops)
1169                 return -ENODEV;
1170
1171         arm_smmu_rpm_get(smmu);
1172         ret = ops->map(ops, iova, paddr, size, prot);
1173         arm_smmu_rpm_put(smmu);
1174
1175         return ret;
1176 }
1177
1178 static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
1179                              size_t size, struct iommu_iotlb_gather *gather)
1180 {
1181         struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1182         struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
1183         size_t ret;
1184
1185         if (!ops)
1186                 return 0;
1187
1188         arm_smmu_rpm_get(smmu);
1189         ret = ops->unmap(ops, iova, size, gather);
1190         arm_smmu_rpm_put(smmu);
1191
1192         return ret;
1193 }
1194
1195 static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
1196 {
1197         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1198         struct arm_smmu_device *smmu = smmu_domain->smmu;
1199
1200         if (smmu_domain->flush_ops) {
1201                 arm_smmu_rpm_get(smmu);
1202                 smmu_domain->flush_ops->tlb.tlb_flush_all(smmu_domain);
1203                 arm_smmu_rpm_put(smmu);
1204         }
1205 }
1206
1207 static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
1208                                 struct iommu_iotlb_gather *gather)
1209 {
1210         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1211         struct arm_smmu_device *smmu = smmu_domain->smmu;
1212
1213         if (smmu_domain->flush_ops) {
1214                 arm_smmu_rpm_get(smmu);
1215                 smmu_domain->flush_ops->tlb_sync(smmu_domain);
1216                 arm_smmu_rpm_put(smmu);
1217         }
1218 }
1219
1220 static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
1221                                               dma_addr_t iova)
1222 {
1223         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1224         struct arm_smmu_device *smmu = smmu_domain->smmu;
1225         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
1226         struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
1227         struct device *dev = smmu->dev;
1228         void __iomem *reg;
1229         u32 tmp;
1230         u64 phys;
1231         unsigned long va, flags;
1232         int ret, idx = cfg->cbndx;
1233
1234         ret = arm_smmu_rpm_get(smmu);
1235         if (ret < 0)
1236                 return 0;
1237
1238         spin_lock_irqsave(&smmu_domain->cb_lock, flags);
1239         va = iova & ~0xfffUL;
1240         if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
1241                 arm_smmu_cb_writeq(smmu, idx, ARM_SMMU_CB_ATS1PR, va);
1242         else
1243                 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_ATS1PR, va);
1244
1245         reg = arm_smmu_page(smmu, ARM_SMMU_CB(smmu, idx)) + ARM_SMMU_CB_ATSR;
1246         if (readl_poll_timeout_atomic(reg, tmp, !(tmp & ATSR_ACTIVE), 5, 50)) {
1247                 spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
1248                 dev_err(dev,
1249                         "iova to phys timed out on %pad. Falling back to software table walk.\n",
1250                         &iova);
1251                 return ops->iova_to_phys(ops, iova);
1252         }
1253
1254         phys = arm_smmu_cb_readq(smmu, idx, ARM_SMMU_CB_PAR);
1255         spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
1256         if (phys & CB_PAR_F) {
1257                 dev_err(dev, "translation fault!\n");
1258                 dev_err(dev, "PAR = 0x%llx\n", phys);
1259                 return 0;
1260         }
1261
1262         arm_smmu_rpm_put(smmu);
1263
1264         return (phys & GENMASK_ULL(39, 12)) | (iova & 0xfff);
1265 }
1266
1267 static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain,
1268                                         dma_addr_t iova)
1269 {
1270         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1271         struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
1272
1273         if (domain->type == IOMMU_DOMAIN_IDENTITY)
1274                 return iova;
1275
1276         if (!ops)
1277                 return 0;
1278
1279         if (smmu_domain->smmu->features & ARM_SMMU_FEAT_TRANS_OPS &&
1280                         smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
1281                 return arm_smmu_iova_to_phys_hard(domain, iova);
1282
1283         return ops->iova_to_phys(ops, iova);
1284 }
1285
1286 static bool arm_smmu_capable(enum iommu_cap cap)
1287 {
1288         switch (cap) {
1289         case IOMMU_CAP_CACHE_COHERENCY:
1290                 /*
1291                  * Return true here as the SMMU can always send out coherent
1292                  * requests.
1293                  */
1294                 return true;
1295         case IOMMU_CAP_NOEXEC:
1296                 return true;
1297         default:
1298                 return false;
1299         }
1300 }
1301
1302 static
1303 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
1304 {
1305         struct device *dev = driver_find_device_by_fwnode(&arm_smmu_driver.driver,
1306                                                           fwnode);
1307         put_device(dev);
1308         return dev ? dev_get_drvdata(dev) : NULL;
1309 }
1310
1311 static int arm_smmu_add_device(struct device *dev)
1312 {
1313         struct arm_smmu_device *smmu;
1314         struct arm_smmu_master_cfg *cfg;
1315         struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1316         int i, ret;
1317
1318         if (using_legacy_binding) {
1319                 ret = arm_smmu_register_legacy_master(dev, &smmu);
1320
1321                 /*
1322                  * If dev->iommu_fwspec is initally NULL, arm_smmu_register_legacy_master()
1323                  * will allocate/initialise a new one. Thus we need to update fwspec for
1324                  * later use.
1325                  */
1326                 fwspec = dev_iommu_fwspec_get(dev);
1327                 if (ret)
1328                         goto out_free;
1329         } else if (fwspec && fwspec->ops == &arm_smmu_ops) {
1330                 smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
1331         } else {
1332                 return -ENODEV;
1333         }
1334
1335         ret = -EINVAL;
1336         for (i = 0; i < fwspec->num_ids; i++) {
1337                 u16 sid = FIELD_GET(SMR_ID, fwspec->ids[i]);
1338                 u16 mask = FIELD_GET(SMR_MASK, fwspec->ids[i]);
1339
1340                 if (sid & ~smmu->streamid_mask) {
1341                         dev_err(dev, "stream ID 0x%x out of range for SMMU (0x%x)\n",
1342                                 sid, smmu->streamid_mask);
1343                         goto out_free;
1344                 }
1345                 if (mask & ~smmu->smr_mask_mask) {
1346                         dev_err(dev, "SMR mask 0x%x out of range for SMMU (0x%x)\n",
1347                                 mask, smmu->smr_mask_mask);
1348                         goto out_free;
1349                 }
1350         }
1351
1352         ret = -ENOMEM;
1353         cfg = kzalloc(offsetof(struct arm_smmu_master_cfg, smendx[i]),
1354                       GFP_KERNEL);
1355         if (!cfg)
1356                 goto out_free;
1357
1358         cfg->smmu = smmu;
1359         fwspec->iommu_priv = cfg;
1360         while (i--)
1361                 cfg->smendx[i] = INVALID_SMENDX;
1362
1363         ret = arm_smmu_rpm_get(smmu);
1364         if (ret < 0)
1365                 goto out_cfg_free;
1366
1367         ret = arm_smmu_master_alloc_smes(dev);
1368         arm_smmu_rpm_put(smmu);
1369
1370         if (ret)
1371                 goto out_cfg_free;
1372
1373         iommu_device_link(&smmu->iommu, dev);
1374
1375         device_link_add(dev, smmu->dev,
1376                         DL_FLAG_PM_RUNTIME | DL_FLAG_AUTOREMOVE_SUPPLIER);
1377
1378         return 0;
1379
1380 out_cfg_free:
1381         kfree(cfg);
1382 out_free:
1383         iommu_fwspec_free(dev);
1384         return ret;
1385 }
1386
1387 static void arm_smmu_remove_device(struct device *dev)
1388 {
1389         struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1390         struct arm_smmu_master_cfg *cfg;
1391         struct arm_smmu_device *smmu;
1392         int ret;
1393
1394         if (!fwspec || fwspec->ops != &arm_smmu_ops)
1395                 return;
1396
1397         cfg  = fwspec->iommu_priv;
1398         smmu = cfg->smmu;
1399
1400         ret = arm_smmu_rpm_get(smmu);
1401         if (ret < 0)
1402                 return;
1403
1404         iommu_device_unlink(&smmu->iommu, dev);
1405         arm_smmu_master_free_smes(fwspec);
1406
1407         arm_smmu_rpm_put(smmu);
1408
1409         iommu_group_remove_device(dev);
1410         kfree(fwspec->iommu_priv);
1411         iommu_fwspec_free(dev);
1412 }
1413
1414 static struct iommu_group *arm_smmu_device_group(struct device *dev)
1415 {
1416         struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1417         struct arm_smmu_device *smmu = fwspec_smmu(fwspec);
1418         struct iommu_group *group = NULL;
1419         int i, idx;
1420
1421         for_each_cfg_sme(fwspec, i, idx) {
1422                 if (group && smmu->s2crs[idx].group &&
1423                     group != smmu->s2crs[idx].group)
1424                         return ERR_PTR(-EINVAL);
1425
1426                 group = smmu->s2crs[idx].group;
1427         }
1428
1429         if (group)
1430                 return iommu_group_ref_get(group);
1431
1432         if (dev_is_pci(dev))
1433                 group = pci_device_group(dev);
1434         else if (dev_is_fsl_mc(dev))
1435                 group = fsl_mc_device_group(dev);
1436         else
1437                 group = generic_device_group(dev);
1438
1439         return group;
1440 }
1441
1442 static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
1443                                     enum iommu_attr attr, void *data)
1444 {
1445         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1446
1447         switch(domain->type) {
1448         case IOMMU_DOMAIN_UNMANAGED:
1449                 switch (attr) {
1450                 case DOMAIN_ATTR_NESTING:
1451                         *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
1452                         return 0;
1453                 default:
1454                         return -ENODEV;
1455                 }
1456                 break;
1457         case IOMMU_DOMAIN_DMA:
1458                 switch (attr) {
1459                 case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
1460                         *(int *)data = smmu_domain->non_strict;
1461                         return 0;
1462                 default:
1463                         return -ENODEV;
1464                 }
1465                 break;
1466         default:
1467                 return -EINVAL;
1468         }
1469 }
1470
1471 static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
1472                                     enum iommu_attr attr, void *data)
1473 {
1474         int ret = 0;
1475         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1476
1477         mutex_lock(&smmu_domain->init_mutex);
1478
1479         switch(domain->type) {
1480         case IOMMU_DOMAIN_UNMANAGED:
1481                 switch (attr) {
1482                 case DOMAIN_ATTR_NESTING:
1483                         if (smmu_domain->smmu) {
1484                                 ret = -EPERM;
1485                                 goto out_unlock;
1486                         }
1487
1488                         if (*(int *)data)
1489                                 smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
1490                         else
1491                                 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
1492                         break;
1493                 default:
1494                         ret = -ENODEV;
1495                 }
1496                 break;
1497         case IOMMU_DOMAIN_DMA:
1498                 switch (attr) {
1499                 case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
1500                         smmu_domain->non_strict = *(int *)data;
1501                         break;
1502                 default:
1503                         ret = -ENODEV;
1504                 }
1505                 break;
1506         default:
1507                 ret = -EINVAL;
1508         }
1509 out_unlock:
1510         mutex_unlock(&smmu_domain->init_mutex);
1511         return ret;
1512 }
1513
1514 static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
1515 {
1516         u32 mask, fwid = 0;
1517
1518         if (args->args_count > 0)
1519                 fwid |= FIELD_PREP(SMR_ID, args->args[0]);
1520
1521         if (args->args_count > 1)
1522                 fwid |= FIELD_PREP(SMR_MASK, args->args[1]);
1523         else if (!of_property_read_u32(args->np, "stream-match-mask", &mask))
1524                 fwid |= FIELD_PREP(SMR_MASK, mask);
1525
1526         return iommu_fwspec_add_ids(dev, &fwid, 1);
1527 }
1528
1529 static void arm_smmu_get_resv_regions(struct device *dev,
1530                                       struct list_head *head)
1531 {
1532         struct iommu_resv_region *region;
1533         int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
1534
1535         region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
1536                                          prot, IOMMU_RESV_SW_MSI);
1537         if (!region)
1538                 return;
1539
1540         list_add_tail(&region->list, head);
1541
1542         iommu_dma_get_resv_regions(dev, head);
1543 }
1544
1545 static void arm_smmu_put_resv_regions(struct device *dev,
1546                                       struct list_head *head)
1547 {
1548         struct iommu_resv_region *entry, *next;
1549
1550         list_for_each_entry_safe(entry, next, head, list)
1551                 kfree(entry);
1552 }
1553
1554 static struct iommu_ops arm_smmu_ops = {
1555         .capable                = arm_smmu_capable,
1556         .domain_alloc           = arm_smmu_domain_alloc,
1557         .domain_free            = arm_smmu_domain_free,
1558         .attach_dev             = arm_smmu_attach_dev,
1559         .map                    = arm_smmu_map,
1560         .unmap                  = arm_smmu_unmap,
1561         .flush_iotlb_all        = arm_smmu_flush_iotlb_all,
1562         .iotlb_sync             = arm_smmu_iotlb_sync,
1563         .iova_to_phys           = arm_smmu_iova_to_phys,
1564         .add_device             = arm_smmu_add_device,
1565         .remove_device          = arm_smmu_remove_device,
1566         .device_group           = arm_smmu_device_group,
1567         .domain_get_attr        = arm_smmu_domain_get_attr,
1568         .domain_set_attr        = arm_smmu_domain_set_attr,
1569         .of_xlate               = arm_smmu_of_xlate,
1570         .get_resv_regions       = arm_smmu_get_resv_regions,
1571         .put_resv_regions       = arm_smmu_put_resv_regions,
1572         .pgsize_bitmap          = -1UL, /* Restricted during device attach */
1573 };
1574
1575 static void arm_smmu_device_reset(struct arm_smmu_device *smmu)
1576 {
1577         int i;
1578         u32 reg;
1579
1580         /* clear global FSR */
1581         reg = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSR);
1582         arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sGFSR, reg);
1583
1584         /*
1585          * Reset stream mapping groups: Initial values mark all SMRn as
1586          * invalid and all S2CRn as bypass unless overridden.
1587          */
1588         for (i = 0; i < smmu->num_mapping_groups; ++i)
1589                 arm_smmu_write_sme(smmu, i);
1590
1591         /* Make sure all context banks are disabled and clear CB_FSR  */
1592         for (i = 0; i < smmu->num_context_banks; ++i) {
1593                 arm_smmu_write_context_bank(smmu, i);
1594                 arm_smmu_cb_write(smmu, i, ARM_SMMU_CB_FSR, FSR_FAULT);
1595         }
1596
1597         /* Invalidate the TLB, just in case */
1598         arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIALLH, QCOM_DUMMY_VAL);
1599         arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIALLNSNH, QCOM_DUMMY_VAL);
1600
1601         reg = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sCR0);
1602
1603         /* Enable fault reporting */
1604         reg |= (sCR0_GFRE | sCR0_GFIE | sCR0_GCFGFRE | sCR0_GCFGFIE);
1605
1606         /* Disable TLB broadcasting. */
1607         reg |= (sCR0_VMIDPNE | sCR0_PTM);
1608
1609         /* Enable client access, handling unmatched streams as appropriate */
1610         reg &= ~sCR0_CLIENTPD;
1611         if (disable_bypass)
1612                 reg |= sCR0_USFCFG;
1613         else
1614                 reg &= ~sCR0_USFCFG;
1615
1616         /* Disable forced broadcasting */
1617         reg &= ~sCR0_FB;
1618
1619         /* Don't upgrade barriers */
1620         reg &= ~(sCR0_BSU);
1621
1622         if (smmu->features & ARM_SMMU_FEAT_VMID16)
1623                 reg |= sCR0_VMID16EN;
1624
1625         if (smmu->features & ARM_SMMU_FEAT_EXIDS)
1626                 reg |= sCR0_EXIDENABLE;
1627
1628         if (smmu->impl && smmu->impl->reset)
1629                 smmu->impl->reset(smmu);
1630
1631         /* Push the button */
1632         arm_smmu_tlb_sync_global(smmu);
1633         arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sCR0, reg);
1634 }
1635
1636 static int arm_smmu_id_size_to_bits(int size)
1637 {
1638         switch (size) {
1639         case 0:
1640                 return 32;
1641         case 1:
1642                 return 36;
1643         case 2:
1644                 return 40;
1645         case 3:
1646                 return 42;
1647         case 4:
1648                 return 44;
1649         case 5:
1650         default:
1651                 return 48;
1652         }
1653 }
1654
1655 static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
1656 {
1657         unsigned int size;
1658         u32 id;
1659         bool cttw_reg, cttw_fw = smmu->features & ARM_SMMU_FEAT_COHERENT_WALK;
1660         int i;
1661
1662         dev_notice(smmu->dev, "probing hardware configuration...\n");
1663         dev_notice(smmu->dev, "SMMUv%d with:\n",
1664                         smmu->version == ARM_SMMU_V2 ? 2 : 1);
1665
1666         /* ID0 */
1667         id = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_ID0);
1668
1669         /* Restrict available stages based on module parameter */
1670         if (force_stage == 1)
1671                 id &= ~(ID0_S2TS | ID0_NTS);
1672         else if (force_stage == 2)
1673                 id &= ~(ID0_S1TS | ID0_NTS);
1674
1675         if (id & ID0_S1TS) {
1676                 smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
1677                 dev_notice(smmu->dev, "\tstage 1 translation\n");
1678         }
1679
1680         if (id & ID0_S2TS) {
1681                 smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
1682                 dev_notice(smmu->dev, "\tstage 2 translation\n");
1683         }
1684
1685         if (id & ID0_NTS) {
1686                 smmu->features |= ARM_SMMU_FEAT_TRANS_NESTED;
1687                 dev_notice(smmu->dev, "\tnested translation\n");
1688         }
1689
1690         if (!(smmu->features &
1691                 (ARM_SMMU_FEAT_TRANS_S1 | ARM_SMMU_FEAT_TRANS_S2))) {
1692                 dev_err(smmu->dev, "\tno translation support!\n");
1693                 return -ENODEV;
1694         }
1695
1696         if ((id & ID0_S1TS) &&
1697                 ((smmu->version < ARM_SMMU_V2) || !(id & ID0_ATOSNS))) {
1698                 smmu->features |= ARM_SMMU_FEAT_TRANS_OPS;
1699                 dev_notice(smmu->dev, "\taddress translation ops\n");
1700         }
1701
1702         /*
1703          * In order for DMA API calls to work properly, we must defer to what
1704          * the FW says about coherency, regardless of what the hardware claims.
1705          * Fortunately, this also opens up a workaround for systems where the
1706          * ID register value has ended up configured incorrectly.
1707          */
1708         cttw_reg = !!(id & ID0_CTTW);
1709         if (cttw_fw || cttw_reg)
1710                 dev_notice(smmu->dev, "\t%scoherent table walk\n",
1711                            cttw_fw ? "" : "non-");
1712         if (cttw_fw != cttw_reg)
1713                 dev_notice(smmu->dev,
1714                            "\t(IDR0.CTTW overridden by FW configuration)\n");
1715
1716         /* Max. number of entries we have for stream matching/indexing */
1717         if (smmu->version == ARM_SMMU_V2 && id & ID0_EXIDS) {
1718                 smmu->features |= ARM_SMMU_FEAT_EXIDS;
1719                 size = 1 << 16;
1720         } else {
1721                 size = 1 << FIELD_GET(ID0_NUMSIDB, id);
1722         }
1723         smmu->streamid_mask = size - 1;
1724         if (id & ID0_SMS) {
1725                 smmu->features |= ARM_SMMU_FEAT_STREAM_MATCH;
1726                 size = FIELD_GET(ID0_NUMSMRG, id);
1727                 if (size == 0) {
1728                         dev_err(smmu->dev,
1729                                 "stream-matching supported, but no SMRs present!\n");
1730                         return -ENODEV;
1731                 }
1732
1733                 /* Zero-initialised to mark as invalid */
1734                 smmu->smrs = devm_kcalloc(smmu->dev, size, sizeof(*smmu->smrs),
1735                                           GFP_KERNEL);
1736                 if (!smmu->smrs)
1737                         return -ENOMEM;
1738
1739                 dev_notice(smmu->dev,
1740                            "\tstream matching with %u register groups", size);
1741         }
1742         /* s2cr->type == 0 means translation, so initialise explicitly */
1743         smmu->s2crs = devm_kmalloc_array(smmu->dev, size, sizeof(*smmu->s2crs),
1744                                          GFP_KERNEL);
1745         if (!smmu->s2crs)
1746                 return -ENOMEM;
1747         for (i = 0; i < size; i++)
1748                 smmu->s2crs[i] = s2cr_init_val;
1749
1750         smmu->num_mapping_groups = size;
1751         mutex_init(&smmu->stream_map_mutex);
1752         spin_lock_init(&smmu->global_sync_lock);
1753
1754         if (smmu->version < ARM_SMMU_V2 || !(id & ID0_PTFS_NO_AARCH32)) {
1755                 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_L;
1756                 if (!(id & ID0_PTFS_NO_AARCH32S))
1757                         smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_S;
1758         }
1759
1760         /* ID1 */
1761         id = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_ID1);
1762         smmu->pgshift = (id & ID1_PAGESIZE) ? 16 : 12;
1763
1764         /* Check for size mismatch of SMMU address space from mapped region */
1765         size = 1 << (FIELD_GET(ID1_NUMPAGENDXB, id) + 1);
1766         if (smmu->numpage != 2 * size << smmu->pgshift)
1767                 dev_warn(smmu->dev,
1768                         "SMMU address space size (0x%x) differs from mapped region size (0x%x)!\n",
1769                         2 * size << smmu->pgshift, smmu->numpage);
1770         /* Now properly encode NUMPAGE to subsequently derive SMMU_CB_BASE */
1771         smmu->numpage = size;
1772
1773         smmu->num_s2_context_banks = FIELD_GET(ID1_NUMS2CB, id);
1774         smmu->num_context_banks = FIELD_GET(ID1_NUMCB, id);
1775         if (smmu->num_s2_context_banks > smmu->num_context_banks) {
1776                 dev_err(smmu->dev, "impossible number of S2 context banks!\n");
1777                 return -ENODEV;
1778         }
1779         dev_notice(smmu->dev, "\t%u context banks (%u stage-2 only)\n",
1780                    smmu->num_context_banks, smmu->num_s2_context_banks);
1781         smmu->cbs = devm_kcalloc(smmu->dev, smmu->num_context_banks,
1782                                  sizeof(*smmu->cbs), GFP_KERNEL);
1783         if (!smmu->cbs)
1784                 return -ENOMEM;
1785
1786         /* ID2 */
1787         id = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_ID2);
1788         size = arm_smmu_id_size_to_bits(FIELD_GET(ID2_IAS, id));
1789         smmu->ipa_size = size;
1790
1791         /* The output mask is also applied for bypass */
1792         size = arm_smmu_id_size_to_bits(FIELD_GET(ID2_OAS, id));
1793         smmu->pa_size = size;
1794
1795         if (id & ID2_VMID16)
1796                 smmu->features |= ARM_SMMU_FEAT_VMID16;
1797
1798         /*
1799          * What the page table walker can address actually depends on which
1800          * descriptor format is in use, but since a) we don't know that yet,
1801          * and b) it can vary per context bank, this will have to do...
1802          */
1803         if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(size)))
1804                 dev_warn(smmu->dev,
1805                          "failed to set DMA mask for table walker\n");
1806
1807         if (smmu->version < ARM_SMMU_V2) {
1808                 smmu->va_size = smmu->ipa_size;
1809                 if (smmu->version == ARM_SMMU_V1_64K)
1810                         smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1811         } else {
1812                 size = FIELD_GET(ID2_UBS, id);
1813                 smmu->va_size = arm_smmu_id_size_to_bits(size);
1814                 if (id & ID2_PTFS_4K)
1815                         smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_4K;
1816                 if (id & ID2_PTFS_16K)
1817                         smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_16K;
1818                 if (id & ID2_PTFS_64K)
1819                         smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1820         }
1821
1822         /* Now we've corralled the various formats, what'll it do? */
1823         if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S)
1824                 smmu->pgsize_bitmap |= SZ_4K | SZ_64K | SZ_1M | SZ_16M;
1825         if (smmu->features &
1826             (ARM_SMMU_FEAT_FMT_AARCH32_L | ARM_SMMU_FEAT_FMT_AARCH64_4K))
1827                 smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
1828         if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_16K)
1829                 smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
1830         if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_64K)
1831                 smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
1832
1833         if (arm_smmu_ops.pgsize_bitmap == -1UL)
1834                 arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
1835         else
1836                 arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
1837         dev_notice(smmu->dev, "\tSupported page sizes: 0x%08lx\n",
1838                    smmu->pgsize_bitmap);
1839
1840
1841         if (smmu->features & ARM_SMMU_FEAT_TRANS_S1)
1842                 dev_notice(smmu->dev, "\tStage-1: %lu-bit VA -> %lu-bit IPA\n",
1843                            smmu->va_size, smmu->ipa_size);
1844
1845         if (smmu->features & ARM_SMMU_FEAT_TRANS_S2)
1846                 dev_notice(smmu->dev, "\tStage-2: %lu-bit IPA -> %lu-bit PA\n",
1847                            smmu->ipa_size, smmu->pa_size);
1848
1849         if (smmu->impl && smmu->impl->cfg_probe)
1850                 return smmu->impl->cfg_probe(smmu);
1851
1852         return 0;
1853 }
1854
1855 struct arm_smmu_match_data {
1856         enum arm_smmu_arch_version version;
1857         enum arm_smmu_implementation model;
1858 };
1859
1860 #define ARM_SMMU_MATCH_DATA(name, ver, imp)     \
1861 static const struct arm_smmu_match_data name = { .version = ver, .model = imp }
1862
1863 ARM_SMMU_MATCH_DATA(smmu_generic_v1, ARM_SMMU_V1, GENERIC_SMMU);
1864 ARM_SMMU_MATCH_DATA(smmu_generic_v2, ARM_SMMU_V2, GENERIC_SMMU);
1865 ARM_SMMU_MATCH_DATA(arm_mmu401, ARM_SMMU_V1_64K, GENERIC_SMMU);
1866 ARM_SMMU_MATCH_DATA(arm_mmu500, ARM_SMMU_V2, ARM_MMU500);
1867 ARM_SMMU_MATCH_DATA(cavium_smmuv2, ARM_SMMU_V2, CAVIUM_SMMUV2);
1868 ARM_SMMU_MATCH_DATA(qcom_smmuv2, ARM_SMMU_V2, QCOM_SMMUV2);
1869
1870 static const struct of_device_id arm_smmu_of_match[] = {
1871         { .compatible = "arm,smmu-v1", .data = &smmu_generic_v1 },
1872         { .compatible = "arm,smmu-v2", .data = &smmu_generic_v2 },
1873         { .compatible = "arm,mmu-400", .data = &smmu_generic_v1 },
1874         { .compatible = "arm,mmu-401", .data = &arm_mmu401 },
1875         { .compatible = "arm,mmu-500", .data = &arm_mmu500 },
1876         { .compatible = "cavium,smmu-v2", .data = &cavium_smmuv2 },
1877         { .compatible = "qcom,smmu-v2", .data = &qcom_smmuv2 },
1878         { },
1879 };
1880
1881 #ifdef CONFIG_ACPI
1882 static int acpi_smmu_get_data(u32 model, struct arm_smmu_device *smmu)
1883 {
1884         int ret = 0;
1885
1886         switch (model) {
1887         case ACPI_IORT_SMMU_V1:
1888         case ACPI_IORT_SMMU_CORELINK_MMU400:
1889                 smmu->version = ARM_SMMU_V1;
1890                 smmu->model = GENERIC_SMMU;
1891                 break;
1892         case ACPI_IORT_SMMU_CORELINK_MMU401:
1893                 smmu->version = ARM_SMMU_V1_64K;
1894                 smmu->model = GENERIC_SMMU;
1895                 break;
1896         case ACPI_IORT_SMMU_V2:
1897                 smmu->version = ARM_SMMU_V2;
1898                 smmu->model = GENERIC_SMMU;
1899                 break;
1900         case ACPI_IORT_SMMU_CORELINK_MMU500:
1901                 smmu->version = ARM_SMMU_V2;
1902                 smmu->model = ARM_MMU500;
1903                 break;
1904         case ACPI_IORT_SMMU_CAVIUM_THUNDERX:
1905                 smmu->version = ARM_SMMU_V2;
1906                 smmu->model = CAVIUM_SMMUV2;
1907                 break;
1908         default:
1909                 ret = -ENODEV;
1910         }
1911
1912         return ret;
1913 }
1914
1915 static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
1916                                       struct arm_smmu_device *smmu)
1917 {
1918         struct device *dev = smmu->dev;
1919         struct acpi_iort_node *node =
1920                 *(struct acpi_iort_node **)dev_get_platdata(dev);
1921         struct acpi_iort_smmu *iort_smmu;
1922         int ret;
1923
1924         /* Retrieve SMMU1/2 specific data */
1925         iort_smmu = (struct acpi_iort_smmu *)node->node_data;
1926
1927         ret = acpi_smmu_get_data(iort_smmu->model, smmu);
1928         if (ret < 0)
1929                 return ret;
1930
1931         /* Ignore the configuration access interrupt */
1932         smmu->num_global_irqs = 1;
1933
1934         if (iort_smmu->flags & ACPI_IORT_SMMU_COHERENT_WALK)
1935                 smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
1936
1937         return 0;
1938 }
1939 #else
1940 static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
1941                                              struct arm_smmu_device *smmu)
1942 {
1943         return -ENODEV;
1944 }
1945 #endif
1946
1947 static int arm_smmu_device_dt_probe(struct platform_device *pdev,
1948                                     struct arm_smmu_device *smmu)
1949 {
1950         const struct arm_smmu_match_data *data;
1951         struct device *dev = &pdev->dev;
1952         bool legacy_binding;
1953
1954         if (of_property_read_u32(dev->of_node, "#global-interrupts",
1955                                  &smmu->num_global_irqs)) {
1956                 dev_err(dev, "missing #global-interrupts property\n");
1957                 return -ENODEV;
1958         }
1959
1960         data = of_device_get_match_data(dev);
1961         smmu->version = data->version;
1962         smmu->model = data->model;
1963
1964         legacy_binding = of_find_property(dev->of_node, "mmu-masters", NULL);
1965         if (legacy_binding && !using_generic_binding) {
1966                 if (!using_legacy_binding)
1967                         pr_notice("deprecated \"mmu-masters\" DT property in use; DMA API support unavailable\n");
1968                 using_legacy_binding = true;
1969         } else if (!legacy_binding && !using_legacy_binding) {
1970                 using_generic_binding = true;
1971         } else {
1972                 dev_err(dev, "not probing due to mismatched DT properties\n");
1973                 return -ENODEV;
1974         }
1975
1976         if (of_dma_is_coherent(dev->of_node))
1977                 smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
1978
1979         return 0;
1980 }
1981
1982 static void arm_smmu_bus_init(void)
1983 {
1984         /* Oh, for a proper bus abstraction */
1985         if (!iommu_present(&platform_bus_type))
1986                 bus_set_iommu(&platform_bus_type, &arm_smmu_ops);
1987 #ifdef CONFIG_ARM_AMBA
1988         if (!iommu_present(&amba_bustype))
1989                 bus_set_iommu(&amba_bustype, &arm_smmu_ops);
1990 #endif
1991 #ifdef CONFIG_PCI
1992         if (!iommu_present(&pci_bus_type)) {
1993                 pci_request_acs();
1994                 bus_set_iommu(&pci_bus_type, &arm_smmu_ops);
1995         }
1996 #endif
1997 #ifdef CONFIG_FSL_MC_BUS
1998         if (!iommu_present(&fsl_mc_bus_type))
1999                 bus_set_iommu(&fsl_mc_bus_type, &arm_smmu_ops);
2000 #endif
2001 }
2002
2003 static int arm_smmu_device_probe(struct platform_device *pdev)
2004 {
2005         struct resource *res;
2006         resource_size_t ioaddr;
2007         struct arm_smmu_device *smmu;
2008         struct device *dev = &pdev->dev;
2009         int num_irqs, i, err;
2010
2011         smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
2012         if (!smmu) {
2013                 dev_err(dev, "failed to allocate arm_smmu_device\n");
2014                 return -ENOMEM;
2015         }
2016         smmu->dev = dev;
2017
2018         if (dev->of_node)
2019                 err = arm_smmu_device_dt_probe(pdev, smmu);
2020         else
2021                 err = arm_smmu_device_acpi_probe(pdev, smmu);
2022
2023         if (err)
2024                 return err;
2025
2026         smmu = arm_smmu_impl_init(smmu);
2027         if (IS_ERR(smmu))
2028                 return PTR_ERR(smmu);
2029
2030         res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
2031         ioaddr = res->start;
2032         smmu->base = devm_ioremap_resource(dev, res);
2033         if (IS_ERR(smmu->base))
2034                 return PTR_ERR(smmu->base);
2035         /*
2036          * The resource size should effectively match the value of SMMU_TOP;
2037          * stash that temporarily until we know PAGESIZE to validate it with.
2038          */
2039         smmu->numpage = resource_size(res);
2040
2041         num_irqs = 0;
2042         while ((res = platform_get_resource(pdev, IORESOURCE_IRQ, num_irqs))) {
2043                 num_irqs++;
2044                 if (num_irqs > smmu->num_global_irqs)
2045                         smmu->num_context_irqs++;
2046         }
2047
2048         if (!smmu->num_context_irqs) {
2049                 dev_err(dev, "found %d interrupts but expected at least %d\n",
2050                         num_irqs, smmu->num_global_irqs + 1);
2051                 return -ENODEV;
2052         }
2053
2054         smmu->irqs = devm_kcalloc(dev, num_irqs, sizeof(*smmu->irqs),
2055                                   GFP_KERNEL);
2056         if (!smmu->irqs) {
2057                 dev_err(dev, "failed to allocate %d irqs\n", num_irqs);
2058                 return -ENOMEM;
2059         }
2060
2061         for (i = 0; i < num_irqs; ++i) {
2062                 int irq = platform_get_irq(pdev, i);
2063
2064                 if (irq < 0) {
2065                         dev_err(dev, "failed to get irq index %d\n", i);
2066                         return -ENODEV;
2067                 }
2068                 smmu->irqs[i] = irq;
2069         }
2070
2071         err = devm_clk_bulk_get_all(dev, &smmu->clks);
2072         if (err < 0) {
2073                 dev_err(dev, "failed to get clocks %d\n", err);
2074                 return err;
2075         }
2076         smmu->num_clks = err;
2077
2078         err = clk_bulk_prepare_enable(smmu->num_clks, smmu->clks);
2079         if (err)
2080                 return err;
2081
2082         err = arm_smmu_device_cfg_probe(smmu);
2083         if (err)
2084                 return err;
2085
2086         if (smmu->version == ARM_SMMU_V2) {
2087                 if (smmu->num_context_banks > smmu->num_context_irqs) {
2088                         dev_err(dev,
2089                               "found only %d context irq(s) but %d required\n",
2090                               smmu->num_context_irqs, smmu->num_context_banks);
2091                         return -ENODEV;
2092                 }
2093
2094                 /* Ignore superfluous interrupts */
2095                 smmu->num_context_irqs = smmu->num_context_banks;
2096         }
2097
2098         for (i = 0; i < smmu->num_global_irqs; ++i) {
2099                 err = devm_request_irq(smmu->dev, smmu->irqs[i],
2100                                        arm_smmu_global_fault,
2101                                        IRQF_SHARED,
2102                                        "arm-smmu global fault",
2103                                        smmu);
2104                 if (err) {
2105                         dev_err(dev, "failed to request global IRQ %d (%u)\n",
2106                                 i, smmu->irqs[i]);
2107                         return err;
2108                 }
2109         }
2110
2111         err = iommu_device_sysfs_add(&smmu->iommu, smmu->dev, NULL,
2112                                      "smmu.%pa", &ioaddr);
2113         if (err) {
2114                 dev_err(dev, "Failed to register iommu in sysfs\n");
2115                 return err;
2116         }
2117
2118         iommu_device_set_ops(&smmu->iommu, &arm_smmu_ops);
2119         iommu_device_set_fwnode(&smmu->iommu, dev->fwnode);
2120
2121         err = iommu_device_register(&smmu->iommu);
2122         if (err) {
2123                 dev_err(dev, "Failed to register iommu\n");
2124                 return err;
2125         }
2126
2127         platform_set_drvdata(pdev, smmu);
2128         arm_smmu_device_reset(smmu);
2129         arm_smmu_test_smr_masks(smmu);
2130
2131         /*
2132          * We want to avoid touching dev->power.lock in fastpaths unless
2133          * it's really going to do something useful - pm_runtime_enabled()
2134          * can serve as an ideal proxy for that decision. So, conditionally
2135          * enable pm_runtime.
2136          */
2137         if (dev->pm_domain) {
2138                 pm_runtime_set_active(dev);
2139                 pm_runtime_enable(dev);
2140         }
2141
2142         /*
2143          * For ACPI and generic DT bindings, an SMMU will be probed before
2144          * any device which might need it, so we want the bus ops in place
2145          * ready to handle default domain setup as soon as any SMMU exists.
2146          */
2147         if (!using_legacy_binding)
2148                 arm_smmu_bus_init();
2149
2150         return 0;
2151 }
2152
2153 /*
2154  * With the legacy DT binding in play, though, we have no guarantees about
2155  * probe order, but then we're also not doing default domains, so we can
2156  * delay setting bus ops until we're sure every possible SMMU is ready,
2157  * and that way ensure that no add_device() calls get missed.
2158  */
2159 static int arm_smmu_legacy_bus_init(void)
2160 {
2161         if (using_legacy_binding)
2162                 arm_smmu_bus_init();
2163         return 0;
2164 }
2165 device_initcall_sync(arm_smmu_legacy_bus_init);
2166
2167 static void arm_smmu_device_shutdown(struct platform_device *pdev)
2168 {
2169         struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
2170
2171         if (!smmu)
2172                 return;
2173
2174         if (!bitmap_empty(smmu->context_map, ARM_SMMU_MAX_CBS))
2175                 dev_err(&pdev->dev, "removing device with active domains!\n");
2176
2177         arm_smmu_rpm_get(smmu);
2178         /* Turn the thing off */
2179         arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sCR0, sCR0_CLIENTPD);
2180         arm_smmu_rpm_put(smmu);
2181
2182         if (pm_runtime_enabled(smmu->dev))
2183                 pm_runtime_force_suspend(smmu->dev);
2184         else
2185                 clk_bulk_disable(smmu->num_clks, smmu->clks);
2186
2187         clk_bulk_unprepare(smmu->num_clks, smmu->clks);
2188 }
2189
2190 static int __maybe_unused arm_smmu_runtime_resume(struct device *dev)
2191 {
2192         struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2193         int ret;
2194
2195         ret = clk_bulk_enable(smmu->num_clks, smmu->clks);
2196         if (ret)
2197                 return ret;
2198
2199         arm_smmu_device_reset(smmu);
2200
2201         return 0;
2202 }
2203
2204 static int __maybe_unused arm_smmu_runtime_suspend(struct device *dev)
2205 {
2206         struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2207
2208         clk_bulk_disable(smmu->num_clks, smmu->clks);
2209
2210         return 0;
2211 }
2212
2213 static int __maybe_unused arm_smmu_pm_resume(struct device *dev)
2214 {
2215         if (pm_runtime_suspended(dev))
2216                 return 0;
2217
2218         return arm_smmu_runtime_resume(dev);
2219 }
2220
2221 static int __maybe_unused arm_smmu_pm_suspend(struct device *dev)
2222 {
2223         if (pm_runtime_suspended(dev))
2224                 return 0;
2225
2226         return arm_smmu_runtime_suspend(dev);
2227 }
2228
2229 static const struct dev_pm_ops arm_smmu_pm_ops = {
2230         SET_SYSTEM_SLEEP_PM_OPS(arm_smmu_pm_suspend, arm_smmu_pm_resume)
2231         SET_RUNTIME_PM_OPS(arm_smmu_runtime_suspend,
2232                            arm_smmu_runtime_resume, NULL)
2233 };
2234
2235 static struct platform_driver arm_smmu_driver = {
2236         .driver = {
2237                 .name                   = "arm-smmu",
2238                 .of_match_table         = of_match_ptr(arm_smmu_of_match),
2239                 .pm                     = &arm_smmu_pm_ops,
2240                 .suppress_bind_attrs    = true,
2241         },
2242         .probe  = arm_smmu_device_probe,
2243         .shutdown = arm_smmu_device_shutdown,
2244 };
2245 builtin_platform_driver(arm_smmu_driver);