]> asedeno.scripts.mit.edu Git - linux.git/blob - drivers/iommu/arm-smmu.c
iommu/amd: Fix downgrading default page-sizes in alloc_pte()
[linux.git] / drivers / iommu / arm-smmu.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * IOMMU API for ARM architected SMMU implementations.
4  *
5  * Copyright (C) 2013 ARM Limited
6  *
7  * Author: Will Deacon <will.deacon@arm.com>
8  *
9  * This driver currently supports:
10  *      - SMMUv1 and v2 implementations
11  *      - Stream-matching and stream-indexing
12  *      - v7/v8 long-descriptor format
13  *      - Non-secure access to the SMMU
14  *      - Context fault reporting
15  *      - Extended Stream ID (16 bit)
16  */
17
18 #define pr_fmt(fmt) "arm-smmu: " fmt
19
20 #include <linux/acpi.h>
21 #include <linux/acpi_iort.h>
22 #include <linux/bitfield.h>
23 #include <linux/delay.h>
24 #include <linux/dma-iommu.h>
25 #include <linux/dma-mapping.h>
26 #include <linux/err.h>
27 #include <linux/interrupt.h>
28 #include <linux/io.h>
29 #include <linux/iopoll.h>
30 #include <linux/init.h>
31 #include <linux/moduleparam.h>
32 #include <linux/of.h>
33 #include <linux/of_address.h>
34 #include <linux/of_device.h>
35 #include <linux/of_iommu.h>
36 #include <linux/pci.h>
37 #include <linux/platform_device.h>
38 #include <linux/pm_runtime.h>
39 #include <linux/slab.h>
40
41 #include <linux/amba/bus.h>
42 #include <linux/fsl/mc.h>
43
44 #include "arm-smmu.h"
45
46 /*
47  * Apparently, some Qualcomm arm64 platforms which appear to expose their SMMU
48  * global register space are still, in fact, using a hypervisor to mediate it
49  * by trapping and emulating register accesses. Sadly, some deployed versions
50  * of said trapping code have bugs wherein they go horribly wrong for stores
51  * using r31 (i.e. XZR/WZR) as the source register.
52  */
53 #define QCOM_DUMMY_VAL -1
54
55 #define TLB_LOOP_TIMEOUT                1000000 /* 1s! */
56 #define TLB_SPIN_COUNT                  10
57
58 #define MSI_IOVA_BASE                   0x8000000
59 #define MSI_IOVA_LENGTH                 0x100000
60
61 static int force_stage;
62 /*
63  * not really modular, but the easiest way to keep compat with existing
64  * bootargs behaviour is to continue using module_param() here.
65  */
66 module_param(force_stage, int, S_IRUGO);
67 MODULE_PARM_DESC(force_stage,
68         "Force SMMU mappings to be installed at a particular stage of translation. A value of '1' or '2' forces the corresponding stage. All other values are ignored (i.e. no stage is forced). Note that selecting a specific stage will disable support for nested translation.");
69 static bool disable_bypass =
70         IS_ENABLED(CONFIG_ARM_SMMU_DISABLE_BYPASS_BY_DEFAULT);
71 module_param(disable_bypass, bool, S_IRUGO);
72 MODULE_PARM_DESC(disable_bypass,
73         "Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
74
75 struct arm_smmu_s2cr {
76         struct iommu_group              *group;
77         int                             count;
78         enum arm_smmu_s2cr_type         type;
79         enum arm_smmu_s2cr_privcfg      privcfg;
80         u8                              cbndx;
81 };
82
83 #define s2cr_init_val (struct arm_smmu_s2cr){                           \
84         .type = disable_bypass ? S2CR_TYPE_FAULT : S2CR_TYPE_BYPASS,    \
85 }
86
87 struct arm_smmu_smr {
88         u16                             mask;
89         u16                             id;
90         bool                            valid;
91 };
92
93 struct arm_smmu_cb {
94         u64                             ttbr[2];
95         u32                             tcr[2];
96         u32                             mair[2];
97         struct arm_smmu_cfg             *cfg;
98 };
99
100 struct arm_smmu_master_cfg {
101         struct arm_smmu_device          *smmu;
102         s16                             smendx[];
103 };
104 #define INVALID_SMENDX                  -1
105 #define __fwspec_cfg(fw) ((struct arm_smmu_master_cfg *)fw->iommu_priv)
106 #define fwspec_smmu(fw)  (__fwspec_cfg(fw)->smmu)
107 #define fwspec_smendx(fw, i) \
108         (i >= fw->num_ids ? INVALID_SMENDX : __fwspec_cfg(fw)->smendx[i])
109 #define for_each_cfg_sme(fw, i, idx) \
110         for (i = 0; idx = fwspec_smendx(fw, i), i < fw->num_ids; ++i)
111
112 static bool using_legacy_binding, using_generic_binding;
113
114 static inline int arm_smmu_rpm_get(struct arm_smmu_device *smmu)
115 {
116         if (pm_runtime_enabled(smmu->dev))
117                 return pm_runtime_get_sync(smmu->dev);
118
119         return 0;
120 }
121
122 static inline void arm_smmu_rpm_put(struct arm_smmu_device *smmu)
123 {
124         if (pm_runtime_enabled(smmu->dev))
125                 pm_runtime_put(smmu->dev);
126 }
127
128 static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
129 {
130         return container_of(dom, struct arm_smmu_domain, domain);
131 }
132
133 static struct device_node *dev_get_dev_node(struct device *dev)
134 {
135         if (dev_is_pci(dev)) {
136                 struct pci_bus *bus = to_pci_dev(dev)->bus;
137
138                 while (!pci_is_root_bus(bus))
139                         bus = bus->parent;
140                 return of_node_get(bus->bridge->parent->of_node);
141         }
142
143         return of_node_get(dev->of_node);
144 }
145
146 static int __arm_smmu_get_pci_sid(struct pci_dev *pdev, u16 alias, void *data)
147 {
148         *((__be32 *)data) = cpu_to_be32(alias);
149         return 0; /* Continue walking */
150 }
151
152 static int __find_legacy_master_phandle(struct device *dev, void *data)
153 {
154         struct of_phandle_iterator *it = *(void **)data;
155         struct device_node *np = it->node;
156         int err;
157
158         of_for_each_phandle(it, err, dev->of_node, "mmu-masters",
159                             "#stream-id-cells", 0)
160                 if (it->node == np) {
161                         *(void **)data = dev;
162                         return 1;
163                 }
164         it->node = np;
165         return err == -ENOENT ? 0 : err;
166 }
167
168 static struct platform_driver arm_smmu_driver;
169 static struct iommu_ops arm_smmu_ops;
170
171 static int arm_smmu_register_legacy_master(struct device *dev,
172                                            struct arm_smmu_device **smmu)
173 {
174         struct device *smmu_dev;
175         struct device_node *np;
176         struct of_phandle_iterator it;
177         void *data = &it;
178         u32 *sids;
179         __be32 pci_sid;
180         int err;
181
182         np = dev_get_dev_node(dev);
183         if (!np || !of_find_property(np, "#stream-id-cells", NULL)) {
184                 of_node_put(np);
185                 return -ENODEV;
186         }
187
188         it.node = np;
189         err = driver_for_each_device(&arm_smmu_driver.driver, NULL, &data,
190                                      __find_legacy_master_phandle);
191         smmu_dev = data;
192         of_node_put(np);
193         if (err == 0)
194                 return -ENODEV;
195         if (err < 0)
196                 return err;
197
198         if (dev_is_pci(dev)) {
199                 /* "mmu-masters" assumes Stream ID == Requester ID */
200                 pci_for_each_dma_alias(to_pci_dev(dev), __arm_smmu_get_pci_sid,
201                                        &pci_sid);
202                 it.cur = &pci_sid;
203                 it.cur_count = 1;
204         }
205
206         err = iommu_fwspec_init(dev, &smmu_dev->of_node->fwnode,
207                                 &arm_smmu_ops);
208         if (err)
209                 return err;
210
211         sids = kcalloc(it.cur_count, sizeof(*sids), GFP_KERNEL);
212         if (!sids)
213                 return -ENOMEM;
214
215         *smmu = dev_get_drvdata(smmu_dev);
216         of_phandle_iterator_args(&it, sids, it.cur_count);
217         err = iommu_fwspec_add_ids(dev, sids, it.cur_count);
218         kfree(sids);
219         return err;
220 }
221
222 static int __arm_smmu_alloc_bitmap(unsigned long *map, int start, int end)
223 {
224         int idx;
225
226         do {
227                 idx = find_next_zero_bit(map, end, start);
228                 if (idx == end)
229                         return -ENOSPC;
230         } while (test_and_set_bit(idx, map));
231
232         return idx;
233 }
234
235 static void __arm_smmu_free_bitmap(unsigned long *map, int idx)
236 {
237         clear_bit(idx, map);
238 }
239
240 /* Wait for any pending TLB invalidations to complete */
241 static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu, int page,
242                                 int sync, int status)
243 {
244         unsigned int spin_cnt, delay;
245         u32 reg;
246
247         arm_smmu_writel(smmu, page, sync, QCOM_DUMMY_VAL);
248         for (delay = 1; delay < TLB_LOOP_TIMEOUT; delay *= 2) {
249                 for (spin_cnt = TLB_SPIN_COUNT; spin_cnt > 0; spin_cnt--) {
250                         reg = arm_smmu_readl(smmu, page, status);
251                         if (!(reg & sTLBGSTATUS_GSACTIVE))
252                                 return;
253                         cpu_relax();
254                 }
255                 udelay(delay);
256         }
257         dev_err_ratelimited(smmu->dev,
258                             "TLB sync timed out -- SMMU may be deadlocked\n");
259 }
260
261 static void arm_smmu_tlb_sync_global(struct arm_smmu_device *smmu)
262 {
263         unsigned long flags;
264
265         spin_lock_irqsave(&smmu->global_sync_lock, flags);
266         __arm_smmu_tlb_sync(smmu, ARM_SMMU_GR0, ARM_SMMU_GR0_sTLBGSYNC,
267                             ARM_SMMU_GR0_sTLBGSTATUS);
268         spin_unlock_irqrestore(&smmu->global_sync_lock, flags);
269 }
270
271 static void arm_smmu_tlb_sync_context(void *cookie)
272 {
273         struct arm_smmu_domain *smmu_domain = cookie;
274         struct arm_smmu_device *smmu = smmu_domain->smmu;
275         unsigned long flags;
276
277         spin_lock_irqsave(&smmu_domain->cb_lock, flags);
278         __arm_smmu_tlb_sync(smmu, ARM_SMMU_CB(smmu, smmu_domain->cfg.cbndx),
279                             ARM_SMMU_CB_TLBSYNC, ARM_SMMU_CB_TLBSTATUS);
280         spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
281 }
282
283 static void arm_smmu_tlb_sync_vmid(void *cookie)
284 {
285         struct arm_smmu_domain *smmu_domain = cookie;
286
287         arm_smmu_tlb_sync_global(smmu_domain->smmu);
288 }
289
290 static void arm_smmu_tlb_inv_context_s1(void *cookie)
291 {
292         struct arm_smmu_domain *smmu_domain = cookie;
293         /*
294          * The TLBI write may be relaxed, so ensure that PTEs cleared by the
295          * current CPU are visible beforehand.
296          */
297         wmb();
298         arm_smmu_cb_write(smmu_domain->smmu, smmu_domain->cfg.cbndx,
299                           ARM_SMMU_CB_S1_TLBIASID, smmu_domain->cfg.asid);
300         arm_smmu_tlb_sync_context(cookie);
301 }
302
303 static void arm_smmu_tlb_inv_context_s2(void *cookie)
304 {
305         struct arm_smmu_domain *smmu_domain = cookie;
306         struct arm_smmu_device *smmu = smmu_domain->smmu;
307
308         /* See above */
309         wmb();
310         arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIVMID, smmu_domain->cfg.vmid);
311         arm_smmu_tlb_sync_global(smmu);
312 }
313
314 static void arm_smmu_tlb_inv_range_s1(unsigned long iova, size_t size,
315                                       size_t granule, bool leaf, void *cookie)
316 {
317         struct arm_smmu_domain *smmu_domain = cookie;
318         struct arm_smmu_device *smmu = smmu_domain->smmu;
319         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
320         int reg, idx = cfg->cbndx;
321
322         if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
323                 wmb();
324
325         reg = leaf ? ARM_SMMU_CB_S1_TLBIVAL : ARM_SMMU_CB_S1_TLBIVA;
326
327         if (cfg->fmt != ARM_SMMU_CTX_FMT_AARCH64) {
328                 iova = (iova >> 12) << 12;
329                 iova |= cfg->asid;
330                 do {
331                         arm_smmu_cb_write(smmu, idx, reg, iova);
332                         iova += granule;
333                 } while (size -= granule);
334         } else {
335                 iova >>= 12;
336                 iova |= (u64)cfg->asid << 48;
337                 do {
338                         arm_smmu_cb_writeq(smmu, idx, reg, iova);
339                         iova += granule >> 12;
340                 } while (size -= granule);
341         }
342 }
343
344 static void arm_smmu_tlb_inv_range_s2(unsigned long iova, size_t size,
345                                       size_t granule, bool leaf, void *cookie)
346 {
347         struct arm_smmu_domain *smmu_domain = cookie;
348         struct arm_smmu_device *smmu = smmu_domain->smmu;
349         int reg, idx = smmu_domain->cfg.cbndx;
350
351         if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
352                 wmb();
353
354         reg = leaf ? ARM_SMMU_CB_S2_TLBIIPAS2L : ARM_SMMU_CB_S2_TLBIIPAS2;
355         iova >>= 12;
356         do {
357                 if (smmu_domain->cfg.fmt == ARM_SMMU_CTX_FMT_AARCH64)
358                         arm_smmu_cb_writeq(smmu, idx, reg, iova);
359                 else
360                         arm_smmu_cb_write(smmu, idx, reg, iova);
361                 iova += granule >> 12;
362         } while (size -= granule);
363 }
364
365 /*
366  * On MMU-401 at least, the cost of firing off multiple TLBIVMIDs appears
367  * almost negligible, but the benefit of getting the first one in as far ahead
368  * of the sync as possible is significant, hence we don't just make this a
369  * no-op and set .tlb_sync to arm_smmu_tlb_inv_context_s2() as you might think.
370  */
371 static void arm_smmu_tlb_inv_vmid_nosync(unsigned long iova, size_t size,
372                                          size_t granule, bool leaf, void *cookie)
373 {
374         struct arm_smmu_domain *smmu_domain = cookie;
375         struct arm_smmu_device *smmu = smmu_domain->smmu;
376
377         if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
378                 wmb();
379
380         arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIVMID, smmu_domain->cfg.vmid);
381 }
382
383 static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size,
384                                   size_t granule, void *cookie)
385 {
386         struct arm_smmu_domain *smmu_domain = cookie;
387         const struct arm_smmu_flush_ops *ops = smmu_domain->flush_ops;
388
389         ops->tlb_inv_range(iova, size, granule, false, cookie);
390         ops->tlb_sync(cookie);
391 }
392
393 static void arm_smmu_tlb_inv_leaf(unsigned long iova, size_t size,
394                                   size_t granule, void *cookie)
395 {
396         struct arm_smmu_domain *smmu_domain = cookie;
397         const struct arm_smmu_flush_ops *ops = smmu_domain->flush_ops;
398
399         ops->tlb_inv_range(iova, size, granule, true, cookie);
400         ops->tlb_sync(cookie);
401 }
402
403 static void arm_smmu_tlb_add_page(struct iommu_iotlb_gather *gather,
404                                   unsigned long iova, size_t granule,
405                                   void *cookie)
406 {
407         struct arm_smmu_domain *smmu_domain = cookie;
408         const struct arm_smmu_flush_ops *ops = smmu_domain->flush_ops;
409
410         ops->tlb_inv_range(iova, granule, granule, true, cookie);
411 }
412
413 static const struct arm_smmu_flush_ops arm_smmu_s1_tlb_ops = {
414         .tlb = {
415                 .tlb_flush_all  = arm_smmu_tlb_inv_context_s1,
416                 .tlb_flush_walk = arm_smmu_tlb_inv_walk,
417                 .tlb_flush_leaf = arm_smmu_tlb_inv_leaf,
418                 .tlb_add_page   = arm_smmu_tlb_add_page,
419         },
420         .tlb_inv_range          = arm_smmu_tlb_inv_range_s1,
421         .tlb_sync               = arm_smmu_tlb_sync_context,
422 };
423
424 static const struct arm_smmu_flush_ops arm_smmu_s2_tlb_ops_v2 = {
425         .tlb = {
426                 .tlb_flush_all  = arm_smmu_tlb_inv_context_s2,
427                 .tlb_flush_walk = arm_smmu_tlb_inv_walk,
428                 .tlb_flush_leaf = arm_smmu_tlb_inv_leaf,
429                 .tlb_add_page   = arm_smmu_tlb_add_page,
430         },
431         .tlb_inv_range          = arm_smmu_tlb_inv_range_s2,
432         .tlb_sync               = arm_smmu_tlb_sync_context,
433 };
434
435 static const struct arm_smmu_flush_ops arm_smmu_s2_tlb_ops_v1 = {
436         .tlb = {
437                 .tlb_flush_all  = arm_smmu_tlb_inv_context_s2,
438                 .tlb_flush_walk = arm_smmu_tlb_inv_walk,
439                 .tlb_flush_leaf = arm_smmu_tlb_inv_leaf,
440                 .tlb_add_page   = arm_smmu_tlb_add_page,
441         },
442         .tlb_inv_range          = arm_smmu_tlb_inv_vmid_nosync,
443         .tlb_sync               = arm_smmu_tlb_sync_vmid,
444 };
445
446 static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
447 {
448         u32 fsr, fsynr, cbfrsynra;
449         unsigned long iova;
450         struct iommu_domain *domain = dev;
451         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
452         struct arm_smmu_device *smmu = smmu_domain->smmu;
453         int idx = smmu_domain->cfg.cbndx;
454
455         fsr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSR);
456         if (!(fsr & FSR_FAULT))
457                 return IRQ_NONE;
458
459         fsynr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSYNR0);
460         iova = arm_smmu_cb_readq(smmu, idx, ARM_SMMU_CB_FAR);
461         cbfrsynra = arm_smmu_gr1_read(smmu, ARM_SMMU_GR1_CBFRSYNRA(idx));
462
463         dev_err_ratelimited(smmu->dev,
464         "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cbfrsynra=0x%x, cb=%d\n",
465                             fsr, iova, fsynr, cbfrsynra, idx);
466
467         arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_FSR, fsr);
468         return IRQ_HANDLED;
469 }
470
471 static irqreturn_t arm_smmu_global_fault(int irq, void *dev)
472 {
473         u32 gfsr, gfsynr0, gfsynr1, gfsynr2;
474         struct arm_smmu_device *smmu = dev;
475
476         gfsr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSR);
477         gfsynr0 = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSYNR0);
478         gfsynr1 = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSYNR1);
479         gfsynr2 = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSYNR2);
480
481         if (!gfsr)
482                 return IRQ_NONE;
483
484         dev_err_ratelimited(smmu->dev,
485                 "Unexpected global fault, this could be serious\n");
486         dev_err_ratelimited(smmu->dev,
487                 "\tGFSR 0x%08x, GFSYNR0 0x%08x, GFSYNR1 0x%08x, GFSYNR2 0x%08x\n",
488                 gfsr, gfsynr0, gfsynr1, gfsynr2);
489
490         arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sGFSR, gfsr);
491         return IRQ_HANDLED;
492 }
493
494 static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
495                                        struct io_pgtable_cfg *pgtbl_cfg)
496 {
497         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
498         struct arm_smmu_cb *cb = &smmu_domain->smmu->cbs[cfg->cbndx];
499         bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
500
501         cb->cfg = cfg;
502
503         /* TCR */
504         if (stage1) {
505                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
506                         cb->tcr[0] = pgtbl_cfg->arm_v7s_cfg.tcr;
507                 } else {
508                         cb->tcr[0] = pgtbl_cfg->arm_lpae_s1_cfg.tcr;
509                         cb->tcr[1] = pgtbl_cfg->arm_lpae_s1_cfg.tcr >> 32;
510                         cb->tcr[1] |= FIELD_PREP(TCR2_SEP, TCR2_SEP_UPSTREAM);
511                         if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
512                                 cb->tcr[1] |= TCR2_AS;
513                 }
514         } else {
515                 cb->tcr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
516         }
517
518         /* TTBRs */
519         if (stage1) {
520                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
521                         cb->ttbr[0] = pgtbl_cfg->arm_v7s_cfg.ttbr[0];
522                         cb->ttbr[1] = pgtbl_cfg->arm_v7s_cfg.ttbr[1];
523                 } else {
524                         cb->ttbr[0] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
525                         cb->ttbr[0] |= FIELD_PREP(TTBRn_ASID, cfg->asid);
526                         cb->ttbr[1] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1];
527                         cb->ttbr[1] |= FIELD_PREP(TTBRn_ASID, cfg->asid);
528                 }
529         } else {
530                 cb->ttbr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
531         }
532
533         /* MAIRs (stage-1 only) */
534         if (stage1) {
535                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
536                         cb->mair[0] = pgtbl_cfg->arm_v7s_cfg.prrr;
537                         cb->mair[1] = pgtbl_cfg->arm_v7s_cfg.nmrr;
538                 } else {
539                         cb->mair[0] = pgtbl_cfg->arm_lpae_s1_cfg.mair[0];
540                         cb->mair[1] = pgtbl_cfg->arm_lpae_s1_cfg.mair[1];
541                 }
542         }
543 }
544
545 static void arm_smmu_write_context_bank(struct arm_smmu_device *smmu, int idx)
546 {
547         u32 reg;
548         bool stage1;
549         struct arm_smmu_cb *cb = &smmu->cbs[idx];
550         struct arm_smmu_cfg *cfg = cb->cfg;
551
552         /* Unassigned context banks only need disabling */
553         if (!cfg) {
554                 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_SCTLR, 0);
555                 return;
556         }
557
558         stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
559
560         /* CBA2R */
561         if (smmu->version > ARM_SMMU_V1) {
562                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
563                         reg = CBA2R_VA64;
564                 else
565                         reg = 0;
566                 /* 16-bit VMIDs live in CBA2R */
567                 if (smmu->features & ARM_SMMU_FEAT_VMID16)
568                         reg |= FIELD_PREP(CBA2R_VMID16, cfg->vmid);
569
570                 arm_smmu_gr1_write(smmu, ARM_SMMU_GR1_CBA2R(idx), reg);
571         }
572
573         /* CBAR */
574         reg = FIELD_PREP(CBAR_TYPE, cfg->cbar);
575         if (smmu->version < ARM_SMMU_V2)
576                 reg |= FIELD_PREP(CBAR_IRPTNDX, cfg->irptndx);
577
578         /*
579          * Use the weakest shareability/memory types, so they are
580          * overridden by the ttbcr/pte.
581          */
582         if (stage1) {
583                 reg |= FIELD_PREP(CBAR_S1_BPSHCFG, CBAR_S1_BPSHCFG_NSH) |
584                         FIELD_PREP(CBAR_S1_MEMATTR, CBAR_S1_MEMATTR_WB);
585         } else if (!(smmu->features & ARM_SMMU_FEAT_VMID16)) {
586                 /* 8-bit VMIDs live in CBAR */
587                 reg |= FIELD_PREP(CBAR_VMID, cfg->vmid);
588         }
589         arm_smmu_gr1_write(smmu, ARM_SMMU_GR1_CBAR(idx), reg);
590
591         /*
592          * TCR
593          * We must write this before the TTBRs, since it determines the
594          * access behaviour of some fields (in particular, ASID[15:8]).
595          */
596         if (stage1 && smmu->version > ARM_SMMU_V1)
597                 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TCR2, cb->tcr[1]);
598         arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TCR, cb->tcr[0]);
599
600         /* TTBRs */
601         if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
602                 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_CONTEXTIDR, cfg->asid);
603                 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TTBR0, cb->ttbr[0]);
604                 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TTBR1, cb->ttbr[1]);
605         } else {
606                 arm_smmu_cb_writeq(smmu, idx, ARM_SMMU_CB_TTBR0, cb->ttbr[0]);
607                 if (stage1)
608                         arm_smmu_cb_writeq(smmu, idx, ARM_SMMU_CB_TTBR1,
609                                            cb->ttbr[1]);
610         }
611
612         /* MAIRs (stage-1 only) */
613         if (stage1) {
614                 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_S1_MAIR0, cb->mair[0]);
615                 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_S1_MAIR1, cb->mair[1]);
616         }
617
618         /* SCTLR */
619         reg = SCTLR_CFIE | SCTLR_CFRE | SCTLR_AFE | SCTLR_TRE | SCTLR_M;
620         if (stage1)
621                 reg |= SCTLR_S1_ASIDPNE;
622         if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN))
623                 reg |= SCTLR_E;
624
625         arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_SCTLR, reg);
626 }
627
628 static int arm_smmu_init_domain_context(struct iommu_domain *domain,
629                                         struct arm_smmu_device *smmu)
630 {
631         int irq, start, ret = 0;
632         unsigned long ias, oas;
633         struct io_pgtable_ops *pgtbl_ops;
634         struct io_pgtable_cfg pgtbl_cfg;
635         enum io_pgtable_fmt fmt;
636         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
637         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
638
639         mutex_lock(&smmu_domain->init_mutex);
640         if (smmu_domain->smmu)
641                 goto out_unlock;
642
643         if (domain->type == IOMMU_DOMAIN_IDENTITY) {
644                 smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
645                 smmu_domain->smmu = smmu;
646                 goto out_unlock;
647         }
648
649         /*
650          * Mapping the requested stage onto what we support is surprisingly
651          * complicated, mainly because the spec allows S1+S2 SMMUs without
652          * support for nested translation. That means we end up with the
653          * following table:
654          *
655          * Requested        Supported        Actual
656          *     S1               N              S1
657          *     S1             S1+S2            S1
658          *     S1               S2             S2
659          *     S1               S1             S1
660          *     N                N              N
661          *     N              S1+S2            S2
662          *     N                S2             S2
663          *     N                S1             S1
664          *
665          * Note that you can't actually request stage-2 mappings.
666          */
667         if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
668                 smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
669         if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
670                 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
671
672         /*
673          * Choosing a suitable context format is even more fiddly. Until we
674          * grow some way for the caller to express a preference, and/or move
675          * the decision into the io-pgtable code where it arguably belongs,
676          * just aim for the closest thing to the rest of the system, and hope
677          * that the hardware isn't esoteric enough that we can't assume AArch64
678          * support to be a superset of AArch32 support...
679          */
680         if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_L)
681                 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_L;
682         if (IS_ENABLED(CONFIG_IOMMU_IO_PGTABLE_ARMV7S) &&
683             !IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_ARM_LPAE) &&
684             (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S) &&
685             (smmu_domain->stage == ARM_SMMU_DOMAIN_S1))
686                 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_S;
687         if ((IS_ENABLED(CONFIG_64BIT) || cfg->fmt == ARM_SMMU_CTX_FMT_NONE) &&
688             (smmu->features & (ARM_SMMU_FEAT_FMT_AARCH64_64K |
689                                ARM_SMMU_FEAT_FMT_AARCH64_16K |
690                                ARM_SMMU_FEAT_FMT_AARCH64_4K)))
691                 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH64;
692
693         if (cfg->fmt == ARM_SMMU_CTX_FMT_NONE) {
694                 ret = -EINVAL;
695                 goto out_unlock;
696         }
697
698         switch (smmu_domain->stage) {
699         case ARM_SMMU_DOMAIN_S1:
700                 cfg->cbar = CBAR_TYPE_S1_TRANS_S2_BYPASS;
701                 start = smmu->num_s2_context_banks;
702                 ias = smmu->va_size;
703                 oas = smmu->ipa_size;
704                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
705                         fmt = ARM_64_LPAE_S1;
706                 } else if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_L) {
707                         fmt = ARM_32_LPAE_S1;
708                         ias = min(ias, 32UL);
709                         oas = min(oas, 40UL);
710                 } else {
711                         fmt = ARM_V7S;
712                         ias = min(ias, 32UL);
713                         oas = min(oas, 32UL);
714                 }
715                 smmu_domain->flush_ops = &arm_smmu_s1_tlb_ops;
716                 break;
717         case ARM_SMMU_DOMAIN_NESTED:
718                 /*
719                  * We will likely want to change this if/when KVM gets
720                  * involved.
721                  */
722         case ARM_SMMU_DOMAIN_S2:
723                 cfg->cbar = CBAR_TYPE_S2_TRANS;
724                 start = 0;
725                 ias = smmu->ipa_size;
726                 oas = smmu->pa_size;
727                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
728                         fmt = ARM_64_LPAE_S2;
729                 } else {
730                         fmt = ARM_32_LPAE_S2;
731                         ias = min(ias, 40UL);
732                         oas = min(oas, 40UL);
733                 }
734                 if (smmu->version == ARM_SMMU_V2)
735                         smmu_domain->flush_ops = &arm_smmu_s2_tlb_ops_v2;
736                 else
737                         smmu_domain->flush_ops = &arm_smmu_s2_tlb_ops_v1;
738                 break;
739         default:
740                 ret = -EINVAL;
741                 goto out_unlock;
742         }
743         ret = __arm_smmu_alloc_bitmap(smmu->context_map, start,
744                                       smmu->num_context_banks);
745         if (ret < 0)
746                 goto out_unlock;
747
748         cfg->cbndx = ret;
749         if (smmu->version < ARM_SMMU_V2) {
750                 cfg->irptndx = atomic_inc_return(&smmu->irptndx);
751                 cfg->irptndx %= smmu->num_context_irqs;
752         } else {
753                 cfg->irptndx = cfg->cbndx;
754         }
755
756         if (smmu_domain->stage == ARM_SMMU_DOMAIN_S2)
757                 cfg->vmid = cfg->cbndx + 1;
758         else
759                 cfg->asid = cfg->cbndx;
760
761         smmu_domain->smmu = smmu;
762         if (smmu->impl && smmu->impl->init_context) {
763                 ret = smmu->impl->init_context(smmu_domain);
764                 if (ret)
765                         goto out_unlock;
766         }
767
768         pgtbl_cfg = (struct io_pgtable_cfg) {
769                 .pgsize_bitmap  = smmu->pgsize_bitmap,
770                 .ias            = ias,
771                 .oas            = oas,
772                 .coherent_walk  = smmu->features & ARM_SMMU_FEAT_COHERENT_WALK,
773                 .tlb            = &smmu_domain->flush_ops->tlb,
774                 .iommu_dev      = smmu->dev,
775         };
776
777         if (smmu_domain->non_strict)
778                 pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
779
780         pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
781         if (!pgtbl_ops) {
782                 ret = -ENOMEM;
783                 goto out_clear_smmu;
784         }
785
786         /* Update the domain's page sizes to reflect the page table format */
787         domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
788         domain->geometry.aperture_end = (1UL << ias) - 1;
789         domain->geometry.force_aperture = true;
790
791         /* Initialise the context bank with our page table cfg */
792         arm_smmu_init_context_bank(smmu_domain, &pgtbl_cfg);
793         arm_smmu_write_context_bank(smmu, cfg->cbndx);
794
795         /*
796          * Request context fault interrupt. Do this last to avoid the
797          * handler seeing a half-initialised domain state.
798          */
799         irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
800         ret = devm_request_irq(smmu->dev, irq, arm_smmu_context_fault,
801                                IRQF_SHARED, "arm-smmu-context-fault", domain);
802         if (ret < 0) {
803                 dev_err(smmu->dev, "failed to request context IRQ %d (%u)\n",
804                         cfg->irptndx, irq);
805                 cfg->irptndx = INVALID_IRPTNDX;
806         }
807
808         mutex_unlock(&smmu_domain->init_mutex);
809
810         /* Publish page table ops for map/unmap */
811         smmu_domain->pgtbl_ops = pgtbl_ops;
812         return 0;
813
814 out_clear_smmu:
815         smmu_domain->smmu = NULL;
816 out_unlock:
817         mutex_unlock(&smmu_domain->init_mutex);
818         return ret;
819 }
820
821 static void arm_smmu_destroy_domain_context(struct iommu_domain *domain)
822 {
823         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
824         struct arm_smmu_device *smmu = smmu_domain->smmu;
825         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
826         int ret, irq;
827
828         if (!smmu || domain->type == IOMMU_DOMAIN_IDENTITY)
829                 return;
830
831         ret = arm_smmu_rpm_get(smmu);
832         if (ret < 0)
833                 return;
834
835         /*
836          * Disable the context bank and free the page tables before freeing
837          * it.
838          */
839         smmu->cbs[cfg->cbndx].cfg = NULL;
840         arm_smmu_write_context_bank(smmu, cfg->cbndx);
841
842         if (cfg->irptndx != INVALID_IRPTNDX) {
843                 irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
844                 devm_free_irq(smmu->dev, irq, domain);
845         }
846
847         free_io_pgtable_ops(smmu_domain->pgtbl_ops);
848         __arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
849
850         arm_smmu_rpm_put(smmu);
851 }
852
853 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
854 {
855         struct arm_smmu_domain *smmu_domain;
856
857         if (type != IOMMU_DOMAIN_UNMANAGED &&
858             type != IOMMU_DOMAIN_DMA &&
859             type != IOMMU_DOMAIN_IDENTITY)
860                 return NULL;
861         /*
862          * Allocate the domain and initialise some of its data structures.
863          * We can't really do anything meaningful until we've added a
864          * master.
865          */
866         smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
867         if (!smmu_domain)
868                 return NULL;
869
870         if (type == IOMMU_DOMAIN_DMA && (using_legacy_binding ||
871             iommu_get_dma_cookie(&smmu_domain->domain))) {
872                 kfree(smmu_domain);
873                 return NULL;
874         }
875
876         mutex_init(&smmu_domain->init_mutex);
877         spin_lock_init(&smmu_domain->cb_lock);
878
879         return &smmu_domain->domain;
880 }
881
882 static void arm_smmu_domain_free(struct iommu_domain *domain)
883 {
884         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
885
886         /*
887          * Free the domain resources. We assume that all devices have
888          * already been detached.
889          */
890         iommu_put_dma_cookie(domain);
891         arm_smmu_destroy_domain_context(domain);
892         kfree(smmu_domain);
893 }
894
895 static void arm_smmu_write_smr(struct arm_smmu_device *smmu, int idx)
896 {
897         struct arm_smmu_smr *smr = smmu->smrs + idx;
898         u32 reg = FIELD_PREP(SMR_ID, smr->id) | FIELD_PREP(SMR_MASK, smr->mask);
899
900         if (!(smmu->features & ARM_SMMU_FEAT_EXIDS) && smr->valid)
901                 reg |= SMR_VALID;
902         arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_SMR(idx), reg);
903 }
904
905 static void arm_smmu_write_s2cr(struct arm_smmu_device *smmu, int idx)
906 {
907         struct arm_smmu_s2cr *s2cr = smmu->s2crs + idx;
908         u32 reg = FIELD_PREP(S2CR_TYPE, s2cr->type) |
909                   FIELD_PREP(S2CR_CBNDX, s2cr->cbndx) |
910                   FIELD_PREP(S2CR_PRIVCFG, s2cr->privcfg);
911
912         if (smmu->features & ARM_SMMU_FEAT_EXIDS && smmu->smrs &&
913             smmu->smrs[idx].valid)
914                 reg |= S2CR_EXIDVALID;
915         arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_S2CR(idx), reg);
916 }
917
918 static void arm_smmu_write_sme(struct arm_smmu_device *smmu, int idx)
919 {
920         arm_smmu_write_s2cr(smmu, idx);
921         if (smmu->smrs)
922                 arm_smmu_write_smr(smmu, idx);
923 }
924
925 /*
926  * The width of SMR's mask field depends on sCR0_EXIDENABLE, so this function
927  * should be called after sCR0 is written.
928  */
929 static void arm_smmu_test_smr_masks(struct arm_smmu_device *smmu)
930 {
931         u32 smr;
932
933         if (!smmu->smrs)
934                 return;
935
936         /*
937          * SMR.ID bits may not be preserved if the corresponding MASK
938          * bits are set, so check each one separately. We can reject
939          * masters later if they try to claim IDs outside these masks.
940          */
941         smr = FIELD_PREP(SMR_ID, smmu->streamid_mask);
942         arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_SMR(0), smr);
943         smr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_SMR(0));
944         smmu->streamid_mask = FIELD_GET(SMR_ID, smr);
945
946         smr = FIELD_PREP(SMR_MASK, smmu->streamid_mask);
947         arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_SMR(0), smr);
948         smr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_SMR(0));
949         smmu->smr_mask_mask = FIELD_GET(SMR_MASK, smr);
950 }
951
952 static int arm_smmu_find_sme(struct arm_smmu_device *smmu, u16 id, u16 mask)
953 {
954         struct arm_smmu_smr *smrs = smmu->smrs;
955         int i, free_idx = -ENOSPC;
956
957         /* Stream indexing is blissfully easy */
958         if (!smrs)
959                 return id;
960
961         /* Validating SMRs is... less so */
962         for (i = 0; i < smmu->num_mapping_groups; ++i) {
963                 if (!smrs[i].valid) {
964                         /*
965                          * Note the first free entry we come across, which
966                          * we'll claim in the end if nothing else matches.
967                          */
968                         if (free_idx < 0)
969                                 free_idx = i;
970                         continue;
971                 }
972                 /*
973                  * If the new entry is _entirely_ matched by an existing entry,
974                  * then reuse that, with the guarantee that there also cannot
975                  * be any subsequent conflicting entries. In normal use we'd
976                  * expect simply identical entries for this case, but there's
977                  * no harm in accommodating the generalisation.
978                  */
979                 if ((mask & smrs[i].mask) == mask &&
980                     !((id ^ smrs[i].id) & ~smrs[i].mask))
981                         return i;
982                 /*
983                  * If the new entry has any other overlap with an existing one,
984                  * though, then there always exists at least one stream ID
985                  * which would cause a conflict, and we can't allow that risk.
986                  */
987                 if (!((id ^ smrs[i].id) & ~(smrs[i].mask | mask)))
988                         return -EINVAL;
989         }
990
991         return free_idx;
992 }
993
994 static bool arm_smmu_free_sme(struct arm_smmu_device *smmu, int idx)
995 {
996         if (--smmu->s2crs[idx].count)
997                 return false;
998
999         smmu->s2crs[idx] = s2cr_init_val;
1000         if (smmu->smrs)
1001                 smmu->smrs[idx].valid = false;
1002
1003         return true;
1004 }
1005
1006 static int arm_smmu_master_alloc_smes(struct device *dev)
1007 {
1008         struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1009         struct arm_smmu_master_cfg *cfg = fwspec->iommu_priv;
1010         struct arm_smmu_device *smmu = cfg->smmu;
1011         struct arm_smmu_smr *smrs = smmu->smrs;
1012         struct iommu_group *group;
1013         int i, idx, ret;
1014
1015         mutex_lock(&smmu->stream_map_mutex);
1016         /* Figure out a viable stream map entry allocation */
1017         for_each_cfg_sme(fwspec, i, idx) {
1018                 u16 sid = FIELD_GET(SMR_ID, fwspec->ids[i]);
1019                 u16 mask = FIELD_GET(SMR_MASK, fwspec->ids[i]);
1020
1021                 if (idx != INVALID_SMENDX) {
1022                         ret = -EEXIST;
1023                         goto out_err;
1024                 }
1025
1026                 ret = arm_smmu_find_sme(smmu, sid, mask);
1027                 if (ret < 0)
1028                         goto out_err;
1029
1030                 idx = ret;
1031                 if (smrs && smmu->s2crs[idx].count == 0) {
1032                         smrs[idx].id = sid;
1033                         smrs[idx].mask = mask;
1034                         smrs[idx].valid = true;
1035                 }
1036                 smmu->s2crs[idx].count++;
1037                 cfg->smendx[i] = (s16)idx;
1038         }
1039
1040         group = iommu_group_get_for_dev(dev);
1041         if (!group)
1042                 group = ERR_PTR(-ENOMEM);
1043         if (IS_ERR(group)) {
1044                 ret = PTR_ERR(group);
1045                 goto out_err;
1046         }
1047         iommu_group_put(group);
1048
1049         /* It worked! Now, poke the actual hardware */
1050         for_each_cfg_sme(fwspec, i, idx) {
1051                 arm_smmu_write_sme(smmu, idx);
1052                 smmu->s2crs[idx].group = group;
1053         }
1054
1055         mutex_unlock(&smmu->stream_map_mutex);
1056         return 0;
1057
1058 out_err:
1059         while (i--) {
1060                 arm_smmu_free_sme(smmu, cfg->smendx[i]);
1061                 cfg->smendx[i] = INVALID_SMENDX;
1062         }
1063         mutex_unlock(&smmu->stream_map_mutex);
1064         return ret;
1065 }
1066
1067 static void arm_smmu_master_free_smes(struct iommu_fwspec *fwspec)
1068 {
1069         struct arm_smmu_device *smmu = fwspec_smmu(fwspec);
1070         struct arm_smmu_master_cfg *cfg = fwspec->iommu_priv;
1071         int i, idx;
1072
1073         mutex_lock(&smmu->stream_map_mutex);
1074         for_each_cfg_sme(fwspec, i, idx) {
1075                 if (arm_smmu_free_sme(smmu, idx))
1076                         arm_smmu_write_sme(smmu, idx);
1077                 cfg->smendx[i] = INVALID_SMENDX;
1078         }
1079         mutex_unlock(&smmu->stream_map_mutex);
1080 }
1081
1082 static int arm_smmu_domain_add_master(struct arm_smmu_domain *smmu_domain,
1083                                       struct iommu_fwspec *fwspec)
1084 {
1085         struct arm_smmu_device *smmu = smmu_domain->smmu;
1086         struct arm_smmu_s2cr *s2cr = smmu->s2crs;
1087         u8 cbndx = smmu_domain->cfg.cbndx;
1088         enum arm_smmu_s2cr_type type;
1089         int i, idx;
1090
1091         if (smmu_domain->stage == ARM_SMMU_DOMAIN_BYPASS)
1092                 type = S2CR_TYPE_BYPASS;
1093         else
1094                 type = S2CR_TYPE_TRANS;
1095
1096         for_each_cfg_sme(fwspec, i, idx) {
1097                 if (type == s2cr[idx].type && cbndx == s2cr[idx].cbndx)
1098                         continue;
1099
1100                 s2cr[idx].type = type;
1101                 s2cr[idx].privcfg = S2CR_PRIVCFG_DEFAULT;
1102                 s2cr[idx].cbndx = cbndx;
1103                 arm_smmu_write_s2cr(smmu, idx);
1104         }
1105         return 0;
1106 }
1107
1108 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
1109 {
1110         int ret;
1111         struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1112         struct arm_smmu_device *smmu;
1113         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1114
1115         if (!fwspec || fwspec->ops != &arm_smmu_ops) {
1116                 dev_err(dev, "cannot attach to SMMU, is it on the same bus?\n");
1117                 return -ENXIO;
1118         }
1119
1120         /*
1121          * FIXME: The arch/arm DMA API code tries to attach devices to its own
1122          * domains between of_xlate() and add_device() - we have no way to cope
1123          * with that, so until ARM gets converted to rely on groups and default
1124          * domains, just say no (but more politely than by dereferencing NULL).
1125          * This should be at least a WARN_ON once that's sorted.
1126          */
1127         if (!fwspec->iommu_priv)
1128                 return -ENODEV;
1129
1130         smmu = fwspec_smmu(fwspec);
1131
1132         ret = arm_smmu_rpm_get(smmu);
1133         if (ret < 0)
1134                 return ret;
1135
1136         /* Ensure that the domain is finalised */
1137         ret = arm_smmu_init_domain_context(domain, smmu);
1138         if (ret < 0)
1139                 goto rpm_put;
1140
1141         /*
1142          * Sanity check the domain. We don't support domains across
1143          * different SMMUs.
1144          */
1145         if (smmu_domain->smmu != smmu) {
1146                 dev_err(dev,
1147                         "cannot attach to SMMU %s whilst already attached to domain on SMMU %s\n",
1148                         dev_name(smmu_domain->smmu->dev), dev_name(smmu->dev));
1149                 ret = -EINVAL;
1150                 goto rpm_put;
1151         }
1152
1153         /* Looks ok, so add the device to the domain */
1154         ret = arm_smmu_domain_add_master(smmu_domain, fwspec);
1155
1156 rpm_put:
1157         arm_smmu_rpm_put(smmu);
1158         return ret;
1159 }
1160
1161 static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
1162                         phys_addr_t paddr, size_t size, int prot)
1163 {
1164         struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1165         struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
1166         int ret;
1167
1168         if (!ops)
1169                 return -ENODEV;
1170
1171         arm_smmu_rpm_get(smmu);
1172         ret = ops->map(ops, iova, paddr, size, prot);
1173         arm_smmu_rpm_put(smmu);
1174
1175         return ret;
1176 }
1177
1178 static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
1179                              size_t size, struct iommu_iotlb_gather *gather)
1180 {
1181         struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1182         struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
1183         size_t ret;
1184
1185         if (!ops)
1186                 return 0;
1187
1188         arm_smmu_rpm_get(smmu);
1189         ret = ops->unmap(ops, iova, size, gather);
1190         arm_smmu_rpm_put(smmu);
1191
1192         return ret;
1193 }
1194
1195 static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
1196 {
1197         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1198         struct arm_smmu_device *smmu = smmu_domain->smmu;
1199
1200         if (smmu_domain->flush_ops) {
1201                 arm_smmu_rpm_get(smmu);
1202                 smmu_domain->flush_ops->tlb.tlb_flush_all(smmu_domain);
1203                 arm_smmu_rpm_put(smmu);
1204         }
1205 }
1206
1207 static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
1208                                 struct iommu_iotlb_gather *gather)
1209 {
1210         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1211         struct arm_smmu_device *smmu = smmu_domain->smmu;
1212
1213         if (smmu_domain->flush_ops) {
1214                 arm_smmu_rpm_get(smmu);
1215                 smmu_domain->flush_ops->tlb_sync(smmu_domain);
1216                 arm_smmu_rpm_put(smmu);
1217         }
1218 }
1219
1220 static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
1221                                               dma_addr_t iova)
1222 {
1223         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1224         struct arm_smmu_device *smmu = smmu_domain->smmu;
1225         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
1226         struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
1227         struct device *dev = smmu->dev;
1228         void __iomem *reg;
1229         u32 tmp;
1230         u64 phys;
1231         unsigned long va, flags;
1232         int ret, idx = cfg->cbndx;
1233
1234         ret = arm_smmu_rpm_get(smmu);
1235         if (ret < 0)
1236                 return 0;
1237
1238         spin_lock_irqsave(&smmu_domain->cb_lock, flags);
1239         va = iova & ~0xfffUL;
1240         if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
1241                 arm_smmu_cb_writeq(smmu, idx, ARM_SMMU_CB_ATS1PR, va);
1242         else
1243                 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_ATS1PR, va);
1244
1245         reg = arm_smmu_page(smmu, ARM_SMMU_CB(smmu, idx)) + ARM_SMMU_CB_ATSR;
1246         if (readl_poll_timeout_atomic(reg, tmp, !(tmp & ATSR_ACTIVE), 5, 50)) {
1247                 spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
1248                 dev_err(dev,
1249                         "iova to phys timed out on %pad. Falling back to software table walk.\n",
1250                         &iova);
1251                 return ops->iova_to_phys(ops, iova);
1252         }
1253
1254         phys = arm_smmu_cb_readq(smmu, idx, ARM_SMMU_CB_PAR);
1255         spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
1256         if (phys & CB_PAR_F) {
1257                 dev_err(dev, "translation fault!\n");
1258                 dev_err(dev, "PAR = 0x%llx\n", phys);
1259                 return 0;
1260         }
1261
1262         arm_smmu_rpm_put(smmu);
1263
1264         return (phys & GENMASK_ULL(39, 12)) | (iova & 0xfff);
1265 }
1266
1267 static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain,
1268                                         dma_addr_t iova)
1269 {
1270         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1271         struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
1272
1273         if (domain->type == IOMMU_DOMAIN_IDENTITY)
1274                 return iova;
1275
1276         if (!ops)
1277                 return 0;
1278
1279         if (smmu_domain->smmu->features & ARM_SMMU_FEAT_TRANS_OPS &&
1280                         smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
1281                 return arm_smmu_iova_to_phys_hard(domain, iova);
1282
1283         return ops->iova_to_phys(ops, iova);
1284 }
1285
1286 static bool arm_smmu_capable(enum iommu_cap cap)
1287 {
1288         switch (cap) {
1289         case IOMMU_CAP_CACHE_COHERENCY:
1290                 /*
1291                  * Return true here as the SMMU can always send out coherent
1292                  * requests.
1293                  */
1294                 return true;
1295         case IOMMU_CAP_NOEXEC:
1296                 return true;
1297         default:
1298                 return false;
1299         }
1300 }
1301
1302 static int arm_smmu_match_node(struct device *dev, const void *data)
1303 {
1304         return dev->fwnode == data;
1305 }
1306
1307 static
1308 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
1309 {
1310         struct device *dev = driver_find_device(&arm_smmu_driver.driver, NULL,
1311                                                 fwnode, arm_smmu_match_node);
1312         put_device(dev);
1313         return dev ? dev_get_drvdata(dev) : NULL;
1314 }
1315
1316 static int arm_smmu_add_device(struct device *dev)
1317 {
1318         struct arm_smmu_device *smmu;
1319         struct arm_smmu_master_cfg *cfg;
1320         struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1321         int i, ret;
1322
1323         if (using_legacy_binding) {
1324                 ret = arm_smmu_register_legacy_master(dev, &smmu);
1325
1326                 /*
1327                  * If dev->iommu_fwspec is initally NULL, arm_smmu_register_legacy_master()
1328                  * will allocate/initialise a new one. Thus we need to update fwspec for
1329                  * later use.
1330                  */
1331                 fwspec = dev_iommu_fwspec_get(dev);
1332                 if (ret)
1333                         goto out_free;
1334         } else if (fwspec && fwspec->ops == &arm_smmu_ops) {
1335                 smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
1336         } else {
1337                 return -ENODEV;
1338         }
1339
1340         ret = -EINVAL;
1341         for (i = 0; i < fwspec->num_ids; i++) {
1342                 u16 sid = FIELD_GET(SMR_ID, fwspec->ids[i]);
1343                 u16 mask = FIELD_GET(SMR_MASK, fwspec->ids[i]);
1344
1345                 if (sid & ~smmu->streamid_mask) {
1346                         dev_err(dev, "stream ID 0x%x out of range for SMMU (0x%x)\n",
1347                                 sid, smmu->streamid_mask);
1348                         goto out_free;
1349                 }
1350                 if (mask & ~smmu->smr_mask_mask) {
1351                         dev_err(dev, "SMR mask 0x%x out of range for SMMU (0x%x)\n",
1352                                 mask, smmu->smr_mask_mask);
1353                         goto out_free;
1354                 }
1355         }
1356
1357         ret = -ENOMEM;
1358         cfg = kzalloc(offsetof(struct arm_smmu_master_cfg, smendx[i]),
1359                       GFP_KERNEL);
1360         if (!cfg)
1361                 goto out_free;
1362
1363         cfg->smmu = smmu;
1364         fwspec->iommu_priv = cfg;
1365         while (i--)
1366                 cfg->smendx[i] = INVALID_SMENDX;
1367
1368         ret = arm_smmu_rpm_get(smmu);
1369         if (ret < 0)
1370                 goto out_cfg_free;
1371
1372         ret = arm_smmu_master_alloc_smes(dev);
1373         arm_smmu_rpm_put(smmu);
1374
1375         if (ret)
1376                 goto out_cfg_free;
1377
1378         iommu_device_link(&smmu->iommu, dev);
1379
1380         device_link_add(dev, smmu->dev,
1381                         DL_FLAG_PM_RUNTIME | DL_FLAG_AUTOREMOVE_SUPPLIER);
1382
1383         return 0;
1384
1385 out_cfg_free:
1386         kfree(cfg);
1387 out_free:
1388         iommu_fwspec_free(dev);
1389         return ret;
1390 }
1391
1392 static void arm_smmu_remove_device(struct device *dev)
1393 {
1394         struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1395         struct arm_smmu_master_cfg *cfg;
1396         struct arm_smmu_device *smmu;
1397         int ret;
1398
1399         if (!fwspec || fwspec->ops != &arm_smmu_ops)
1400                 return;
1401
1402         cfg  = fwspec->iommu_priv;
1403         smmu = cfg->smmu;
1404
1405         ret = arm_smmu_rpm_get(smmu);
1406         if (ret < 0)
1407                 return;
1408
1409         iommu_device_unlink(&smmu->iommu, dev);
1410         arm_smmu_master_free_smes(fwspec);
1411
1412         arm_smmu_rpm_put(smmu);
1413
1414         iommu_group_remove_device(dev);
1415         kfree(fwspec->iommu_priv);
1416         iommu_fwspec_free(dev);
1417 }
1418
1419 static struct iommu_group *arm_smmu_device_group(struct device *dev)
1420 {
1421         struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1422         struct arm_smmu_device *smmu = fwspec_smmu(fwspec);
1423         struct iommu_group *group = NULL;
1424         int i, idx;
1425
1426         for_each_cfg_sme(fwspec, i, idx) {
1427                 if (group && smmu->s2crs[idx].group &&
1428                     group != smmu->s2crs[idx].group)
1429                         return ERR_PTR(-EINVAL);
1430
1431                 group = smmu->s2crs[idx].group;
1432         }
1433
1434         if (group)
1435                 return iommu_group_ref_get(group);
1436
1437         if (dev_is_pci(dev))
1438                 group = pci_device_group(dev);
1439         else if (dev_is_fsl_mc(dev))
1440                 group = fsl_mc_device_group(dev);
1441         else
1442                 group = generic_device_group(dev);
1443
1444         return group;
1445 }
1446
1447 static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
1448                                     enum iommu_attr attr, void *data)
1449 {
1450         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1451
1452         switch(domain->type) {
1453         case IOMMU_DOMAIN_UNMANAGED:
1454                 switch (attr) {
1455                 case DOMAIN_ATTR_NESTING:
1456                         *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
1457                         return 0;
1458                 default:
1459                         return -ENODEV;
1460                 }
1461                 break;
1462         case IOMMU_DOMAIN_DMA:
1463                 switch (attr) {
1464                 case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
1465                         *(int *)data = smmu_domain->non_strict;
1466                         return 0;
1467                 default:
1468                         return -ENODEV;
1469                 }
1470                 break;
1471         default:
1472                 return -EINVAL;
1473         }
1474 }
1475
1476 static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
1477                                     enum iommu_attr attr, void *data)
1478 {
1479         int ret = 0;
1480         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1481
1482         mutex_lock(&smmu_domain->init_mutex);
1483
1484         switch(domain->type) {
1485         case IOMMU_DOMAIN_UNMANAGED:
1486                 switch (attr) {
1487                 case DOMAIN_ATTR_NESTING:
1488                         if (smmu_domain->smmu) {
1489                                 ret = -EPERM;
1490                                 goto out_unlock;
1491                         }
1492
1493                         if (*(int *)data)
1494                                 smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
1495                         else
1496                                 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
1497                         break;
1498                 default:
1499                         ret = -ENODEV;
1500                 }
1501                 break;
1502         case IOMMU_DOMAIN_DMA:
1503                 switch (attr) {
1504                 case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
1505                         smmu_domain->non_strict = *(int *)data;
1506                         break;
1507                 default:
1508                         ret = -ENODEV;
1509                 }
1510                 break;
1511         default:
1512                 ret = -EINVAL;
1513         }
1514 out_unlock:
1515         mutex_unlock(&smmu_domain->init_mutex);
1516         return ret;
1517 }
1518
1519 static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
1520 {
1521         u32 mask, fwid = 0;
1522
1523         if (args->args_count > 0)
1524                 fwid |= FIELD_PREP(SMR_ID, args->args[0]);
1525
1526         if (args->args_count > 1)
1527                 fwid |= FIELD_PREP(SMR_MASK, args->args[1]);
1528         else if (!of_property_read_u32(args->np, "stream-match-mask", &mask))
1529                 fwid |= FIELD_PREP(SMR_MASK, mask);
1530
1531         return iommu_fwspec_add_ids(dev, &fwid, 1);
1532 }
1533
1534 static void arm_smmu_get_resv_regions(struct device *dev,
1535                                       struct list_head *head)
1536 {
1537         struct iommu_resv_region *region;
1538         int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
1539
1540         region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
1541                                          prot, IOMMU_RESV_SW_MSI);
1542         if (!region)
1543                 return;
1544
1545         list_add_tail(&region->list, head);
1546
1547         iommu_dma_get_resv_regions(dev, head);
1548 }
1549
1550 static void arm_smmu_put_resv_regions(struct device *dev,
1551                                       struct list_head *head)
1552 {
1553         struct iommu_resv_region *entry, *next;
1554
1555         list_for_each_entry_safe(entry, next, head, list)
1556                 kfree(entry);
1557 }
1558
1559 static struct iommu_ops arm_smmu_ops = {
1560         .capable                = arm_smmu_capable,
1561         .domain_alloc           = arm_smmu_domain_alloc,
1562         .domain_free            = arm_smmu_domain_free,
1563         .attach_dev             = arm_smmu_attach_dev,
1564         .map                    = arm_smmu_map,
1565         .unmap                  = arm_smmu_unmap,
1566         .flush_iotlb_all        = arm_smmu_flush_iotlb_all,
1567         .iotlb_sync             = arm_smmu_iotlb_sync,
1568         .iova_to_phys           = arm_smmu_iova_to_phys,
1569         .add_device             = arm_smmu_add_device,
1570         .remove_device          = arm_smmu_remove_device,
1571         .device_group           = arm_smmu_device_group,
1572         .domain_get_attr        = arm_smmu_domain_get_attr,
1573         .domain_set_attr        = arm_smmu_domain_set_attr,
1574         .of_xlate               = arm_smmu_of_xlate,
1575         .get_resv_regions       = arm_smmu_get_resv_regions,
1576         .put_resv_regions       = arm_smmu_put_resv_regions,
1577         .pgsize_bitmap          = -1UL, /* Restricted during device attach */
1578 };
1579
1580 static void arm_smmu_device_reset(struct arm_smmu_device *smmu)
1581 {
1582         int i;
1583         u32 reg;
1584
1585         /* clear global FSR */
1586         reg = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSR);
1587         arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sGFSR, reg);
1588
1589         /*
1590          * Reset stream mapping groups: Initial values mark all SMRn as
1591          * invalid and all S2CRn as bypass unless overridden.
1592          */
1593         for (i = 0; i < smmu->num_mapping_groups; ++i)
1594                 arm_smmu_write_sme(smmu, i);
1595
1596         /* Make sure all context banks are disabled and clear CB_FSR  */
1597         for (i = 0; i < smmu->num_context_banks; ++i) {
1598                 arm_smmu_write_context_bank(smmu, i);
1599                 arm_smmu_cb_write(smmu, i, ARM_SMMU_CB_FSR, FSR_FAULT);
1600         }
1601
1602         /* Invalidate the TLB, just in case */
1603         arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIALLH, QCOM_DUMMY_VAL);
1604         arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIALLNSNH, QCOM_DUMMY_VAL);
1605
1606         reg = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sCR0);
1607
1608         /* Enable fault reporting */
1609         reg |= (sCR0_GFRE | sCR0_GFIE | sCR0_GCFGFRE | sCR0_GCFGFIE);
1610
1611         /* Disable TLB broadcasting. */
1612         reg |= (sCR0_VMIDPNE | sCR0_PTM);
1613
1614         /* Enable client access, handling unmatched streams as appropriate */
1615         reg &= ~sCR0_CLIENTPD;
1616         if (disable_bypass)
1617                 reg |= sCR0_USFCFG;
1618         else
1619                 reg &= ~sCR0_USFCFG;
1620
1621         /* Disable forced broadcasting */
1622         reg &= ~sCR0_FB;
1623
1624         /* Don't upgrade barriers */
1625         reg &= ~(sCR0_BSU);
1626
1627         if (smmu->features & ARM_SMMU_FEAT_VMID16)
1628                 reg |= sCR0_VMID16EN;
1629
1630         if (smmu->features & ARM_SMMU_FEAT_EXIDS)
1631                 reg |= sCR0_EXIDENABLE;
1632
1633         if (smmu->impl && smmu->impl->reset)
1634                 smmu->impl->reset(smmu);
1635
1636         /* Push the button */
1637         arm_smmu_tlb_sync_global(smmu);
1638         arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sCR0, reg);
1639 }
1640
1641 static int arm_smmu_id_size_to_bits(int size)
1642 {
1643         switch (size) {
1644         case 0:
1645                 return 32;
1646         case 1:
1647                 return 36;
1648         case 2:
1649                 return 40;
1650         case 3:
1651                 return 42;
1652         case 4:
1653                 return 44;
1654         case 5:
1655         default:
1656                 return 48;
1657         }
1658 }
1659
1660 static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
1661 {
1662         unsigned int size;
1663         u32 id;
1664         bool cttw_reg, cttw_fw = smmu->features & ARM_SMMU_FEAT_COHERENT_WALK;
1665         int i;
1666
1667         dev_notice(smmu->dev, "probing hardware configuration...\n");
1668         dev_notice(smmu->dev, "SMMUv%d with:\n",
1669                         smmu->version == ARM_SMMU_V2 ? 2 : 1);
1670
1671         /* ID0 */
1672         id = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_ID0);
1673
1674         /* Restrict available stages based on module parameter */
1675         if (force_stage == 1)
1676                 id &= ~(ID0_S2TS | ID0_NTS);
1677         else if (force_stage == 2)
1678                 id &= ~(ID0_S1TS | ID0_NTS);
1679
1680         if (id & ID0_S1TS) {
1681                 smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
1682                 dev_notice(smmu->dev, "\tstage 1 translation\n");
1683         }
1684
1685         if (id & ID0_S2TS) {
1686                 smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
1687                 dev_notice(smmu->dev, "\tstage 2 translation\n");
1688         }
1689
1690         if (id & ID0_NTS) {
1691                 smmu->features |= ARM_SMMU_FEAT_TRANS_NESTED;
1692                 dev_notice(smmu->dev, "\tnested translation\n");
1693         }
1694
1695         if (!(smmu->features &
1696                 (ARM_SMMU_FEAT_TRANS_S1 | ARM_SMMU_FEAT_TRANS_S2))) {
1697                 dev_err(smmu->dev, "\tno translation support!\n");
1698                 return -ENODEV;
1699         }
1700
1701         if ((id & ID0_S1TS) &&
1702                 ((smmu->version < ARM_SMMU_V2) || !(id & ID0_ATOSNS))) {
1703                 smmu->features |= ARM_SMMU_FEAT_TRANS_OPS;
1704                 dev_notice(smmu->dev, "\taddress translation ops\n");
1705         }
1706
1707         /*
1708          * In order for DMA API calls to work properly, we must defer to what
1709          * the FW says about coherency, regardless of what the hardware claims.
1710          * Fortunately, this also opens up a workaround for systems where the
1711          * ID register value has ended up configured incorrectly.
1712          */
1713         cttw_reg = !!(id & ID0_CTTW);
1714         if (cttw_fw || cttw_reg)
1715                 dev_notice(smmu->dev, "\t%scoherent table walk\n",
1716                            cttw_fw ? "" : "non-");
1717         if (cttw_fw != cttw_reg)
1718                 dev_notice(smmu->dev,
1719                            "\t(IDR0.CTTW overridden by FW configuration)\n");
1720
1721         /* Max. number of entries we have for stream matching/indexing */
1722         if (smmu->version == ARM_SMMU_V2 && id & ID0_EXIDS) {
1723                 smmu->features |= ARM_SMMU_FEAT_EXIDS;
1724                 size = 1 << 16;
1725         } else {
1726                 size = 1 << FIELD_GET(ID0_NUMSIDB, id);
1727         }
1728         smmu->streamid_mask = size - 1;
1729         if (id & ID0_SMS) {
1730                 smmu->features |= ARM_SMMU_FEAT_STREAM_MATCH;
1731                 size = FIELD_GET(ID0_NUMSMRG, id);
1732                 if (size == 0) {
1733                         dev_err(smmu->dev,
1734                                 "stream-matching supported, but no SMRs present!\n");
1735                         return -ENODEV;
1736                 }
1737
1738                 /* Zero-initialised to mark as invalid */
1739                 smmu->smrs = devm_kcalloc(smmu->dev, size, sizeof(*smmu->smrs),
1740                                           GFP_KERNEL);
1741                 if (!smmu->smrs)
1742                         return -ENOMEM;
1743
1744                 dev_notice(smmu->dev,
1745                            "\tstream matching with %u register groups", size);
1746         }
1747         /* s2cr->type == 0 means translation, so initialise explicitly */
1748         smmu->s2crs = devm_kmalloc_array(smmu->dev, size, sizeof(*smmu->s2crs),
1749                                          GFP_KERNEL);
1750         if (!smmu->s2crs)
1751                 return -ENOMEM;
1752         for (i = 0; i < size; i++)
1753                 smmu->s2crs[i] = s2cr_init_val;
1754
1755         smmu->num_mapping_groups = size;
1756         mutex_init(&smmu->stream_map_mutex);
1757         spin_lock_init(&smmu->global_sync_lock);
1758
1759         if (smmu->version < ARM_SMMU_V2 || !(id & ID0_PTFS_NO_AARCH32)) {
1760                 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_L;
1761                 if (!(id & ID0_PTFS_NO_AARCH32S))
1762                         smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_S;
1763         }
1764
1765         /* ID1 */
1766         id = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_ID1);
1767         smmu->pgshift = (id & ID1_PAGESIZE) ? 16 : 12;
1768
1769         /* Check for size mismatch of SMMU address space from mapped region */
1770         size = 1 << (FIELD_GET(ID1_NUMPAGENDXB, id) + 1);
1771         if (smmu->numpage != 2 * size << smmu->pgshift)
1772                 dev_warn(smmu->dev,
1773                         "SMMU address space size (0x%x) differs from mapped region size (0x%x)!\n",
1774                         2 * size << smmu->pgshift, smmu->numpage);
1775         /* Now properly encode NUMPAGE to subsequently derive SMMU_CB_BASE */
1776         smmu->numpage = size;
1777
1778         smmu->num_s2_context_banks = FIELD_GET(ID1_NUMS2CB, id);
1779         smmu->num_context_banks = FIELD_GET(ID1_NUMCB, id);
1780         if (smmu->num_s2_context_banks > smmu->num_context_banks) {
1781                 dev_err(smmu->dev, "impossible number of S2 context banks!\n");
1782                 return -ENODEV;
1783         }
1784         dev_notice(smmu->dev, "\t%u context banks (%u stage-2 only)\n",
1785                    smmu->num_context_banks, smmu->num_s2_context_banks);
1786         smmu->cbs = devm_kcalloc(smmu->dev, smmu->num_context_banks,
1787                                  sizeof(*smmu->cbs), GFP_KERNEL);
1788         if (!smmu->cbs)
1789                 return -ENOMEM;
1790
1791         /* ID2 */
1792         id = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_ID2);
1793         size = arm_smmu_id_size_to_bits(FIELD_GET(ID2_IAS, id));
1794         smmu->ipa_size = size;
1795
1796         /* The output mask is also applied for bypass */
1797         size = arm_smmu_id_size_to_bits(FIELD_GET(ID2_OAS, id));
1798         smmu->pa_size = size;
1799
1800         if (id & ID2_VMID16)
1801                 smmu->features |= ARM_SMMU_FEAT_VMID16;
1802
1803         /*
1804          * What the page table walker can address actually depends on which
1805          * descriptor format is in use, but since a) we don't know that yet,
1806          * and b) it can vary per context bank, this will have to do...
1807          */
1808         if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(size)))
1809                 dev_warn(smmu->dev,
1810                          "failed to set DMA mask for table walker\n");
1811
1812         if (smmu->version < ARM_SMMU_V2) {
1813                 smmu->va_size = smmu->ipa_size;
1814                 if (smmu->version == ARM_SMMU_V1_64K)
1815                         smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1816         } else {
1817                 size = FIELD_GET(ID2_UBS, id);
1818                 smmu->va_size = arm_smmu_id_size_to_bits(size);
1819                 if (id & ID2_PTFS_4K)
1820                         smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_4K;
1821                 if (id & ID2_PTFS_16K)
1822                         smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_16K;
1823                 if (id & ID2_PTFS_64K)
1824                         smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1825         }
1826
1827         /* Now we've corralled the various formats, what'll it do? */
1828         if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S)
1829                 smmu->pgsize_bitmap |= SZ_4K | SZ_64K | SZ_1M | SZ_16M;
1830         if (smmu->features &
1831             (ARM_SMMU_FEAT_FMT_AARCH32_L | ARM_SMMU_FEAT_FMT_AARCH64_4K))
1832                 smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
1833         if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_16K)
1834                 smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
1835         if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_64K)
1836                 smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
1837
1838         if (arm_smmu_ops.pgsize_bitmap == -1UL)
1839                 arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
1840         else
1841                 arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
1842         dev_notice(smmu->dev, "\tSupported page sizes: 0x%08lx\n",
1843                    smmu->pgsize_bitmap);
1844
1845
1846         if (smmu->features & ARM_SMMU_FEAT_TRANS_S1)
1847                 dev_notice(smmu->dev, "\tStage-1: %lu-bit VA -> %lu-bit IPA\n",
1848                            smmu->va_size, smmu->ipa_size);
1849
1850         if (smmu->features & ARM_SMMU_FEAT_TRANS_S2)
1851                 dev_notice(smmu->dev, "\tStage-2: %lu-bit IPA -> %lu-bit PA\n",
1852                            smmu->ipa_size, smmu->pa_size);
1853
1854         if (smmu->impl && smmu->impl->cfg_probe)
1855                 return smmu->impl->cfg_probe(smmu);
1856
1857         return 0;
1858 }
1859
1860 struct arm_smmu_match_data {
1861         enum arm_smmu_arch_version version;
1862         enum arm_smmu_implementation model;
1863 };
1864
1865 #define ARM_SMMU_MATCH_DATA(name, ver, imp)     \
1866 static const struct arm_smmu_match_data name = { .version = ver, .model = imp }
1867
1868 ARM_SMMU_MATCH_DATA(smmu_generic_v1, ARM_SMMU_V1, GENERIC_SMMU);
1869 ARM_SMMU_MATCH_DATA(smmu_generic_v2, ARM_SMMU_V2, GENERIC_SMMU);
1870 ARM_SMMU_MATCH_DATA(arm_mmu401, ARM_SMMU_V1_64K, GENERIC_SMMU);
1871 ARM_SMMU_MATCH_DATA(arm_mmu500, ARM_SMMU_V2, ARM_MMU500);
1872 ARM_SMMU_MATCH_DATA(cavium_smmuv2, ARM_SMMU_V2, CAVIUM_SMMUV2);
1873 ARM_SMMU_MATCH_DATA(qcom_smmuv2, ARM_SMMU_V2, QCOM_SMMUV2);
1874
1875 static const struct of_device_id arm_smmu_of_match[] = {
1876         { .compatible = "arm,smmu-v1", .data = &smmu_generic_v1 },
1877         { .compatible = "arm,smmu-v2", .data = &smmu_generic_v2 },
1878         { .compatible = "arm,mmu-400", .data = &smmu_generic_v1 },
1879         { .compatible = "arm,mmu-401", .data = &arm_mmu401 },
1880         { .compatible = "arm,mmu-500", .data = &arm_mmu500 },
1881         { .compatible = "cavium,smmu-v2", .data = &cavium_smmuv2 },
1882         { .compatible = "qcom,smmu-v2", .data = &qcom_smmuv2 },
1883         { },
1884 };
1885
1886 #ifdef CONFIG_ACPI
1887 static int acpi_smmu_get_data(u32 model, struct arm_smmu_device *smmu)
1888 {
1889         int ret = 0;
1890
1891         switch (model) {
1892         case ACPI_IORT_SMMU_V1:
1893         case ACPI_IORT_SMMU_CORELINK_MMU400:
1894                 smmu->version = ARM_SMMU_V1;
1895                 smmu->model = GENERIC_SMMU;
1896                 break;
1897         case ACPI_IORT_SMMU_CORELINK_MMU401:
1898                 smmu->version = ARM_SMMU_V1_64K;
1899                 smmu->model = GENERIC_SMMU;
1900                 break;
1901         case ACPI_IORT_SMMU_V2:
1902                 smmu->version = ARM_SMMU_V2;
1903                 smmu->model = GENERIC_SMMU;
1904                 break;
1905         case ACPI_IORT_SMMU_CORELINK_MMU500:
1906                 smmu->version = ARM_SMMU_V2;
1907                 smmu->model = ARM_MMU500;
1908                 break;
1909         case ACPI_IORT_SMMU_CAVIUM_THUNDERX:
1910                 smmu->version = ARM_SMMU_V2;
1911                 smmu->model = CAVIUM_SMMUV2;
1912                 break;
1913         default:
1914                 ret = -ENODEV;
1915         }
1916
1917         return ret;
1918 }
1919
1920 static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
1921                                       struct arm_smmu_device *smmu)
1922 {
1923         struct device *dev = smmu->dev;
1924         struct acpi_iort_node *node =
1925                 *(struct acpi_iort_node **)dev_get_platdata(dev);
1926         struct acpi_iort_smmu *iort_smmu;
1927         int ret;
1928
1929         /* Retrieve SMMU1/2 specific data */
1930         iort_smmu = (struct acpi_iort_smmu *)node->node_data;
1931
1932         ret = acpi_smmu_get_data(iort_smmu->model, smmu);
1933         if (ret < 0)
1934                 return ret;
1935
1936         /* Ignore the configuration access interrupt */
1937         smmu->num_global_irqs = 1;
1938
1939         if (iort_smmu->flags & ACPI_IORT_SMMU_COHERENT_WALK)
1940                 smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
1941
1942         return 0;
1943 }
1944 #else
1945 static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
1946                                              struct arm_smmu_device *smmu)
1947 {
1948         return -ENODEV;
1949 }
1950 #endif
1951
1952 static int arm_smmu_device_dt_probe(struct platform_device *pdev,
1953                                     struct arm_smmu_device *smmu)
1954 {
1955         const struct arm_smmu_match_data *data;
1956         struct device *dev = &pdev->dev;
1957         bool legacy_binding;
1958
1959         if (of_property_read_u32(dev->of_node, "#global-interrupts",
1960                                  &smmu->num_global_irqs)) {
1961                 dev_err(dev, "missing #global-interrupts property\n");
1962                 return -ENODEV;
1963         }
1964
1965         data = of_device_get_match_data(dev);
1966         smmu->version = data->version;
1967         smmu->model = data->model;
1968
1969         legacy_binding = of_find_property(dev->of_node, "mmu-masters", NULL);
1970         if (legacy_binding && !using_generic_binding) {
1971                 if (!using_legacy_binding)
1972                         pr_notice("deprecated \"mmu-masters\" DT property in use; DMA API support unavailable\n");
1973                 using_legacy_binding = true;
1974         } else if (!legacy_binding && !using_legacy_binding) {
1975                 using_generic_binding = true;
1976         } else {
1977                 dev_err(dev, "not probing due to mismatched DT properties\n");
1978                 return -ENODEV;
1979         }
1980
1981         if (of_dma_is_coherent(dev->of_node))
1982                 smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
1983
1984         return 0;
1985 }
1986
1987 static void arm_smmu_bus_init(void)
1988 {
1989         /* Oh, for a proper bus abstraction */
1990         if (!iommu_present(&platform_bus_type))
1991                 bus_set_iommu(&platform_bus_type, &arm_smmu_ops);
1992 #ifdef CONFIG_ARM_AMBA
1993         if (!iommu_present(&amba_bustype))
1994                 bus_set_iommu(&amba_bustype, &arm_smmu_ops);
1995 #endif
1996 #ifdef CONFIG_PCI
1997         if (!iommu_present(&pci_bus_type)) {
1998                 pci_request_acs();
1999                 bus_set_iommu(&pci_bus_type, &arm_smmu_ops);
2000         }
2001 #endif
2002 #ifdef CONFIG_FSL_MC_BUS
2003         if (!iommu_present(&fsl_mc_bus_type))
2004                 bus_set_iommu(&fsl_mc_bus_type, &arm_smmu_ops);
2005 #endif
2006 }
2007
2008 static int arm_smmu_device_probe(struct platform_device *pdev)
2009 {
2010         struct resource *res;
2011         resource_size_t ioaddr;
2012         struct arm_smmu_device *smmu;
2013         struct device *dev = &pdev->dev;
2014         int num_irqs, i, err;
2015
2016         smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
2017         if (!smmu) {
2018                 dev_err(dev, "failed to allocate arm_smmu_device\n");
2019                 return -ENOMEM;
2020         }
2021         smmu->dev = dev;
2022
2023         if (dev->of_node)
2024                 err = arm_smmu_device_dt_probe(pdev, smmu);
2025         else
2026                 err = arm_smmu_device_acpi_probe(pdev, smmu);
2027
2028         if (err)
2029                 return err;
2030
2031         smmu = arm_smmu_impl_init(smmu);
2032         if (IS_ERR(smmu))
2033                 return PTR_ERR(smmu);
2034
2035         res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
2036         ioaddr = res->start;
2037         smmu->base = devm_ioremap_resource(dev, res);
2038         if (IS_ERR(smmu->base))
2039                 return PTR_ERR(smmu->base);
2040         /*
2041          * The resource size should effectively match the value of SMMU_TOP;
2042          * stash that temporarily until we know PAGESIZE to validate it with.
2043          */
2044         smmu->numpage = resource_size(res);
2045
2046         num_irqs = 0;
2047         while ((res = platform_get_resource(pdev, IORESOURCE_IRQ, num_irqs))) {
2048                 num_irqs++;
2049                 if (num_irqs > smmu->num_global_irqs)
2050                         smmu->num_context_irqs++;
2051         }
2052
2053         if (!smmu->num_context_irqs) {
2054                 dev_err(dev, "found %d interrupts but expected at least %d\n",
2055                         num_irqs, smmu->num_global_irqs + 1);
2056                 return -ENODEV;
2057         }
2058
2059         smmu->irqs = devm_kcalloc(dev, num_irqs, sizeof(*smmu->irqs),
2060                                   GFP_KERNEL);
2061         if (!smmu->irqs) {
2062                 dev_err(dev, "failed to allocate %d irqs\n", num_irqs);
2063                 return -ENOMEM;
2064         }
2065
2066         for (i = 0; i < num_irqs; ++i) {
2067                 int irq = platform_get_irq(pdev, i);
2068
2069                 if (irq < 0) {
2070                         dev_err(dev, "failed to get irq index %d\n", i);
2071                         return -ENODEV;
2072                 }
2073                 smmu->irqs[i] = irq;
2074         }
2075
2076         err = devm_clk_bulk_get_all(dev, &smmu->clks);
2077         if (err < 0) {
2078                 dev_err(dev, "failed to get clocks %d\n", err);
2079                 return err;
2080         }
2081         smmu->num_clks = err;
2082
2083         err = clk_bulk_prepare_enable(smmu->num_clks, smmu->clks);
2084         if (err)
2085                 return err;
2086
2087         err = arm_smmu_device_cfg_probe(smmu);
2088         if (err)
2089                 return err;
2090
2091         if (smmu->version == ARM_SMMU_V2) {
2092                 if (smmu->num_context_banks > smmu->num_context_irqs) {
2093                         dev_err(dev,
2094                               "found only %d context irq(s) but %d required\n",
2095                               smmu->num_context_irqs, smmu->num_context_banks);
2096                         return -ENODEV;
2097                 }
2098
2099                 /* Ignore superfluous interrupts */
2100                 smmu->num_context_irqs = smmu->num_context_banks;
2101         }
2102
2103         for (i = 0; i < smmu->num_global_irqs; ++i) {
2104                 err = devm_request_irq(smmu->dev, smmu->irqs[i],
2105                                        arm_smmu_global_fault,
2106                                        IRQF_SHARED,
2107                                        "arm-smmu global fault",
2108                                        smmu);
2109                 if (err) {
2110                         dev_err(dev, "failed to request global IRQ %d (%u)\n",
2111                                 i, smmu->irqs[i]);
2112                         return err;
2113                 }
2114         }
2115
2116         err = iommu_device_sysfs_add(&smmu->iommu, smmu->dev, NULL,
2117                                      "smmu.%pa", &ioaddr);
2118         if (err) {
2119                 dev_err(dev, "Failed to register iommu in sysfs\n");
2120                 return err;
2121         }
2122
2123         iommu_device_set_ops(&smmu->iommu, &arm_smmu_ops);
2124         iommu_device_set_fwnode(&smmu->iommu, dev->fwnode);
2125
2126         err = iommu_device_register(&smmu->iommu);
2127         if (err) {
2128                 dev_err(dev, "Failed to register iommu\n");
2129                 return err;
2130         }
2131
2132         platform_set_drvdata(pdev, smmu);
2133         arm_smmu_device_reset(smmu);
2134         arm_smmu_test_smr_masks(smmu);
2135
2136         /*
2137          * We want to avoid touching dev->power.lock in fastpaths unless
2138          * it's really going to do something useful - pm_runtime_enabled()
2139          * can serve as an ideal proxy for that decision. So, conditionally
2140          * enable pm_runtime.
2141          */
2142         if (dev->pm_domain) {
2143                 pm_runtime_set_active(dev);
2144                 pm_runtime_enable(dev);
2145         }
2146
2147         /*
2148          * For ACPI and generic DT bindings, an SMMU will be probed before
2149          * any device which might need it, so we want the bus ops in place
2150          * ready to handle default domain setup as soon as any SMMU exists.
2151          */
2152         if (!using_legacy_binding)
2153                 arm_smmu_bus_init();
2154
2155         return 0;
2156 }
2157
2158 /*
2159  * With the legacy DT binding in play, though, we have no guarantees about
2160  * probe order, but then we're also not doing default domains, so we can
2161  * delay setting bus ops until we're sure every possible SMMU is ready,
2162  * and that way ensure that no add_device() calls get missed.
2163  */
2164 static int arm_smmu_legacy_bus_init(void)
2165 {
2166         if (using_legacy_binding)
2167                 arm_smmu_bus_init();
2168         return 0;
2169 }
2170 device_initcall_sync(arm_smmu_legacy_bus_init);
2171
2172 static void arm_smmu_device_shutdown(struct platform_device *pdev)
2173 {
2174         struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
2175
2176         if (!smmu)
2177                 return;
2178
2179         if (!bitmap_empty(smmu->context_map, ARM_SMMU_MAX_CBS))
2180                 dev_err(&pdev->dev, "removing device with active domains!\n");
2181
2182         arm_smmu_rpm_get(smmu);
2183         /* Turn the thing off */
2184         arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sCR0, sCR0_CLIENTPD);
2185         arm_smmu_rpm_put(smmu);
2186
2187         if (pm_runtime_enabled(smmu->dev))
2188                 pm_runtime_force_suspend(smmu->dev);
2189         else
2190                 clk_bulk_disable(smmu->num_clks, smmu->clks);
2191
2192         clk_bulk_unprepare(smmu->num_clks, smmu->clks);
2193 }
2194
2195 static int __maybe_unused arm_smmu_runtime_resume(struct device *dev)
2196 {
2197         struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2198         int ret;
2199
2200         ret = clk_bulk_enable(smmu->num_clks, smmu->clks);
2201         if (ret)
2202                 return ret;
2203
2204         arm_smmu_device_reset(smmu);
2205
2206         return 0;
2207 }
2208
2209 static int __maybe_unused arm_smmu_runtime_suspend(struct device *dev)
2210 {
2211         struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2212
2213         clk_bulk_disable(smmu->num_clks, smmu->clks);
2214
2215         return 0;
2216 }
2217
2218 static int __maybe_unused arm_smmu_pm_resume(struct device *dev)
2219 {
2220         if (pm_runtime_suspended(dev))
2221                 return 0;
2222
2223         return arm_smmu_runtime_resume(dev);
2224 }
2225
2226 static int __maybe_unused arm_smmu_pm_suspend(struct device *dev)
2227 {
2228         if (pm_runtime_suspended(dev))
2229                 return 0;
2230
2231         return arm_smmu_runtime_suspend(dev);
2232 }
2233
2234 static const struct dev_pm_ops arm_smmu_pm_ops = {
2235         SET_SYSTEM_SLEEP_PM_OPS(arm_smmu_pm_suspend, arm_smmu_pm_resume)
2236         SET_RUNTIME_PM_OPS(arm_smmu_runtime_suspend,
2237                            arm_smmu_runtime_resume, NULL)
2238 };
2239
2240 static struct platform_driver arm_smmu_driver = {
2241         .driver = {
2242                 .name                   = "arm-smmu",
2243                 .of_match_table         = of_match_ptr(arm_smmu_of_match),
2244                 .pm                     = &arm_smmu_pm_ops,
2245                 .suppress_bind_attrs    = true,
2246         },
2247         .probe  = arm_smmu_device_probe,
2248         .shutdown = arm_smmu_device_shutdown,
2249 };
2250 builtin_platform_driver(arm_smmu_driver);