]> asedeno.scripts.mit.edu Git - linux.git/blob - drivers/iommu/arm-smmu.c
Merge branch 'for-joerg/arm-smmu/updates' of git://git.kernel.org/pub/scm/linux/kerne...
[linux.git] / drivers / iommu / arm-smmu.c
1 /*
2  * IOMMU API for ARM architected SMMU implementations.
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License version 2 as
6  * published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11  * GNU General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public License
14  * along with this program; if not, write to the Free Software
15  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
16  *
17  * Copyright (C) 2013 ARM Limited
18  *
19  * Author: Will Deacon <will.deacon@arm.com>
20  *
21  * This driver currently supports:
22  *      - SMMUv1 and v2 implementations
23  *      - Stream-matching and stream-indexing
24  *      - v7/v8 long-descriptor format
25  *      - Non-secure access to the SMMU
26  *      - Context fault reporting
27  *      - Extended Stream ID (16 bit)
28  */
29
30 #define pr_fmt(fmt) "arm-smmu: " fmt
31
32 #include <linux/acpi.h>
33 #include <linux/acpi_iort.h>
34 #include <linux/atomic.h>
35 #include <linux/delay.h>
36 #include <linux/dma-iommu.h>
37 #include <linux/dma-mapping.h>
38 #include <linux/err.h>
39 #include <linux/interrupt.h>
40 #include <linux/io.h>
41 #include <linux/io-64-nonatomic-hi-lo.h>
42 #include <linux/iommu.h>
43 #include <linux/iopoll.h>
44 #include <linux/module.h>
45 #include <linux/of.h>
46 #include <linux/of_address.h>
47 #include <linux/of_device.h>
48 #include <linux/of_iommu.h>
49 #include <linux/pci.h>
50 #include <linux/platform_device.h>
51 #include <linux/slab.h>
52 #include <linux/spinlock.h>
53
54 #include <linux/amba/bus.h>
55
56 #include "io-pgtable.h"
57 #include "arm-smmu-regs.h"
58
59 #define ARM_MMU500_ACTLR_CPRE           (1 << 1)
60
61 #define ARM_MMU500_ACR_CACHE_LOCK       (1 << 26)
62 #define ARM_MMU500_ACR_S2CRB_TLBEN      (1 << 10)
63 #define ARM_MMU500_ACR_SMTNMB_TLBEN     (1 << 8)
64
65 #define TLB_LOOP_TIMEOUT                1000000 /* 1s! */
66 #define TLB_SPIN_COUNT                  10
67
68 /* Maximum number of context banks per SMMU */
69 #define ARM_SMMU_MAX_CBS                128
70
71 /* SMMU global address space */
72 #define ARM_SMMU_GR0(smmu)              ((smmu)->base)
73 #define ARM_SMMU_GR1(smmu)              ((smmu)->base + (1 << (smmu)->pgshift))
74
75 /*
76  * SMMU global address space with conditional offset to access secure
77  * aliases of non-secure registers (e.g. nsCR0: 0x400, nsGFSR: 0x448,
78  * nsGFSYNR0: 0x450)
79  */
80 #define ARM_SMMU_GR0_NS(smmu)                                           \
81         ((smmu)->base +                                                 \
82                 ((smmu->options & ARM_SMMU_OPT_SECURE_CFG_ACCESS)       \
83                         ? 0x400 : 0))
84
85 /*
86  * Some 64-bit registers only make sense to write atomically, but in such
87  * cases all the data relevant to AArch32 formats lies within the lower word,
88  * therefore this actually makes more sense than it might first appear.
89  */
90 #ifdef CONFIG_64BIT
91 #define smmu_write_atomic_lq            writeq_relaxed
92 #else
93 #define smmu_write_atomic_lq            writel_relaxed
94 #endif
95
96 /* Translation context bank */
97 #define ARM_SMMU_CB(smmu, n)    ((smmu)->cb_base + ((n) << (smmu)->pgshift))
98
99 #define MSI_IOVA_BASE                   0x8000000
100 #define MSI_IOVA_LENGTH                 0x100000
101
102 static int force_stage;
103 module_param(force_stage, int, S_IRUGO);
104 MODULE_PARM_DESC(force_stage,
105         "Force SMMU mappings to be installed at a particular stage of translation. A value of '1' or '2' forces the corresponding stage. All other values are ignored (i.e. no stage is forced). Note that selecting a specific stage will disable support for nested translation.");
106 static bool disable_bypass;
107 module_param(disable_bypass, bool, S_IRUGO);
108 MODULE_PARM_DESC(disable_bypass,
109         "Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
110
111 enum arm_smmu_arch_version {
112         ARM_SMMU_V1,
113         ARM_SMMU_V1_64K,
114         ARM_SMMU_V2,
115 };
116
117 enum arm_smmu_implementation {
118         GENERIC_SMMU,
119         ARM_MMU500,
120         CAVIUM_SMMUV2,
121 };
122
123 struct arm_smmu_s2cr {
124         struct iommu_group              *group;
125         int                             count;
126         enum arm_smmu_s2cr_type         type;
127         enum arm_smmu_s2cr_privcfg      privcfg;
128         u8                              cbndx;
129 };
130
131 #define s2cr_init_val (struct arm_smmu_s2cr){                           \
132         .type = disable_bypass ? S2CR_TYPE_FAULT : S2CR_TYPE_BYPASS,    \
133 }
134
135 struct arm_smmu_smr {
136         u16                             mask;
137         u16                             id;
138         bool                            valid;
139 };
140
141 struct arm_smmu_cb {
142         u64                             ttbr[2];
143         u32                             tcr[2];
144         u32                             mair[2];
145         struct arm_smmu_cfg             *cfg;
146 };
147
148 struct arm_smmu_master_cfg {
149         struct arm_smmu_device          *smmu;
150         s16                             smendx[];
151 };
152 #define INVALID_SMENDX                  -1
153 #define __fwspec_cfg(fw) ((struct arm_smmu_master_cfg *)fw->iommu_priv)
154 #define fwspec_smmu(fw)  (__fwspec_cfg(fw)->smmu)
155 #define fwspec_smendx(fw, i) \
156         (i >= fw->num_ids ? INVALID_SMENDX : __fwspec_cfg(fw)->smendx[i])
157 #define for_each_cfg_sme(fw, i, idx) \
158         for (i = 0; idx = fwspec_smendx(fw, i), i < fw->num_ids; ++i)
159
160 struct arm_smmu_device {
161         struct device                   *dev;
162
163         void __iomem                    *base;
164         void __iomem                    *cb_base;
165         unsigned long                   pgshift;
166
167 #define ARM_SMMU_FEAT_COHERENT_WALK     (1 << 0)
168 #define ARM_SMMU_FEAT_STREAM_MATCH      (1 << 1)
169 #define ARM_SMMU_FEAT_TRANS_S1          (1 << 2)
170 #define ARM_SMMU_FEAT_TRANS_S2          (1 << 3)
171 #define ARM_SMMU_FEAT_TRANS_NESTED      (1 << 4)
172 #define ARM_SMMU_FEAT_TRANS_OPS         (1 << 5)
173 #define ARM_SMMU_FEAT_VMID16            (1 << 6)
174 #define ARM_SMMU_FEAT_FMT_AARCH64_4K    (1 << 7)
175 #define ARM_SMMU_FEAT_FMT_AARCH64_16K   (1 << 8)
176 #define ARM_SMMU_FEAT_FMT_AARCH64_64K   (1 << 9)
177 #define ARM_SMMU_FEAT_FMT_AARCH32_L     (1 << 10)
178 #define ARM_SMMU_FEAT_FMT_AARCH32_S     (1 << 11)
179 #define ARM_SMMU_FEAT_EXIDS             (1 << 12)
180         u32                             features;
181
182 #define ARM_SMMU_OPT_SECURE_CFG_ACCESS (1 << 0)
183         u32                             options;
184         enum arm_smmu_arch_version      version;
185         enum arm_smmu_implementation    model;
186
187         u32                             num_context_banks;
188         u32                             num_s2_context_banks;
189         DECLARE_BITMAP(context_map, ARM_SMMU_MAX_CBS);
190         struct arm_smmu_cb              *cbs;
191         atomic_t                        irptndx;
192
193         u32                             num_mapping_groups;
194         u16                             streamid_mask;
195         u16                             smr_mask_mask;
196         struct arm_smmu_smr             *smrs;
197         struct arm_smmu_s2cr            *s2crs;
198         struct mutex                    stream_map_mutex;
199
200         unsigned long                   va_size;
201         unsigned long                   ipa_size;
202         unsigned long                   pa_size;
203         unsigned long                   pgsize_bitmap;
204
205         u32                             num_global_irqs;
206         u32                             num_context_irqs;
207         unsigned int                    *irqs;
208
209         u32                             cavium_id_base; /* Specific to Cavium */
210
211         spinlock_t                      global_sync_lock;
212
213         /* IOMMU core code handle */
214         struct iommu_device             iommu;
215 };
216
217 enum arm_smmu_context_fmt {
218         ARM_SMMU_CTX_FMT_NONE,
219         ARM_SMMU_CTX_FMT_AARCH64,
220         ARM_SMMU_CTX_FMT_AARCH32_L,
221         ARM_SMMU_CTX_FMT_AARCH32_S,
222 };
223
224 struct arm_smmu_cfg {
225         u8                              cbndx;
226         u8                              irptndx;
227         union {
228                 u16                     asid;
229                 u16                     vmid;
230         };
231         u32                             cbar;
232         enum arm_smmu_context_fmt       fmt;
233 };
234 #define INVALID_IRPTNDX                 0xff
235
236 enum arm_smmu_domain_stage {
237         ARM_SMMU_DOMAIN_S1 = 0,
238         ARM_SMMU_DOMAIN_S2,
239         ARM_SMMU_DOMAIN_NESTED,
240         ARM_SMMU_DOMAIN_BYPASS,
241 };
242
243 struct arm_smmu_domain {
244         struct arm_smmu_device          *smmu;
245         struct io_pgtable_ops           *pgtbl_ops;
246         const struct iommu_gather_ops   *tlb_ops;
247         struct arm_smmu_cfg             cfg;
248         enum arm_smmu_domain_stage      stage;
249         bool                            non_strict;
250         struct mutex                    init_mutex; /* Protects smmu pointer */
251         spinlock_t                      cb_lock; /* Serialises ATS1* ops and TLB syncs */
252         struct iommu_domain             domain;
253 };
254
255 struct arm_smmu_option_prop {
256         u32 opt;
257         const char *prop;
258 };
259
260 static atomic_t cavium_smmu_context_count = ATOMIC_INIT(0);
261
262 static bool using_legacy_binding, using_generic_binding;
263
264 static struct arm_smmu_option_prop arm_smmu_options[] = {
265         { ARM_SMMU_OPT_SECURE_CFG_ACCESS, "calxeda,smmu-secure-config-access" },
266         { 0, NULL},
267 };
268
269 static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
270 {
271         return container_of(dom, struct arm_smmu_domain, domain);
272 }
273
274 static void parse_driver_options(struct arm_smmu_device *smmu)
275 {
276         int i = 0;
277
278         do {
279                 if (of_property_read_bool(smmu->dev->of_node,
280                                                 arm_smmu_options[i].prop)) {
281                         smmu->options |= arm_smmu_options[i].opt;
282                         dev_notice(smmu->dev, "option %s\n",
283                                 arm_smmu_options[i].prop);
284                 }
285         } while (arm_smmu_options[++i].opt);
286 }
287
288 static struct device_node *dev_get_dev_node(struct device *dev)
289 {
290         if (dev_is_pci(dev)) {
291                 struct pci_bus *bus = to_pci_dev(dev)->bus;
292
293                 while (!pci_is_root_bus(bus))
294                         bus = bus->parent;
295                 return of_node_get(bus->bridge->parent->of_node);
296         }
297
298         return of_node_get(dev->of_node);
299 }
300
301 static int __arm_smmu_get_pci_sid(struct pci_dev *pdev, u16 alias, void *data)
302 {
303         *((__be32 *)data) = cpu_to_be32(alias);
304         return 0; /* Continue walking */
305 }
306
307 static int __find_legacy_master_phandle(struct device *dev, void *data)
308 {
309         struct of_phandle_iterator *it = *(void **)data;
310         struct device_node *np = it->node;
311         int err;
312
313         of_for_each_phandle(it, err, dev->of_node, "mmu-masters",
314                             "#stream-id-cells", 0)
315                 if (it->node == np) {
316                         *(void **)data = dev;
317                         return 1;
318                 }
319         it->node = np;
320         return err == -ENOENT ? 0 : err;
321 }
322
323 static struct platform_driver arm_smmu_driver;
324 static struct iommu_ops arm_smmu_ops;
325
326 static int arm_smmu_register_legacy_master(struct device *dev,
327                                            struct arm_smmu_device **smmu)
328 {
329         struct device *smmu_dev;
330         struct device_node *np;
331         struct of_phandle_iterator it;
332         void *data = &it;
333         u32 *sids;
334         __be32 pci_sid;
335         int err;
336
337         np = dev_get_dev_node(dev);
338         if (!np || !of_find_property(np, "#stream-id-cells", NULL)) {
339                 of_node_put(np);
340                 return -ENODEV;
341         }
342
343         it.node = np;
344         err = driver_for_each_device(&arm_smmu_driver.driver, NULL, &data,
345                                      __find_legacy_master_phandle);
346         smmu_dev = data;
347         of_node_put(np);
348         if (err == 0)
349                 return -ENODEV;
350         if (err < 0)
351                 return err;
352
353         if (dev_is_pci(dev)) {
354                 /* "mmu-masters" assumes Stream ID == Requester ID */
355                 pci_for_each_dma_alias(to_pci_dev(dev), __arm_smmu_get_pci_sid,
356                                        &pci_sid);
357                 it.cur = &pci_sid;
358                 it.cur_count = 1;
359         }
360
361         err = iommu_fwspec_init(dev, &smmu_dev->of_node->fwnode,
362                                 &arm_smmu_ops);
363         if (err)
364                 return err;
365
366         sids = kcalloc(it.cur_count, sizeof(*sids), GFP_KERNEL);
367         if (!sids)
368                 return -ENOMEM;
369
370         *smmu = dev_get_drvdata(smmu_dev);
371         of_phandle_iterator_args(&it, sids, it.cur_count);
372         err = iommu_fwspec_add_ids(dev, sids, it.cur_count);
373         kfree(sids);
374         return err;
375 }
376
377 static int __arm_smmu_alloc_bitmap(unsigned long *map, int start, int end)
378 {
379         int idx;
380
381         do {
382                 idx = find_next_zero_bit(map, end, start);
383                 if (idx == end)
384                         return -ENOSPC;
385         } while (test_and_set_bit(idx, map));
386
387         return idx;
388 }
389
390 static void __arm_smmu_free_bitmap(unsigned long *map, int idx)
391 {
392         clear_bit(idx, map);
393 }
394
395 /* Wait for any pending TLB invalidations to complete */
396 static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu,
397                                 void __iomem *sync, void __iomem *status)
398 {
399         unsigned int spin_cnt, delay;
400
401         writel_relaxed(0, sync);
402         for (delay = 1; delay < TLB_LOOP_TIMEOUT; delay *= 2) {
403                 for (spin_cnt = TLB_SPIN_COUNT; spin_cnt > 0; spin_cnt--) {
404                         if (!(readl_relaxed(status) & sTLBGSTATUS_GSACTIVE))
405                                 return;
406                         cpu_relax();
407                 }
408                 udelay(delay);
409         }
410         dev_err_ratelimited(smmu->dev,
411                             "TLB sync timed out -- SMMU may be deadlocked\n");
412 }
413
414 static void arm_smmu_tlb_sync_global(struct arm_smmu_device *smmu)
415 {
416         void __iomem *base = ARM_SMMU_GR0(smmu);
417         unsigned long flags;
418
419         spin_lock_irqsave(&smmu->global_sync_lock, flags);
420         __arm_smmu_tlb_sync(smmu, base + ARM_SMMU_GR0_sTLBGSYNC,
421                             base + ARM_SMMU_GR0_sTLBGSTATUS);
422         spin_unlock_irqrestore(&smmu->global_sync_lock, flags);
423 }
424
425 static void arm_smmu_tlb_sync_context(void *cookie)
426 {
427         struct arm_smmu_domain *smmu_domain = cookie;
428         struct arm_smmu_device *smmu = smmu_domain->smmu;
429         void __iomem *base = ARM_SMMU_CB(smmu, smmu_domain->cfg.cbndx);
430         unsigned long flags;
431
432         spin_lock_irqsave(&smmu_domain->cb_lock, flags);
433         __arm_smmu_tlb_sync(smmu, base + ARM_SMMU_CB_TLBSYNC,
434                             base + ARM_SMMU_CB_TLBSTATUS);
435         spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
436 }
437
438 static void arm_smmu_tlb_sync_vmid(void *cookie)
439 {
440         struct arm_smmu_domain *smmu_domain = cookie;
441
442         arm_smmu_tlb_sync_global(smmu_domain->smmu);
443 }
444
445 static void arm_smmu_tlb_inv_context_s1(void *cookie)
446 {
447         struct arm_smmu_domain *smmu_domain = cookie;
448         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
449         void __iomem *base = ARM_SMMU_CB(smmu_domain->smmu, cfg->cbndx);
450
451         /*
452          * NOTE: this is not a relaxed write; it needs to guarantee that PTEs
453          * cleared by the current CPU are visible to the SMMU before the TLBI.
454          */
455         writel(cfg->asid, base + ARM_SMMU_CB_S1_TLBIASID);
456         arm_smmu_tlb_sync_context(cookie);
457 }
458
459 static void arm_smmu_tlb_inv_context_s2(void *cookie)
460 {
461         struct arm_smmu_domain *smmu_domain = cookie;
462         struct arm_smmu_device *smmu = smmu_domain->smmu;
463         void __iomem *base = ARM_SMMU_GR0(smmu);
464
465         /* NOTE: see above */
466         writel(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID);
467         arm_smmu_tlb_sync_global(smmu);
468 }
469
470 static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
471                                           size_t granule, bool leaf, void *cookie)
472 {
473         struct arm_smmu_domain *smmu_domain = cookie;
474         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
475         bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
476         void __iomem *reg = ARM_SMMU_CB(smmu_domain->smmu, cfg->cbndx);
477
478         if (smmu_domain->smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
479                 wmb();
480
481         if (stage1) {
482                 reg += leaf ? ARM_SMMU_CB_S1_TLBIVAL : ARM_SMMU_CB_S1_TLBIVA;
483
484                 if (cfg->fmt != ARM_SMMU_CTX_FMT_AARCH64) {
485                         iova &= ~12UL;
486                         iova |= cfg->asid;
487                         do {
488                                 writel_relaxed(iova, reg);
489                                 iova += granule;
490                         } while (size -= granule);
491                 } else {
492                         iova >>= 12;
493                         iova |= (u64)cfg->asid << 48;
494                         do {
495                                 writeq_relaxed(iova, reg);
496                                 iova += granule >> 12;
497                         } while (size -= granule);
498                 }
499         } else {
500                 reg += leaf ? ARM_SMMU_CB_S2_TLBIIPAS2L :
501                               ARM_SMMU_CB_S2_TLBIIPAS2;
502                 iova >>= 12;
503                 do {
504                         smmu_write_atomic_lq(iova, reg);
505                         iova += granule >> 12;
506                 } while (size -= granule);
507         }
508 }
509
510 /*
511  * On MMU-401 at least, the cost of firing off multiple TLBIVMIDs appears
512  * almost negligible, but the benefit of getting the first one in as far ahead
513  * of the sync as possible is significant, hence we don't just make this a
514  * no-op and set .tlb_sync to arm_smmu_inv_context_s2() as you might think.
515  */
516 static void arm_smmu_tlb_inv_vmid_nosync(unsigned long iova, size_t size,
517                                          size_t granule, bool leaf, void *cookie)
518 {
519         struct arm_smmu_domain *smmu_domain = cookie;
520         void __iomem *base = ARM_SMMU_GR0(smmu_domain->smmu);
521
522         if (smmu_domain->smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
523                 wmb();
524
525         writel_relaxed(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID);
526 }
527
528 static const struct iommu_gather_ops arm_smmu_s1_tlb_ops = {
529         .tlb_flush_all  = arm_smmu_tlb_inv_context_s1,
530         .tlb_add_flush  = arm_smmu_tlb_inv_range_nosync,
531         .tlb_sync       = arm_smmu_tlb_sync_context,
532 };
533
534 static const struct iommu_gather_ops arm_smmu_s2_tlb_ops_v2 = {
535         .tlb_flush_all  = arm_smmu_tlb_inv_context_s2,
536         .tlb_add_flush  = arm_smmu_tlb_inv_range_nosync,
537         .tlb_sync       = arm_smmu_tlb_sync_context,
538 };
539
540 static const struct iommu_gather_ops arm_smmu_s2_tlb_ops_v1 = {
541         .tlb_flush_all  = arm_smmu_tlb_inv_context_s2,
542         .tlb_add_flush  = arm_smmu_tlb_inv_vmid_nosync,
543         .tlb_sync       = arm_smmu_tlb_sync_vmid,
544 };
545
546 static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
547 {
548         u32 fsr, fsynr;
549         unsigned long iova;
550         struct iommu_domain *domain = dev;
551         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
552         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
553         struct arm_smmu_device *smmu = smmu_domain->smmu;
554         void __iomem *cb_base;
555
556         cb_base = ARM_SMMU_CB(smmu, cfg->cbndx);
557         fsr = readl_relaxed(cb_base + ARM_SMMU_CB_FSR);
558
559         if (!(fsr & FSR_FAULT))
560                 return IRQ_NONE;
561
562         fsynr = readl_relaxed(cb_base + ARM_SMMU_CB_FSYNR0);
563         iova = readq_relaxed(cb_base + ARM_SMMU_CB_FAR);
564
565         dev_err_ratelimited(smmu->dev,
566         "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cb=%d\n",
567                             fsr, iova, fsynr, cfg->cbndx);
568
569         writel(fsr, cb_base + ARM_SMMU_CB_FSR);
570         return IRQ_HANDLED;
571 }
572
573 static irqreturn_t arm_smmu_global_fault(int irq, void *dev)
574 {
575         u32 gfsr, gfsynr0, gfsynr1, gfsynr2;
576         struct arm_smmu_device *smmu = dev;
577         void __iomem *gr0_base = ARM_SMMU_GR0_NS(smmu);
578
579         gfsr = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSR);
580         gfsynr0 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR0);
581         gfsynr1 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR1);
582         gfsynr2 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR2);
583
584         if (!gfsr)
585                 return IRQ_NONE;
586
587         dev_err_ratelimited(smmu->dev,
588                 "Unexpected global fault, this could be serious\n");
589         dev_err_ratelimited(smmu->dev,
590                 "\tGFSR 0x%08x, GFSYNR0 0x%08x, GFSYNR1 0x%08x, GFSYNR2 0x%08x\n",
591                 gfsr, gfsynr0, gfsynr1, gfsynr2);
592
593         writel(gfsr, gr0_base + ARM_SMMU_GR0_sGFSR);
594         return IRQ_HANDLED;
595 }
596
597 static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
598                                        struct io_pgtable_cfg *pgtbl_cfg)
599 {
600         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
601         struct arm_smmu_cb *cb = &smmu_domain->smmu->cbs[cfg->cbndx];
602         bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
603
604         cb->cfg = cfg;
605
606         /* TTBCR */
607         if (stage1) {
608                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
609                         cb->tcr[0] = pgtbl_cfg->arm_v7s_cfg.tcr;
610                 } else {
611                         cb->tcr[0] = pgtbl_cfg->arm_lpae_s1_cfg.tcr;
612                         cb->tcr[1] = pgtbl_cfg->arm_lpae_s1_cfg.tcr >> 32;
613                         cb->tcr[1] |= TTBCR2_SEP_UPSTREAM;
614                         if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
615                                 cb->tcr[1] |= TTBCR2_AS;
616                 }
617         } else {
618                 cb->tcr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
619         }
620
621         /* TTBRs */
622         if (stage1) {
623                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
624                         cb->ttbr[0] = pgtbl_cfg->arm_v7s_cfg.ttbr[0];
625                         cb->ttbr[1] = pgtbl_cfg->arm_v7s_cfg.ttbr[1];
626                 } else {
627                         cb->ttbr[0] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
628                         cb->ttbr[0] |= (u64)cfg->asid << TTBRn_ASID_SHIFT;
629                         cb->ttbr[1] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1];
630                         cb->ttbr[1] |= (u64)cfg->asid << TTBRn_ASID_SHIFT;
631                 }
632         } else {
633                 cb->ttbr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
634         }
635
636         /* MAIRs (stage-1 only) */
637         if (stage1) {
638                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
639                         cb->mair[0] = pgtbl_cfg->arm_v7s_cfg.prrr;
640                         cb->mair[1] = pgtbl_cfg->arm_v7s_cfg.nmrr;
641                 } else {
642                         cb->mair[0] = pgtbl_cfg->arm_lpae_s1_cfg.mair[0];
643                         cb->mair[1] = pgtbl_cfg->arm_lpae_s1_cfg.mair[1];
644                 }
645         }
646 }
647
648 static void arm_smmu_write_context_bank(struct arm_smmu_device *smmu, int idx)
649 {
650         u32 reg;
651         bool stage1;
652         struct arm_smmu_cb *cb = &smmu->cbs[idx];
653         struct arm_smmu_cfg *cfg = cb->cfg;
654         void __iomem *cb_base, *gr1_base;
655
656         cb_base = ARM_SMMU_CB(smmu, idx);
657
658         /* Unassigned context banks only need disabling */
659         if (!cfg) {
660                 writel_relaxed(0, cb_base + ARM_SMMU_CB_SCTLR);
661                 return;
662         }
663
664         gr1_base = ARM_SMMU_GR1(smmu);
665         stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
666
667         /* CBA2R */
668         if (smmu->version > ARM_SMMU_V1) {
669                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
670                         reg = CBA2R_RW64_64BIT;
671                 else
672                         reg = CBA2R_RW64_32BIT;
673                 /* 16-bit VMIDs live in CBA2R */
674                 if (smmu->features & ARM_SMMU_FEAT_VMID16)
675                         reg |= cfg->vmid << CBA2R_VMID_SHIFT;
676
677                 writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBA2R(idx));
678         }
679
680         /* CBAR */
681         reg = cfg->cbar;
682         if (smmu->version < ARM_SMMU_V2)
683                 reg |= cfg->irptndx << CBAR_IRPTNDX_SHIFT;
684
685         /*
686          * Use the weakest shareability/memory types, so they are
687          * overridden by the ttbcr/pte.
688          */
689         if (stage1) {
690                 reg |= (CBAR_S1_BPSHCFG_NSH << CBAR_S1_BPSHCFG_SHIFT) |
691                         (CBAR_S1_MEMATTR_WB << CBAR_S1_MEMATTR_SHIFT);
692         } else if (!(smmu->features & ARM_SMMU_FEAT_VMID16)) {
693                 /* 8-bit VMIDs live in CBAR */
694                 reg |= cfg->vmid << CBAR_VMID_SHIFT;
695         }
696         writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBAR(idx));
697
698         /*
699          * TTBCR
700          * We must write this before the TTBRs, since it determines the
701          * access behaviour of some fields (in particular, ASID[15:8]).
702          */
703         if (stage1 && smmu->version > ARM_SMMU_V1)
704                 writel_relaxed(cb->tcr[1], cb_base + ARM_SMMU_CB_TTBCR2);
705         writel_relaxed(cb->tcr[0], cb_base + ARM_SMMU_CB_TTBCR);
706
707         /* TTBRs */
708         if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
709                 writel_relaxed(cfg->asid, cb_base + ARM_SMMU_CB_CONTEXTIDR);
710                 writel_relaxed(cb->ttbr[0], cb_base + ARM_SMMU_CB_TTBR0);
711                 writel_relaxed(cb->ttbr[1], cb_base + ARM_SMMU_CB_TTBR1);
712         } else {
713                 writeq_relaxed(cb->ttbr[0], cb_base + ARM_SMMU_CB_TTBR0);
714                 if (stage1)
715                         writeq_relaxed(cb->ttbr[1], cb_base + ARM_SMMU_CB_TTBR1);
716         }
717
718         /* MAIRs (stage-1 only) */
719         if (stage1) {
720                 writel_relaxed(cb->mair[0], cb_base + ARM_SMMU_CB_S1_MAIR0);
721                 writel_relaxed(cb->mair[1], cb_base + ARM_SMMU_CB_S1_MAIR1);
722         }
723
724         /* SCTLR */
725         reg = SCTLR_CFIE | SCTLR_CFRE | SCTLR_AFE | SCTLR_TRE | SCTLR_M;
726         if (stage1)
727                 reg |= SCTLR_S1_ASIDPNE;
728         if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN))
729                 reg |= SCTLR_E;
730
731         writel_relaxed(reg, cb_base + ARM_SMMU_CB_SCTLR);
732 }
733
734 static int arm_smmu_init_domain_context(struct iommu_domain *domain,
735                                         struct arm_smmu_device *smmu)
736 {
737         int irq, start, ret = 0;
738         unsigned long ias, oas;
739         struct io_pgtable_ops *pgtbl_ops;
740         struct io_pgtable_cfg pgtbl_cfg;
741         enum io_pgtable_fmt fmt;
742         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
743         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
744
745         mutex_lock(&smmu_domain->init_mutex);
746         if (smmu_domain->smmu)
747                 goto out_unlock;
748
749         if (domain->type == IOMMU_DOMAIN_IDENTITY) {
750                 smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
751                 smmu_domain->smmu = smmu;
752                 goto out_unlock;
753         }
754
755         /*
756          * Mapping the requested stage onto what we support is surprisingly
757          * complicated, mainly because the spec allows S1+S2 SMMUs without
758          * support for nested translation. That means we end up with the
759          * following table:
760          *
761          * Requested        Supported        Actual
762          *     S1               N              S1
763          *     S1             S1+S2            S1
764          *     S1               S2             S2
765          *     S1               S1             S1
766          *     N                N              N
767          *     N              S1+S2            S2
768          *     N                S2             S2
769          *     N                S1             S1
770          *
771          * Note that you can't actually request stage-2 mappings.
772          */
773         if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
774                 smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
775         if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
776                 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
777
778         /*
779          * Choosing a suitable context format is even more fiddly. Until we
780          * grow some way for the caller to express a preference, and/or move
781          * the decision into the io-pgtable code where it arguably belongs,
782          * just aim for the closest thing to the rest of the system, and hope
783          * that the hardware isn't esoteric enough that we can't assume AArch64
784          * support to be a superset of AArch32 support...
785          */
786         if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_L)
787                 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_L;
788         if (IS_ENABLED(CONFIG_IOMMU_IO_PGTABLE_ARMV7S) &&
789             !IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_ARM_LPAE) &&
790             (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S) &&
791             (smmu_domain->stage == ARM_SMMU_DOMAIN_S1))
792                 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_S;
793         if ((IS_ENABLED(CONFIG_64BIT) || cfg->fmt == ARM_SMMU_CTX_FMT_NONE) &&
794             (smmu->features & (ARM_SMMU_FEAT_FMT_AARCH64_64K |
795                                ARM_SMMU_FEAT_FMT_AARCH64_16K |
796                                ARM_SMMU_FEAT_FMT_AARCH64_4K)))
797                 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH64;
798
799         if (cfg->fmt == ARM_SMMU_CTX_FMT_NONE) {
800                 ret = -EINVAL;
801                 goto out_unlock;
802         }
803
804         switch (smmu_domain->stage) {
805         case ARM_SMMU_DOMAIN_S1:
806                 cfg->cbar = CBAR_TYPE_S1_TRANS_S2_BYPASS;
807                 start = smmu->num_s2_context_banks;
808                 ias = smmu->va_size;
809                 oas = smmu->ipa_size;
810                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
811                         fmt = ARM_64_LPAE_S1;
812                 } else if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_L) {
813                         fmt = ARM_32_LPAE_S1;
814                         ias = min(ias, 32UL);
815                         oas = min(oas, 40UL);
816                 } else {
817                         fmt = ARM_V7S;
818                         ias = min(ias, 32UL);
819                         oas = min(oas, 32UL);
820                 }
821                 smmu_domain->tlb_ops = &arm_smmu_s1_tlb_ops;
822                 break;
823         case ARM_SMMU_DOMAIN_NESTED:
824                 /*
825                  * We will likely want to change this if/when KVM gets
826                  * involved.
827                  */
828         case ARM_SMMU_DOMAIN_S2:
829                 cfg->cbar = CBAR_TYPE_S2_TRANS;
830                 start = 0;
831                 ias = smmu->ipa_size;
832                 oas = smmu->pa_size;
833                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
834                         fmt = ARM_64_LPAE_S2;
835                 } else {
836                         fmt = ARM_32_LPAE_S2;
837                         ias = min(ias, 40UL);
838                         oas = min(oas, 40UL);
839                 }
840                 if (smmu->version == ARM_SMMU_V2)
841                         smmu_domain->tlb_ops = &arm_smmu_s2_tlb_ops_v2;
842                 else
843                         smmu_domain->tlb_ops = &arm_smmu_s2_tlb_ops_v1;
844                 break;
845         default:
846                 ret = -EINVAL;
847                 goto out_unlock;
848         }
849         ret = __arm_smmu_alloc_bitmap(smmu->context_map, start,
850                                       smmu->num_context_banks);
851         if (ret < 0)
852                 goto out_unlock;
853
854         cfg->cbndx = ret;
855         if (smmu->version < ARM_SMMU_V2) {
856                 cfg->irptndx = atomic_inc_return(&smmu->irptndx);
857                 cfg->irptndx %= smmu->num_context_irqs;
858         } else {
859                 cfg->irptndx = cfg->cbndx;
860         }
861
862         if (smmu_domain->stage == ARM_SMMU_DOMAIN_S2)
863                 cfg->vmid = cfg->cbndx + 1 + smmu->cavium_id_base;
864         else
865                 cfg->asid = cfg->cbndx + smmu->cavium_id_base;
866
867         pgtbl_cfg = (struct io_pgtable_cfg) {
868                 .pgsize_bitmap  = smmu->pgsize_bitmap,
869                 .ias            = ias,
870                 .oas            = oas,
871                 .tlb            = smmu_domain->tlb_ops,
872                 .iommu_dev      = smmu->dev,
873         };
874
875         if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
876                 pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_NO_DMA;
877
878         if (smmu_domain->non_strict)
879                 pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
880
881         smmu_domain->smmu = smmu;
882         pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
883         if (!pgtbl_ops) {
884                 ret = -ENOMEM;
885                 goto out_clear_smmu;
886         }
887
888         /* Update the domain's page sizes to reflect the page table format */
889         domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
890         domain->geometry.aperture_end = (1UL << ias) - 1;
891         domain->geometry.force_aperture = true;
892
893         /* Initialise the context bank with our page table cfg */
894         arm_smmu_init_context_bank(smmu_domain, &pgtbl_cfg);
895         arm_smmu_write_context_bank(smmu, cfg->cbndx);
896
897         /*
898          * Request context fault interrupt. Do this last to avoid the
899          * handler seeing a half-initialised domain state.
900          */
901         irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
902         ret = devm_request_irq(smmu->dev, irq, arm_smmu_context_fault,
903                                IRQF_SHARED, "arm-smmu-context-fault", domain);
904         if (ret < 0) {
905                 dev_err(smmu->dev, "failed to request context IRQ %d (%u)\n",
906                         cfg->irptndx, irq);
907                 cfg->irptndx = INVALID_IRPTNDX;
908         }
909
910         mutex_unlock(&smmu_domain->init_mutex);
911
912         /* Publish page table ops for map/unmap */
913         smmu_domain->pgtbl_ops = pgtbl_ops;
914         return 0;
915
916 out_clear_smmu:
917         smmu_domain->smmu = NULL;
918 out_unlock:
919         mutex_unlock(&smmu_domain->init_mutex);
920         return ret;
921 }
922
923 static void arm_smmu_destroy_domain_context(struct iommu_domain *domain)
924 {
925         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
926         struct arm_smmu_device *smmu = smmu_domain->smmu;
927         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
928         int irq;
929
930         if (!smmu || domain->type == IOMMU_DOMAIN_IDENTITY)
931                 return;
932
933         /*
934          * Disable the context bank and free the page tables before freeing
935          * it.
936          */
937         smmu->cbs[cfg->cbndx].cfg = NULL;
938         arm_smmu_write_context_bank(smmu, cfg->cbndx);
939
940         if (cfg->irptndx != INVALID_IRPTNDX) {
941                 irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
942                 devm_free_irq(smmu->dev, irq, domain);
943         }
944
945         free_io_pgtable_ops(smmu_domain->pgtbl_ops);
946         __arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
947 }
948
949 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
950 {
951         struct arm_smmu_domain *smmu_domain;
952
953         if (type != IOMMU_DOMAIN_UNMANAGED &&
954             type != IOMMU_DOMAIN_DMA &&
955             type != IOMMU_DOMAIN_IDENTITY)
956                 return NULL;
957         /*
958          * Allocate the domain and initialise some of its data structures.
959          * We can't really do anything meaningful until we've added a
960          * master.
961          */
962         smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
963         if (!smmu_domain)
964                 return NULL;
965
966         if (type == IOMMU_DOMAIN_DMA && (using_legacy_binding ||
967             iommu_get_dma_cookie(&smmu_domain->domain))) {
968                 kfree(smmu_domain);
969                 return NULL;
970         }
971
972         mutex_init(&smmu_domain->init_mutex);
973         spin_lock_init(&smmu_domain->cb_lock);
974
975         return &smmu_domain->domain;
976 }
977
978 static void arm_smmu_domain_free(struct iommu_domain *domain)
979 {
980         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
981
982         /*
983          * Free the domain resources. We assume that all devices have
984          * already been detached.
985          */
986         iommu_put_dma_cookie(domain);
987         arm_smmu_destroy_domain_context(domain);
988         kfree(smmu_domain);
989 }
990
991 static void arm_smmu_write_smr(struct arm_smmu_device *smmu, int idx)
992 {
993         struct arm_smmu_smr *smr = smmu->smrs + idx;
994         u32 reg = smr->id << SMR_ID_SHIFT | smr->mask << SMR_MASK_SHIFT;
995
996         if (!(smmu->features & ARM_SMMU_FEAT_EXIDS) && smr->valid)
997                 reg |= SMR_VALID;
998         writel_relaxed(reg, ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_SMR(idx));
999 }
1000
1001 static void arm_smmu_write_s2cr(struct arm_smmu_device *smmu, int idx)
1002 {
1003         struct arm_smmu_s2cr *s2cr = smmu->s2crs + idx;
1004         u32 reg = (s2cr->type & S2CR_TYPE_MASK) << S2CR_TYPE_SHIFT |
1005                   (s2cr->cbndx & S2CR_CBNDX_MASK) << S2CR_CBNDX_SHIFT |
1006                   (s2cr->privcfg & S2CR_PRIVCFG_MASK) << S2CR_PRIVCFG_SHIFT;
1007
1008         if (smmu->features & ARM_SMMU_FEAT_EXIDS && smmu->smrs &&
1009             smmu->smrs[idx].valid)
1010                 reg |= S2CR_EXIDVALID;
1011         writel_relaxed(reg, ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_S2CR(idx));
1012 }
1013
1014 static void arm_smmu_write_sme(struct arm_smmu_device *smmu, int idx)
1015 {
1016         arm_smmu_write_s2cr(smmu, idx);
1017         if (smmu->smrs)
1018                 arm_smmu_write_smr(smmu, idx);
1019 }
1020
1021 /*
1022  * The width of SMR's mask field depends on sCR0_EXIDENABLE, so this function
1023  * should be called after sCR0 is written.
1024  */
1025 static void arm_smmu_test_smr_masks(struct arm_smmu_device *smmu)
1026 {
1027         void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1028         u32 smr;
1029
1030         if (!smmu->smrs)
1031                 return;
1032
1033         /*
1034          * SMR.ID bits may not be preserved if the corresponding MASK
1035          * bits are set, so check each one separately. We can reject
1036          * masters later if they try to claim IDs outside these masks.
1037          */
1038         smr = smmu->streamid_mask << SMR_ID_SHIFT;
1039         writel_relaxed(smr, gr0_base + ARM_SMMU_GR0_SMR(0));
1040         smr = readl_relaxed(gr0_base + ARM_SMMU_GR0_SMR(0));
1041         smmu->streamid_mask = smr >> SMR_ID_SHIFT;
1042
1043         smr = smmu->streamid_mask << SMR_MASK_SHIFT;
1044         writel_relaxed(smr, gr0_base + ARM_SMMU_GR0_SMR(0));
1045         smr = readl_relaxed(gr0_base + ARM_SMMU_GR0_SMR(0));
1046         smmu->smr_mask_mask = smr >> SMR_MASK_SHIFT;
1047 }
1048
1049 static int arm_smmu_find_sme(struct arm_smmu_device *smmu, u16 id, u16 mask)
1050 {
1051         struct arm_smmu_smr *smrs = smmu->smrs;
1052         int i, free_idx = -ENOSPC;
1053
1054         /* Stream indexing is blissfully easy */
1055         if (!smrs)
1056                 return id;
1057
1058         /* Validating SMRs is... less so */
1059         for (i = 0; i < smmu->num_mapping_groups; ++i) {
1060                 if (!smrs[i].valid) {
1061                         /*
1062                          * Note the first free entry we come across, which
1063                          * we'll claim in the end if nothing else matches.
1064                          */
1065                         if (free_idx < 0)
1066                                 free_idx = i;
1067                         continue;
1068                 }
1069                 /*
1070                  * If the new entry is _entirely_ matched by an existing entry,
1071                  * then reuse that, with the guarantee that there also cannot
1072                  * be any subsequent conflicting entries. In normal use we'd
1073                  * expect simply identical entries for this case, but there's
1074                  * no harm in accommodating the generalisation.
1075                  */
1076                 if ((mask & smrs[i].mask) == mask &&
1077                     !((id ^ smrs[i].id) & ~smrs[i].mask))
1078                         return i;
1079                 /*
1080                  * If the new entry has any other overlap with an existing one,
1081                  * though, then there always exists at least one stream ID
1082                  * which would cause a conflict, and we can't allow that risk.
1083                  */
1084                 if (!((id ^ smrs[i].id) & ~(smrs[i].mask | mask)))
1085                         return -EINVAL;
1086         }
1087
1088         return free_idx;
1089 }
1090
1091 static bool arm_smmu_free_sme(struct arm_smmu_device *smmu, int idx)
1092 {
1093         if (--smmu->s2crs[idx].count)
1094                 return false;
1095
1096         smmu->s2crs[idx] = s2cr_init_val;
1097         if (smmu->smrs)
1098                 smmu->smrs[idx].valid = false;
1099
1100         return true;
1101 }
1102
1103 static int arm_smmu_master_alloc_smes(struct device *dev)
1104 {
1105         struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1106         struct arm_smmu_master_cfg *cfg = fwspec->iommu_priv;
1107         struct arm_smmu_device *smmu = cfg->smmu;
1108         struct arm_smmu_smr *smrs = smmu->smrs;
1109         struct iommu_group *group;
1110         int i, idx, ret;
1111
1112         mutex_lock(&smmu->stream_map_mutex);
1113         /* Figure out a viable stream map entry allocation */
1114         for_each_cfg_sme(fwspec, i, idx) {
1115                 u16 sid = fwspec->ids[i];
1116                 u16 mask = fwspec->ids[i] >> SMR_MASK_SHIFT;
1117
1118                 if (idx != INVALID_SMENDX) {
1119                         ret = -EEXIST;
1120                         goto out_err;
1121                 }
1122
1123                 ret = arm_smmu_find_sme(smmu, sid, mask);
1124                 if (ret < 0)
1125                         goto out_err;
1126
1127                 idx = ret;
1128                 if (smrs && smmu->s2crs[idx].count == 0) {
1129                         smrs[idx].id = sid;
1130                         smrs[idx].mask = mask;
1131                         smrs[idx].valid = true;
1132                 }
1133                 smmu->s2crs[idx].count++;
1134                 cfg->smendx[i] = (s16)idx;
1135         }
1136
1137         group = iommu_group_get_for_dev(dev);
1138         if (!group)
1139                 group = ERR_PTR(-ENOMEM);
1140         if (IS_ERR(group)) {
1141                 ret = PTR_ERR(group);
1142                 goto out_err;
1143         }
1144         iommu_group_put(group);
1145
1146         /* It worked! Now, poke the actual hardware */
1147         for_each_cfg_sme(fwspec, i, idx) {
1148                 arm_smmu_write_sme(smmu, idx);
1149                 smmu->s2crs[idx].group = group;
1150         }
1151
1152         mutex_unlock(&smmu->stream_map_mutex);
1153         return 0;
1154
1155 out_err:
1156         while (i--) {
1157                 arm_smmu_free_sme(smmu, cfg->smendx[i]);
1158                 cfg->smendx[i] = INVALID_SMENDX;
1159         }
1160         mutex_unlock(&smmu->stream_map_mutex);
1161         return ret;
1162 }
1163
1164 static void arm_smmu_master_free_smes(struct iommu_fwspec *fwspec)
1165 {
1166         struct arm_smmu_device *smmu = fwspec_smmu(fwspec);
1167         struct arm_smmu_master_cfg *cfg = fwspec->iommu_priv;
1168         int i, idx;
1169
1170         mutex_lock(&smmu->stream_map_mutex);
1171         for_each_cfg_sme(fwspec, i, idx) {
1172                 if (arm_smmu_free_sme(smmu, idx))
1173                         arm_smmu_write_sme(smmu, idx);
1174                 cfg->smendx[i] = INVALID_SMENDX;
1175         }
1176         mutex_unlock(&smmu->stream_map_mutex);
1177 }
1178
1179 static int arm_smmu_domain_add_master(struct arm_smmu_domain *smmu_domain,
1180                                       struct iommu_fwspec *fwspec)
1181 {
1182         struct arm_smmu_device *smmu = smmu_domain->smmu;
1183         struct arm_smmu_s2cr *s2cr = smmu->s2crs;
1184         u8 cbndx = smmu_domain->cfg.cbndx;
1185         enum arm_smmu_s2cr_type type;
1186         int i, idx;
1187
1188         if (smmu_domain->stage == ARM_SMMU_DOMAIN_BYPASS)
1189                 type = S2CR_TYPE_BYPASS;
1190         else
1191                 type = S2CR_TYPE_TRANS;
1192
1193         for_each_cfg_sme(fwspec, i, idx) {
1194                 if (type == s2cr[idx].type && cbndx == s2cr[idx].cbndx)
1195                         continue;
1196
1197                 s2cr[idx].type = type;
1198                 s2cr[idx].privcfg = S2CR_PRIVCFG_DEFAULT;
1199                 s2cr[idx].cbndx = cbndx;
1200                 arm_smmu_write_s2cr(smmu, idx);
1201         }
1202         return 0;
1203 }
1204
1205 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
1206 {
1207         int ret;
1208         struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1209         struct arm_smmu_device *smmu;
1210         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1211
1212         if (!fwspec || fwspec->ops != &arm_smmu_ops) {
1213                 dev_err(dev, "cannot attach to SMMU, is it on the same bus?\n");
1214                 return -ENXIO;
1215         }
1216
1217         /*
1218          * FIXME: The arch/arm DMA API code tries to attach devices to its own
1219          * domains between of_xlate() and add_device() - we have no way to cope
1220          * with that, so until ARM gets converted to rely on groups and default
1221          * domains, just say no (but more politely than by dereferencing NULL).
1222          * This should be at least a WARN_ON once that's sorted.
1223          */
1224         if (!fwspec->iommu_priv)
1225                 return -ENODEV;
1226
1227         smmu = fwspec_smmu(fwspec);
1228         /* Ensure that the domain is finalised */
1229         ret = arm_smmu_init_domain_context(domain, smmu);
1230         if (ret < 0)
1231                 return ret;
1232
1233         /*
1234          * Sanity check the domain. We don't support domains across
1235          * different SMMUs.
1236          */
1237         if (smmu_domain->smmu != smmu) {
1238                 dev_err(dev,
1239                         "cannot attach to SMMU %s whilst already attached to domain on SMMU %s\n",
1240                         dev_name(smmu_domain->smmu->dev), dev_name(smmu->dev));
1241                 return -EINVAL;
1242         }
1243
1244         /* Looks ok, so add the device to the domain */
1245         return arm_smmu_domain_add_master(smmu_domain, fwspec);
1246 }
1247
1248 static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
1249                         phys_addr_t paddr, size_t size, int prot)
1250 {
1251         struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1252
1253         if (!ops)
1254                 return -ENODEV;
1255
1256         return ops->map(ops, iova, paddr, size, prot);
1257 }
1258
1259 static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
1260                              size_t size)
1261 {
1262         struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1263
1264         if (!ops)
1265                 return 0;
1266
1267         return ops->unmap(ops, iova, size);
1268 }
1269
1270 static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
1271 {
1272         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1273
1274         if (smmu_domain->tlb_ops)
1275                 smmu_domain->tlb_ops->tlb_flush_all(smmu_domain);
1276 }
1277
1278 static void arm_smmu_iotlb_sync(struct iommu_domain *domain)
1279 {
1280         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1281
1282         if (smmu_domain->tlb_ops)
1283                 smmu_domain->tlb_ops->tlb_sync(smmu_domain);
1284 }
1285
1286 static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
1287                                               dma_addr_t iova)
1288 {
1289         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1290         struct arm_smmu_device *smmu = smmu_domain->smmu;
1291         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
1292         struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
1293         struct device *dev = smmu->dev;
1294         void __iomem *cb_base;
1295         u32 tmp;
1296         u64 phys;
1297         unsigned long va, flags;
1298
1299         cb_base = ARM_SMMU_CB(smmu, cfg->cbndx);
1300
1301         spin_lock_irqsave(&smmu_domain->cb_lock, flags);
1302         /* ATS1 registers can only be written atomically */
1303         va = iova & ~0xfffUL;
1304         if (smmu->version == ARM_SMMU_V2)
1305                 smmu_write_atomic_lq(va, cb_base + ARM_SMMU_CB_ATS1PR);
1306         else /* Register is only 32-bit in v1 */
1307                 writel_relaxed(va, cb_base + ARM_SMMU_CB_ATS1PR);
1308
1309         if (readl_poll_timeout_atomic(cb_base + ARM_SMMU_CB_ATSR, tmp,
1310                                       !(tmp & ATSR_ACTIVE), 5, 50)) {
1311                 spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
1312                 dev_err(dev,
1313                         "iova to phys timed out on %pad. Falling back to software table walk.\n",
1314                         &iova);
1315                 return ops->iova_to_phys(ops, iova);
1316         }
1317
1318         phys = readq_relaxed(cb_base + ARM_SMMU_CB_PAR);
1319         spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
1320         if (phys & CB_PAR_F) {
1321                 dev_err(dev, "translation fault!\n");
1322                 dev_err(dev, "PAR = 0x%llx\n", phys);
1323                 return 0;
1324         }
1325
1326         return (phys & GENMASK_ULL(39, 12)) | (iova & 0xfff);
1327 }
1328
1329 static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain,
1330                                         dma_addr_t iova)
1331 {
1332         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1333         struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
1334
1335         if (domain->type == IOMMU_DOMAIN_IDENTITY)
1336                 return iova;
1337
1338         if (!ops)
1339                 return 0;
1340
1341         if (smmu_domain->smmu->features & ARM_SMMU_FEAT_TRANS_OPS &&
1342                         smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
1343                 return arm_smmu_iova_to_phys_hard(domain, iova);
1344
1345         return ops->iova_to_phys(ops, iova);
1346 }
1347
1348 static bool arm_smmu_capable(enum iommu_cap cap)
1349 {
1350         switch (cap) {
1351         case IOMMU_CAP_CACHE_COHERENCY:
1352                 /*
1353                  * Return true here as the SMMU can always send out coherent
1354                  * requests.
1355                  */
1356                 return true;
1357         case IOMMU_CAP_NOEXEC:
1358                 return true;
1359         default:
1360                 return false;
1361         }
1362 }
1363
1364 static int arm_smmu_match_node(struct device *dev, void *data)
1365 {
1366         return dev->fwnode == data;
1367 }
1368
1369 static
1370 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
1371 {
1372         struct device *dev = driver_find_device(&arm_smmu_driver.driver, NULL,
1373                                                 fwnode, arm_smmu_match_node);
1374         put_device(dev);
1375         return dev ? dev_get_drvdata(dev) : NULL;
1376 }
1377
1378 static int arm_smmu_add_device(struct device *dev)
1379 {
1380         struct arm_smmu_device *smmu;
1381         struct arm_smmu_master_cfg *cfg;
1382         struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1383         int i, ret;
1384
1385         if (using_legacy_binding) {
1386                 ret = arm_smmu_register_legacy_master(dev, &smmu);
1387
1388                 /*
1389                  * If dev->iommu_fwspec is initally NULL, arm_smmu_register_legacy_master()
1390                  * will allocate/initialise a new one. Thus we need to update fwspec for
1391                  * later use.
1392                  */
1393                 fwspec = dev->iommu_fwspec;
1394                 if (ret)
1395                         goto out_free;
1396         } else if (fwspec && fwspec->ops == &arm_smmu_ops) {
1397                 smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
1398         } else {
1399                 return -ENODEV;
1400         }
1401
1402         ret = -EINVAL;
1403         for (i = 0; i < fwspec->num_ids; i++) {
1404                 u16 sid = fwspec->ids[i];
1405                 u16 mask = fwspec->ids[i] >> SMR_MASK_SHIFT;
1406
1407                 if (sid & ~smmu->streamid_mask) {
1408                         dev_err(dev, "stream ID 0x%x out of range for SMMU (0x%x)\n",
1409                                 sid, smmu->streamid_mask);
1410                         goto out_free;
1411                 }
1412                 if (mask & ~smmu->smr_mask_mask) {
1413                         dev_err(dev, "SMR mask 0x%x out of range for SMMU (0x%x)\n",
1414                                 mask, smmu->smr_mask_mask);
1415                         goto out_free;
1416                 }
1417         }
1418
1419         ret = -ENOMEM;
1420         cfg = kzalloc(offsetof(struct arm_smmu_master_cfg, smendx[i]),
1421                       GFP_KERNEL);
1422         if (!cfg)
1423                 goto out_free;
1424
1425         cfg->smmu = smmu;
1426         fwspec->iommu_priv = cfg;
1427         while (i--)
1428                 cfg->smendx[i] = INVALID_SMENDX;
1429
1430         ret = arm_smmu_master_alloc_smes(dev);
1431         if (ret)
1432                 goto out_cfg_free;
1433
1434         iommu_device_link(&smmu->iommu, dev);
1435
1436         return 0;
1437
1438 out_cfg_free:
1439         kfree(cfg);
1440 out_free:
1441         iommu_fwspec_free(dev);
1442         return ret;
1443 }
1444
1445 static void arm_smmu_remove_device(struct device *dev)
1446 {
1447         struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1448         struct arm_smmu_master_cfg *cfg;
1449         struct arm_smmu_device *smmu;
1450
1451
1452         if (!fwspec || fwspec->ops != &arm_smmu_ops)
1453                 return;
1454
1455         cfg  = fwspec->iommu_priv;
1456         smmu = cfg->smmu;
1457
1458         iommu_device_unlink(&smmu->iommu, dev);
1459         arm_smmu_master_free_smes(fwspec);
1460         iommu_group_remove_device(dev);
1461         kfree(fwspec->iommu_priv);
1462         iommu_fwspec_free(dev);
1463 }
1464
1465 static struct iommu_group *arm_smmu_device_group(struct device *dev)
1466 {
1467         struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1468         struct arm_smmu_device *smmu = fwspec_smmu(fwspec);
1469         struct iommu_group *group = NULL;
1470         int i, idx;
1471
1472         for_each_cfg_sme(fwspec, i, idx) {
1473                 if (group && smmu->s2crs[idx].group &&
1474                     group != smmu->s2crs[idx].group)
1475                         return ERR_PTR(-EINVAL);
1476
1477                 group = smmu->s2crs[idx].group;
1478         }
1479
1480         if (group)
1481                 return iommu_group_ref_get(group);
1482
1483         if (dev_is_pci(dev))
1484                 group = pci_device_group(dev);
1485         else
1486                 group = generic_device_group(dev);
1487
1488         return group;
1489 }
1490
1491 static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
1492                                     enum iommu_attr attr, void *data)
1493 {
1494         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1495
1496         switch(domain->type) {
1497         case IOMMU_DOMAIN_UNMANAGED:
1498                 switch (attr) {
1499                 case DOMAIN_ATTR_NESTING:
1500                         *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
1501                         return 0;
1502                 default:
1503                         return -ENODEV;
1504                 }
1505                 break;
1506         case IOMMU_DOMAIN_DMA:
1507                 switch (attr) {
1508                 case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
1509                         *(int *)data = smmu_domain->non_strict;
1510                         return 0;
1511                 default:
1512                         return -ENODEV;
1513                 }
1514                 break;
1515         default:
1516                 return -EINVAL;
1517         }
1518 }
1519
1520 static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
1521                                     enum iommu_attr attr, void *data)
1522 {
1523         int ret = 0;
1524         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1525
1526         mutex_lock(&smmu_domain->init_mutex);
1527
1528         switch(domain->type) {
1529         case IOMMU_DOMAIN_UNMANAGED:
1530                 switch (attr) {
1531                 case DOMAIN_ATTR_NESTING:
1532                         if (smmu_domain->smmu) {
1533                                 ret = -EPERM;
1534                                 goto out_unlock;
1535                         }
1536
1537                         if (*(int *)data)
1538                                 smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
1539                         else
1540                                 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
1541                         break;
1542                 default:
1543                         ret = -ENODEV;
1544                 }
1545                 break;
1546         case IOMMU_DOMAIN_DMA:
1547                 switch (attr) {
1548                 case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
1549                         smmu_domain->non_strict = *(int *)data;
1550                         break;
1551                 default:
1552                         ret = -ENODEV;
1553                 }
1554                 break;
1555         default:
1556                 ret = -EINVAL;
1557         }
1558 out_unlock:
1559         mutex_unlock(&smmu_domain->init_mutex);
1560         return ret;
1561 }
1562
1563 static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
1564 {
1565         u32 mask, fwid = 0;
1566
1567         if (args->args_count > 0)
1568                 fwid |= (u16)args->args[0];
1569
1570         if (args->args_count > 1)
1571                 fwid |= (u16)args->args[1] << SMR_MASK_SHIFT;
1572         else if (!of_property_read_u32(args->np, "stream-match-mask", &mask))
1573                 fwid |= (u16)mask << SMR_MASK_SHIFT;
1574
1575         return iommu_fwspec_add_ids(dev, &fwid, 1);
1576 }
1577
1578 static void arm_smmu_get_resv_regions(struct device *dev,
1579                                       struct list_head *head)
1580 {
1581         struct iommu_resv_region *region;
1582         int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
1583
1584         region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
1585                                          prot, IOMMU_RESV_SW_MSI);
1586         if (!region)
1587                 return;
1588
1589         list_add_tail(&region->list, head);
1590
1591         iommu_dma_get_resv_regions(dev, head);
1592 }
1593
1594 static void arm_smmu_put_resv_regions(struct device *dev,
1595                                       struct list_head *head)
1596 {
1597         struct iommu_resv_region *entry, *next;
1598
1599         list_for_each_entry_safe(entry, next, head, list)
1600                 kfree(entry);
1601 }
1602
1603 static struct iommu_ops arm_smmu_ops = {
1604         .capable                = arm_smmu_capable,
1605         .domain_alloc           = arm_smmu_domain_alloc,
1606         .domain_free            = arm_smmu_domain_free,
1607         .attach_dev             = arm_smmu_attach_dev,
1608         .map                    = arm_smmu_map,
1609         .unmap                  = arm_smmu_unmap,
1610         .flush_iotlb_all        = arm_smmu_flush_iotlb_all,
1611         .iotlb_sync             = arm_smmu_iotlb_sync,
1612         .iova_to_phys           = arm_smmu_iova_to_phys,
1613         .add_device             = arm_smmu_add_device,
1614         .remove_device          = arm_smmu_remove_device,
1615         .device_group           = arm_smmu_device_group,
1616         .domain_get_attr        = arm_smmu_domain_get_attr,
1617         .domain_set_attr        = arm_smmu_domain_set_attr,
1618         .of_xlate               = arm_smmu_of_xlate,
1619         .get_resv_regions       = arm_smmu_get_resv_regions,
1620         .put_resv_regions       = arm_smmu_put_resv_regions,
1621         .pgsize_bitmap          = -1UL, /* Restricted during device attach */
1622 };
1623
1624 static void arm_smmu_device_reset(struct arm_smmu_device *smmu)
1625 {
1626         void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1627         int i;
1628         u32 reg, major;
1629
1630         /* clear global FSR */
1631         reg = readl_relaxed(ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sGFSR);
1632         writel(reg, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sGFSR);
1633
1634         /*
1635          * Reset stream mapping groups: Initial values mark all SMRn as
1636          * invalid and all S2CRn as bypass unless overridden.
1637          */
1638         for (i = 0; i < smmu->num_mapping_groups; ++i)
1639                 arm_smmu_write_sme(smmu, i);
1640
1641         if (smmu->model == ARM_MMU500) {
1642                 /*
1643                  * Before clearing ARM_MMU500_ACTLR_CPRE, need to
1644                  * clear CACHE_LOCK bit of ACR first. And, CACHE_LOCK
1645                  * bit is only present in MMU-500r2 onwards.
1646                  */
1647                 reg = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID7);
1648                 major = (reg >> ID7_MAJOR_SHIFT) & ID7_MAJOR_MASK;
1649                 reg = readl_relaxed(gr0_base + ARM_SMMU_GR0_sACR);
1650                 if (major >= 2)
1651                         reg &= ~ARM_MMU500_ACR_CACHE_LOCK;
1652                 /*
1653                  * Allow unmatched Stream IDs to allocate bypass
1654                  * TLB entries for reduced latency.
1655                  */
1656                 reg |= ARM_MMU500_ACR_SMTNMB_TLBEN | ARM_MMU500_ACR_S2CRB_TLBEN;
1657                 writel_relaxed(reg, gr0_base + ARM_SMMU_GR0_sACR);
1658         }
1659
1660         /* Make sure all context banks are disabled and clear CB_FSR  */
1661         for (i = 0; i < smmu->num_context_banks; ++i) {
1662                 void __iomem *cb_base = ARM_SMMU_CB(smmu, i);
1663
1664                 arm_smmu_write_context_bank(smmu, i);
1665                 writel_relaxed(FSR_FAULT, cb_base + ARM_SMMU_CB_FSR);
1666                 /*
1667                  * Disable MMU-500's not-particularly-beneficial next-page
1668                  * prefetcher for the sake of errata #841119 and #826419.
1669                  */
1670                 if (smmu->model == ARM_MMU500) {
1671                         reg = readl_relaxed(cb_base + ARM_SMMU_CB_ACTLR);
1672                         reg &= ~ARM_MMU500_ACTLR_CPRE;
1673                         writel_relaxed(reg, cb_base + ARM_SMMU_CB_ACTLR);
1674                 }
1675         }
1676
1677         /* Invalidate the TLB, just in case */
1678         writel_relaxed(0, gr0_base + ARM_SMMU_GR0_TLBIALLH);
1679         writel_relaxed(0, gr0_base + ARM_SMMU_GR0_TLBIALLNSNH);
1680
1681         reg = readl_relaxed(ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
1682
1683         /* Enable fault reporting */
1684         reg |= (sCR0_GFRE | sCR0_GFIE | sCR0_GCFGFRE | sCR0_GCFGFIE);
1685
1686         /* Disable TLB broadcasting. */
1687         reg |= (sCR0_VMIDPNE | sCR0_PTM);
1688
1689         /* Enable client access, handling unmatched streams as appropriate */
1690         reg &= ~sCR0_CLIENTPD;
1691         if (disable_bypass)
1692                 reg |= sCR0_USFCFG;
1693         else
1694                 reg &= ~sCR0_USFCFG;
1695
1696         /* Disable forced broadcasting */
1697         reg &= ~sCR0_FB;
1698
1699         /* Don't upgrade barriers */
1700         reg &= ~(sCR0_BSU_MASK << sCR0_BSU_SHIFT);
1701
1702         if (smmu->features & ARM_SMMU_FEAT_VMID16)
1703                 reg |= sCR0_VMID16EN;
1704
1705         if (smmu->features & ARM_SMMU_FEAT_EXIDS)
1706                 reg |= sCR0_EXIDENABLE;
1707
1708         /* Push the button */
1709         arm_smmu_tlb_sync_global(smmu);
1710         writel(reg, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
1711 }
1712
1713 static int arm_smmu_id_size_to_bits(int size)
1714 {
1715         switch (size) {
1716         case 0:
1717                 return 32;
1718         case 1:
1719                 return 36;
1720         case 2:
1721                 return 40;
1722         case 3:
1723                 return 42;
1724         case 4:
1725                 return 44;
1726         case 5:
1727         default:
1728                 return 48;
1729         }
1730 }
1731
1732 static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
1733 {
1734         unsigned long size;
1735         void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1736         u32 id;
1737         bool cttw_reg, cttw_fw = smmu->features & ARM_SMMU_FEAT_COHERENT_WALK;
1738         int i;
1739
1740         dev_notice(smmu->dev, "probing hardware configuration...\n");
1741         dev_notice(smmu->dev, "SMMUv%d with:\n",
1742                         smmu->version == ARM_SMMU_V2 ? 2 : 1);
1743
1744         /* ID0 */
1745         id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID0);
1746
1747         /* Restrict available stages based on module parameter */
1748         if (force_stage == 1)
1749                 id &= ~(ID0_S2TS | ID0_NTS);
1750         else if (force_stage == 2)
1751                 id &= ~(ID0_S1TS | ID0_NTS);
1752
1753         if (id & ID0_S1TS) {
1754                 smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
1755                 dev_notice(smmu->dev, "\tstage 1 translation\n");
1756         }
1757
1758         if (id & ID0_S2TS) {
1759                 smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
1760                 dev_notice(smmu->dev, "\tstage 2 translation\n");
1761         }
1762
1763         if (id & ID0_NTS) {
1764                 smmu->features |= ARM_SMMU_FEAT_TRANS_NESTED;
1765                 dev_notice(smmu->dev, "\tnested translation\n");
1766         }
1767
1768         if (!(smmu->features &
1769                 (ARM_SMMU_FEAT_TRANS_S1 | ARM_SMMU_FEAT_TRANS_S2))) {
1770                 dev_err(smmu->dev, "\tno translation support!\n");
1771                 return -ENODEV;
1772         }
1773
1774         if ((id & ID0_S1TS) &&
1775                 ((smmu->version < ARM_SMMU_V2) || !(id & ID0_ATOSNS))) {
1776                 smmu->features |= ARM_SMMU_FEAT_TRANS_OPS;
1777                 dev_notice(smmu->dev, "\taddress translation ops\n");
1778         }
1779
1780         /*
1781          * In order for DMA API calls to work properly, we must defer to what
1782          * the FW says about coherency, regardless of what the hardware claims.
1783          * Fortunately, this also opens up a workaround for systems where the
1784          * ID register value has ended up configured incorrectly.
1785          */
1786         cttw_reg = !!(id & ID0_CTTW);
1787         if (cttw_fw || cttw_reg)
1788                 dev_notice(smmu->dev, "\t%scoherent table walk\n",
1789                            cttw_fw ? "" : "non-");
1790         if (cttw_fw != cttw_reg)
1791                 dev_notice(smmu->dev,
1792                            "\t(IDR0.CTTW overridden by FW configuration)\n");
1793
1794         /* Max. number of entries we have for stream matching/indexing */
1795         if (smmu->version == ARM_SMMU_V2 && id & ID0_EXIDS) {
1796                 smmu->features |= ARM_SMMU_FEAT_EXIDS;
1797                 size = 1 << 16;
1798         } else {
1799                 size = 1 << ((id >> ID0_NUMSIDB_SHIFT) & ID0_NUMSIDB_MASK);
1800         }
1801         smmu->streamid_mask = size - 1;
1802         if (id & ID0_SMS) {
1803                 smmu->features |= ARM_SMMU_FEAT_STREAM_MATCH;
1804                 size = (id >> ID0_NUMSMRG_SHIFT) & ID0_NUMSMRG_MASK;
1805                 if (size == 0) {
1806                         dev_err(smmu->dev,
1807                                 "stream-matching supported, but no SMRs present!\n");
1808                         return -ENODEV;
1809                 }
1810
1811                 /* Zero-initialised to mark as invalid */
1812                 smmu->smrs = devm_kcalloc(smmu->dev, size, sizeof(*smmu->smrs),
1813                                           GFP_KERNEL);
1814                 if (!smmu->smrs)
1815                         return -ENOMEM;
1816
1817                 dev_notice(smmu->dev,
1818                            "\tstream matching with %lu register groups", size);
1819         }
1820         /* s2cr->type == 0 means translation, so initialise explicitly */
1821         smmu->s2crs = devm_kmalloc_array(smmu->dev, size, sizeof(*smmu->s2crs),
1822                                          GFP_KERNEL);
1823         if (!smmu->s2crs)
1824                 return -ENOMEM;
1825         for (i = 0; i < size; i++)
1826                 smmu->s2crs[i] = s2cr_init_val;
1827
1828         smmu->num_mapping_groups = size;
1829         mutex_init(&smmu->stream_map_mutex);
1830         spin_lock_init(&smmu->global_sync_lock);
1831
1832         if (smmu->version < ARM_SMMU_V2 || !(id & ID0_PTFS_NO_AARCH32)) {
1833                 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_L;
1834                 if (!(id & ID0_PTFS_NO_AARCH32S))
1835                         smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_S;
1836         }
1837
1838         /* ID1 */
1839         id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID1);
1840         smmu->pgshift = (id & ID1_PAGESIZE) ? 16 : 12;
1841
1842         /* Check for size mismatch of SMMU address space from mapped region */
1843         size = 1 << (((id >> ID1_NUMPAGENDXB_SHIFT) & ID1_NUMPAGENDXB_MASK) + 1);
1844         size <<= smmu->pgshift;
1845         if (smmu->cb_base != gr0_base + size)
1846                 dev_warn(smmu->dev,
1847                         "SMMU address space size (0x%lx) differs from mapped region size (0x%tx)!\n",
1848                         size * 2, (smmu->cb_base - gr0_base) * 2);
1849
1850         smmu->num_s2_context_banks = (id >> ID1_NUMS2CB_SHIFT) & ID1_NUMS2CB_MASK;
1851         smmu->num_context_banks = (id >> ID1_NUMCB_SHIFT) & ID1_NUMCB_MASK;
1852         if (smmu->num_s2_context_banks > smmu->num_context_banks) {
1853                 dev_err(smmu->dev, "impossible number of S2 context banks!\n");
1854                 return -ENODEV;
1855         }
1856         dev_notice(smmu->dev, "\t%u context banks (%u stage-2 only)\n",
1857                    smmu->num_context_banks, smmu->num_s2_context_banks);
1858         /*
1859          * Cavium CN88xx erratum #27704.
1860          * Ensure ASID and VMID allocation is unique across all SMMUs in
1861          * the system.
1862          */
1863         if (smmu->model == CAVIUM_SMMUV2) {
1864                 smmu->cavium_id_base =
1865                         atomic_add_return(smmu->num_context_banks,
1866                                           &cavium_smmu_context_count);
1867                 smmu->cavium_id_base -= smmu->num_context_banks;
1868                 dev_notice(smmu->dev, "\tenabling workaround for Cavium erratum 27704\n");
1869         }
1870         smmu->cbs = devm_kcalloc(smmu->dev, smmu->num_context_banks,
1871                                  sizeof(*smmu->cbs), GFP_KERNEL);
1872         if (!smmu->cbs)
1873                 return -ENOMEM;
1874
1875         /* ID2 */
1876         id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID2);
1877         size = arm_smmu_id_size_to_bits((id >> ID2_IAS_SHIFT) & ID2_IAS_MASK);
1878         smmu->ipa_size = size;
1879
1880         /* The output mask is also applied for bypass */
1881         size = arm_smmu_id_size_to_bits((id >> ID2_OAS_SHIFT) & ID2_OAS_MASK);
1882         smmu->pa_size = size;
1883
1884         if (id & ID2_VMID16)
1885                 smmu->features |= ARM_SMMU_FEAT_VMID16;
1886
1887         /*
1888          * What the page table walker can address actually depends on which
1889          * descriptor format is in use, but since a) we don't know that yet,
1890          * and b) it can vary per context bank, this will have to do...
1891          */
1892         if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(size)))
1893                 dev_warn(smmu->dev,
1894                          "failed to set DMA mask for table walker\n");
1895
1896         if (smmu->version < ARM_SMMU_V2) {
1897                 smmu->va_size = smmu->ipa_size;
1898                 if (smmu->version == ARM_SMMU_V1_64K)
1899                         smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1900         } else {
1901                 size = (id >> ID2_UBS_SHIFT) & ID2_UBS_MASK;
1902                 smmu->va_size = arm_smmu_id_size_to_bits(size);
1903                 if (id & ID2_PTFS_4K)
1904                         smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_4K;
1905                 if (id & ID2_PTFS_16K)
1906                         smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_16K;
1907                 if (id & ID2_PTFS_64K)
1908                         smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1909         }
1910
1911         /* Now we've corralled the various formats, what'll it do? */
1912         if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S)
1913                 smmu->pgsize_bitmap |= SZ_4K | SZ_64K | SZ_1M | SZ_16M;
1914         if (smmu->features &
1915             (ARM_SMMU_FEAT_FMT_AARCH32_L | ARM_SMMU_FEAT_FMT_AARCH64_4K))
1916                 smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
1917         if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_16K)
1918                 smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
1919         if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_64K)
1920                 smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
1921
1922         if (arm_smmu_ops.pgsize_bitmap == -1UL)
1923                 arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
1924         else
1925                 arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
1926         dev_notice(smmu->dev, "\tSupported page sizes: 0x%08lx\n",
1927                    smmu->pgsize_bitmap);
1928
1929
1930         if (smmu->features & ARM_SMMU_FEAT_TRANS_S1)
1931                 dev_notice(smmu->dev, "\tStage-1: %lu-bit VA -> %lu-bit IPA\n",
1932                            smmu->va_size, smmu->ipa_size);
1933
1934         if (smmu->features & ARM_SMMU_FEAT_TRANS_S2)
1935                 dev_notice(smmu->dev, "\tStage-2: %lu-bit IPA -> %lu-bit PA\n",
1936                            smmu->ipa_size, smmu->pa_size);
1937
1938         return 0;
1939 }
1940
1941 struct arm_smmu_match_data {
1942         enum arm_smmu_arch_version version;
1943         enum arm_smmu_implementation model;
1944 };
1945
1946 #define ARM_SMMU_MATCH_DATA(name, ver, imp)     \
1947 static struct arm_smmu_match_data name = { .version = ver, .model = imp }
1948
1949 ARM_SMMU_MATCH_DATA(smmu_generic_v1, ARM_SMMU_V1, GENERIC_SMMU);
1950 ARM_SMMU_MATCH_DATA(smmu_generic_v2, ARM_SMMU_V2, GENERIC_SMMU);
1951 ARM_SMMU_MATCH_DATA(arm_mmu401, ARM_SMMU_V1_64K, GENERIC_SMMU);
1952 ARM_SMMU_MATCH_DATA(arm_mmu500, ARM_SMMU_V2, ARM_MMU500);
1953 ARM_SMMU_MATCH_DATA(cavium_smmuv2, ARM_SMMU_V2, CAVIUM_SMMUV2);
1954
1955 static const struct of_device_id arm_smmu_of_match[] = {
1956         { .compatible = "arm,smmu-v1", .data = &smmu_generic_v1 },
1957         { .compatible = "arm,smmu-v2", .data = &smmu_generic_v2 },
1958         { .compatible = "arm,mmu-400", .data = &smmu_generic_v1 },
1959         { .compatible = "arm,mmu-401", .data = &arm_mmu401 },
1960         { .compatible = "arm,mmu-500", .data = &arm_mmu500 },
1961         { .compatible = "cavium,smmu-v2", .data = &cavium_smmuv2 },
1962         { },
1963 };
1964 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
1965
1966 #ifdef CONFIG_ACPI
1967 static int acpi_smmu_get_data(u32 model, struct arm_smmu_device *smmu)
1968 {
1969         int ret = 0;
1970
1971         switch (model) {
1972         case ACPI_IORT_SMMU_V1:
1973         case ACPI_IORT_SMMU_CORELINK_MMU400:
1974                 smmu->version = ARM_SMMU_V1;
1975                 smmu->model = GENERIC_SMMU;
1976                 break;
1977         case ACPI_IORT_SMMU_CORELINK_MMU401:
1978                 smmu->version = ARM_SMMU_V1_64K;
1979                 smmu->model = GENERIC_SMMU;
1980                 break;
1981         case ACPI_IORT_SMMU_V2:
1982                 smmu->version = ARM_SMMU_V2;
1983                 smmu->model = GENERIC_SMMU;
1984                 break;
1985         case ACPI_IORT_SMMU_CORELINK_MMU500:
1986                 smmu->version = ARM_SMMU_V2;
1987                 smmu->model = ARM_MMU500;
1988                 break;
1989         case ACPI_IORT_SMMU_CAVIUM_THUNDERX:
1990                 smmu->version = ARM_SMMU_V2;
1991                 smmu->model = CAVIUM_SMMUV2;
1992                 break;
1993         default:
1994                 ret = -ENODEV;
1995         }
1996
1997         return ret;
1998 }
1999
2000 static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
2001                                       struct arm_smmu_device *smmu)
2002 {
2003         struct device *dev = smmu->dev;
2004         struct acpi_iort_node *node =
2005                 *(struct acpi_iort_node **)dev_get_platdata(dev);
2006         struct acpi_iort_smmu *iort_smmu;
2007         int ret;
2008
2009         /* Retrieve SMMU1/2 specific data */
2010         iort_smmu = (struct acpi_iort_smmu *)node->node_data;
2011
2012         ret = acpi_smmu_get_data(iort_smmu->model, smmu);
2013         if (ret < 0)
2014                 return ret;
2015
2016         /* Ignore the configuration access interrupt */
2017         smmu->num_global_irqs = 1;
2018
2019         if (iort_smmu->flags & ACPI_IORT_SMMU_COHERENT_WALK)
2020                 smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
2021
2022         return 0;
2023 }
2024 #else
2025 static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
2026                                              struct arm_smmu_device *smmu)
2027 {
2028         return -ENODEV;
2029 }
2030 #endif
2031
2032 static int arm_smmu_device_dt_probe(struct platform_device *pdev,
2033                                     struct arm_smmu_device *smmu)
2034 {
2035         const struct arm_smmu_match_data *data;
2036         struct device *dev = &pdev->dev;
2037         bool legacy_binding;
2038
2039         if (of_property_read_u32(dev->of_node, "#global-interrupts",
2040                                  &smmu->num_global_irqs)) {
2041                 dev_err(dev, "missing #global-interrupts property\n");
2042                 return -ENODEV;
2043         }
2044
2045         data = of_device_get_match_data(dev);
2046         smmu->version = data->version;
2047         smmu->model = data->model;
2048
2049         parse_driver_options(smmu);
2050
2051         legacy_binding = of_find_property(dev->of_node, "mmu-masters", NULL);
2052         if (legacy_binding && !using_generic_binding) {
2053                 if (!using_legacy_binding)
2054                         pr_notice("deprecated \"mmu-masters\" DT property in use; DMA API support unavailable\n");
2055                 using_legacy_binding = true;
2056         } else if (!legacy_binding && !using_legacy_binding) {
2057                 using_generic_binding = true;
2058         } else {
2059                 dev_err(dev, "not probing due to mismatched DT properties\n");
2060                 return -ENODEV;
2061         }
2062
2063         if (of_dma_is_coherent(dev->of_node))
2064                 smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
2065
2066         return 0;
2067 }
2068
2069 static void arm_smmu_bus_init(void)
2070 {
2071         /* Oh, for a proper bus abstraction */
2072         if (!iommu_present(&platform_bus_type))
2073                 bus_set_iommu(&platform_bus_type, &arm_smmu_ops);
2074 #ifdef CONFIG_ARM_AMBA
2075         if (!iommu_present(&amba_bustype))
2076                 bus_set_iommu(&amba_bustype, &arm_smmu_ops);
2077 #endif
2078 #ifdef CONFIG_PCI
2079         if (!iommu_present(&pci_bus_type)) {
2080                 pci_request_acs();
2081                 bus_set_iommu(&pci_bus_type, &arm_smmu_ops);
2082         }
2083 #endif
2084 }
2085
2086 static int arm_smmu_device_probe(struct platform_device *pdev)
2087 {
2088         struct resource *res;
2089         resource_size_t ioaddr;
2090         struct arm_smmu_device *smmu;
2091         struct device *dev = &pdev->dev;
2092         int num_irqs, i, err;
2093
2094         smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
2095         if (!smmu) {
2096                 dev_err(dev, "failed to allocate arm_smmu_device\n");
2097                 return -ENOMEM;
2098         }
2099         smmu->dev = dev;
2100
2101         if (dev->of_node)
2102                 err = arm_smmu_device_dt_probe(pdev, smmu);
2103         else
2104                 err = arm_smmu_device_acpi_probe(pdev, smmu);
2105
2106         if (err)
2107                 return err;
2108
2109         res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
2110         ioaddr = res->start;
2111         smmu->base = devm_ioremap_resource(dev, res);
2112         if (IS_ERR(smmu->base))
2113                 return PTR_ERR(smmu->base);
2114         smmu->cb_base = smmu->base + resource_size(res) / 2;
2115
2116         num_irqs = 0;
2117         while ((res = platform_get_resource(pdev, IORESOURCE_IRQ, num_irqs))) {
2118                 num_irqs++;
2119                 if (num_irqs > smmu->num_global_irqs)
2120                         smmu->num_context_irqs++;
2121         }
2122
2123         if (!smmu->num_context_irqs) {
2124                 dev_err(dev, "found %d interrupts but expected at least %d\n",
2125                         num_irqs, smmu->num_global_irqs + 1);
2126                 return -ENODEV;
2127         }
2128
2129         smmu->irqs = devm_kcalloc(dev, num_irqs, sizeof(*smmu->irqs),
2130                                   GFP_KERNEL);
2131         if (!smmu->irqs) {
2132                 dev_err(dev, "failed to allocate %d irqs\n", num_irqs);
2133                 return -ENOMEM;
2134         }
2135
2136         for (i = 0; i < num_irqs; ++i) {
2137                 int irq = platform_get_irq(pdev, i);
2138
2139                 if (irq < 0) {
2140                         dev_err(dev, "failed to get irq index %d\n", i);
2141                         return -ENODEV;
2142                 }
2143                 smmu->irqs[i] = irq;
2144         }
2145
2146         err = arm_smmu_device_cfg_probe(smmu);
2147         if (err)
2148                 return err;
2149
2150         if (smmu->version == ARM_SMMU_V2) {
2151                 if (smmu->num_context_banks > smmu->num_context_irqs) {
2152                         dev_err(dev,
2153                               "found only %d context irq(s) but %d required\n",
2154                               smmu->num_context_irqs, smmu->num_context_banks);
2155                         return -ENODEV;
2156                 }
2157
2158                 /* Ignore superfluous interrupts */
2159                 smmu->num_context_irqs = smmu->num_context_banks;
2160         }
2161
2162         for (i = 0; i < smmu->num_global_irqs; ++i) {
2163                 err = devm_request_irq(smmu->dev, smmu->irqs[i],
2164                                        arm_smmu_global_fault,
2165                                        IRQF_SHARED,
2166                                        "arm-smmu global fault",
2167                                        smmu);
2168                 if (err) {
2169                         dev_err(dev, "failed to request global IRQ %d (%u)\n",
2170                                 i, smmu->irqs[i]);
2171                         return err;
2172                 }
2173         }
2174
2175         err = iommu_device_sysfs_add(&smmu->iommu, smmu->dev, NULL,
2176                                      "smmu.%pa", &ioaddr);
2177         if (err) {
2178                 dev_err(dev, "Failed to register iommu in sysfs\n");
2179                 return err;
2180         }
2181
2182         iommu_device_set_ops(&smmu->iommu, &arm_smmu_ops);
2183         iommu_device_set_fwnode(&smmu->iommu, dev->fwnode);
2184
2185         err = iommu_device_register(&smmu->iommu);
2186         if (err) {
2187                 dev_err(dev, "Failed to register iommu\n");
2188                 return err;
2189         }
2190
2191         platform_set_drvdata(pdev, smmu);
2192         arm_smmu_device_reset(smmu);
2193         arm_smmu_test_smr_masks(smmu);
2194
2195         /*
2196          * For ACPI and generic DT bindings, an SMMU will be probed before
2197          * any device which might need it, so we want the bus ops in place
2198          * ready to handle default domain setup as soon as any SMMU exists.
2199          */
2200         if (!using_legacy_binding)
2201                 arm_smmu_bus_init();
2202
2203         return 0;
2204 }
2205
2206 /*
2207  * With the legacy DT binding in play, though, we have no guarantees about
2208  * probe order, but then we're also not doing default domains, so we can
2209  * delay setting bus ops until we're sure every possible SMMU is ready,
2210  * and that way ensure that no add_device() calls get missed.
2211  */
2212 static int arm_smmu_legacy_bus_init(void)
2213 {
2214         if (using_legacy_binding)
2215                 arm_smmu_bus_init();
2216         return 0;
2217 }
2218 device_initcall_sync(arm_smmu_legacy_bus_init);
2219
2220 static int arm_smmu_device_remove(struct platform_device *pdev)
2221 {
2222         struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
2223
2224         if (!smmu)
2225                 return -ENODEV;
2226
2227         if (!bitmap_empty(smmu->context_map, ARM_SMMU_MAX_CBS))
2228                 dev_err(&pdev->dev, "removing device with active domains!\n");
2229
2230         /* Turn the thing off */
2231         writel(sCR0_CLIENTPD, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
2232         return 0;
2233 }
2234
2235 static void arm_smmu_device_shutdown(struct platform_device *pdev)
2236 {
2237         arm_smmu_device_remove(pdev);
2238 }
2239
2240 static int __maybe_unused arm_smmu_pm_resume(struct device *dev)
2241 {
2242         struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2243
2244         arm_smmu_device_reset(smmu);
2245         return 0;
2246 }
2247
2248 static SIMPLE_DEV_PM_OPS(arm_smmu_pm_ops, NULL, arm_smmu_pm_resume);
2249
2250 static struct platform_driver arm_smmu_driver = {
2251         .driver = {
2252                 .name           = "arm-smmu",
2253                 .of_match_table = of_match_ptr(arm_smmu_of_match),
2254                 .pm             = &arm_smmu_pm_ops,
2255         },
2256         .probe  = arm_smmu_device_probe,
2257         .remove = arm_smmu_device_remove,
2258         .shutdown = arm_smmu_device_shutdown,
2259 };
2260 module_platform_driver(arm_smmu_driver);
2261
2262 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMU implementations");
2263 MODULE_AUTHOR("Will Deacon <will.deacon@arm.com>");
2264 MODULE_LICENSE("GPL v2");