]> asedeno.scripts.mit.edu Git - linux.git/blob - drivers/iommu/arm-smmu.c
iommu/arm-smmu: Add support for the fsl-mc bus
[linux.git] / drivers / iommu / arm-smmu.c
1 /*
2  * IOMMU API for ARM architected SMMU implementations.
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License version 2 as
6  * published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11  * GNU General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public License
14  * along with this program; if not, write to the Free Software
15  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
16  *
17  * Copyright (C) 2013 ARM Limited
18  *
19  * Author: Will Deacon <will.deacon@arm.com>
20  *
21  * This driver currently supports:
22  *      - SMMUv1 and v2 implementations
23  *      - Stream-matching and stream-indexing
24  *      - v7/v8 long-descriptor format
25  *      - Non-secure access to the SMMU
26  *      - Context fault reporting
27  *      - Extended Stream ID (16 bit)
28  */
29
30 #define pr_fmt(fmt) "arm-smmu: " fmt
31
32 #include <linux/acpi.h>
33 #include <linux/acpi_iort.h>
34 #include <linux/atomic.h>
35 #include <linux/delay.h>
36 #include <linux/dma-iommu.h>
37 #include <linux/dma-mapping.h>
38 #include <linux/err.h>
39 #include <linux/interrupt.h>
40 #include <linux/io.h>
41 #include <linux/io-64-nonatomic-hi-lo.h>
42 #include <linux/iommu.h>
43 #include <linux/iopoll.h>
44 #include <linux/module.h>
45 #include <linux/of.h>
46 #include <linux/of_address.h>
47 #include <linux/of_device.h>
48 #include <linux/of_iommu.h>
49 #include <linux/pci.h>
50 #include <linux/platform_device.h>
51 #include <linux/slab.h>
52 #include <linux/spinlock.h>
53
54 #include <linux/amba/bus.h>
55 #include <linux/fsl/mc.h>
56
57 #include "io-pgtable.h"
58 #include "arm-smmu-regs.h"
59
60 #define ARM_MMU500_ACTLR_CPRE           (1 << 1)
61
62 #define ARM_MMU500_ACR_CACHE_LOCK       (1 << 26)
63 #define ARM_MMU500_ACR_S2CRB_TLBEN      (1 << 10)
64 #define ARM_MMU500_ACR_SMTNMB_TLBEN     (1 << 8)
65
66 #define TLB_LOOP_TIMEOUT                1000000 /* 1s! */
67 #define TLB_SPIN_COUNT                  10
68
69 /* Maximum number of context banks per SMMU */
70 #define ARM_SMMU_MAX_CBS                128
71
72 /* SMMU global address space */
73 #define ARM_SMMU_GR0(smmu)              ((smmu)->base)
74 #define ARM_SMMU_GR1(smmu)              ((smmu)->base + (1 << (smmu)->pgshift))
75
76 /*
77  * SMMU global address space with conditional offset to access secure
78  * aliases of non-secure registers (e.g. nsCR0: 0x400, nsGFSR: 0x448,
79  * nsGFSYNR0: 0x450)
80  */
81 #define ARM_SMMU_GR0_NS(smmu)                                           \
82         ((smmu)->base +                                                 \
83                 ((smmu->options & ARM_SMMU_OPT_SECURE_CFG_ACCESS)       \
84                         ? 0x400 : 0))
85
86 /*
87  * Some 64-bit registers only make sense to write atomically, but in such
88  * cases all the data relevant to AArch32 formats lies within the lower word,
89  * therefore this actually makes more sense than it might first appear.
90  */
91 #ifdef CONFIG_64BIT
92 #define smmu_write_atomic_lq            writeq_relaxed
93 #else
94 #define smmu_write_atomic_lq            writel_relaxed
95 #endif
96
97 /* Translation context bank */
98 #define ARM_SMMU_CB(smmu, n)    ((smmu)->cb_base + ((n) << (smmu)->pgshift))
99
100 #define MSI_IOVA_BASE                   0x8000000
101 #define MSI_IOVA_LENGTH                 0x100000
102
103 static int force_stage;
104 module_param(force_stage, int, S_IRUGO);
105 MODULE_PARM_DESC(force_stage,
106         "Force SMMU mappings to be installed at a particular stage of translation. A value of '1' or '2' forces the corresponding stage. All other values are ignored (i.e. no stage is forced). Note that selecting a specific stage will disable support for nested translation.");
107 static bool disable_bypass;
108 module_param(disable_bypass, bool, S_IRUGO);
109 MODULE_PARM_DESC(disable_bypass,
110         "Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
111
112 enum arm_smmu_arch_version {
113         ARM_SMMU_V1,
114         ARM_SMMU_V1_64K,
115         ARM_SMMU_V2,
116 };
117
118 enum arm_smmu_implementation {
119         GENERIC_SMMU,
120         ARM_MMU500,
121         CAVIUM_SMMUV2,
122 };
123
124 struct arm_smmu_s2cr {
125         struct iommu_group              *group;
126         int                             count;
127         enum arm_smmu_s2cr_type         type;
128         enum arm_smmu_s2cr_privcfg      privcfg;
129         u8                              cbndx;
130 };
131
132 #define s2cr_init_val (struct arm_smmu_s2cr){                           \
133         .type = disable_bypass ? S2CR_TYPE_FAULT : S2CR_TYPE_BYPASS,    \
134 }
135
136 struct arm_smmu_smr {
137         u16                             mask;
138         u16                             id;
139         bool                            valid;
140 };
141
142 struct arm_smmu_cb {
143         u64                             ttbr[2];
144         u32                             tcr[2];
145         u32                             mair[2];
146         struct arm_smmu_cfg             *cfg;
147 };
148
149 struct arm_smmu_master_cfg {
150         struct arm_smmu_device          *smmu;
151         s16                             smendx[];
152 };
153 #define INVALID_SMENDX                  -1
154 #define __fwspec_cfg(fw) ((struct arm_smmu_master_cfg *)fw->iommu_priv)
155 #define fwspec_smmu(fw)  (__fwspec_cfg(fw)->smmu)
156 #define fwspec_smendx(fw, i) \
157         (i >= fw->num_ids ? INVALID_SMENDX : __fwspec_cfg(fw)->smendx[i])
158 #define for_each_cfg_sme(fw, i, idx) \
159         for (i = 0; idx = fwspec_smendx(fw, i), i < fw->num_ids; ++i)
160
161 struct arm_smmu_device {
162         struct device                   *dev;
163
164         void __iomem                    *base;
165         void __iomem                    *cb_base;
166         unsigned long                   pgshift;
167
168 #define ARM_SMMU_FEAT_COHERENT_WALK     (1 << 0)
169 #define ARM_SMMU_FEAT_STREAM_MATCH      (1 << 1)
170 #define ARM_SMMU_FEAT_TRANS_S1          (1 << 2)
171 #define ARM_SMMU_FEAT_TRANS_S2          (1 << 3)
172 #define ARM_SMMU_FEAT_TRANS_NESTED      (1 << 4)
173 #define ARM_SMMU_FEAT_TRANS_OPS         (1 << 5)
174 #define ARM_SMMU_FEAT_VMID16            (1 << 6)
175 #define ARM_SMMU_FEAT_FMT_AARCH64_4K    (1 << 7)
176 #define ARM_SMMU_FEAT_FMT_AARCH64_16K   (1 << 8)
177 #define ARM_SMMU_FEAT_FMT_AARCH64_64K   (1 << 9)
178 #define ARM_SMMU_FEAT_FMT_AARCH32_L     (1 << 10)
179 #define ARM_SMMU_FEAT_FMT_AARCH32_S     (1 << 11)
180 #define ARM_SMMU_FEAT_EXIDS             (1 << 12)
181         u32                             features;
182
183 #define ARM_SMMU_OPT_SECURE_CFG_ACCESS (1 << 0)
184         u32                             options;
185         enum arm_smmu_arch_version      version;
186         enum arm_smmu_implementation    model;
187
188         u32                             num_context_banks;
189         u32                             num_s2_context_banks;
190         DECLARE_BITMAP(context_map, ARM_SMMU_MAX_CBS);
191         struct arm_smmu_cb              *cbs;
192         atomic_t                        irptndx;
193
194         u32                             num_mapping_groups;
195         u16                             streamid_mask;
196         u16                             smr_mask_mask;
197         struct arm_smmu_smr             *smrs;
198         struct arm_smmu_s2cr            *s2crs;
199         struct mutex                    stream_map_mutex;
200
201         unsigned long                   va_size;
202         unsigned long                   ipa_size;
203         unsigned long                   pa_size;
204         unsigned long                   pgsize_bitmap;
205
206         u32                             num_global_irqs;
207         u32                             num_context_irqs;
208         unsigned int                    *irqs;
209
210         u32                             cavium_id_base; /* Specific to Cavium */
211
212         spinlock_t                      global_sync_lock;
213
214         /* IOMMU core code handle */
215         struct iommu_device             iommu;
216 };
217
218 enum arm_smmu_context_fmt {
219         ARM_SMMU_CTX_FMT_NONE,
220         ARM_SMMU_CTX_FMT_AARCH64,
221         ARM_SMMU_CTX_FMT_AARCH32_L,
222         ARM_SMMU_CTX_FMT_AARCH32_S,
223 };
224
225 struct arm_smmu_cfg {
226         u8                              cbndx;
227         u8                              irptndx;
228         union {
229                 u16                     asid;
230                 u16                     vmid;
231         };
232         u32                             cbar;
233         enum arm_smmu_context_fmt       fmt;
234 };
235 #define INVALID_IRPTNDX                 0xff
236
237 enum arm_smmu_domain_stage {
238         ARM_SMMU_DOMAIN_S1 = 0,
239         ARM_SMMU_DOMAIN_S2,
240         ARM_SMMU_DOMAIN_NESTED,
241         ARM_SMMU_DOMAIN_BYPASS,
242 };
243
244 struct arm_smmu_domain {
245         struct arm_smmu_device          *smmu;
246         struct io_pgtable_ops           *pgtbl_ops;
247         const struct iommu_gather_ops   *tlb_ops;
248         struct arm_smmu_cfg             cfg;
249         enum arm_smmu_domain_stage      stage;
250         struct mutex                    init_mutex; /* Protects smmu pointer */
251         spinlock_t                      cb_lock; /* Serialises ATS1* ops and TLB syncs */
252         struct iommu_domain             domain;
253 };
254
255 struct arm_smmu_option_prop {
256         u32 opt;
257         const char *prop;
258 };
259
260 static atomic_t cavium_smmu_context_count = ATOMIC_INIT(0);
261
262 static bool using_legacy_binding, using_generic_binding;
263
264 static struct arm_smmu_option_prop arm_smmu_options[] = {
265         { ARM_SMMU_OPT_SECURE_CFG_ACCESS, "calxeda,smmu-secure-config-access" },
266         { 0, NULL},
267 };
268
269 static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
270 {
271         return container_of(dom, struct arm_smmu_domain, domain);
272 }
273
274 static void parse_driver_options(struct arm_smmu_device *smmu)
275 {
276         int i = 0;
277
278         do {
279                 if (of_property_read_bool(smmu->dev->of_node,
280                                                 arm_smmu_options[i].prop)) {
281                         smmu->options |= arm_smmu_options[i].opt;
282                         dev_notice(smmu->dev, "option %s\n",
283                                 arm_smmu_options[i].prop);
284                 }
285         } while (arm_smmu_options[++i].opt);
286 }
287
288 static struct device_node *dev_get_dev_node(struct device *dev)
289 {
290         if (dev_is_pci(dev)) {
291                 struct pci_bus *bus = to_pci_dev(dev)->bus;
292
293                 while (!pci_is_root_bus(bus))
294                         bus = bus->parent;
295                 return of_node_get(bus->bridge->parent->of_node);
296         }
297
298         return of_node_get(dev->of_node);
299 }
300
301 static int __arm_smmu_get_pci_sid(struct pci_dev *pdev, u16 alias, void *data)
302 {
303         *((__be32 *)data) = cpu_to_be32(alias);
304         return 0; /* Continue walking */
305 }
306
307 static int __find_legacy_master_phandle(struct device *dev, void *data)
308 {
309         struct of_phandle_iterator *it = *(void **)data;
310         struct device_node *np = it->node;
311         int err;
312
313         of_for_each_phandle(it, err, dev->of_node, "mmu-masters",
314                             "#stream-id-cells", 0)
315                 if (it->node == np) {
316                         *(void **)data = dev;
317                         return 1;
318                 }
319         it->node = np;
320         return err == -ENOENT ? 0 : err;
321 }
322
323 static struct platform_driver arm_smmu_driver;
324 static struct iommu_ops arm_smmu_ops;
325
326 static int arm_smmu_register_legacy_master(struct device *dev,
327                                            struct arm_smmu_device **smmu)
328 {
329         struct device *smmu_dev;
330         struct device_node *np;
331         struct of_phandle_iterator it;
332         void *data = &it;
333         u32 *sids;
334         __be32 pci_sid;
335         int err;
336
337         np = dev_get_dev_node(dev);
338         if (!np || !of_find_property(np, "#stream-id-cells", NULL)) {
339                 of_node_put(np);
340                 return -ENODEV;
341         }
342
343         it.node = np;
344         err = driver_for_each_device(&arm_smmu_driver.driver, NULL, &data,
345                                      __find_legacy_master_phandle);
346         smmu_dev = data;
347         of_node_put(np);
348         if (err == 0)
349                 return -ENODEV;
350         if (err < 0)
351                 return err;
352
353         if (dev_is_pci(dev)) {
354                 /* "mmu-masters" assumes Stream ID == Requester ID */
355                 pci_for_each_dma_alias(to_pci_dev(dev), __arm_smmu_get_pci_sid,
356                                        &pci_sid);
357                 it.cur = &pci_sid;
358                 it.cur_count = 1;
359         }
360
361         err = iommu_fwspec_init(dev, &smmu_dev->of_node->fwnode,
362                                 &arm_smmu_ops);
363         if (err)
364                 return err;
365
366         sids = kcalloc(it.cur_count, sizeof(*sids), GFP_KERNEL);
367         if (!sids)
368                 return -ENOMEM;
369
370         *smmu = dev_get_drvdata(smmu_dev);
371         of_phandle_iterator_args(&it, sids, it.cur_count);
372         err = iommu_fwspec_add_ids(dev, sids, it.cur_count);
373         kfree(sids);
374         return err;
375 }
376
377 static int __arm_smmu_alloc_bitmap(unsigned long *map, int start, int end)
378 {
379         int idx;
380
381         do {
382                 idx = find_next_zero_bit(map, end, start);
383                 if (idx == end)
384                         return -ENOSPC;
385         } while (test_and_set_bit(idx, map));
386
387         return idx;
388 }
389
390 static void __arm_smmu_free_bitmap(unsigned long *map, int idx)
391 {
392         clear_bit(idx, map);
393 }
394
395 /* Wait for any pending TLB invalidations to complete */
396 static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu,
397                                 void __iomem *sync, void __iomem *status)
398 {
399         unsigned int spin_cnt, delay;
400
401         writel_relaxed(0, sync);
402         for (delay = 1; delay < TLB_LOOP_TIMEOUT; delay *= 2) {
403                 for (spin_cnt = TLB_SPIN_COUNT; spin_cnt > 0; spin_cnt--) {
404                         if (!(readl_relaxed(status) & sTLBGSTATUS_GSACTIVE))
405                                 return;
406                         cpu_relax();
407                 }
408                 udelay(delay);
409         }
410         dev_err_ratelimited(smmu->dev,
411                             "TLB sync timed out -- SMMU may be deadlocked\n");
412 }
413
414 static void arm_smmu_tlb_sync_global(struct arm_smmu_device *smmu)
415 {
416         void __iomem *base = ARM_SMMU_GR0(smmu);
417         unsigned long flags;
418
419         spin_lock_irqsave(&smmu->global_sync_lock, flags);
420         __arm_smmu_tlb_sync(smmu, base + ARM_SMMU_GR0_sTLBGSYNC,
421                             base + ARM_SMMU_GR0_sTLBGSTATUS);
422         spin_unlock_irqrestore(&smmu->global_sync_lock, flags);
423 }
424
425 static void arm_smmu_tlb_sync_context(void *cookie)
426 {
427         struct arm_smmu_domain *smmu_domain = cookie;
428         struct arm_smmu_device *smmu = smmu_domain->smmu;
429         void __iomem *base = ARM_SMMU_CB(smmu, smmu_domain->cfg.cbndx);
430         unsigned long flags;
431
432         spin_lock_irqsave(&smmu_domain->cb_lock, flags);
433         __arm_smmu_tlb_sync(smmu, base + ARM_SMMU_CB_TLBSYNC,
434                             base + ARM_SMMU_CB_TLBSTATUS);
435         spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
436 }
437
438 static void arm_smmu_tlb_sync_vmid(void *cookie)
439 {
440         struct arm_smmu_domain *smmu_domain = cookie;
441
442         arm_smmu_tlb_sync_global(smmu_domain->smmu);
443 }
444
445 static void arm_smmu_tlb_inv_context_s1(void *cookie)
446 {
447         struct arm_smmu_domain *smmu_domain = cookie;
448         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
449         void __iomem *base = ARM_SMMU_CB(smmu_domain->smmu, cfg->cbndx);
450
451         writel_relaxed(cfg->asid, base + ARM_SMMU_CB_S1_TLBIASID);
452         arm_smmu_tlb_sync_context(cookie);
453 }
454
455 static void arm_smmu_tlb_inv_context_s2(void *cookie)
456 {
457         struct arm_smmu_domain *smmu_domain = cookie;
458         struct arm_smmu_device *smmu = smmu_domain->smmu;
459         void __iomem *base = ARM_SMMU_GR0(smmu);
460
461         writel_relaxed(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID);
462         arm_smmu_tlb_sync_global(smmu);
463 }
464
465 static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
466                                           size_t granule, bool leaf, void *cookie)
467 {
468         struct arm_smmu_domain *smmu_domain = cookie;
469         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
470         bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
471         void __iomem *reg = ARM_SMMU_CB(smmu_domain->smmu, cfg->cbndx);
472
473         if (stage1) {
474                 reg += leaf ? ARM_SMMU_CB_S1_TLBIVAL : ARM_SMMU_CB_S1_TLBIVA;
475
476                 if (cfg->fmt != ARM_SMMU_CTX_FMT_AARCH64) {
477                         iova &= ~12UL;
478                         iova |= cfg->asid;
479                         do {
480                                 writel_relaxed(iova, reg);
481                                 iova += granule;
482                         } while (size -= granule);
483                 } else {
484                         iova >>= 12;
485                         iova |= (u64)cfg->asid << 48;
486                         do {
487                                 writeq_relaxed(iova, reg);
488                                 iova += granule >> 12;
489                         } while (size -= granule);
490                 }
491         } else {
492                 reg += leaf ? ARM_SMMU_CB_S2_TLBIIPAS2L :
493                               ARM_SMMU_CB_S2_TLBIIPAS2;
494                 iova >>= 12;
495                 do {
496                         smmu_write_atomic_lq(iova, reg);
497                         iova += granule >> 12;
498                 } while (size -= granule);
499         }
500 }
501
502 /*
503  * On MMU-401 at least, the cost of firing off multiple TLBIVMIDs appears
504  * almost negligible, but the benefit of getting the first one in as far ahead
505  * of the sync as possible is significant, hence we don't just make this a
506  * no-op and set .tlb_sync to arm_smmu_inv_context_s2() as you might think.
507  */
508 static void arm_smmu_tlb_inv_vmid_nosync(unsigned long iova, size_t size,
509                                          size_t granule, bool leaf, void *cookie)
510 {
511         struct arm_smmu_domain *smmu_domain = cookie;
512         void __iomem *base = ARM_SMMU_GR0(smmu_domain->smmu);
513
514         writel_relaxed(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID);
515 }
516
517 static const struct iommu_gather_ops arm_smmu_s1_tlb_ops = {
518         .tlb_flush_all  = arm_smmu_tlb_inv_context_s1,
519         .tlb_add_flush  = arm_smmu_tlb_inv_range_nosync,
520         .tlb_sync       = arm_smmu_tlb_sync_context,
521 };
522
523 static const struct iommu_gather_ops arm_smmu_s2_tlb_ops_v2 = {
524         .tlb_flush_all  = arm_smmu_tlb_inv_context_s2,
525         .tlb_add_flush  = arm_smmu_tlb_inv_range_nosync,
526         .tlb_sync       = arm_smmu_tlb_sync_context,
527 };
528
529 static const struct iommu_gather_ops arm_smmu_s2_tlb_ops_v1 = {
530         .tlb_flush_all  = arm_smmu_tlb_inv_context_s2,
531         .tlb_add_flush  = arm_smmu_tlb_inv_vmid_nosync,
532         .tlb_sync       = arm_smmu_tlb_sync_vmid,
533 };
534
535 static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
536 {
537         u32 fsr, fsynr;
538         unsigned long iova;
539         struct iommu_domain *domain = dev;
540         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
541         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
542         struct arm_smmu_device *smmu = smmu_domain->smmu;
543         void __iomem *cb_base;
544
545         cb_base = ARM_SMMU_CB(smmu, cfg->cbndx);
546         fsr = readl_relaxed(cb_base + ARM_SMMU_CB_FSR);
547
548         if (!(fsr & FSR_FAULT))
549                 return IRQ_NONE;
550
551         fsynr = readl_relaxed(cb_base + ARM_SMMU_CB_FSYNR0);
552         iova = readq_relaxed(cb_base + ARM_SMMU_CB_FAR);
553
554         dev_err_ratelimited(smmu->dev,
555         "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cb=%d\n",
556                             fsr, iova, fsynr, cfg->cbndx);
557
558         writel(fsr, cb_base + ARM_SMMU_CB_FSR);
559         return IRQ_HANDLED;
560 }
561
562 static irqreturn_t arm_smmu_global_fault(int irq, void *dev)
563 {
564         u32 gfsr, gfsynr0, gfsynr1, gfsynr2;
565         struct arm_smmu_device *smmu = dev;
566         void __iomem *gr0_base = ARM_SMMU_GR0_NS(smmu);
567
568         gfsr = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSR);
569         gfsynr0 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR0);
570         gfsynr1 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR1);
571         gfsynr2 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR2);
572
573         if (!gfsr)
574                 return IRQ_NONE;
575
576         dev_err_ratelimited(smmu->dev,
577                 "Unexpected global fault, this could be serious\n");
578         dev_err_ratelimited(smmu->dev,
579                 "\tGFSR 0x%08x, GFSYNR0 0x%08x, GFSYNR1 0x%08x, GFSYNR2 0x%08x\n",
580                 gfsr, gfsynr0, gfsynr1, gfsynr2);
581
582         writel(gfsr, gr0_base + ARM_SMMU_GR0_sGFSR);
583         return IRQ_HANDLED;
584 }
585
586 static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
587                                        struct io_pgtable_cfg *pgtbl_cfg)
588 {
589         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
590         struct arm_smmu_cb *cb = &smmu_domain->smmu->cbs[cfg->cbndx];
591         bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
592
593         cb->cfg = cfg;
594
595         /* TTBCR */
596         if (stage1) {
597                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
598                         cb->tcr[0] = pgtbl_cfg->arm_v7s_cfg.tcr;
599                 } else {
600                         cb->tcr[0] = pgtbl_cfg->arm_lpae_s1_cfg.tcr;
601                         cb->tcr[1] = pgtbl_cfg->arm_lpae_s1_cfg.tcr >> 32;
602                         cb->tcr[1] |= TTBCR2_SEP_UPSTREAM;
603                         if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
604                                 cb->tcr[1] |= TTBCR2_AS;
605                 }
606         } else {
607                 cb->tcr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
608         }
609
610         /* TTBRs */
611         if (stage1) {
612                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
613                         cb->ttbr[0] = pgtbl_cfg->arm_v7s_cfg.ttbr[0];
614                         cb->ttbr[1] = pgtbl_cfg->arm_v7s_cfg.ttbr[1];
615                 } else {
616                         cb->ttbr[0] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
617                         cb->ttbr[0] |= (u64)cfg->asid << TTBRn_ASID_SHIFT;
618                         cb->ttbr[1] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1];
619                         cb->ttbr[1] |= (u64)cfg->asid << TTBRn_ASID_SHIFT;
620                 }
621         } else {
622                 cb->ttbr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
623         }
624
625         /* MAIRs (stage-1 only) */
626         if (stage1) {
627                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
628                         cb->mair[0] = pgtbl_cfg->arm_v7s_cfg.prrr;
629                         cb->mair[1] = pgtbl_cfg->arm_v7s_cfg.nmrr;
630                 } else {
631                         cb->mair[0] = pgtbl_cfg->arm_lpae_s1_cfg.mair[0];
632                         cb->mair[1] = pgtbl_cfg->arm_lpae_s1_cfg.mair[1];
633                 }
634         }
635 }
636
637 static void arm_smmu_write_context_bank(struct arm_smmu_device *smmu, int idx)
638 {
639         u32 reg;
640         bool stage1;
641         struct arm_smmu_cb *cb = &smmu->cbs[idx];
642         struct arm_smmu_cfg *cfg = cb->cfg;
643         void __iomem *cb_base, *gr1_base;
644
645         cb_base = ARM_SMMU_CB(smmu, idx);
646
647         /* Unassigned context banks only need disabling */
648         if (!cfg) {
649                 writel_relaxed(0, cb_base + ARM_SMMU_CB_SCTLR);
650                 return;
651         }
652
653         gr1_base = ARM_SMMU_GR1(smmu);
654         stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
655
656         /* CBA2R */
657         if (smmu->version > ARM_SMMU_V1) {
658                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
659                         reg = CBA2R_RW64_64BIT;
660                 else
661                         reg = CBA2R_RW64_32BIT;
662                 /* 16-bit VMIDs live in CBA2R */
663                 if (smmu->features & ARM_SMMU_FEAT_VMID16)
664                         reg |= cfg->vmid << CBA2R_VMID_SHIFT;
665
666                 writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBA2R(idx));
667         }
668
669         /* CBAR */
670         reg = cfg->cbar;
671         if (smmu->version < ARM_SMMU_V2)
672                 reg |= cfg->irptndx << CBAR_IRPTNDX_SHIFT;
673
674         /*
675          * Use the weakest shareability/memory types, so they are
676          * overridden by the ttbcr/pte.
677          */
678         if (stage1) {
679                 reg |= (CBAR_S1_BPSHCFG_NSH << CBAR_S1_BPSHCFG_SHIFT) |
680                         (CBAR_S1_MEMATTR_WB << CBAR_S1_MEMATTR_SHIFT);
681         } else if (!(smmu->features & ARM_SMMU_FEAT_VMID16)) {
682                 /* 8-bit VMIDs live in CBAR */
683                 reg |= cfg->vmid << CBAR_VMID_SHIFT;
684         }
685         writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBAR(idx));
686
687         /*
688          * TTBCR
689          * We must write this before the TTBRs, since it determines the
690          * access behaviour of some fields (in particular, ASID[15:8]).
691          */
692         if (stage1 && smmu->version > ARM_SMMU_V1)
693                 writel_relaxed(cb->tcr[1], cb_base + ARM_SMMU_CB_TTBCR2);
694         writel_relaxed(cb->tcr[0], cb_base + ARM_SMMU_CB_TTBCR);
695
696         /* TTBRs */
697         if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
698                 writel_relaxed(cfg->asid, cb_base + ARM_SMMU_CB_CONTEXTIDR);
699                 writel_relaxed(cb->ttbr[0], cb_base + ARM_SMMU_CB_TTBR0);
700                 writel_relaxed(cb->ttbr[1], cb_base + ARM_SMMU_CB_TTBR1);
701         } else {
702                 writeq_relaxed(cb->ttbr[0], cb_base + ARM_SMMU_CB_TTBR0);
703                 if (stage1)
704                         writeq_relaxed(cb->ttbr[1], cb_base + ARM_SMMU_CB_TTBR1);
705         }
706
707         /* MAIRs (stage-1 only) */
708         if (stage1) {
709                 writel_relaxed(cb->mair[0], cb_base + ARM_SMMU_CB_S1_MAIR0);
710                 writel_relaxed(cb->mair[1], cb_base + ARM_SMMU_CB_S1_MAIR1);
711         }
712
713         /* SCTLR */
714         reg = SCTLR_CFIE | SCTLR_CFRE | SCTLR_AFE | SCTLR_TRE | SCTLR_M;
715         if (stage1)
716                 reg |= SCTLR_S1_ASIDPNE;
717         if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN))
718                 reg |= SCTLR_E;
719
720         writel_relaxed(reg, cb_base + ARM_SMMU_CB_SCTLR);
721 }
722
723 static int arm_smmu_init_domain_context(struct iommu_domain *domain,
724                                         struct arm_smmu_device *smmu)
725 {
726         int irq, start, ret = 0;
727         unsigned long ias, oas;
728         struct io_pgtable_ops *pgtbl_ops;
729         struct io_pgtable_cfg pgtbl_cfg;
730         enum io_pgtable_fmt fmt;
731         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
732         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
733
734         mutex_lock(&smmu_domain->init_mutex);
735         if (smmu_domain->smmu)
736                 goto out_unlock;
737
738         if (domain->type == IOMMU_DOMAIN_IDENTITY) {
739                 smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
740                 smmu_domain->smmu = smmu;
741                 goto out_unlock;
742         }
743
744         /*
745          * Mapping the requested stage onto what we support is surprisingly
746          * complicated, mainly because the spec allows S1+S2 SMMUs without
747          * support for nested translation. That means we end up with the
748          * following table:
749          *
750          * Requested        Supported        Actual
751          *     S1               N              S1
752          *     S1             S1+S2            S1
753          *     S1               S2             S2
754          *     S1               S1             S1
755          *     N                N              N
756          *     N              S1+S2            S2
757          *     N                S2             S2
758          *     N                S1             S1
759          *
760          * Note that you can't actually request stage-2 mappings.
761          */
762         if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
763                 smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
764         if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
765                 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
766
767         /*
768          * Choosing a suitable context format is even more fiddly. Until we
769          * grow some way for the caller to express a preference, and/or move
770          * the decision into the io-pgtable code where it arguably belongs,
771          * just aim for the closest thing to the rest of the system, and hope
772          * that the hardware isn't esoteric enough that we can't assume AArch64
773          * support to be a superset of AArch32 support...
774          */
775         if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_L)
776                 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_L;
777         if (IS_ENABLED(CONFIG_IOMMU_IO_PGTABLE_ARMV7S) &&
778             !IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_ARM_LPAE) &&
779             (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S) &&
780             (smmu_domain->stage == ARM_SMMU_DOMAIN_S1))
781                 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_S;
782         if ((IS_ENABLED(CONFIG_64BIT) || cfg->fmt == ARM_SMMU_CTX_FMT_NONE) &&
783             (smmu->features & (ARM_SMMU_FEAT_FMT_AARCH64_64K |
784                                ARM_SMMU_FEAT_FMT_AARCH64_16K |
785                                ARM_SMMU_FEAT_FMT_AARCH64_4K)))
786                 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH64;
787
788         if (cfg->fmt == ARM_SMMU_CTX_FMT_NONE) {
789                 ret = -EINVAL;
790                 goto out_unlock;
791         }
792
793         switch (smmu_domain->stage) {
794         case ARM_SMMU_DOMAIN_S1:
795                 cfg->cbar = CBAR_TYPE_S1_TRANS_S2_BYPASS;
796                 start = smmu->num_s2_context_banks;
797                 ias = smmu->va_size;
798                 oas = smmu->ipa_size;
799                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
800                         fmt = ARM_64_LPAE_S1;
801                 } else if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_L) {
802                         fmt = ARM_32_LPAE_S1;
803                         ias = min(ias, 32UL);
804                         oas = min(oas, 40UL);
805                 } else {
806                         fmt = ARM_V7S;
807                         ias = min(ias, 32UL);
808                         oas = min(oas, 32UL);
809                 }
810                 smmu_domain->tlb_ops = &arm_smmu_s1_tlb_ops;
811                 break;
812         case ARM_SMMU_DOMAIN_NESTED:
813                 /*
814                  * We will likely want to change this if/when KVM gets
815                  * involved.
816                  */
817         case ARM_SMMU_DOMAIN_S2:
818                 cfg->cbar = CBAR_TYPE_S2_TRANS;
819                 start = 0;
820                 ias = smmu->ipa_size;
821                 oas = smmu->pa_size;
822                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
823                         fmt = ARM_64_LPAE_S2;
824                 } else {
825                         fmt = ARM_32_LPAE_S2;
826                         ias = min(ias, 40UL);
827                         oas = min(oas, 40UL);
828                 }
829                 if (smmu->version == ARM_SMMU_V2)
830                         smmu_domain->tlb_ops = &arm_smmu_s2_tlb_ops_v2;
831                 else
832                         smmu_domain->tlb_ops = &arm_smmu_s2_tlb_ops_v1;
833                 break;
834         default:
835                 ret = -EINVAL;
836                 goto out_unlock;
837         }
838         ret = __arm_smmu_alloc_bitmap(smmu->context_map, start,
839                                       smmu->num_context_banks);
840         if (ret < 0)
841                 goto out_unlock;
842
843         cfg->cbndx = ret;
844         if (smmu->version < ARM_SMMU_V2) {
845                 cfg->irptndx = atomic_inc_return(&smmu->irptndx);
846                 cfg->irptndx %= smmu->num_context_irqs;
847         } else {
848                 cfg->irptndx = cfg->cbndx;
849         }
850
851         if (smmu_domain->stage == ARM_SMMU_DOMAIN_S2)
852                 cfg->vmid = cfg->cbndx + 1 + smmu->cavium_id_base;
853         else
854                 cfg->asid = cfg->cbndx + smmu->cavium_id_base;
855
856         pgtbl_cfg = (struct io_pgtable_cfg) {
857                 .pgsize_bitmap  = smmu->pgsize_bitmap,
858                 .ias            = ias,
859                 .oas            = oas,
860                 .tlb            = smmu_domain->tlb_ops,
861                 .iommu_dev      = smmu->dev,
862         };
863
864         if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
865                 pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_NO_DMA;
866
867         smmu_domain->smmu = smmu;
868         pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
869         if (!pgtbl_ops) {
870                 ret = -ENOMEM;
871                 goto out_clear_smmu;
872         }
873
874         /* Update the domain's page sizes to reflect the page table format */
875         domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
876         domain->geometry.aperture_end = (1UL << ias) - 1;
877         domain->geometry.force_aperture = true;
878
879         /* Initialise the context bank with our page table cfg */
880         arm_smmu_init_context_bank(smmu_domain, &pgtbl_cfg);
881         arm_smmu_write_context_bank(smmu, cfg->cbndx);
882
883         /*
884          * Request context fault interrupt. Do this last to avoid the
885          * handler seeing a half-initialised domain state.
886          */
887         irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
888         ret = devm_request_irq(smmu->dev, irq, arm_smmu_context_fault,
889                                IRQF_SHARED, "arm-smmu-context-fault", domain);
890         if (ret < 0) {
891                 dev_err(smmu->dev, "failed to request context IRQ %d (%u)\n",
892                         cfg->irptndx, irq);
893                 cfg->irptndx = INVALID_IRPTNDX;
894         }
895
896         mutex_unlock(&smmu_domain->init_mutex);
897
898         /* Publish page table ops for map/unmap */
899         smmu_domain->pgtbl_ops = pgtbl_ops;
900         return 0;
901
902 out_clear_smmu:
903         smmu_domain->smmu = NULL;
904 out_unlock:
905         mutex_unlock(&smmu_domain->init_mutex);
906         return ret;
907 }
908
909 static void arm_smmu_destroy_domain_context(struct iommu_domain *domain)
910 {
911         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
912         struct arm_smmu_device *smmu = smmu_domain->smmu;
913         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
914         int irq;
915
916         if (!smmu || domain->type == IOMMU_DOMAIN_IDENTITY)
917                 return;
918
919         /*
920          * Disable the context bank and free the page tables before freeing
921          * it.
922          */
923         smmu->cbs[cfg->cbndx].cfg = NULL;
924         arm_smmu_write_context_bank(smmu, cfg->cbndx);
925
926         if (cfg->irptndx != INVALID_IRPTNDX) {
927                 irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
928                 devm_free_irq(smmu->dev, irq, domain);
929         }
930
931         free_io_pgtable_ops(smmu_domain->pgtbl_ops);
932         __arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
933 }
934
935 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
936 {
937         struct arm_smmu_domain *smmu_domain;
938
939         if (type != IOMMU_DOMAIN_UNMANAGED &&
940             type != IOMMU_DOMAIN_DMA &&
941             type != IOMMU_DOMAIN_IDENTITY)
942                 return NULL;
943         /*
944          * Allocate the domain and initialise some of its data structures.
945          * We can't really do anything meaningful until we've added a
946          * master.
947          */
948         smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
949         if (!smmu_domain)
950                 return NULL;
951
952         if (type == IOMMU_DOMAIN_DMA && (using_legacy_binding ||
953             iommu_get_dma_cookie(&smmu_domain->domain))) {
954                 kfree(smmu_domain);
955                 return NULL;
956         }
957
958         mutex_init(&smmu_domain->init_mutex);
959         spin_lock_init(&smmu_domain->cb_lock);
960
961         return &smmu_domain->domain;
962 }
963
964 static void arm_smmu_domain_free(struct iommu_domain *domain)
965 {
966         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
967
968         /*
969          * Free the domain resources. We assume that all devices have
970          * already been detached.
971          */
972         iommu_put_dma_cookie(domain);
973         arm_smmu_destroy_domain_context(domain);
974         kfree(smmu_domain);
975 }
976
977 static void arm_smmu_write_smr(struct arm_smmu_device *smmu, int idx)
978 {
979         struct arm_smmu_smr *smr = smmu->smrs + idx;
980         u32 reg = smr->id << SMR_ID_SHIFT | smr->mask << SMR_MASK_SHIFT;
981
982         if (!(smmu->features & ARM_SMMU_FEAT_EXIDS) && smr->valid)
983                 reg |= SMR_VALID;
984         writel_relaxed(reg, ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_SMR(idx));
985 }
986
987 static void arm_smmu_write_s2cr(struct arm_smmu_device *smmu, int idx)
988 {
989         struct arm_smmu_s2cr *s2cr = smmu->s2crs + idx;
990         u32 reg = (s2cr->type & S2CR_TYPE_MASK) << S2CR_TYPE_SHIFT |
991                   (s2cr->cbndx & S2CR_CBNDX_MASK) << S2CR_CBNDX_SHIFT |
992                   (s2cr->privcfg & S2CR_PRIVCFG_MASK) << S2CR_PRIVCFG_SHIFT;
993
994         if (smmu->features & ARM_SMMU_FEAT_EXIDS && smmu->smrs &&
995             smmu->smrs[idx].valid)
996                 reg |= S2CR_EXIDVALID;
997         writel_relaxed(reg, ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_S2CR(idx));
998 }
999
1000 static void arm_smmu_write_sme(struct arm_smmu_device *smmu, int idx)
1001 {
1002         arm_smmu_write_s2cr(smmu, idx);
1003         if (smmu->smrs)
1004                 arm_smmu_write_smr(smmu, idx);
1005 }
1006
1007 /*
1008  * The width of SMR's mask field depends on sCR0_EXIDENABLE, so this function
1009  * should be called after sCR0 is written.
1010  */
1011 static void arm_smmu_test_smr_masks(struct arm_smmu_device *smmu)
1012 {
1013         void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1014         u32 smr;
1015
1016         if (!smmu->smrs)
1017                 return;
1018
1019         /*
1020          * SMR.ID bits may not be preserved if the corresponding MASK
1021          * bits are set, so check each one separately. We can reject
1022          * masters later if they try to claim IDs outside these masks.
1023          */
1024         smr = smmu->streamid_mask << SMR_ID_SHIFT;
1025         writel_relaxed(smr, gr0_base + ARM_SMMU_GR0_SMR(0));
1026         smr = readl_relaxed(gr0_base + ARM_SMMU_GR0_SMR(0));
1027         smmu->streamid_mask = smr >> SMR_ID_SHIFT;
1028
1029         smr = smmu->streamid_mask << SMR_MASK_SHIFT;
1030         writel_relaxed(smr, gr0_base + ARM_SMMU_GR0_SMR(0));
1031         smr = readl_relaxed(gr0_base + ARM_SMMU_GR0_SMR(0));
1032         smmu->smr_mask_mask = smr >> SMR_MASK_SHIFT;
1033 }
1034
1035 static int arm_smmu_find_sme(struct arm_smmu_device *smmu, u16 id, u16 mask)
1036 {
1037         struct arm_smmu_smr *smrs = smmu->smrs;
1038         int i, free_idx = -ENOSPC;
1039
1040         /* Stream indexing is blissfully easy */
1041         if (!smrs)
1042                 return id;
1043
1044         /* Validating SMRs is... less so */
1045         for (i = 0; i < smmu->num_mapping_groups; ++i) {
1046                 if (!smrs[i].valid) {
1047                         /*
1048                          * Note the first free entry we come across, which
1049                          * we'll claim in the end if nothing else matches.
1050                          */
1051                         if (free_idx < 0)
1052                                 free_idx = i;
1053                         continue;
1054                 }
1055                 /*
1056                  * If the new entry is _entirely_ matched by an existing entry,
1057                  * then reuse that, with the guarantee that there also cannot
1058                  * be any subsequent conflicting entries. In normal use we'd
1059                  * expect simply identical entries for this case, but there's
1060                  * no harm in accommodating the generalisation.
1061                  */
1062                 if ((mask & smrs[i].mask) == mask &&
1063                     !((id ^ smrs[i].id) & ~smrs[i].mask))
1064                         return i;
1065                 /*
1066                  * If the new entry has any other overlap with an existing one,
1067                  * though, then there always exists at least one stream ID
1068                  * which would cause a conflict, and we can't allow that risk.
1069                  */
1070                 if (!((id ^ smrs[i].id) & ~(smrs[i].mask | mask)))
1071                         return -EINVAL;
1072         }
1073
1074         return free_idx;
1075 }
1076
1077 static bool arm_smmu_free_sme(struct arm_smmu_device *smmu, int idx)
1078 {
1079         if (--smmu->s2crs[idx].count)
1080                 return false;
1081
1082         smmu->s2crs[idx] = s2cr_init_val;
1083         if (smmu->smrs)
1084                 smmu->smrs[idx].valid = false;
1085
1086         return true;
1087 }
1088
1089 static int arm_smmu_master_alloc_smes(struct device *dev)
1090 {
1091         struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1092         struct arm_smmu_master_cfg *cfg = fwspec->iommu_priv;
1093         struct arm_smmu_device *smmu = cfg->smmu;
1094         struct arm_smmu_smr *smrs = smmu->smrs;
1095         struct iommu_group *group;
1096         int i, idx, ret;
1097
1098         mutex_lock(&smmu->stream_map_mutex);
1099         /* Figure out a viable stream map entry allocation */
1100         for_each_cfg_sme(fwspec, i, idx) {
1101                 u16 sid = fwspec->ids[i];
1102                 u16 mask = fwspec->ids[i] >> SMR_MASK_SHIFT;
1103
1104                 if (idx != INVALID_SMENDX) {
1105                         ret = -EEXIST;
1106                         goto out_err;
1107                 }
1108
1109                 ret = arm_smmu_find_sme(smmu, sid, mask);
1110                 if (ret < 0)
1111                         goto out_err;
1112
1113                 idx = ret;
1114                 if (smrs && smmu->s2crs[idx].count == 0) {
1115                         smrs[idx].id = sid;
1116                         smrs[idx].mask = mask;
1117                         smrs[idx].valid = true;
1118                 }
1119                 smmu->s2crs[idx].count++;
1120                 cfg->smendx[i] = (s16)idx;
1121         }
1122
1123         group = iommu_group_get_for_dev(dev);
1124         if (!group)
1125                 group = ERR_PTR(-ENOMEM);
1126         if (IS_ERR(group)) {
1127                 ret = PTR_ERR(group);
1128                 goto out_err;
1129         }
1130         iommu_group_put(group);
1131
1132         /* It worked! Now, poke the actual hardware */
1133         for_each_cfg_sme(fwspec, i, idx) {
1134                 arm_smmu_write_sme(smmu, idx);
1135                 smmu->s2crs[idx].group = group;
1136         }
1137
1138         mutex_unlock(&smmu->stream_map_mutex);
1139         return 0;
1140
1141 out_err:
1142         while (i--) {
1143                 arm_smmu_free_sme(smmu, cfg->smendx[i]);
1144                 cfg->smendx[i] = INVALID_SMENDX;
1145         }
1146         mutex_unlock(&smmu->stream_map_mutex);
1147         return ret;
1148 }
1149
1150 static void arm_smmu_master_free_smes(struct iommu_fwspec *fwspec)
1151 {
1152         struct arm_smmu_device *smmu = fwspec_smmu(fwspec);
1153         struct arm_smmu_master_cfg *cfg = fwspec->iommu_priv;
1154         int i, idx;
1155
1156         mutex_lock(&smmu->stream_map_mutex);
1157         for_each_cfg_sme(fwspec, i, idx) {
1158                 if (arm_smmu_free_sme(smmu, idx))
1159                         arm_smmu_write_sme(smmu, idx);
1160                 cfg->smendx[i] = INVALID_SMENDX;
1161         }
1162         mutex_unlock(&smmu->stream_map_mutex);
1163 }
1164
1165 static int arm_smmu_domain_add_master(struct arm_smmu_domain *smmu_domain,
1166                                       struct iommu_fwspec *fwspec)
1167 {
1168         struct arm_smmu_device *smmu = smmu_domain->smmu;
1169         struct arm_smmu_s2cr *s2cr = smmu->s2crs;
1170         u8 cbndx = smmu_domain->cfg.cbndx;
1171         enum arm_smmu_s2cr_type type;
1172         int i, idx;
1173
1174         if (smmu_domain->stage == ARM_SMMU_DOMAIN_BYPASS)
1175                 type = S2CR_TYPE_BYPASS;
1176         else
1177                 type = S2CR_TYPE_TRANS;
1178
1179         for_each_cfg_sme(fwspec, i, idx) {
1180                 if (type == s2cr[idx].type && cbndx == s2cr[idx].cbndx)
1181                         continue;
1182
1183                 s2cr[idx].type = type;
1184                 s2cr[idx].privcfg = S2CR_PRIVCFG_DEFAULT;
1185                 s2cr[idx].cbndx = cbndx;
1186                 arm_smmu_write_s2cr(smmu, idx);
1187         }
1188         return 0;
1189 }
1190
1191 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
1192 {
1193         int ret;
1194         struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1195         struct arm_smmu_device *smmu;
1196         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1197
1198         if (!fwspec || fwspec->ops != &arm_smmu_ops) {
1199                 dev_err(dev, "cannot attach to SMMU, is it on the same bus?\n");
1200                 return -ENXIO;
1201         }
1202
1203         /*
1204          * FIXME: The arch/arm DMA API code tries to attach devices to its own
1205          * domains between of_xlate() and add_device() - we have no way to cope
1206          * with that, so until ARM gets converted to rely on groups and default
1207          * domains, just say no (but more politely than by dereferencing NULL).
1208          * This should be at least a WARN_ON once that's sorted.
1209          */
1210         if (!fwspec->iommu_priv)
1211                 return -ENODEV;
1212
1213         smmu = fwspec_smmu(fwspec);
1214         /* Ensure that the domain is finalised */
1215         ret = arm_smmu_init_domain_context(domain, smmu);
1216         if (ret < 0)
1217                 return ret;
1218
1219         /*
1220          * Sanity check the domain. We don't support domains across
1221          * different SMMUs.
1222          */
1223         if (smmu_domain->smmu != smmu) {
1224                 dev_err(dev,
1225                         "cannot attach to SMMU %s whilst already attached to domain on SMMU %s\n",
1226                         dev_name(smmu_domain->smmu->dev), dev_name(smmu->dev));
1227                 return -EINVAL;
1228         }
1229
1230         /* Looks ok, so add the device to the domain */
1231         return arm_smmu_domain_add_master(smmu_domain, fwspec);
1232 }
1233
1234 static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
1235                         phys_addr_t paddr, size_t size, int prot)
1236 {
1237         struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1238
1239         if (!ops)
1240                 return -ENODEV;
1241
1242         return ops->map(ops, iova, paddr, size, prot);
1243 }
1244
1245 static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
1246                              size_t size)
1247 {
1248         struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1249
1250         if (!ops)
1251                 return 0;
1252
1253         return ops->unmap(ops, iova, size);
1254 }
1255
1256 static void arm_smmu_iotlb_sync(struct iommu_domain *domain)
1257 {
1258         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1259
1260         if (smmu_domain->tlb_ops)
1261                 smmu_domain->tlb_ops->tlb_sync(smmu_domain);
1262 }
1263
1264 static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
1265                                               dma_addr_t iova)
1266 {
1267         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1268         struct arm_smmu_device *smmu = smmu_domain->smmu;
1269         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
1270         struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
1271         struct device *dev = smmu->dev;
1272         void __iomem *cb_base;
1273         u32 tmp;
1274         u64 phys;
1275         unsigned long va, flags;
1276
1277         cb_base = ARM_SMMU_CB(smmu, cfg->cbndx);
1278
1279         spin_lock_irqsave(&smmu_domain->cb_lock, flags);
1280         /* ATS1 registers can only be written atomically */
1281         va = iova & ~0xfffUL;
1282         if (smmu->version == ARM_SMMU_V2)
1283                 smmu_write_atomic_lq(va, cb_base + ARM_SMMU_CB_ATS1PR);
1284         else /* Register is only 32-bit in v1 */
1285                 writel_relaxed(va, cb_base + ARM_SMMU_CB_ATS1PR);
1286
1287         if (readl_poll_timeout_atomic(cb_base + ARM_SMMU_CB_ATSR, tmp,
1288                                       !(tmp & ATSR_ACTIVE), 5, 50)) {
1289                 spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
1290                 dev_err(dev,
1291                         "iova to phys timed out on %pad. Falling back to software table walk.\n",
1292                         &iova);
1293                 return ops->iova_to_phys(ops, iova);
1294         }
1295
1296         phys = readq_relaxed(cb_base + ARM_SMMU_CB_PAR);
1297         spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
1298         if (phys & CB_PAR_F) {
1299                 dev_err(dev, "translation fault!\n");
1300                 dev_err(dev, "PAR = 0x%llx\n", phys);
1301                 return 0;
1302         }
1303
1304         return (phys & GENMASK_ULL(39, 12)) | (iova & 0xfff);
1305 }
1306
1307 static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain,
1308                                         dma_addr_t iova)
1309 {
1310         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1311         struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
1312
1313         if (domain->type == IOMMU_DOMAIN_IDENTITY)
1314                 return iova;
1315
1316         if (!ops)
1317                 return 0;
1318
1319         if (smmu_domain->smmu->features & ARM_SMMU_FEAT_TRANS_OPS &&
1320                         smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
1321                 return arm_smmu_iova_to_phys_hard(domain, iova);
1322
1323         return ops->iova_to_phys(ops, iova);
1324 }
1325
1326 static bool arm_smmu_capable(enum iommu_cap cap)
1327 {
1328         switch (cap) {
1329         case IOMMU_CAP_CACHE_COHERENCY:
1330                 /*
1331                  * Return true here as the SMMU can always send out coherent
1332                  * requests.
1333                  */
1334                 return true;
1335         case IOMMU_CAP_NOEXEC:
1336                 return true;
1337         default:
1338                 return false;
1339         }
1340 }
1341
1342 static int arm_smmu_match_node(struct device *dev, void *data)
1343 {
1344         return dev->fwnode == data;
1345 }
1346
1347 static
1348 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
1349 {
1350         struct device *dev = driver_find_device(&arm_smmu_driver.driver, NULL,
1351                                                 fwnode, arm_smmu_match_node);
1352         put_device(dev);
1353         return dev ? dev_get_drvdata(dev) : NULL;
1354 }
1355
1356 static int arm_smmu_add_device(struct device *dev)
1357 {
1358         struct arm_smmu_device *smmu;
1359         struct arm_smmu_master_cfg *cfg;
1360         struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1361         int i, ret;
1362
1363         if (using_legacy_binding) {
1364                 ret = arm_smmu_register_legacy_master(dev, &smmu);
1365
1366                 /*
1367                  * If dev->iommu_fwspec is initally NULL, arm_smmu_register_legacy_master()
1368                  * will allocate/initialise a new one. Thus we need to update fwspec for
1369                  * later use.
1370                  */
1371                 fwspec = dev->iommu_fwspec;
1372                 if (ret)
1373                         goto out_free;
1374         } else if (fwspec && fwspec->ops == &arm_smmu_ops) {
1375                 smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
1376         } else {
1377                 return -ENODEV;
1378         }
1379
1380         ret = -EINVAL;
1381         for (i = 0; i < fwspec->num_ids; i++) {
1382                 u16 sid = fwspec->ids[i];
1383                 u16 mask = fwspec->ids[i] >> SMR_MASK_SHIFT;
1384
1385                 if (sid & ~smmu->streamid_mask) {
1386                         dev_err(dev, "stream ID 0x%x out of range for SMMU (0x%x)\n",
1387                                 sid, smmu->streamid_mask);
1388                         goto out_free;
1389                 }
1390                 if (mask & ~smmu->smr_mask_mask) {
1391                         dev_err(dev, "SMR mask 0x%x out of range for SMMU (0x%x)\n",
1392                                 mask, smmu->smr_mask_mask);
1393                         goto out_free;
1394                 }
1395         }
1396
1397         ret = -ENOMEM;
1398         cfg = kzalloc(offsetof(struct arm_smmu_master_cfg, smendx[i]),
1399                       GFP_KERNEL);
1400         if (!cfg)
1401                 goto out_free;
1402
1403         cfg->smmu = smmu;
1404         fwspec->iommu_priv = cfg;
1405         while (i--)
1406                 cfg->smendx[i] = INVALID_SMENDX;
1407
1408         ret = arm_smmu_master_alloc_smes(dev);
1409         if (ret)
1410                 goto out_cfg_free;
1411
1412         iommu_device_link(&smmu->iommu, dev);
1413
1414         return 0;
1415
1416 out_cfg_free:
1417         kfree(cfg);
1418 out_free:
1419         iommu_fwspec_free(dev);
1420         return ret;
1421 }
1422
1423 static void arm_smmu_remove_device(struct device *dev)
1424 {
1425         struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1426         struct arm_smmu_master_cfg *cfg;
1427         struct arm_smmu_device *smmu;
1428
1429
1430         if (!fwspec || fwspec->ops != &arm_smmu_ops)
1431                 return;
1432
1433         cfg  = fwspec->iommu_priv;
1434         smmu = cfg->smmu;
1435
1436         iommu_device_unlink(&smmu->iommu, dev);
1437         arm_smmu_master_free_smes(fwspec);
1438         iommu_group_remove_device(dev);
1439         kfree(fwspec->iommu_priv);
1440         iommu_fwspec_free(dev);
1441 }
1442
1443 static struct iommu_group *arm_smmu_device_group(struct device *dev)
1444 {
1445         struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1446         struct arm_smmu_device *smmu = fwspec_smmu(fwspec);
1447         struct iommu_group *group = NULL;
1448         int i, idx;
1449
1450         for_each_cfg_sme(fwspec, i, idx) {
1451                 if (group && smmu->s2crs[idx].group &&
1452                     group != smmu->s2crs[idx].group)
1453                         return ERR_PTR(-EINVAL);
1454
1455                 group = smmu->s2crs[idx].group;
1456         }
1457
1458         if (group)
1459                 return iommu_group_ref_get(group);
1460
1461         if (dev_is_pci(dev))
1462                 group = pci_device_group(dev);
1463         else if (dev_is_fsl_mc(dev))
1464                 group = fsl_mc_device_group(dev);
1465         else
1466                 group = generic_device_group(dev);
1467
1468         return group;
1469 }
1470
1471 static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
1472                                     enum iommu_attr attr, void *data)
1473 {
1474         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1475
1476         if (domain->type != IOMMU_DOMAIN_UNMANAGED)
1477                 return -EINVAL;
1478
1479         switch (attr) {
1480         case DOMAIN_ATTR_NESTING:
1481                 *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
1482                 return 0;
1483         default:
1484                 return -ENODEV;
1485         }
1486 }
1487
1488 static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
1489                                     enum iommu_attr attr, void *data)
1490 {
1491         int ret = 0;
1492         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1493
1494         if (domain->type != IOMMU_DOMAIN_UNMANAGED)
1495                 return -EINVAL;
1496
1497         mutex_lock(&smmu_domain->init_mutex);
1498
1499         switch (attr) {
1500         case DOMAIN_ATTR_NESTING:
1501                 if (smmu_domain->smmu) {
1502                         ret = -EPERM;
1503                         goto out_unlock;
1504                 }
1505
1506                 if (*(int *)data)
1507                         smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
1508                 else
1509                         smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
1510
1511                 break;
1512         default:
1513                 ret = -ENODEV;
1514         }
1515
1516 out_unlock:
1517         mutex_unlock(&smmu_domain->init_mutex);
1518         return ret;
1519 }
1520
1521 static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
1522 {
1523         u32 mask, fwid = 0;
1524
1525         if (args->args_count > 0)
1526                 fwid |= (u16)args->args[0];
1527
1528         if (args->args_count > 1)
1529                 fwid |= (u16)args->args[1] << SMR_MASK_SHIFT;
1530         else if (!of_property_read_u32(args->np, "stream-match-mask", &mask))
1531                 fwid |= (u16)mask << SMR_MASK_SHIFT;
1532
1533         return iommu_fwspec_add_ids(dev, &fwid, 1);
1534 }
1535
1536 static void arm_smmu_get_resv_regions(struct device *dev,
1537                                       struct list_head *head)
1538 {
1539         struct iommu_resv_region *region;
1540         int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
1541
1542         region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
1543                                          prot, IOMMU_RESV_SW_MSI);
1544         if (!region)
1545                 return;
1546
1547         list_add_tail(&region->list, head);
1548
1549         iommu_dma_get_resv_regions(dev, head);
1550 }
1551
1552 static void arm_smmu_put_resv_regions(struct device *dev,
1553                                       struct list_head *head)
1554 {
1555         struct iommu_resv_region *entry, *next;
1556
1557         list_for_each_entry_safe(entry, next, head, list)
1558                 kfree(entry);
1559 }
1560
1561 static struct iommu_ops arm_smmu_ops = {
1562         .capable                = arm_smmu_capable,
1563         .domain_alloc           = arm_smmu_domain_alloc,
1564         .domain_free            = arm_smmu_domain_free,
1565         .attach_dev             = arm_smmu_attach_dev,
1566         .map                    = arm_smmu_map,
1567         .unmap                  = arm_smmu_unmap,
1568         .flush_iotlb_all        = arm_smmu_iotlb_sync,
1569         .iotlb_sync             = arm_smmu_iotlb_sync,
1570         .iova_to_phys           = arm_smmu_iova_to_phys,
1571         .add_device             = arm_smmu_add_device,
1572         .remove_device          = arm_smmu_remove_device,
1573         .device_group           = arm_smmu_device_group,
1574         .domain_get_attr        = arm_smmu_domain_get_attr,
1575         .domain_set_attr        = arm_smmu_domain_set_attr,
1576         .of_xlate               = arm_smmu_of_xlate,
1577         .get_resv_regions       = arm_smmu_get_resv_regions,
1578         .put_resv_regions       = arm_smmu_put_resv_regions,
1579         .pgsize_bitmap          = -1UL, /* Restricted during device attach */
1580 };
1581
1582 static void arm_smmu_device_reset(struct arm_smmu_device *smmu)
1583 {
1584         void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1585         int i;
1586         u32 reg, major;
1587
1588         /* clear global FSR */
1589         reg = readl_relaxed(ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sGFSR);
1590         writel(reg, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sGFSR);
1591
1592         /*
1593          * Reset stream mapping groups: Initial values mark all SMRn as
1594          * invalid and all S2CRn as bypass unless overridden.
1595          */
1596         for (i = 0; i < smmu->num_mapping_groups; ++i)
1597                 arm_smmu_write_sme(smmu, i);
1598
1599         if (smmu->model == ARM_MMU500) {
1600                 /*
1601                  * Before clearing ARM_MMU500_ACTLR_CPRE, need to
1602                  * clear CACHE_LOCK bit of ACR first. And, CACHE_LOCK
1603                  * bit is only present in MMU-500r2 onwards.
1604                  */
1605                 reg = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID7);
1606                 major = (reg >> ID7_MAJOR_SHIFT) & ID7_MAJOR_MASK;
1607                 reg = readl_relaxed(gr0_base + ARM_SMMU_GR0_sACR);
1608                 if (major >= 2)
1609                         reg &= ~ARM_MMU500_ACR_CACHE_LOCK;
1610                 /*
1611                  * Allow unmatched Stream IDs to allocate bypass
1612                  * TLB entries for reduced latency.
1613                  */
1614                 reg |= ARM_MMU500_ACR_SMTNMB_TLBEN | ARM_MMU500_ACR_S2CRB_TLBEN;
1615                 writel_relaxed(reg, gr0_base + ARM_SMMU_GR0_sACR);
1616         }
1617
1618         /* Make sure all context banks are disabled and clear CB_FSR  */
1619         for (i = 0; i < smmu->num_context_banks; ++i) {
1620                 void __iomem *cb_base = ARM_SMMU_CB(smmu, i);
1621
1622                 arm_smmu_write_context_bank(smmu, i);
1623                 writel_relaxed(FSR_FAULT, cb_base + ARM_SMMU_CB_FSR);
1624                 /*
1625                  * Disable MMU-500's not-particularly-beneficial next-page
1626                  * prefetcher for the sake of errata #841119 and #826419.
1627                  */
1628                 if (smmu->model == ARM_MMU500) {
1629                         reg = readl_relaxed(cb_base + ARM_SMMU_CB_ACTLR);
1630                         reg &= ~ARM_MMU500_ACTLR_CPRE;
1631                         writel_relaxed(reg, cb_base + ARM_SMMU_CB_ACTLR);
1632                 }
1633         }
1634
1635         /* Invalidate the TLB, just in case */
1636         writel_relaxed(0, gr0_base + ARM_SMMU_GR0_TLBIALLH);
1637         writel_relaxed(0, gr0_base + ARM_SMMU_GR0_TLBIALLNSNH);
1638
1639         reg = readl_relaxed(ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
1640
1641         /* Enable fault reporting */
1642         reg |= (sCR0_GFRE | sCR0_GFIE | sCR0_GCFGFRE | sCR0_GCFGFIE);
1643
1644         /* Disable TLB broadcasting. */
1645         reg |= (sCR0_VMIDPNE | sCR0_PTM);
1646
1647         /* Enable client access, handling unmatched streams as appropriate */
1648         reg &= ~sCR0_CLIENTPD;
1649         if (disable_bypass)
1650                 reg |= sCR0_USFCFG;
1651         else
1652                 reg &= ~sCR0_USFCFG;
1653
1654         /* Disable forced broadcasting */
1655         reg &= ~sCR0_FB;
1656
1657         /* Don't upgrade barriers */
1658         reg &= ~(sCR0_BSU_MASK << sCR0_BSU_SHIFT);
1659
1660         if (smmu->features & ARM_SMMU_FEAT_VMID16)
1661                 reg |= sCR0_VMID16EN;
1662
1663         if (smmu->features & ARM_SMMU_FEAT_EXIDS)
1664                 reg |= sCR0_EXIDENABLE;
1665
1666         /* Push the button */
1667         arm_smmu_tlb_sync_global(smmu);
1668         writel(reg, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
1669 }
1670
1671 static int arm_smmu_id_size_to_bits(int size)
1672 {
1673         switch (size) {
1674         case 0:
1675                 return 32;
1676         case 1:
1677                 return 36;
1678         case 2:
1679                 return 40;
1680         case 3:
1681                 return 42;
1682         case 4:
1683                 return 44;
1684         case 5:
1685         default:
1686                 return 48;
1687         }
1688 }
1689
1690 static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
1691 {
1692         unsigned long size;
1693         void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1694         u32 id;
1695         bool cttw_reg, cttw_fw = smmu->features & ARM_SMMU_FEAT_COHERENT_WALK;
1696         int i;
1697
1698         dev_notice(smmu->dev, "probing hardware configuration...\n");
1699         dev_notice(smmu->dev, "SMMUv%d with:\n",
1700                         smmu->version == ARM_SMMU_V2 ? 2 : 1);
1701
1702         /* ID0 */
1703         id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID0);
1704
1705         /* Restrict available stages based on module parameter */
1706         if (force_stage == 1)
1707                 id &= ~(ID0_S2TS | ID0_NTS);
1708         else if (force_stage == 2)
1709                 id &= ~(ID0_S1TS | ID0_NTS);
1710
1711         if (id & ID0_S1TS) {
1712                 smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
1713                 dev_notice(smmu->dev, "\tstage 1 translation\n");
1714         }
1715
1716         if (id & ID0_S2TS) {
1717                 smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
1718                 dev_notice(smmu->dev, "\tstage 2 translation\n");
1719         }
1720
1721         if (id & ID0_NTS) {
1722                 smmu->features |= ARM_SMMU_FEAT_TRANS_NESTED;
1723                 dev_notice(smmu->dev, "\tnested translation\n");
1724         }
1725
1726         if (!(smmu->features &
1727                 (ARM_SMMU_FEAT_TRANS_S1 | ARM_SMMU_FEAT_TRANS_S2))) {
1728                 dev_err(smmu->dev, "\tno translation support!\n");
1729                 return -ENODEV;
1730         }
1731
1732         if ((id & ID0_S1TS) &&
1733                 ((smmu->version < ARM_SMMU_V2) || !(id & ID0_ATOSNS))) {
1734                 smmu->features |= ARM_SMMU_FEAT_TRANS_OPS;
1735                 dev_notice(smmu->dev, "\taddress translation ops\n");
1736         }
1737
1738         /*
1739          * In order for DMA API calls to work properly, we must defer to what
1740          * the FW says about coherency, regardless of what the hardware claims.
1741          * Fortunately, this also opens up a workaround for systems where the
1742          * ID register value has ended up configured incorrectly.
1743          */
1744         cttw_reg = !!(id & ID0_CTTW);
1745         if (cttw_fw || cttw_reg)
1746                 dev_notice(smmu->dev, "\t%scoherent table walk\n",
1747                            cttw_fw ? "" : "non-");
1748         if (cttw_fw != cttw_reg)
1749                 dev_notice(smmu->dev,
1750                            "\t(IDR0.CTTW overridden by FW configuration)\n");
1751
1752         /* Max. number of entries we have for stream matching/indexing */
1753         if (smmu->version == ARM_SMMU_V2 && id & ID0_EXIDS) {
1754                 smmu->features |= ARM_SMMU_FEAT_EXIDS;
1755                 size = 1 << 16;
1756         } else {
1757                 size = 1 << ((id >> ID0_NUMSIDB_SHIFT) & ID0_NUMSIDB_MASK);
1758         }
1759         smmu->streamid_mask = size - 1;
1760         if (id & ID0_SMS) {
1761                 smmu->features |= ARM_SMMU_FEAT_STREAM_MATCH;
1762                 size = (id >> ID0_NUMSMRG_SHIFT) & ID0_NUMSMRG_MASK;
1763                 if (size == 0) {
1764                         dev_err(smmu->dev,
1765                                 "stream-matching supported, but no SMRs present!\n");
1766                         return -ENODEV;
1767                 }
1768
1769                 /* Zero-initialised to mark as invalid */
1770                 smmu->smrs = devm_kcalloc(smmu->dev, size, sizeof(*smmu->smrs),
1771                                           GFP_KERNEL);
1772                 if (!smmu->smrs)
1773                         return -ENOMEM;
1774
1775                 dev_notice(smmu->dev,
1776                            "\tstream matching with %lu register groups", size);
1777         }
1778         /* s2cr->type == 0 means translation, so initialise explicitly */
1779         smmu->s2crs = devm_kmalloc_array(smmu->dev, size, sizeof(*smmu->s2crs),
1780                                          GFP_KERNEL);
1781         if (!smmu->s2crs)
1782                 return -ENOMEM;
1783         for (i = 0; i < size; i++)
1784                 smmu->s2crs[i] = s2cr_init_val;
1785
1786         smmu->num_mapping_groups = size;
1787         mutex_init(&smmu->stream_map_mutex);
1788         spin_lock_init(&smmu->global_sync_lock);
1789
1790         if (smmu->version < ARM_SMMU_V2 || !(id & ID0_PTFS_NO_AARCH32)) {
1791                 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_L;
1792                 if (!(id & ID0_PTFS_NO_AARCH32S))
1793                         smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_S;
1794         }
1795
1796         /* ID1 */
1797         id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID1);
1798         smmu->pgshift = (id & ID1_PAGESIZE) ? 16 : 12;
1799
1800         /* Check for size mismatch of SMMU address space from mapped region */
1801         size = 1 << (((id >> ID1_NUMPAGENDXB_SHIFT) & ID1_NUMPAGENDXB_MASK) + 1);
1802         size <<= smmu->pgshift;
1803         if (smmu->cb_base != gr0_base + size)
1804                 dev_warn(smmu->dev,
1805                         "SMMU address space size (0x%lx) differs from mapped region size (0x%tx)!\n",
1806                         size * 2, (smmu->cb_base - gr0_base) * 2);
1807
1808         smmu->num_s2_context_banks = (id >> ID1_NUMS2CB_SHIFT) & ID1_NUMS2CB_MASK;
1809         smmu->num_context_banks = (id >> ID1_NUMCB_SHIFT) & ID1_NUMCB_MASK;
1810         if (smmu->num_s2_context_banks > smmu->num_context_banks) {
1811                 dev_err(smmu->dev, "impossible number of S2 context banks!\n");
1812                 return -ENODEV;
1813         }
1814         dev_notice(smmu->dev, "\t%u context banks (%u stage-2 only)\n",
1815                    smmu->num_context_banks, smmu->num_s2_context_banks);
1816         /*
1817          * Cavium CN88xx erratum #27704.
1818          * Ensure ASID and VMID allocation is unique across all SMMUs in
1819          * the system.
1820          */
1821         if (smmu->model == CAVIUM_SMMUV2) {
1822                 smmu->cavium_id_base =
1823                         atomic_add_return(smmu->num_context_banks,
1824                                           &cavium_smmu_context_count);
1825                 smmu->cavium_id_base -= smmu->num_context_banks;
1826                 dev_notice(smmu->dev, "\tenabling workaround for Cavium erratum 27704\n");
1827         }
1828         smmu->cbs = devm_kcalloc(smmu->dev, smmu->num_context_banks,
1829                                  sizeof(*smmu->cbs), GFP_KERNEL);
1830         if (!smmu->cbs)
1831                 return -ENOMEM;
1832
1833         /* ID2 */
1834         id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID2);
1835         size = arm_smmu_id_size_to_bits((id >> ID2_IAS_SHIFT) & ID2_IAS_MASK);
1836         smmu->ipa_size = size;
1837
1838         /* The output mask is also applied for bypass */
1839         size = arm_smmu_id_size_to_bits((id >> ID2_OAS_SHIFT) & ID2_OAS_MASK);
1840         smmu->pa_size = size;
1841
1842         if (id & ID2_VMID16)
1843                 smmu->features |= ARM_SMMU_FEAT_VMID16;
1844
1845         /*
1846          * What the page table walker can address actually depends on which
1847          * descriptor format is in use, but since a) we don't know that yet,
1848          * and b) it can vary per context bank, this will have to do...
1849          */
1850         if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(size)))
1851                 dev_warn(smmu->dev,
1852                          "failed to set DMA mask for table walker\n");
1853
1854         if (smmu->version < ARM_SMMU_V2) {
1855                 smmu->va_size = smmu->ipa_size;
1856                 if (smmu->version == ARM_SMMU_V1_64K)
1857                         smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1858         } else {
1859                 size = (id >> ID2_UBS_SHIFT) & ID2_UBS_MASK;
1860                 smmu->va_size = arm_smmu_id_size_to_bits(size);
1861                 if (id & ID2_PTFS_4K)
1862                         smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_4K;
1863                 if (id & ID2_PTFS_16K)
1864                         smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_16K;
1865                 if (id & ID2_PTFS_64K)
1866                         smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1867         }
1868
1869         /* Now we've corralled the various formats, what'll it do? */
1870         if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S)
1871                 smmu->pgsize_bitmap |= SZ_4K | SZ_64K | SZ_1M | SZ_16M;
1872         if (smmu->features &
1873             (ARM_SMMU_FEAT_FMT_AARCH32_L | ARM_SMMU_FEAT_FMT_AARCH64_4K))
1874                 smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
1875         if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_16K)
1876                 smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
1877         if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_64K)
1878                 smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
1879
1880         if (arm_smmu_ops.pgsize_bitmap == -1UL)
1881                 arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
1882         else
1883                 arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
1884         dev_notice(smmu->dev, "\tSupported page sizes: 0x%08lx\n",
1885                    smmu->pgsize_bitmap);
1886
1887
1888         if (smmu->features & ARM_SMMU_FEAT_TRANS_S1)
1889                 dev_notice(smmu->dev, "\tStage-1: %lu-bit VA -> %lu-bit IPA\n",
1890                            smmu->va_size, smmu->ipa_size);
1891
1892         if (smmu->features & ARM_SMMU_FEAT_TRANS_S2)
1893                 dev_notice(smmu->dev, "\tStage-2: %lu-bit IPA -> %lu-bit PA\n",
1894                            smmu->ipa_size, smmu->pa_size);
1895
1896         return 0;
1897 }
1898
1899 struct arm_smmu_match_data {
1900         enum arm_smmu_arch_version version;
1901         enum arm_smmu_implementation model;
1902 };
1903
1904 #define ARM_SMMU_MATCH_DATA(name, ver, imp)     \
1905 static struct arm_smmu_match_data name = { .version = ver, .model = imp }
1906
1907 ARM_SMMU_MATCH_DATA(smmu_generic_v1, ARM_SMMU_V1, GENERIC_SMMU);
1908 ARM_SMMU_MATCH_DATA(smmu_generic_v2, ARM_SMMU_V2, GENERIC_SMMU);
1909 ARM_SMMU_MATCH_DATA(arm_mmu401, ARM_SMMU_V1_64K, GENERIC_SMMU);
1910 ARM_SMMU_MATCH_DATA(arm_mmu500, ARM_SMMU_V2, ARM_MMU500);
1911 ARM_SMMU_MATCH_DATA(cavium_smmuv2, ARM_SMMU_V2, CAVIUM_SMMUV2);
1912
1913 static const struct of_device_id arm_smmu_of_match[] = {
1914         { .compatible = "arm,smmu-v1", .data = &smmu_generic_v1 },
1915         { .compatible = "arm,smmu-v2", .data = &smmu_generic_v2 },
1916         { .compatible = "arm,mmu-400", .data = &smmu_generic_v1 },
1917         { .compatible = "arm,mmu-401", .data = &arm_mmu401 },
1918         { .compatible = "arm,mmu-500", .data = &arm_mmu500 },
1919         { .compatible = "cavium,smmu-v2", .data = &cavium_smmuv2 },
1920         { },
1921 };
1922 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
1923
1924 #ifdef CONFIG_ACPI
1925 static int acpi_smmu_get_data(u32 model, struct arm_smmu_device *smmu)
1926 {
1927         int ret = 0;
1928
1929         switch (model) {
1930         case ACPI_IORT_SMMU_V1:
1931         case ACPI_IORT_SMMU_CORELINK_MMU400:
1932                 smmu->version = ARM_SMMU_V1;
1933                 smmu->model = GENERIC_SMMU;
1934                 break;
1935         case ACPI_IORT_SMMU_CORELINK_MMU401:
1936                 smmu->version = ARM_SMMU_V1_64K;
1937                 smmu->model = GENERIC_SMMU;
1938                 break;
1939         case ACPI_IORT_SMMU_V2:
1940                 smmu->version = ARM_SMMU_V2;
1941                 smmu->model = GENERIC_SMMU;
1942                 break;
1943         case ACPI_IORT_SMMU_CORELINK_MMU500:
1944                 smmu->version = ARM_SMMU_V2;
1945                 smmu->model = ARM_MMU500;
1946                 break;
1947         case ACPI_IORT_SMMU_CAVIUM_THUNDERX:
1948                 smmu->version = ARM_SMMU_V2;
1949                 smmu->model = CAVIUM_SMMUV2;
1950                 break;
1951         default:
1952                 ret = -ENODEV;
1953         }
1954
1955         return ret;
1956 }
1957
1958 static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
1959                                       struct arm_smmu_device *smmu)
1960 {
1961         struct device *dev = smmu->dev;
1962         struct acpi_iort_node *node =
1963                 *(struct acpi_iort_node **)dev_get_platdata(dev);
1964         struct acpi_iort_smmu *iort_smmu;
1965         int ret;
1966
1967         /* Retrieve SMMU1/2 specific data */
1968         iort_smmu = (struct acpi_iort_smmu *)node->node_data;
1969
1970         ret = acpi_smmu_get_data(iort_smmu->model, smmu);
1971         if (ret < 0)
1972                 return ret;
1973
1974         /* Ignore the configuration access interrupt */
1975         smmu->num_global_irqs = 1;
1976
1977         if (iort_smmu->flags & ACPI_IORT_SMMU_COHERENT_WALK)
1978                 smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
1979
1980         return 0;
1981 }
1982 #else
1983 static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
1984                                              struct arm_smmu_device *smmu)
1985 {
1986         return -ENODEV;
1987 }
1988 #endif
1989
1990 static int arm_smmu_device_dt_probe(struct platform_device *pdev,
1991                                     struct arm_smmu_device *smmu)
1992 {
1993         const struct arm_smmu_match_data *data;
1994         struct device *dev = &pdev->dev;
1995         bool legacy_binding;
1996
1997         if (of_property_read_u32(dev->of_node, "#global-interrupts",
1998                                  &smmu->num_global_irqs)) {
1999                 dev_err(dev, "missing #global-interrupts property\n");
2000                 return -ENODEV;
2001         }
2002
2003         data = of_device_get_match_data(dev);
2004         smmu->version = data->version;
2005         smmu->model = data->model;
2006
2007         parse_driver_options(smmu);
2008
2009         legacy_binding = of_find_property(dev->of_node, "mmu-masters", NULL);
2010         if (legacy_binding && !using_generic_binding) {
2011                 if (!using_legacy_binding)
2012                         pr_notice("deprecated \"mmu-masters\" DT property in use; DMA API support unavailable\n");
2013                 using_legacy_binding = true;
2014         } else if (!legacy_binding && !using_legacy_binding) {
2015                 using_generic_binding = true;
2016         } else {
2017                 dev_err(dev, "not probing due to mismatched DT properties\n");
2018                 return -ENODEV;
2019         }
2020
2021         if (of_dma_is_coherent(dev->of_node))
2022                 smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
2023
2024         return 0;
2025 }
2026
2027 static void arm_smmu_bus_init(void)
2028 {
2029         /* Oh, for a proper bus abstraction */
2030         if (!iommu_present(&platform_bus_type))
2031                 bus_set_iommu(&platform_bus_type, &arm_smmu_ops);
2032 #ifdef CONFIG_ARM_AMBA
2033         if (!iommu_present(&amba_bustype))
2034                 bus_set_iommu(&amba_bustype, &arm_smmu_ops);
2035 #endif
2036 #ifdef CONFIG_PCI
2037         if (!iommu_present(&pci_bus_type)) {
2038                 pci_request_acs();
2039                 bus_set_iommu(&pci_bus_type, &arm_smmu_ops);
2040         }
2041 #endif
2042 #ifdef CONFIG_FSL_MC_BUS
2043         if (!iommu_present(&fsl_mc_bus_type))
2044                 bus_set_iommu(&fsl_mc_bus_type, &arm_smmu_ops);
2045 #endif
2046 }
2047
2048 static int arm_smmu_device_probe(struct platform_device *pdev)
2049 {
2050         struct resource *res;
2051         resource_size_t ioaddr;
2052         struct arm_smmu_device *smmu;
2053         struct device *dev = &pdev->dev;
2054         int num_irqs, i, err;
2055
2056         smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
2057         if (!smmu) {
2058                 dev_err(dev, "failed to allocate arm_smmu_device\n");
2059                 return -ENOMEM;
2060         }
2061         smmu->dev = dev;
2062
2063         if (dev->of_node)
2064                 err = arm_smmu_device_dt_probe(pdev, smmu);
2065         else
2066                 err = arm_smmu_device_acpi_probe(pdev, smmu);
2067
2068         if (err)
2069                 return err;
2070
2071         res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
2072         ioaddr = res->start;
2073         smmu->base = devm_ioremap_resource(dev, res);
2074         if (IS_ERR(smmu->base))
2075                 return PTR_ERR(smmu->base);
2076         smmu->cb_base = smmu->base + resource_size(res) / 2;
2077
2078         num_irqs = 0;
2079         while ((res = platform_get_resource(pdev, IORESOURCE_IRQ, num_irqs))) {
2080                 num_irqs++;
2081                 if (num_irqs > smmu->num_global_irqs)
2082                         smmu->num_context_irqs++;
2083         }
2084
2085         if (!smmu->num_context_irqs) {
2086                 dev_err(dev, "found %d interrupts but expected at least %d\n",
2087                         num_irqs, smmu->num_global_irqs + 1);
2088                 return -ENODEV;
2089         }
2090
2091         smmu->irqs = devm_kcalloc(dev, num_irqs, sizeof(*smmu->irqs),
2092                                   GFP_KERNEL);
2093         if (!smmu->irqs) {
2094                 dev_err(dev, "failed to allocate %d irqs\n", num_irqs);
2095                 return -ENOMEM;
2096         }
2097
2098         for (i = 0; i < num_irqs; ++i) {
2099                 int irq = platform_get_irq(pdev, i);
2100
2101                 if (irq < 0) {
2102                         dev_err(dev, "failed to get irq index %d\n", i);
2103                         return -ENODEV;
2104                 }
2105                 smmu->irqs[i] = irq;
2106         }
2107
2108         err = arm_smmu_device_cfg_probe(smmu);
2109         if (err)
2110                 return err;
2111
2112         if (smmu->version == ARM_SMMU_V2) {
2113                 if (smmu->num_context_banks > smmu->num_context_irqs) {
2114                         dev_err(dev,
2115                               "found only %d context irq(s) but %d required\n",
2116                               smmu->num_context_irqs, smmu->num_context_banks);
2117                         return -ENODEV;
2118                 }
2119
2120                 /* Ignore superfluous interrupts */
2121                 smmu->num_context_irqs = smmu->num_context_banks;
2122         }
2123
2124         for (i = 0; i < smmu->num_global_irqs; ++i) {
2125                 err = devm_request_irq(smmu->dev, smmu->irqs[i],
2126                                        arm_smmu_global_fault,
2127                                        IRQF_SHARED,
2128                                        "arm-smmu global fault",
2129                                        smmu);
2130                 if (err) {
2131                         dev_err(dev, "failed to request global IRQ %d (%u)\n",
2132                                 i, smmu->irqs[i]);
2133                         return err;
2134                 }
2135         }
2136
2137         err = iommu_device_sysfs_add(&smmu->iommu, smmu->dev, NULL,
2138                                      "smmu.%pa", &ioaddr);
2139         if (err) {
2140                 dev_err(dev, "Failed to register iommu in sysfs\n");
2141                 return err;
2142         }
2143
2144         iommu_device_set_ops(&smmu->iommu, &arm_smmu_ops);
2145         iommu_device_set_fwnode(&smmu->iommu, dev->fwnode);
2146
2147         err = iommu_device_register(&smmu->iommu);
2148         if (err) {
2149                 dev_err(dev, "Failed to register iommu\n");
2150                 return err;
2151         }
2152
2153         platform_set_drvdata(pdev, smmu);
2154         arm_smmu_device_reset(smmu);
2155         arm_smmu_test_smr_masks(smmu);
2156
2157         /*
2158          * For ACPI and generic DT bindings, an SMMU will be probed before
2159          * any device which might need it, so we want the bus ops in place
2160          * ready to handle default domain setup as soon as any SMMU exists.
2161          */
2162         if (!using_legacy_binding)
2163                 arm_smmu_bus_init();
2164
2165         return 0;
2166 }
2167
2168 /*
2169  * With the legacy DT binding in play, though, we have no guarantees about
2170  * probe order, but then we're also not doing default domains, so we can
2171  * delay setting bus ops until we're sure every possible SMMU is ready,
2172  * and that way ensure that no add_device() calls get missed.
2173  */
2174 static int arm_smmu_legacy_bus_init(void)
2175 {
2176         if (using_legacy_binding)
2177                 arm_smmu_bus_init();
2178         return 0;
2179 }
2180 device_initcall_sync(arm_smmu_legacy_bus_init);
2181
2182 static int arm_smmu_device_remove(struct platform_device *pdev)
2183 {
2184         struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
2185
2186         if (!smmu)
2187                 return -ENODEV;
2188
2189         if (!bitmap_empty(smmu->context_map, ARM_SMMU_MAX_CBS))
2190                 dev_err(&pdev->dev, "removing device with active domains!\n");
2191
2192         /* Turn the thing off */
2193         writel(sCR0_CLIENTPD, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
2194         return 0;
2195 }
2196
2197 static void arm_smmu_device_shutdown(struct platform_device *pdev)
2198 {
2199         arm_smmu_device_remove(pdev);
2200 }
2201
2202 static int __maybe_unused arm_smmu_pm_resume(struct device *dev)
2203 {
2204         struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2205
2206         arm_smmu_device_reset(smmu);
2207         return 0;
2208 }
2209
2210 static SIMPLE_DEV_PM_OPS(arm_smmu_pm_ops, NULL, arm_smmu_pm_resume);
2211
2212 static struct platform_driver arm_smmu_driver = {
2213         .driver = {
2214                 .name           = "arm-smmu",
2215                 .of_match_table = of_match_ptr(arm_smmu_of_match),
2216                 .pm             = &arm_smmu_pm_ops,
2217         },
2218         .probe  = arm_smmu_device_probe,
2219         .remove = arm_smmu_device_remove,
2220         .shutdown = arm_smmu_device_shutdown,
2221 };
2222 module_platform_driver(arm_smmu_driver);
2223
2224 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMU implementations");
2225 MODULE_AUTHOR("Will Deacon <will.deacon@arm.com>");
2226 MODULE_LICENSE("GPL v2");