]> asedeno.scripts.mit.edu Git - linux.git/blob - drivers/iommu/arm-smmu.c
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jikos/hid
[linux.git] / drivers / iommu / arm-smmu.c
1 /*
2  * IOMMU API for ARM architected SMMU implementations.
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License version 2 as
6  * published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11  * GNU General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public License
14  * along with this program; if not, write to the Free Software
15  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
16  *
17  * Copyright (C) 2013 ARM Limited
18  *
19  * Author: Will Deacon <will.deacon@arm.com>
20  *
21  * This driver currently supports:
22  *      - SMMUv1 and v2 implementations
23  *      - Stream-matching and stream-indexing
24  *      - v7/v8 long-descriptor format
25  *      - Non-secure access to the SMMU
26  *      - Context fault reporting
27  *      - Extended Stream ID (16 bit)
28  */
29
30 #define pr_fmt(fmt) "arm-smmu: " fmt
31
32 #include <linux/acpi.h>
33 #include <linux/acpi_iort.h>
34 #include <linux/atomic.h>
35 #include <linux/delay.h>
36 #include <linux/dma-iommu.h>
37 #include <linux/dma-mapping.h>
38 #include <linux/err.h>
39 #include <linux/interrupt.h>
40 #include <linux/io.h>
41 #include <linux/io-64-nonatomic-hi-lo.h>
42 #include <linux/iommu.h>
43 #include <linux/iopoll.h>
44 #include <linux/module.h>
45 #include <linux/of.h>
46 #include <linux/of_address.h>
47 #include <linux/of_device.h>
48 #include <linux/of_iommu.h>
49 #include <linux/pci.h>
50 #include <linux/platform_device.h>
51 #include <linux/slab.h>
52 #include <linux/spinlock.h>
53
54 #include <linux/amba/bus.h>
55
56 #include "io-pgtable.h"
57 #include "arm-smmu-regs.h"
58
59 #define ARM_MMU500_ACTLR_CPRE           (1 << 1)
60
61 #define ARM_MMU500_ACR_CACHE_LOCK       (1 << 26)
62 #define ARM_MMU500_ACR_S2CRB_TLBEN      (1 << 10)
63 #define ARM_MMU500_ACR_SMTNMB_TLBEN     (1 << 8)
64
65 #define TLB_LOOP_TIMEOUT                1000000 /* 1s! */
66 #define TLB_SPIN_COUNT                  10
67
68 /* Maximum number of context banks per SMMU */
69 #define ARM_SMMU_MAX_CBS                128
70
71 /* SMMU global address space */
72 #define ARM_SMMU_GR0(smmu)              ((smmu)->base)
73 #define ARM_SMMU_GR1(smmu)              ((smmu)->base + (1 << (smmu)->pgshift))
74
75 /*
76  * SMMU global address space with conditional offset to access secure
77  * aliases of non-secure registers (e.g. nsCR0: 0x400, nsGFSR: 0x448,
78  * nsGFSYNR0: 0x450)
79  */
80 #define ARM_SMMU_GR0_NS(smmu)                                           \
81         ((smmu)->base +                                                 \
82                 ((smmu->options & ARM_SMMU_OPT_SECURE_CFG_ACCESS)       \
83                         ? 0x400 : 0))
84
85 /*
86  * Some 64-bit registers only make sense to write atomically, but in such
87  * cases all the data relevant to AArch32 formats lies within the lower word,
88  * therefore this actually makes more sense than it might first appear.
89  */
90 #ifdef CONFIG_64BIT
91 #define smmu_write_atomic_lq            writeq_relaxed
92 #else
93 #define smmu_write_atomic_lq            writel_relaxed
94 #endif
95
96 /* Translation context bank */
97 #define ARM_SMMU_CB(smmu, n)    ((smmu)->cb_base + ((n) << (smmu)->pgshift))
98
99 #define MSI_IOVA_BASE                   0x8000000
100 #define MSI_IOVA_LENGTH                 0x100000
101
102 static int force_stage;
103 module_param(force_stage, int, S_IRUGO);
104 MODULE_PARM_DESC(force_stage,
105         "Force SMMU mappings to be installed at a particular stage of translation. A value of '1' or '2' forces the corresponding stage. All other values are ignored (i.e. no stage is forced). Note that selecting a specific stage will disable support for nested translation.");
106 static bool disable_bypass;
107 module_param(disable_bypass, bool, S_IRUGO);
108 MODULE_PARM_DESC(disable_bypass,
109         "Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
110
111 enum arm_smmu_arch_version {
112         ARM_SMMU_V1,
113         ARM_SMMU_V1_64K,
114         ARM_SMMU_V2,
115 };
116
117 enum arm_smmu_implementation {
118         GENERIC_SMMU,
119         ARM_MMU500,
120         CAVIUM_SMMUV2,
121 };
122
123 struct arm_smmu_s2cr {
124         struct iommu_group              *group;
125         int                             count;
126         enum arm_smmu_s2cr_type         type;
127         enum arm_smmu_s2cr_privcfg      privcfg;
128         u8                              cbndx;
129 };
130
131 #define s2cr_init_val (struct arm_smmu_s2cr){                           \
132         .type = disable_bypass ? S2CR_TYPE_FAULT : S2CR_TYPE_BYPASS,    \
133 }
134
135 struct arm_smmu_smr {
136         u16                             mask;
137         u16                             id;
138         bool                            valid;
139 };
140
141 struct arm_smmu_cb {
142         u64                             ttbr[2];
143         u32                             tcr[2];
144         u32                             mair[2];
145         struct arm_smmu_cfg             *cfg;
146 };
147
148 struct arm_smmu_master_cfg {
149         struct arm_smmu_device          *smmu;
150         s16                             smendx[];
151 };
152 #define INVALID_SMENDX                  -1
153 #define __fwspec_cfg(fw) ((struct arm_smmu_master_cfg *)fw->iommu_priv)
154 #define fwspec_smmu(fw)  (__fwspec_cfg(fw)->smmu)
155 #define fwspec_smendx(fw, i) \
156         (i >= fw->num_ids ? INVALID_SMENDX : __fwspec_cfg(fw)->smendx[i])
157 #define for_each_cfg_sme(fw, i, idx) \
158         for (i = 0; idx = fwspec_smendx(fw, i), i < fw->num_ids; ++i)
159
160 struct arm_smmu_device {
161         struct device                   *dev;
162
163         void __iomem                    *base;
164         void __iomem                    *cb_base;
165         unsigned long                   pgshift;
166
167 #define ARM_SMMU_FEAT_COHERENT_WALK     (1 << 0)
168 #define ARM_SMMU_FEAT_STREAM_MATCH      (1 << 1)
169 #define ARM_SMMU_FEAT_TRANS_S1          (1 << 2)
170 #define ARM_SMMU_FEAT_TRANS_S2          (1 << 3)
171 #define ARM_SMMU_FEAT_TRANS_NESTED      (1 << 4)
172 #define ARM_SMMU_FEAT_TRANS_OPS         (1 << 5)
173 #define ARM_SMMU_FEAT_VMID16            (1 << 6)
174 #define ARM_SMMU_FEAT_FMT_AARCH64_4K    (1 << 7)
175 #define ARM_SMMU_FEAT_FMT_AARCH64_16K   (1 << 8)
176 #define ARM_SMMU_FEAT_FMT_AARCH64_64K   (1 << 9)
177 #define ARM_SMMU_FEAT_FMT_AARCH32_L     (1 << 10)
178 #define ARM_SMMU_FEAT_FMT_AARCH32_S     (1 << 11)
179 #define ARM_SMMU_FEAT_EXIDS             (1 << 12)
180         u32                             features;
181
182 #define ARM_SMMU_OPT_SECURE_CFG_ACCESS (1 << 0)
183         u32                             options;
184         enum arm_smmu_arch_version      version;
185         enum arm_smmu_implementation    model;
186
187         u32                             num_context_banks;
188         u32                             num_s2_context_banks;
189         DECLARE_BITMAP(context_map, ARM_SMMU_MAX_CBS);
190         struct arm_smmu_cb              *cbs;
191         atomic_t                        irptndx;
192
193         u32                             num_mapping_groups;
194         u16                             streamid_mask;
195         u16                             smr_mask_mask;
196         struct arm_smmu_smr             *smrs;
197         struct arm_smmu_s2cr            *s2crs;
198         struct mutex                    stream_map_mutex;
199
200         unsigned long                   va_size;
201         unsigned long                   ipa_size;
202         unsigned long                   pa_size;
203         unsigned long                   pgsize_bitmap;
204
205         u32                             num_global_irqs;
206         u32                             num_context_irqs;
207         unsigned int                    *irqs;
208
209         u32                             cavium_id_base; /* Specific to Cavium */
210
211         spinlock_t                      global_sync_lock;
212
213         /* IOMMU core code handle */
214         struct iommu_device             iommu;
215 };
216
217 enum arm_smmu_context_fmt {
218         ARM_SMMU_CTX_FMT_NONE,
219         ARM_SMMU_CTX_FMT_AARCH64,
220         ARM_SMMU_CTX_FMT_AARCH32_L,
221         ARM_SMMU_CTX_FMT_AARCH32_S,
222 };
223
224 struct arm_smmu_cfg {
225         u8                              cbndx;
226         u8                              irptndx;
227         union {
228                 u16                     asid;
229                 u16                     vmid;
230         };
231         u32                             cbar;
232         enum arm_smmu_context_fmt       fmt;
233 };
234 #define INVALID_IRPTNDX                 0xff
235
236 enum arm_smmu_domain_stage {
237         ARM_SMMU_DOMAIN_S1 = 0,
238         ARM_SMMU_DOMAIN_S2,
239         ARM_SMMU_DOMAIN_NESTED,
240         ARM_SMMU_DOMAIN_BYPASS,
241 };
242
243 struct arm_smmu_domain {
244         struct arm_smmu_device          *smmu;
245         struct io_pgtable_ops           *pgtbl_ops;
246         const struct iommu_gather_ops   *tlb_ops;
247         struct arm_smmu_cfg             cfg;
248         enum arm_smmu_domain_stage      stage;
249         struct mutex                    init_mutex; /* Protects smmu pointer */
250         spinlock_t                      cb_lock; /* Serialises ATS1* ops and TLB syncs */
251         struct iommu_domain             domain;
252 };
253
254 struct arm_smmu_option_prop {
255         u32 opt;
256         const char *prop;
257 };
258
259 static atomic_t cavium_smmu_context_count = ATOMIC_INIT(0);
260
261 static bool using_legacy_binding, using_generic_binding;
262
263 static struct arm_smmu_option_prop arm_smmu_options[] = {
264         { ARM_SMMU_OPT_SECURE_CFG_ACCESS, "calxeda,smmu-secure-config-access" },
265         { 0, NULL},
266 };
267
268 static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
269 {
270         return container_of(dom, struct arm_smmu_domain, domain);
271 }
272
273 static void parse_driver_options(struct arm_smmu_device *smmu)
274 {
275         int i = 0;
276
277         do {
278                 if (of_property_read_bool(smmu->dev->of_node,
279                                                 arm_smmu_options[i].prop)) {
280                         smmu->options |= arm_smmu_options[i].opt;
281                         dev_notice(smmu->dev, "option %s\n",
282                                 arm_smmu_options[i].prop);
283                 }
284         } while (arm_smmu_options[++i].opt);
285 }
286
287 static struct device_node *dev_get_dev_node(struct device *dev)
288 {
289         if (dev_is_pci(dev)) {
290                 struct pci_bus *bus = to_pci_dev(dev)->bus;
291
292                 while (!pci_is_root_bus(bus))
293                         bus = bus->parent;
294                 return of_node_get(bus->bridge->parent->of_node);
295         }
296
297         return of_node_get(dev->of_node);
298 }
299
300 static int __arm_smmu_get_pci_sid(struct pci_dev *pdev, u16 alias, void *data)
301 {
302         *((__be32 *)data) = cpu_to_be32(alias);
303         return 0; /* Continue walking */
304 }
305
306 static int __find_legacy_master_phandle(struct device *dev, void *data)
307 {
308         struct of_phandle_iterator *it = *(void **)data;
309         struct device_node *np = it->node;
310         int err;
311
312         of_for_each_phandle(it, err, dev->of_node, "mmu-masters",
313                             "#stream-id-cells", 0)
314                 if (it->node == np) {
315                         *(void **)data = dev;
316                         return 1;
317                 }
318         it->node = np;
319         return err == -ENOENT ? 0 : err;
320 }
321
322 static struct platform_driver arm_smmu_driver;
323 static struct iommu_ops arm_smmu_ops;
324
325 static int arm_smmu_register_legacy_master(struct device *dev,
326                                            struct arm_smmu_device **smmu)
327 {
328         struct device *smmu_dev;
329         struct device_node *np;
330         struct of_phandle_iterator it;
331         void *data = &it;
332         u32 *sids;
333         __be32 pci_sid;
334         int err;
335
336         np = dev_get_dev_node(dev);
337         if (!np || !of_find_property(np, "#stream-id-cells", NULL)) {
338                 of_node_put(np);
339                 return -ENODEV;
340         }
341
342         it.node = np;
343         err = driver_for_each_device(&arm_smmu_driver.driver, NULL, &data,
344                                      __find_legacy_master_phandle);
345         smmu_dev = data;
346         of_node_put(np);
347         if (err == 0)
348                 return -ENODEV;
349         if (err < 0)
350                 return err;
351
352         if (dev_is_pci(dev)) {
353                 /* "mmu-masters" assumes Stream ID == Requester ID */
354                 pci_for_each_dma_alias(to_pci_dev(dev), __arm_smmu_get_pci_sid,
355                                        &pci_sid);
356                 it.cur = &pci_sid;
357                 it.cur_count = 1;
358         }
359
360         err = iommu_fwspec_init(dev, &smmu_dev->of_node->fwnode,
361                                 &arm_smmu_ops);
362         if (err)
363                 return err;
364
365         sids = kcalloc(it.cur_count, sizeof(*sids), GFP_KERNEL);
366         if (!sids)
367                 return -ENOMEM;
368
369         *smmu = dev_get_drvdata(smmu_dev);
370         of_phandle_iterator_args(&it, sids, it.cur_count);
371         err = iommu_fwspec_add_ids(dev, sids, it.cur_count);
372         kfree(sids);
373         return err;
374 }
375
376 static int __arm_smmu_alloc_bitmap(unsigned long *map, int start, int end)
377 {
378         int idx;
379
380         do {
381                 idx = find_next_zero_bit(map, end, start);
382                 if (idx == end)
383                         return -ENOSPC;
384         } while (test_and_set_bit(idx, map));
385
386         return idx;
387 }
388
389 static void __arm_smmu_free_bitmap(unsigned long *map, int idx)
390 {
391         clear_bit(idx, map);
392 }
393
394 /* Wait for any pending TLB invalidations to complete */
395 static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu,
396                                 void __iomem *sync, void __iomem *status)
397 {
398         unsigned int spin_cnt, delay;
399
400         writel_relaxed(0, sync);
401         for (delay = 1; delay < TLB_LOOP_TIMEOUT; delay *= 2) {
402                 for (spin_cnt = TLB_SPIN_COUNT; spin_cnt > 0; spin_cnt--) {
403                         if (!(readl_relaxed(status) & sTLBGSTATUS_GSACTIVE))
404                                 return;
405                         cpu_relax();
406                 }
407                 udelay(delay);
408         }
409         dev_err_ratelimited(smmu->dev,
410                             "TLB sync timed out -- SMMU may be deadlocked\n");
411 }
412
413 static void arm_smmu_tlb_sync_global(struct arm_smmu_device *smmu)
414 {
415         void __iomem *base = ARM_SMMU_GR0(smmu);
416         unsigned long flags;
417
418         spin_lock_irqsave(&smmu->global_sync_lock, flags);
419         __arm_smmu_tlb_sync(smmu, base + ARM_SMMU_GR0_sTLBGSYNC,
420                             base + ARM_SMMU_GR0_sTLBGSTATUS);
421         spin_unlock_irqrestore(&smmu->global_sync_lock, flags);
422 }
423
424 static void arm_smmu_tlb_sync_context(void *cookie)
425 {
426         struct arm_smmu_domain *smmu_domain = cookie;
427         struct arm_smmu_device *smmu = smmu_domain->smmu;
428         void __iomem *base = ARM_SMMU_CB(smmu, smmu_domain->cfg.cbndx);
429         unsigned long flags;
430
431         spin_lock_irqsave(&smmu_domain->cb_lock, flags);
432         __arm_smmu_tlb_sync(smmu, base + ARM_SMMU_CB_TLBSYNC,
433                             base + ARM_SMMU_CB_TLBSTATUS);
434         spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
435 }
436
437 static void arm_smmu_tlb_sync_vmid(void *cookie)
438 {
439         struct arm_smmu_domain *smmu_domain = cookie;
440
441         arm_smmu_tlb_sync_global(smmu_domain->smmu);
442 }
443
444 static void arm_smmu_tlb_inv_context_s1(void *cookie)
445 {
446         struct arm_smmu_domain *smmu_domain = cookie;
447         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
448         void __iomem *base = ARM_SMMU_CB(smmu_domain->smmu, cfg->cbndx);
449
450         writel_relaxed(cfg->asid, base + ARM_SMMU_CB_S1_TLBIASID);
451         arm_smmu_tlb_sync_context(cookie);
452 }
453
454 static void arm_smmu_tlb_inv_context_s2(void *cookie)
455 {
456         struct arm_smmu_domain *smmu_domain = cookie;
457         struct arm_smmu_device *smmu = smmu_domain->smmu;
458         void __iomem *base = ARM_SMMU_GR0(smmu);
459
460         writel_relaxed(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID);
461         arm_smmu_tlb_sync_global(smmu);
462 }
463
464 static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
465                                           size_t granule, bool leaf, void *cookie)
466 {
467         struct arm_smmu_domain *smmu_domain = cookie;
468         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
469         bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
470         void __iomem *reg = ARM_SMMU_CB(smmu_domain->smmu, cfg->cbndx);
471
472         if (stage1) {
473                 reg += leaf ? ARM_SMMU_CB_S1_TLBIVAL : ARM_SMMU_CB_S1_TLBIVA;
474
475                 if (cfg->fmt != ARM_SMMU_CTX_FMT_AARCH64) {
476                         iova &= ~12UL;
477                         iova |= cfg->asid;
478                         do {
479                                 writel_relaxed(iova, reg);
480                                 iova += granule;
481                         } while (size -= granule);
482                 } else {
483                         iova >>= 12;
484                         iova |= (u64)cfg->asid << 48;
485                         do {
486                                 writeq_relaxed(iova, reg);
487                                 iova += granule >> 12;
488                         } while (size -= granule);
489                 }
490         } else {
491                 reg += leaf ? ARM_SMMU_CB_S2_TLBIIPAS2L :
492                               ARM_SMMU_CB_S2_TLBIIPAS2;
493                 iova >>= 12;
494                 do {
495                         smmu_write_atomic_lq(iova, reg);
496                         iova += granule >> 12;
497                 } while (size -= granule);
498         }
499 }
500
501 /*
502  * On MMU-401 at least, the cost of firing off multiple TLBIVMIDs appears
503  * almost negligible, but the benefit of getting the first one in as far ahead
504  * of the sync as possible is significant, hence we don't just make this a
505  * no-op and set .tlb_sync to arm_smmu_inv_context_s2() as you might think.
506  */
507 static void arm_smmu_tlb_inv_vmid_nosync(unsigned long iova, size_t size,
508                                          size_t granule, bool leaf, void *cookie)
509 {
510         struct arm_smmu_domain *smmu_domain = cookie;
511         void __iomem *base = ARM_SMMU_GR0(smmu_domain->smmu);
512
513         writel_relaxed(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID);
514 }
515
516 static const struct iommu_gather_ops arm_smmu_s1_tlb_ops = {
517         .tlb_flush_all  = arm_smmu_tlb_inv_context_s1,
518         .tlb_add_flush  = arm_smmu_tlb_inv_range_nosync,
519         .tlb_sync       = arm_smmu_tlb_sync_context,
520 };
521
522 static const struct iommu_gather_ops arm_smmu_s2_tlb_ops_v2 = {
523         .tlb_flush_all  = arm_smmu_tlb_inv_context_s2,
524         .tlb_add_flush  = arm_smmu_tlb_inv_range_nosync,
525         .tlb_sync       = arm_smmu_tlb_sync_context,
526 };
527
528 static const struct iommu_gather_ops arm_smmu_s2_tlb_ops_v1 = {
529         .tlb_flush_all  = arm_smmu_tlb_inv_context_s2,
530         .tlb_add_flush  = arm_smmu_tlb_inv_vmid_nosync,
531         .tlb_sync       = arm_smmu_tlb_sync_vmid,
532 };
533
534 static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
535 {
536         u32 fsr, fsynr;
537         unsigned long iova;
538         struct iommu_domain *domain = dev;
539         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
540         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
541         struct arm_smmu_device *smmu = smmu_domain->smmu;
542         void __iomem *cb_base;
543
544         cb_base = ARM_SMMU_CB(smmu, cfg->cbndx);
545         fsr = readl_relaxed(cb_base + ARM_SMMU_CB_FSR);
546
547         if (!(fsr & FSR_FAULT))
548                 return IRQ_NONE;
549
550         fsynr = readl_relaxed(cb_base + ARM_SMMU_CB_FSYNR0);
551         iova = readq_relaxed(cb_base + ARM_SMMU_CB_FAR);
552
553         dev_err_ratelimited(smmu->dev,
554         "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cb=%d\n",
555                             fsr, iova, fsynr, cfg->cbndx);
556
557         writel(fsr, cb_base + ARM_SMMU_CB_FSR);
558         return IRQ_HANDLED;
559 }
560
561 static irqreturn_t arm_smmu_global_fault(int irq, void *dev)
562 {
563         u32 gfsr, gfsynr0, gfsynr1, gfsynr2;
564         struct arm_smmu_device *smmu = dev;
565         void __iomem *gr0_base = ARM_SMMU_GR0_NS(smmu);
566
567         gfsr = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSR);
568         gfsynr0 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR0);
569         gfsynr1 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR1);
570         gfsynr2 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR2);
571
572         if (!gfsr)
573                 return IRQ_NONE;
574
575         dev_err_ratelimited(smmu->dev,
576                 "Unexpected global fault, this could be serious\n");
577         dev_err_ratelimited(smmu->dev,
578                 "\tGFSR 0x%08x, GFSYNR0 0x%08x, GFSYNR1 0x%08x, GFSYNR2 0x%08x\n",
579                 gfsr, gfsynr0, gfsynr1, gfsynr2);
580
581         writel(gfsr, gr0_base + ARM_SMMU_GR0_sGFSR);
582         return IRQ_HANDLED;
583 }
584
585 static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
586                                        struct io_pgtable_cfg *pgtbl_cfg)
587 {
588         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
589         struct arm_smmu_cb *cb = &smmu_domain->smmu->cbs[cfg->cbndx];
590         bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
591
592         cb->cfg = cfg;
593
594         /* TTBCR */
595         if (stage1) {
596                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
597                         cb->tcr[0] = pgtbl_cfg->arm_v7s_cfg.tcr;
598                 } else {
599                         cb->tcr[0] = pgtbl_cfg->arm_lpae_s1_cfg.tcr;
600                         cb->tcr[1] = pgtbl_cfg->arm_lpae_s1_cfg.tcr >> 32;
601                         cb->tcr[1] |= TTBCR2_SEP_UPSTREAM;
602                         if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
603                                 cb->tcr[1] |= TTBCR2_AS;
604                 }
605         } else {
606                 cb->tcr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
607         }
608
609         /* TTBRs */
610         if (stage1) {
611                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
612                         cb->ttbr[0] = pgtbl_cfg->arm_v7s_cfg.ttbr[0];
613                         cb->ttbr[1] = pgtbl_cfg->arm_v7s_cfg.ttbr[1];
614                 } else {
615                         cb->ttbr[0] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
616                         cb->ttbr[0] |= (u64)cfg->asid << TTBRn_ASID_SHIFT;
617                         cb->ttbr[1] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1];
618                         cb->ttbr[1] |= (u64)cfg->asid << TTBRn_ASID_SHIFT;
619                 }
620         } else {
621                 cb->ttbr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
622         }
623
624         /* MAIRs (stage-1 only) */
625         if (stage1) {
626                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
627                         cb->mair[0] = pgtbl_cfg->arm_v7s_cfg.prrr;
628                         cb->mair[1] = pgtbl_cfg->arm_v7s_cfg.nmrr;
629                 } else {
630                         cb->mair[0] = pgtbl_cfg->arm_lpae_s1_cfg.mair[0];
631                         cb->mair[1] = pgtbl_cfg->arm_lpae_s1_cfg.mair[1];
632                 }
633         }
634 }
635
636 static void arm_smmu_write_context_bank(struct arm_smmu_device *smmu, int idx)
637 {
638         u32 reg;
639         bool stage1;
640         struct arm_smmu_cb *cb = &smmu->cbs[idx];
641         struct arm_smmu_cfg *cfg = cb->cfg;
642         void __iomem *cb_base, *gr1_base;
643
644         cb_base = ARM_SMMU_CB(smmu, idx);
645
646         /* Unassigned context banks only need disabling */
647         if (!cfg) {
648                 writel_relaxed(0, cb_base + ARM_SMMU_CB_SCTLR);
649                 return;
650         }
651
652         gr1_base = ARM_SMMU_GR1(smmu);
653         stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
654
655         /* CBA2R */
656         if (smmu->version > ARM_SMMU_V1) {
657                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
658                         reg = CBA2R_RW64_64BIT;
659                 else
660                         reg = CBA2R_RW64_32BIT;
661                 /* 16-bit VMIDs live in CBA2R */
662                 if (smmu->features & ARM_SMMU_FEAT_VMID16)
663                         reg |= cfg->vmid << CBA2R_VMID_SHIFT;
664
665                 writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBA2R(idx));
666         }
667
668         /* CBAR */
669         reg = cfg->cbar;
670         if (smmu->version < ARM_SMMU_V2)
671                 reg |= cfg->irptndx << CBAR_IRPTNDX_SHIFT;
672
673         /*
674          * Use the weakest shareability/memory types, so they are
675          * overridden by the ttbcr/pte.
676          */
677         if (stage1) {
678                 reg |= (CBAR_S1_BPSHCFG_NSH << CBAR_S1_BPSHCFG_SHIFT) |
679                         (CBAR_S1_MEMATTR_WB << CBAR_S1_MEMATTR_SHIFT);
680         } else if (!(smmu->features & ARM_SMMU_FEAT_VMID16)) {
681                 /* 8-bit VMIDs live in CBAR */
682                 reg |= cfg->vmid << CBAR_VMID_SHIFT;
683         }
684         writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBAR(idx));
685
686         /*
687          * TTBCR
688          * We must write this before the TTBRs, since it determines the
689          * access behaviour of some fields (in particular, ASID[15:8]).
690          */
691         if (stage1 && smmu->version > ARM_SMMU_V1)
692                 writel_relaxed(cb->tcr[1], cb_base + ARM_SMMU_CB_TTBCR2);
693         writel_relaxed(cb->tcr[0], cb_base + ARM_SMMU_CB_TTBCR);
694
695         /* TTBRs */
696         if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
697                 writel_relaxed(cfg->asid, cb_base + ARM_SMMU_CB_CONTEXTIDR);
698                 writel_relaxed(cb->ttbr[0], cb_base + ARM_SMMU_CB_TTBR0);
699                 writel_relaxed(cb->ttbr[1], cb_base + ARM_SMMU_CB_TTBR1);
700         } else {
701                 writeq_relaxed(cb->ttbr[0], cb_base + ARM_SMMU_CB_TTBR0);
702                 if (stage1)
703                         writeq_relaxed(cb->ttbr[1], cb_base + ARM_SMMU_CB_TTBR1);
704         }
705
706         /* MAIRs (stage-1 only) */
707         if (stage1) {
708                 writel_relaxed(cb->mair[0], cb_base + ARM_SMMU_CB_S1_MAIR0);
709                 writel_relaxed(cb->mair[1], cb_base + ARM_SMMU_CB_S1_MAIR1);
710         }
711
712         /* SCTLR */
713         reg = SCTLR_CFIE | SCTLR_CFRE | SCTLR_AFE | SCTLR_TRE | SCTLR_M;
714         if (stage1)
715                 reg |= SCTLR_S1_ASIDPNE;
716         if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN))
717                 reg |= SCTLR_E;
718
719         writel_relaxed(reg, cb_base + ARM_SMMU_CB_SCTLR);
720 }
721
722 static int arm_smmu_init_domain_context(struct iommu_domain *domain,
723                                         struct arm_smmu_device *smmu)
724 {
725         int irq, start, ret = 0;
726         unsigned long ias, oas;
727         struct io_pgtable_ops *pgtbl_ops;
728         struct io_pgtable_cfg pgtbl_cfg;
729         enum io_pgtable_fmt fmt;
730         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
731         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
732
733         mutex_lock(&smmu_domain->init_mutex);
734         if (smmu_domain->smmu)
735                 goto out_unlock;
736
737         if (domain->type == IOMMU_DOMAIN_IDENTITY) {
738                 smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
739                 smmu_domain->smmu = smmu;
740                 goto out_unlock;
741         }
742
743         /*
744          * Mapping the requested stage onto what we support is surprisingly
745          * complicated, mainly because the spec allows S1+S2 SMMUs without
746          * support for nested translation. That means we end up with the
747          * following table:
748          *
749          * Requested        Supported        Actual
750          *     S1               N              S1
751          *     S1             S1+S2            S1
752          *     S1               S2             S2
753          *     S1               S1             S1
754          *     N                N              N
755          *     N              S1+S2            S2
756          *     N                S2             S2
757          *     N                S1             S1
758          *
759          * Note that you can't actually request stage-2 mappings.
760          */
761         if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
762                 smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
763         if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
764                 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
765
766         /*
767          * Choosing a suitable context format is even more fiddly. Until we
768          * grow some way for the caller to express a preference, and/or move
769          * the decision into the io-pgtable code where it arguably belongs,
770          * just aim for the closest thing to the rest of the system, and hope
771          * that the hardware isn't esoteric enough that we can't assume AArch64
772          * support to be a superset of AArch32 support...
773          */
774         if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_L)
775                 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_L;
776         if (IS_ENABLED(CONFIG_IOMMU_IO_PGTABLE_ARMV7S) &&
777             !IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_ARM_LPAE) &&
778             (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S) &&
779             (smmu_domain->stage == ARM_SMMU_DOMAIN_S1))
780                 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_S;
781         if ((IS_ENABLED(CONFIG_64BIT) || cfg->fmt == ARM_SMMU_CTX_FMT_NONE) &&
782             (smmu->features & (ARM_SMMU_FEAT_FMT_AARCH64_64K |
783                                ARM_SMMU_FEAT_FMT_AARCH64_16K |
784                                ARM_SMMU_FEAT_FMT_AARCH64_4K)))
785                 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH64;
786
787         if (cfg->fmt == ARM_SMMU_CTX_FMT_NONE) {
788                 ret = -EINVAL;
789                 goto out_unlock;
790         }
791
792         switch (smmu_domain->stage) {
793         case ARM_SMMU_DOMAIN_S1:
794                 cfg->cbar = CBAR_TYPE_S1_TRANS_S2_BYPASS;
795                 start = smmu->num_s2_context_banks;
796                 ias = smmu->va_size;
797                 oas = smmu->ipa_size;
798                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
799                         fmt = ARM_64_LPAE_S1;
800                 } else if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_L) {
801                         fmt = ARM_32_LPAE_S1;
802                         ias = min(ias, 32UL);
803                         oas = min(oas, 40UL);
804                 } else {
805                         fmt = ARM_V7S;
806                         ias = min(ias, 32UL);
807                         oas = min(oas, 32UL);
808                 }
809                 smmu_domain->tlb_ops = &arm_smmu_s1_tlb_ops;
810                 break;
811         case ARM_SMMU_DOMAIN_NESTED:
812                 /*
813                  * We will likely want to change this if/when KVM gets
814                  * involved.
815                  */
816         case ARM_SMMU_DOMAIN_S2:
817                 cfg->cbar = CBAR_TYPE_S2_TRANS;
818                 start = 0;
819                 ias = smmu->ipa_size;
820                 oas = smmu->pa_size;
821                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
822                         fmt = ARM_64_LPAE_S2;
823                 } else {
824                         fmt = ARM_32_LPAE_S2;
825                         ias = min(ias, 40UL);
826                         oas = min(oas, 40UL);
827                 }
828                 if (smmu->version == ARM_SMMU_V2)
829                         smmu_domain->tlb_ops = &arm_smmu_s2_tlb_ops_v2;
830                 else
831                         smmu_domain->tlb_ops = &arm_smmu_s2_tlb_ops_v1;
832                 break;
833         default:
834                 ret = -EINVAL;
835                 goto out_unlock;
836         }
837         ret = __arm_smmu_alloc_bitmap(smmu->context_map, start,
838                                       smmu->num_context_banks);
839         if (ret < 0)
840                 goto out_unlock;
841
842         cfg->cbndx = ret;
843         if (smmu->version < ARM_SMMU_V2) {
844                 cfg->irptndx = atomic_inc_return(&smmu->irptndx);
845                 cfg->irptndx %= smmu->num_context_irqs;
846         } else {
847                 cfg->irptndx = cfg->cbndx;
848         }
849
850         if (smmu_domain->stage == ARM_SMMU_DOMAIN_S2)
851                 cfg->vmid = cfg->cbndx + 1 + smmu->cavium_id_base;
852         else
853                 cfg->asid = cfg->cbndx + smmu->cavium_id_base;
854
855         pgtbl_cfg = (struct io_pgtable_cfg) {
856                 .pgsize_bitmap  = smmu->pgsize_bitmap,
857                 .ias            = ias,
858                 .oas            = oas,
859                 .tlb            = smmu_domain->tlb_ops,
860                 .iommu_dev      = smmu->dev,
861         };
862
863         if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
864                 pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_NO_DMA;
865
866         smmu_domain->smmu = smmu;
867         pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
868         if (!pgtbl_ops) {
869                 ret = -ENOMEM;
870                 goto out_clear_smmu;
871         }
872
873         /* Update the domain's page sizes to reflect the page table format */
874         domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
875         domain->geometry.aperture_end = (1UL << ias) - 1;
876         domain->geometry.force_aperture = true;
877
878         /* Initialise the context bank with our page table cfg */
879         arm_smmu_init_context_bank(smmu_domain, &pgtbl_cfg);
880         arm_smmu_write_context_bank(smmu, cfg->cbndx);
881
882         /*
883          * Request context fault interrupt. Do this last to avoid the
884          * handler seeing a half-initialised domain state.
885          */
886         irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
887         ret = devm_request_irq(smmu->dev, irq, arm_smmu_context_fault,
888                                IRQF_SHARED, "arm-smmu-context-fault", domain);
889         if (ret < 0) {
890                 dev_err(smmu->dev, "failed to request context IRQ %d (%u)\n",
891                         cfg->irptndx, irq);
892                 cfg->irptndx = INVALID_IRPTNDX;
893         }
894
895         mutex_unlock(&smmu_domain->init_mutex);
896
897         /* Publish page table ops for map/unmap */
898         smmu_domain->pgtbl_ops = pgtbl_ops;
899         return 0;
900
901 out_clear_smmu:
902         smmu_domain->smmu = NULL;
903 out_unlock:
904         mutex_unlock(&smmu_domain->init_mutex);
905         return ret;
906 }
907
908 static void arm_smmu_destroy_domain_context(struct iommu_domain *domain)
909 {
910         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
911         struct arm_smmu_device *smmu = smmu_domain->smmu;
912         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
913         int irq;
914
915         if (!smmu || domain->type == IOMMU_DOMAIN_IDENTITY)
916                 return;
917
918         /*
919          * Disable the context bank and free the page tables before freeing
920          * it.
921          */
922         smmu->cbs[cfg->cbndx].cfg = NULL;
923         arm_smmu_write_context_bank(smmu, cfg->cbndx);
924
925         if (cfg->irptndx != INVALID_IRPTNDX) {
926                 irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
927                 devm_free_irq(smmu->dev, irq, domain);
928         }
929
930         free_io_pgtable_ops(smmu_domain->pgtbl_ops);
931         __arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
932 }
933
934 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
935 {
936         struct arm_smmu_domain *smmu_domain;
937
938         if (type != IOMMU_DOMAIN_UNMANAGED &&
939             type != IOMMU_DOMAIN_DMA &&
940             type != IOMMU_DOMAIN_IDENTITY)
941                 return NULL;
942         /*
943          * Allocate the domain and initialise some of its data structures.
944          * We can't really do anything meaningful until we've added a
945          * master.
946          */
947         smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
948         if (!smmu_domain)
949                 return NULL;
950
951         if (type == IOMMU_DOMAIN_DMA && (using_legacy_binding ||
952             iommu_get_dma_cookie(&smmu_domain->domain))) {
953                 kfree(smmu_domain);
954                 return NULL;
955         }
956
957         mutex_init(&smmu_domain->init_mutex);
958         spin_lock_init(&smmu_domain->cb_lock);
959
960         return &smmu_domain->domain;
961 }
962
963 static void arm_smmu_domain_free(struct iommu_domain *domain)
964 {
965         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
966
967         /*
968          * Free the domain resources. We assume that all devices have
969          * already been detached.
970          */
971         iommu_put_dma_cookie(domain);
972         arm_smmu_destroy_domain_context(domain);
973         kfree(smmu_domain);
974 }
975
976 static void arm_smmu_write_smr(struct arm_smmu_device *smmu, int idx)
977 {
978         struct arm_smmu_smr *smr = smmu->smrs + idx;
979         u32 reg = smr->id << SMR_ID_SHIFT | smr->mask << SMR_MASK_SHIFT;
980
981         if (!(smmu->features & ARM_SMMU_FEAT_EXIDS) && smr->valid)
982                 reg |= SMR_VALID;
983         writel_relaxed(reg, ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_SMR(idx));
984 }
985
986 static void arm_smmu_write_s2cr(struct arm_smmu_device *smmu, int idx)
987 {
988         struct arm_smmu_s2cr *s2cr = smmu->s2crs + idx;
989         u32 reg = (s2cr->type & S2CR_TYPE_MASK) << S2CR_TYPE_SHIFT |
990                   (s2cr->cbndx & S2CR_CBNDX_MASK) << S2CR_CBNDX_SHIFT |
991                   (s2cr->privcfg & S2CR_PRIVCFG_MASK) << S2CR_PRIVCFG_SHIFT;
992
993         if (smmu->features & ARM_SMMU_FEAT_EXIDS && smmu->smrs &&
994             smmu->smrs[idx].valid)
995                 reg |= S2CR_EXIDVALID;
996         writel_relaxed(reg, ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_S2CR(idx));
997 }
998
999 static void arm_smmu_write_sme(struct arm_smmu_device *smmu, int idx)
1000 {
1001         arm_smmu_write_s2cr(smmu, idx);
1002         if (smmu->smrs)
1003                 arm_smmu_write_smr(smmu, idx);
1004 }
1005
1006 /*
1007  * The width of SMR's mask field depends on sCR0_EXIDENABLE, so this function
1008  * should be called after sCR0 is written.
1009  */
1010 static void arm_smmu_test_smr_masks(struct arm_smmu_device *smmu)
1011 {
1012         void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1013         u32 smr;
1014
1015         if (!smmu->smrs)
1016                 return;
1017
1018         /*
1019          * SMR.ID bits may not be preserved if the corresponding MASK
1020          * bits are set, so check each one separately. We can reject
1021          * masters later if they try to claim IDs outside these masks.
1022          */
1023         smr = smmu->streamid_mask << SMR_ID_SHIFT;
1024         writel_relaxed(smr, gr0_base + ARM_SMMU_GR0_SMR(0));
1025         smr = readl_relaxed(gr0_base + ARM_SMMU_GR0_SMR(0));
1026         smmu->streamid_mask = smr >> SMR_ID_SHIFT;
1027
1028         smr = smmu->streamid_mask << SMR_MASK_SHIFT;
1029         writel_relaxed(smr, gr0_base + ARM_SMMU_GR0_SMR(0));
1030         smr = readl_relaxed(gr0_base + ARM_SMMU_GR0_SMR(0));
1031         smmu->smr_mask_mask = smr >> SMR_MASK_SHIFT;
1032 }
1033
1034 static int arm_smmu_find_sme(struct arm_smmu_device *smmu, u16 id, u16 mask)
1035 {
1036         struct arm_smmu_smr *smrs = smmu->smrs;
1037         int i, free_idx = -ENOSPC;
1038
1039         /* Stream indexing is blissfully easy */
1040         if (!smrs)
1041                 return id;
1042
1043         /* Validating SMRs is... less so */
1044         for (i = 0; i < smmu->num_mapping_groups; ++i) {
1045                 if (!smrs[i].valid) {
1046                         /*
1047                          * Note the first free entry we come across, which
1048                          * we'll claim in the end if nothing else matches.
1049                          */
1050                         if (free_idx < 0)
1051                                 free_idx = i;
1052                         continue;
1053                 }
1054                 /*
1055                  * If the new entry is _entirely_ matched by an existing entry,
1056                  * then reuse that, with the guarantee that there also cannot
1057                  * be any subsequent conflicting entries. In normal use we'd
1058                  * expect simply identical entries for this case, but there's
1059                  * no harm in accommodating the generalisation.
1060                  */
1061                 if ((mask & smrs[i].mask) == mask &&
1062                     !((id ^ smrs[i].id) & ~smrs[i].mask))
1063                         return i;
1064                 /*
1065                  * If the new entry has any other overlap with an existing one,
1066                  * though, then there always exists at least one stream ID
1067                  * which would cause a conflict, and we can't allow that risk.
1068                  */
1069                 if (!((id ^ smrs[i].id) & ~(smrs[i].mask | mask)))
1070                         return -EINVAL;
1071         }
1072
1073         return free_idx;
1074 }
1075
1076 static bool arm_smmu_free_sme(struct arm_smmu_device *smmu, int idx)
1077 {
1078         if (--smmu->s2crs[idx].count)
1079                 return false;
1080
1081         smmu->s2crs[idx] = s2cr_init_val;
1082         if (smmu->smrs)
1083                 smmu->smrs[idx].valid = false;
1084
1085         return true;
1086 }
1087
1088 static int arm_smmu_master_alloc_smes(struct device *dev)
1089 {
1090         struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1091         struct arm_smmu_master_cfg *cfg = fwspec->iommu_priv;
1092         struct arm_smmu_device *smmu = cfg->smmu;
1093         struct arm_smmu_smr *smrs = smmu->smrs;
1094         struct iommu_group *group;
1095         int i, idx, ret;
1096
1097         mutex_lock(&smmu->stream_map_mutex);
1098         /* Figure out a viable stream map entry allocation */
1099         for_each_cfg_sme(fwspec, i, idx) {
1100                 u16 sid = fwspec->ids[i];
1101                 u16 mask = fwspec->ids[i] >> SMR_MASK_SHIFT;
1102
1103                 if (idx != INVALID_SMENDX) {
1104                         ret = -EEXIST;
1105                         goto out_err;
1106                 }
1107
1108                 ret = arm_smmu_find_sme(smmu, sid, mask);
1109                 if (ret < 0)
1110                         goto out_err;
1111
1112                 idx = ret;
1113                 if (smrs && smmu->s2crs[idx].count == 0) {
1114                         smrs[idx].id = sid;
1115                         smrs[idx].mask = mask;
1116                         smrs[idx].valid = true;
1117                 }
1118                 smmu->s2crs[idx].count++;
1119                 cfg->smendx[i] = (s16)idx;
1120         }
1121
1122         group = iommu_group_get_for_dev(dev);
1123         if (!group)
1124                 group = ERR_PTR(-ENOMEM);
1125         if (IS_ERR(group)) {
1126                 ret = PTR_ERR(group);
1127                 goto out_err;
1128         }
1129         iommu_group_put(group);
1130
1131         /* It worked! Now, poke the actual hardware */
1132         for_each_cfg_sme(fwspec, i, idx) {
1133                 arm_smmu_write_sme(smmu, idx);
1134                 smmu->s2crs[idx].group = group;
1135         }
1136
1137         mutex_unlock(&smmu->stream_map_mutex);
1138         return 0;
1139
1140 out_err:
1141         while (i--) {
1142                 arm_smmu_free_sme(smmu, cfg->smendx[i]);
1143                 cfg->smendx[i] = INVALID_SMENDX;
1144         }
1145         mutex_unlock(&smmu->stream_map_mutex);
1146         return ret;
1147 }
1148
1149 static void arm_smmu_master_free_smes(struct iommu_fwspec *fwspec)
1150 {
1151         struct arm_smmu_device *smmu = fwspec_smmu(fwspec);
1152         struct arm_smmu_master_cfg *cfg = fwspec->iommu_priv;
1153         int i, idx;
1154
1155         mutex_lock(&smmu->stream_map_mutex);
1156         for_each_cfg_sme(fwspec, i, idx) {
1157                 if (arm_smmu_free_sme(smmu, idx))
1158                         arm_smmu_write_sme(smmu, idx);
1159                 cfg->smendx[i] = INVALID_SMENDX;
1160         }
1161         mutex_unlock(&smmu->stream_map_mutex);
1162 }
1163
1164 static int arm_smmu_domain_add_master(struct arm_smmu_domain *smmu_domain,
1165                                       struct iommu_fwspec *fwspec)
1166 {
1167         struct arm_smmu_device *smmu = smmu_domain->smmu;
1168         struct arm_smmu_s2cr *s2cr = smmu->s2crs;
1169         u8 cbndx = smmu_domain->cfg.cbndx;
1170         enum arm_smmu_s2cr_type type;
1171         int i, idx;
1172
1173         if (smmu_domain->stage == ARM_SMMU_DOMAIN_BYPASS)
1174                 type = S2CR_TYPE_BYPASS;
1175         else
1176                 type = S2CR_TYPE_TRANS;
1177
1178         for_each_cfg_sme(fwspec, i, idx) {
1179                 if (type == s2cr[idx].type && cbndx == s2cr[idx].cbndx)
1180                         continue;
1181
1182                 s2cr[idx].type = type;
1183                 s2cr[idx].privcfg = S2CR_PRIVCFG_DEFAULT;
1184                 s2cr[idx].cbndx = cbndx;
1185                 arm_smmu_write_s2cr(smmu, idx);
1186         }
1187         return 0;
1188 }
1189
1190 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
1191 {
1192         int ret;
1193         struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1194         struct arm_smmu_device *smmu;
1195         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1196
1197         if (!fwspec || fwspec->ops != &arm_smmu_ops) {
1198                 dev_err(dev, "cannot attach to SMMU, is it on the same bus?\n");
1199                 return -ENXIO;
1200         }
1201
1202         /*
1203          * FIXME: The arch/arm DMA API code tries to attach devices to its own
1204          * domains between of_xlate() and add_device() - we have no way to cope
1205          * with that, so until ARM gets converted to rely on groups and default
1206          * domains, just say no (but more politely than by dereferencing NULL).
1207          * This should be at least a WARN_ON once that's sorted.
1208          */
1209         if (!fwspec->iommu_priv)
1210                 return -ENODEV;
1211
1212         smmu = fwspec_smmu(fwspec);
1213         /* Ensure that the domain is finalised */
1214         ret = arm_smmu_init_domain_context(domain, smmu);
1215         if (ret < 0)
1216                 return ret;
1217
1218         /*
1219          * Sanity check the domain. We don't support domains across
1220          * different SMMUs.
1221          */
1222         if (smmu_domain->smmu != smmu) {
1223                 dev_err(dev,
1224                         "cannot attach to SMMU %s whilst already attached to domain on SMMU %s\n",
1225                         dev_name(smmu_domain->smmu->dev), dev_name(smmu->dev));
1226                 return -EINVAL;
1227         }
1228
1229         /* Looks ok, so add the device to the domain */
1230         return arm_smmu_domain_add_master(smmu_domain, fwspec);
1231 }
1232
1233 static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
1234                         phys_addr_t paddr, size_t size, int prot)
1235 {
1236         struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1237
1238         if (!ops)
1239                 return -ENODEV;
1240
1241         return ops->map(ops, iova, paddr, size, prot);
1242 }
1243
1244 static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
1245                              size_t size)
1246 {
1247         struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1248
1249         if (!ops)
1250                 return 0;
1251
1252         return ops->unmap(ops, iova, size);
1253 }
1254
1255 static void arm_smmu_iotlb_sync(struct iommu_domain *domain)
1256 {
1257         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1258
1259         if (smmu_domain->tlb_ops)
1260                 smmu_domain->tlb_ops->tlb_sync(smmu_domain);
1261 }
1262
1263 static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
1264                                               dma_addr_t iova)
1265 {
1266         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1267         struct arm_smmu_device *smmu = smmu_domain->smmu;
1268         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
1269         struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
1270         struct device *dev = smmu->dev;
1271         void __iomem *cb_base;
1272         u32 tmp;
1273         u64 phys;
1274         unsigned long va, flags;
1275
1276         cb_base = ARM_SMMU_CB(smmu, cfg->cbndx);
1277
1278         spin_lock_irqsave(&smmu_domain->cb_lock, flags);
1279         /* ATS1 registers can only be written atomically */
1280         va = iova & ~0xfffUL;
1281         if (smmu->version == ARM_SMMU_V2)
1282                 smmu_write_atomic_lq(va, cb_base + ARM_SMMU_CB_ATS1PR);
1283         else /* Register is only 32-bit in v1 */
1284                 writel_relaxed(va, cb_base + ARM_SMMU_CB_ATS1PR);
1285
1286         if (readl_poll_timeout_atomic(cb_base + ARM_SMMU_CB_ATSR, tmp,
1287                                       !(tmp & ATSR_ACTIVE), 5, 50)) {
1288                 spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
1289                 dev_err(dev,
1290                         "iova to phys timed out on %pad. Falling back to software table walk.\n",
1291                         &iova);
1292                 return ops->iova_to_phys(ops, iova);
1293         }
1294
1295         phys = readq_relaxed(cb_base + ARM_SMMU_CB_PAR);
1296         spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
1297         if (phys & CB_PAR_F) {
1298                 dev_err(dev, "translation fault!\n");
1299                 dev_err(dev, "PAR = 0x%llx\n", phys);
1300                 return 0;
1301         }
1302
1303         return (phys & GENMASK_ULL(39, 12)) | (iova & 0xfff);
1304 }
1305
1306 static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain,
1307                                         dma_addr_t iova)
1308 {
1309         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1310         struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
1311
1312         if (domain->type == IOMMU_DOMAIN_IDENTITY)
1313                 return iova;
1314
1315         if (!ops)
1316                 return 0;
1317
1318         if (smmu_domain->smmu->features & ARM_SMMU_FEAT_TRANS_OPS &&
1319                         smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
1320                 return arm_smmu_iova_to_phys_hard(domain, iova);
1321
1322         return ops->iova_to_phys(ops, iova);
1323 }
1324
1325 static bool arm_smmu_capable(enum iommu_cap cap)
1326 {
1327         switch (cap) {
1328         case IOMMU_CAP_CACHE_COHERENCY:
1329                 /*
1330                  * Return true here as the SMMU can always send out coherent
1331                  * requests.
1332                  */
1333                 return true;
1334         case IOMMU_CAP_NOEXEC:
1335                 return true;
1336         default:
1337                 return false;
1338         }
1339 }
1340
1341 static int arm_smmu_match_node(struct device *dev, void *data)
1342 {
1343         return dev->fwnode == data;
1344 }
1345
1346 static
1347 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
1348 {
1349         struct device *dev = driver_find_device(&arm_smmu_driver.driver, NULL,
1350                                                 fwnode, arm_smmu_match_node);
1351         put_device(dev);
1352         return dev ? dev_get_drvdata(dev) : NULL;
1353 }
1354
1355 static int arm_smmu_add_device(struct device *dev)
1356 {
1357         struct arm_smmu_device *smmu;
1358         struct arm_smmu_master_cfg *cfg;
1359         struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1360         int i, ret;
1361
1362         if (using_legacy_binding) {
1363                 ret = arm_smmu_register_legacy_master(dev, &smmu);
1364
1365                 /*
1366                  * If dev->iommu_fwspec is initally NULL, arm_smmu_register_legacy_master()
1367                  * will allocate/initialise a new one. Thus we need to update fwspec for
1368                  * later use.
1369                  */
1370                 fwspec = dev->iommu_fwspec;
1371                 if (ret)
1372                         goto out_free;
1373         } else if (fwspec && fwspec->ops == &arm_smmu_ops) {
1374                 smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
1375         } else {
1376                 return -ENODEV;
1377         }
1378
1379         ret = -EINVAL;
1380         for (i = 0; i < fwspec->num_ids; i++) {
1381                 u16 sid = fwspec->ids[i];
1382                 u16 mask = fwspec->ids[i] >> SMR_MASK_SHIFT;
1383
1384                 if (sid & ~smmu->streamid_mask) {
1385                         dev_err(dev, "stream ID 0x%x out of range for SMMU (0x%x)\n",
1386                                 sid, smmu->streamid_mask);
1387                         goto out_free;
1388                 }
1389                 if (mask & ~smmu->smr_mask_mask) {
1390                         dev_err(dev, "SMR mask 0x%x out of range for SMMU (0x%x)\n",
1391                                 mask, smmu->smr_mask_mask);
1392                         goto out_free;
1393                 }
1394         }
1395
1396         ret = -ENOMEM;
1397         cfg = kzalloc(offsetof(struct arm_smmu_master_cfg, smendx[i]),
1398                       GFP_KERNEL);
1399         if (!cfg)
1400                 goto out_free;
1401
1402         cfg->smmu = smmu;
1403         fwspec->iommu_priv = cfg;
1404         while (i--)
1405                 cfg->smendx[i] = INVALID_SMENDX;
1406
1407         ret = arm_smmu_master_alloc_smes(dev);
1408         if (ret)
1409                 goto out_cfg_free;
1410
1411         iommu_device_link(&smmu->iommu, dev);
1412
1413         return 0;
1414
1415 out_cfg_free:
1416         kfree(cfg);
1417 out_free:
1418         iommu_fwspec_free(dev);
1419         return ret;
1420 }
1421
1422 static void arm_smmu_remove_device(struct device *dev)
1423 {
1424         struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1425         struct arm_smmu_master_cfg *cfg;
1426         struct arm_smmu_device *smmu;
1427
1428
1429         if (!fwspec || fwspec->ops != &arm_smmu_ops)
1430                 return;
1431
1432         cfg  = fwspec->iommu_priv;
1433         smmu = cfg->smmu;
1434
1435         iommu_device_unlink(&smmu->iommu, dev);
1436         arm_smmu_master_free_smes(fwspec);
1437         iommu_group_remove_device(dev);
1438         kfree(fwspec->iommu_priv);
1439         iommu_fwspec_free(dev);
1440 }
1441
1442 static struct iommu_group *arm_smmu_device_group(struct device *dev)
1443 {
1444         struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1445         struct arm_smmu_device *smmu = fwspec_smmu(fwspec);
1446         struct iommu_group *group = NULL;
1447         int i, idx;
1448
1449         for_each_cfg_sme(fwspec, i, idx) {
1450                 if (group && smmu->s2crs[idx].group &&
1451                     group != smmu->s2crs[idx].group)
1452                         return ERR_PTR(-EINVAL);
1453
1454                 group = smmu->s2crs[idx].group;
1455         }
1456
1457         if (group)
1458                 return iommu_group_ref_get(group);
1459
1460         if (dev_is_pci(dev))
1461                 group = pci_device_group(dev);
1462         else
1463                 group = generic_device_group(dev);
1464
1465         return group;
1466 }
1467
1468 static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
1469                                     enum iommu_attr attr, void *data)
1470 {
1471         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1472
1473         if (domain->type != IOMMU_DOMAIN_UNMANAGED)
1474                 return -EINVAL;
1475
1476         switch (attr) {
1477         case DOMAIN_ATTR_NESTING:
1478                 *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
1479                 return 0;
1480         default:
1481                 return -ENODEV;
1482         }
1483 }
1484
1485 static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
1486                                     enum iommu_attr attr, void *data)
1487 {
1488         int ret = 0;
1489         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1490
1491         if (domain->type != IOMMU_DOMAIN_UNMANAGED)
1492                 return -EINVAL;
1493
1494         mutex_lock(&smmu_domain->init_mutex);
1495
1496         switch (attr) {
1497         case DOMAIN_ATTR_NESTING:
1498                 if (smmu_domain->smmu) {
1499                         ret = -EPERM;
1500                         goto out_unlock;
1501                 }
1502
1503                 if (*(int *)data)
1504                         smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
1505                 else
1506                         smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
1507
1508                 break;
1509         default:
1510                 ret = -ENODEV;
1511         }
1512
1513 out_unlock:
1514         mutex_unlock(&smmu_domain->init_mutex);
1515         return ret;
1516 }
1517
1518 static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
1519 {
1520         u32 mask, fwid = 0;
1521
1522         if (args->args_count > 0)
1523                 fwid |= (u16)args->args[0];
1524
1525         if (args->args_count > 1)
1526                 fwid |= (u16)args->args[1] << SMR_MASK_SHIFT;
1527         else if (!of_property_read_u32(args->np, "stream-match-mask", &mask))
1528                 fwid |= (u16)mask << SMR_MASK_SHIFT;
1529
1530         return iommu_fwspec_add_ids(dev, &fwid, 1);
1531 }
1532
1533 static void arm_smmu_get_resv_regions(struct device *dev,
1534                                       struct list_head *head)
1535 {
1536         struct iommu_resv_region *region;
1537         int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
1538
1539         region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
1540                                          prot, IOMMU_RESV_SW_MSI);
1541         if (!region)
1542                 return;
1543
1544         list_add_tail(&region->list, head);
1545
1546         iommu_dma_get_resv_regions(dev, head);
1547 }
1548
1549 static void arm_smmu_put_resv_regions(struct device *dev,
1550                                       struct list_head *head)
1551 {
1552         struct iommu_resv_region *entry, *next;
1553
1554         list_for_each_entry_safe(entry, next, head, list)
1555                 kfree(entry);
1556 }
1557
1558 static struct iommu_ops arm_smmu_ops = {
1559         .capable                = arm_smmu_capable,
1560         .domain_alloc           = arm_smmu_domain_alloc,
1561         .domain_free            = arm_smmu_domain_free,
1562         .attach_dev             = arm_smmu_attach_dev,
1563         .map                    = arm_smmu_map,
1564         .unmap                  = arm_smmu_unmap,
1565         .flush_iotlb_all        = arm_smmu_iotlb_sync,
1566         .iotlb_sync             = arm_smmu_iotlb_sync,
1567         .iova_to_phys           = arm_smmu_iova_to_phys,
1568         .add_device             = arm_smmu_add_device,
1569         .remove_device          = arm_smmu_remove_device,
1570         .device_group           = arm_smmu_device_group,
1571         .domain_get_attr        = arm_smmu_domain_get_attr,
1572         .domain_set_attr        = arm_smmu_domain_set_attr,
1573         .of_xlate               = arm_smmu_of_xlate,
1574         .get_resv_regions       = arm_smmu_get_resv_regions,
1575         .put_resv_regions       = arm_smmu_put_resv_regions,
1576         .pgsize_bitmap          = -1UL, /* Restricted during device attach */
1577 };
1578
1579 static void arm_smmu_device_reset(struct arm_smmu_device *smmu)
1580 {
1581         void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1582         int i;
1583         u32 reg, major;
1584
1585         /* clear global FSR */
1586         reg = readl_relaxed(ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sGFSR);
1587         writel(reg, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sGFSR);
1588
1589         /*
1590          * Reset stream mapping groups: Initial values mark all SMRn as
1591          * invalid and all S2CRn as bypass unless overridden.
1592          */
1593         for (i = 0; i < smmu->num_mapping_groups; ++i)
1594                 arm_smmu_write_sme(smmu, i);
1595
1596         if (smmu->model == ARM_MMU500) {
1597                 /*
1598                  * Before clearing ARM_MMU500_ACTLR_CPRE, need to
1599                  * clear CACHE_LOCK bit of ACR first. And, CACHE_LOCK
1600                  * bit is only present in MMU-500r2 onwards.
1601                  */
1602                 reg = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID7);
1603                 major = (reg >> ID7_MAJOR_SHIFT) & ID7_MAJOR_MASK;
1604                 reg = readl_relaxed(gr0_base + ARM_SMMU_GR0_sACR);
1605                 if (major >= 2)
1606                         reg &= ~ARM_MMU500_ACR_CACHE_LOCK;
1607                 /*
1608                  * Allow unmatched Stream IDs to allocate bypass
1609                  * TLB entries for reduced latency.
1610                  */
1611                 reg |= ARM_MMU500_ACR_SMTNMB_TLBEN | ARM_MMU500_ACR_S2CRB_TLBEN;
1612                 writel_relaxed(reg, gr0_base + ARM_SMMU_GR0_sACR);
1613         }
1614
1615         /* Make sure all context banks are disabled and clear CB_FSR  */
1616         for (i = 0; i < smmu->num_context_banks; ++i) {
1617                 void __iomem *cb_base = ARM_SMMU_CB(smmu, i);
1618
1619                 arm_smmu_write_context_bank(smmu, i);
1620                 writel_relaxed(FSR_FAULT, cb_base + ARM_SMMU_CB_FSR);
1621                 /*
1622                  * Disable MMU-500's not-particularly-beneficial next-page
1623                  * prefetcher for the sake of errata #841119 and #826419.
1624                  */
1625                 if (smmu->model == ARM_MMU500) {
1626                         reg = readl_relaxed(cb_base + ARM_SMMU_CB_ACTLR);
1627                         reg &= ~ARM_MMU500_ACTLR_CPRE;
1628                         writel_relaxed(reg, cb_base + ARM_SMMU_CB_ACTLR);
1629                 }
1630         }
1631
1632         /* Invalidate the TLB, just in case */
1633         writel_relaxed(0, gr0_base + ARM_SMMU_GR0_TLBIALLH);
1634         writel_relaxed(0, gr0_base + ARM_SMMU_GR0_TLBIALLNSNH);
1635
1636         reg = readl_relaxed(ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
1637
1638         /* Enable fault reporting */
1639         reg |= (sCR0_GFRE | sCR0_GFIE | sCR0_GCFGFRE | sCR0_GCFGFIE);
1640
1641         /* Disable TLB broadcasting. */
1642         reg |= (sCR0_VMIDPNE | sCR0_PTM);
1643
1644         /* Enable client access, handling unmatched streams as appropriate */
1645         reg &= ~sCR0_CLIENTPD;
1646         if (disable_bypass)
1647                 reg |= sCR0_USFCFG;
1648         else
1649                 reg &= ~sCR0_USFCFG;
1650
1651         /* Disable forced broadcasting */
1652         reg &= ~sCR0_FB;
1653
1654         /* Don't upgrade barriers */
1655         reg &= ~(sCR0_BSU_MASK << sCR0_BSU_SHIFT);
1656
1657         if (smmu->features & ARM_SMMU_FEAT_VMID16)
1658                 reg |= sCR0_VMID16EN;
1659
1660         if (smmu->features & ARM_SMMU_FEAT_EXIDS)
1661                 reg |= sCR0_EXIDENABLE;
1662
1663         /* Push the button */
1664         arm_smmu_tlb_sync_global(smmu);
1665         writel(reg, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
1666 }
1667
1668 static int arm_smmu_id_size_to_bits(int size)
1669 {
1670         switch (size) {
1671         case 0:
1672                 return 32;
1673         case 1:
1674                 return 36;
1675         case 2:
1676                 return 40;
1677         case 3:
1678                 return 42;
1679         case 4:
1680                 return 44;
1681         case 5:
1682         default:
1683                 return 48;
1684         }
1685 }
1686
1687 static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
1688 {
1689         unsigned long size;
1690         void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1691         u32 id;
1692         bool cttw_reg, cttw_fw = smmu->features & ARM_SMMU_FEAT_COHERENT_WALK;
1693         int i;
1694
1695         dev_notice(smmu->dev, "probing hardware configuration...\n");
1696         dev_notice(smmu->dev, "SMMUv%d with:\n",
1697                         smmu->version == ARM_SMMU_V2 ? 2 : 1);
1698
1699         /* ID0 */
1700         id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID0);
1701
1702         /* Restrict available stages based on module parameter */
1703         if (force_stage == 1)
1704                 id &= ~(ID0_S2TS | ID0_NTS);
1705         else if (force_stage == 2)
1706                 id &= ~(ID0_S1TS | ID0_NTS);
1707
1708         if (id & ID0_S1TS) {
1709                 smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
1710                 dev_notice(smmu->dev, "\tstage 1 translation\n");
1711         }
1712
1713         if (id & ID0_S2TS) {
1714                 smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
1715                 dev_notice(smmu->dev, "\tstage 2 translation\n");
1716         }
1717
1718         if (id & ID0_NTS) {
1719                 smmu->features |= ARM_SMMU_FEAT_TRANS_NESTED;
1720                 dev_notice(smmu->dev, "\tnested translation\n");
1721         }
1722
1723         if (!(smmu->features &
1724                 (ARM_SMMU_FEAT_TRANS_S1 | ARM_SMMU_FEAT_TRANS_S2))) {
1725                 dev_err(smmu->dev, "\tno translation support!\n");
1726                 return -ENODEV;
1727         }
1728
1729         if ((id & ID0_S1TS) &&
1730                 ((smmu->version < ARM_SMMU_V2) || !(id & ID0_ATOSNS))) {
1731                 smmu->features |= ARM_SMMU_FEAT_TRANS_OPS;
1732                 dev_notice(smmu->dev, "\taddress translation ops\n");
1733         }
1734
1735         /*
1736          * In order for DMA API calls to work properly, we must defer to what
1737          * the FW says about coherency, regardless of what the hardware claims.
1738          * Fortunately, this also opens up a workaround for systems where the
1739          * ID register value has ended up configured incorrectly.
1740          */
1741         cttw_reg = !!(id & ID0_CTTW);
1742         if (cttw_fw || cttw_reg)
1743                 dev_notice(smmu->dev, "\t%scoherent table walk\n",
1744                            cttw_fw ? "" : "non-");
1745         if (cttw_fw != cttw_reg)
1746                 dev_notice(smmu->dev,
1747                            "\t(IDR0.CTTW overridden by FW configuration)\n");
1748
1749         /* Max. number of entries we have for stream matching/indexing */
1750         if (smmu->version == ARM_SMMU_V2 && id & ID0_EXIDS) {
1751                 smmu->features |= ARM_SMMU_FEAT_EXIDS;
1752                 size = 1 << 16;
1753         } else {
1754                 size = 1 << ((id >> ID0_NUMSIDB_SHIFT) & ID0_NUMSIDB_MASK);
1755         }
1756         smmu->streamid_mask = size - 1;
1757         if (id & ID0_SMS) {
1758                 smmu->features |= ARM_SMMU_FEAT_STREAM_MATCH;
1759                 size = (id >> ID0_NUMSMRG_SHIFT) & ID0_NUMSMRG_MASK;
1760                 if (size == 0) {
1761                         dev_err(smmu->dev,
1762                                 "stream-matching supported, but no SMRs present!\n");
1763                         return -ENODEV;
1764                 }
1765
1766                 /* Zero-initialised to mark as invalid */
1767                 smmu->smrs = devm_kcalloc(smmu->dev, size, sizeof(*smmu->smrs),
1768                                           GFP_KERNEL);
1769                 if (!smmu->smrs)
1770                         return -ENOMEM;
1771
1772                 dev_notice(smmu->dev,
1773                            "\tstream matching with %lu register groups", size);
1774         }
1775         /* s2cr->type == 0 means translation, so initialise explicitly */
1776         smmu->s2crs = devm_kmalloc_array(smmu->dev, size, sizeof(*smmu->s2crs),
1777                                          GFP_KERNEL);
1778         if (!smmu->s2crs)
1779                 return -ENOMEM;
1780         for (i = 0; i < size; i++)
1781                 smmu->s2crs[i] = s2cr_init_val;
1782
1783         smmu->num_mapping_groups = size;
1784         mutex_init(&smmu->stream_map_mutex);
1785         spin_lock_init(&smmu->global_sync_lock);
1786
1787         if (smmu->version < ARM_SMMU_V2 || !(id & ID0_PTFS_NO_AARCH32)) {
1788                 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_L;
1789                 if (!(id & ID0_PTFS_NO_AARCH32S))
1790                         smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_S;
1791         }
1792
1793         /* ID1 */
1794         id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID1);
1795         smmu->pgshift = (id & ID1_PAGESIZE) ? 16 : 12;
1796
1797         /* Check for size mismatch of SMMU address space from mapped region */
1798         size = 1 << (((id >> ID1_NUMPAGENDXB_SHIFT) & ID1_NUMPAGENDXB_MASK) + 1);
1799         size <<= smmu->pgshift;
1800         if (smmu->cb_base != gr0_base + size)
1801                 dev_warn(smmu->dev,
1802                         "SMMU address space size (0x%lx) differs from mapped region size (0x%tx)!\n",
1803                         size * 2, (smmu->cb_base - gr0_base) * 2);
1804
1805         smmu->num_s2_context_banks = (id >> ID1_NUMS2CB_SHIFT) & ID1_NUMS2CB_MASK;
1806         smmu->num_context_banks = (id >> ID1_NUMCB_SHIFT) & ID1_NUMCB_MASK;
1807         if (smmu->num_s2_context_banks > smmu->num_context_banks) {
1808                 dev_err(smmu->dev, "impossible number of S2 context banks!\n");
1809                 return -ENODEV;
1810         }
1811         dev_notice(smmu->dev, "\t%u context banks (%u stage-2 only)\n",
1812                    smmu->num_context_banks, smmu->num_s2_context_banks);
1813         /*
1814          * Cavium CN88xx erratum #27704.
1815          * Ensure ASID and VMID allocation is unique across all SMMUs in
1816          * the system.
1817          */
1818         if (smmu->model == CAVIUM_SMMUV2) {
1819                 smmu->cavium_id_base =
1820                         atomic_add_return(smmu->num_context_banks,
1821                                           &cavium_smmu_context_count);
1822                 smmu->cavium_id_base -= smmu->num_context_banks;
1823                 dev_notice(smmu->dev, "\tenabling workaround for Cavium erratum 27704\n");
1824         }
1825         smmu->cbs = devm_kcalloc(smmu->dev, smmu->num_context_banks,
1826                                  sizeof(*smmu->cbs), GFP_KERNEL);
1827         if (!smmu->cbs)
1828                 return -ENOMEM;
1829
1830         /* ID2 */
1831         id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID2);
1832         size = arm_smmu_id_size_to_bits((id >> ID2_IAS_SHIFT) & ID2_IAS_MASK);
1833         smmu->ipa_size = size;
1834
1835         /* The output mask is also applied for bypass */
1836         size = arm_smmu_id_size_to_bits((id >> ID2_OAS_SHIFT) & ID2_OAS_MASK);
1837         smmu->pa_size = size;
1838
1839         if (id & ID2_VMID16)
1840                 smmu->features |= ARM_SMMU_FEAT_VMID16;
1841
1842         /*
1843          * What the page table walker can address actually depends on which
1844          * descriptor format is in use, but since a) we don't know that yet,
1845          * and b) it can vary per context bank, this will have to do...
1846          */
1847         if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(size)))
1848                 dev_warn(smmu->dev,
1849                          "failed to set DMA mask for table walker\n");
1850
1851         if (smmu->version < ARM_SMMU_V2) {
1852                 smmu->va_size = smmu->ipa_size;
1853                 if (smmu->version == ARM_SMMU_V1_64K)
1854                         smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1855         } else {
1856                 size = (id >> ID2_UBS_SHIFT) & ID2_UBS_MASK;
1857                 smmu->va_size = arm_smmu_id_size_to_bits(size);
1858                 if (id & ID2_PTFS_4K)
1859                         smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_4K;
1860                 if (id & ID2_PTFS_16K)
1861                         smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_16K;
1862                 if (id & ID2_PTFS_64K)
1863                         smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1864         }
1865
1866         /* Now we've corralled the various formats, what'll it do? */
1867         if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S)
1868                 smmu->pgsize_bitmap |= SZ_4K | SZ_64K | SZ_1M | SZ_16M;
1869         if (smmu->features &
1870             (ARM_SMMU_FEAT_FMT_AARCH32_L | ARM_SMMU_FEAT_FMT_AARCH64_4K))
1871                 smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
1872         if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_16K)
1873                 smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
1874         if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_64K)
1875                 smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
1876
1877         if (arm_smmu_ops.pgsize_bitmap == -1UL)
1878                 arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
1879         else
1880                 arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
1881         dev_notice(smmu->dev, "\tSupported page sizes: 0x%08lx\n",
1882                    smmu->pgsize_bitmap);
1883
1884
1885         if (smmu->features & ARM_SMMU_FEAT_TRANS_S1)
1886                 dev_notice(smmu->dev, "\tStage-1: %lu-bit VA -> %lu-bit IPA\n",
1887                            smmu->va_size, smmu->ipa_size);
1888
1889         if (smmu->features & ARM_SMMU_FEAT_TRANS_S2)
1890                 dev_notice(smmu->dev, "\tStage-2: %lu-bit IPA -> %lu-bit PA\n",
1891                            smmu->ipa_size, smmu->pa_size);
1892
1893         return 0;
1894 }
1895
1896 struct arm_smmu_match_data {
1897         enum arm_smmu_arch_version version;
1898         enum arm_smmu_implementation model;
1899 };
1900
1901 #define ARM_SMMU_MATCH_DATA(name, ver, imp)     \
1902 static struct arm_smmu_match_data name = { .version = ver, .model = imp }
1903
1904 ARM_SMMU_MATCH_DATA(smmu_generic_v1, ARM_SMMU_V1, GENERIC_SMMU);
1905 ARM_SMMU_MATCH_DATA(smmu_generic_v2, ARM_SMMU_V2, GENERIC_SMMU);
1906 ARM_SMMU_MATCH_DATA(arm_mmu401, ARM_SMMU_V1_64K, GENERIC_SMMU);
1907 ARM_SMMU_MATCH_DATA(arm_mmu500, ARM_SMMU_V2, ARM_MMU500);
1908 ARM_SMMU_MATCH_DATA(cavium_smmuv2, ARM_SMMU_V2, CAVIUM_SMMUV2);
1909
1910 static const struct of_device_id arm_smmu_of_match[] = {
1911         { .compatible = "arm,smmu-v1", .data = &smmu_generic_v1 },
1912         { .compatible = "arm,smmu-v2", .data = &smmu_generic_v2 },
1913         { .compatible = "arm,mmu-400", .data = &smmu_generic_v1 },
1914         { .compatible = "arm,mmu-401", .data = &arm_mmu401 },
1915         { .compatible = "arm,mmu-500", .data = &arm_mmu500 },
1916         { .compatible = "cavium,smmu-v2", .data = &cavium_smmuv2 },
1917         { },
1918 };
1919 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
1920
1921 #ifdef CONFIG_ACPI
1922 static int acpi_smmu_get_data(u32 model, struct arm_smmu_device *smmu)
1923 {
1924         int ret = 0;
1925
1926         switch (model) {
1927         case ACPI_IORT_SMMU_V1:
1928         case ACPI_IORT_SMMU_CORELINK_MMU400:
1929                 smmu->version = ARM_SMMU_V1;
1930                 smmu->model = GENERIC_SMMU;
1931                 break;
1932         case ACPI_IORT_SMMU_CORELINK_MMU401:
1933                 smmu->version = ARM_SMMU_V1_64K;
1934                 smmu->model = GENERIC_SMMU;
1935                 break;
1936         case ACPI_IORT_SMMU_V2:
1937                 smmu->version = ARM_SMMU_V2;
1938                 smmu->model = GENERIC_SMMU;
1939                 break;
1940         case ACPI_IORT_SMMU_CORELINK_MMU500:
1941                 smmu->version = ARM_SMMU_V2;
1942                 smmu->model = ARM_MMU500;
1943                 break;
1944         case ACPI_IORT_SMMU_CAVIUM_THUNDERX:
1945                 smmu->version = ARM_SMMU_V2;
1946                 smmu->model = CAVIUM_SMMUV2;
1947                 break;
1948         default:
1949                 ret = -ENODEV;
1950         }
1951
1952         return ret;
1953 }
1954
1955 static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
1956                                       struct arm_smmu_device *smmu)
1957 {
1958         struct device *dev = smmu->dev;
1959         struct acpi_iort_node *node =
1960                 *(struct acpi_iort_node **)dev_get_platdata(dev);
1961         struct acpi_iort_smmu *iort_smmu;
1962         int ret;
1963
1964         /* Retrieve SMMU1/2 specific data */
1965         iort_smmu = (struct acpi_iort_smmu *)node->node_data;
1966
1967         ret = acpi_smmu_get_data(iort_smmu->model, smmu);
1968         if (ret < 0)
1969                 return ret;
1970
1971         /* Ignore the configuration access interrupt */
1972         smmu->num_global_irqs = 1;
1973
1974         if (iort_smmu->flags & ACPI_IORT_SMMU_COHERENT_WALK)
1975                 smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
1976
1977         return 0;
1978 }
1979 #else
1980 static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
1981                                              struct arm_smmu_device *smmu)
1982 {
1983         return -ENODEV;
1984 }
1985 #endif
1986
1987 static int arm_smmu_device_dt_probe(struct platform_device *pdev,
1988                                     struct arm_smmu_device *smmu)
1989 {
1990         const struct arm_smmu_match_data *data;
1991         struct device *dev = &pdev->dev;
1992         bool legacy_binding;
1993
1994         if (of_property_read_u32(dev->of_node, "#global-interrupts",
1995                                  &smmu->num_global_irqs)) {
1996                 dev_err(dev, "missing #global-interrupts property\n");
1997                 return -ENODEV;
1998         }
1999
2000         data = of_device_get_match_data(dev);
2001         smmu->version = data->version;
2002         smmu->model = data->model;
2003
2004         parse_driver_options(smmu);
2005
2006         legacy_binding = of_find_property(dev->of_node, "mmu-masters", NULL);
2007         if (legacy_binding && !using_generic_binding) {
2008                 if (!using_legacy_binding)
2009                         pr_notice("deprecated \"mmu-masters\" DT property in use; DMA API support unavailable\n");
2010                 using_legacy_binding = true;
2011         } else if (!legacy_binding && !using_legacy_binding) {
2012                 using_generic_binding = true;
2013         } else {
2014                 dev_err(dev, "not probing due to mismatched DT properties\n");
2015                 return -ENODEV;
2016         }
2017
2018         if (of_dma_is_coherent(dev->of_node))
2019                 smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
2020
2021         return 0;
2022 }
2023
2024 static void arm_smmu_bus_init(void)
2025 {
2026         /* Oh, for a proper bus abstraction */
2027         if (!iommu_present(&platform_bus_type))
2028                 bus_set_iommu(&platform_bus_type, &arm_smmu_ops);
2029 #ifdef CONFIG_ARM_AMBA
2030         if (!iommu_present(&amba_bustype))
2031                 bus_set_iommu(&amba_bustype, &arm_smmu_ops);
2032 #endif
2033 #ifdef CONFIG_PCI
2034         if (!iommu_present(&pci_bus_type)) {
2035                 pci_request_acs();
2036                 bus_set_iommu(&pci_bus_type, &arm_smmu_ops);
2037         }
2038 #endif
2039 }
2040
2041 static int arm_smmu_device_probe(struct platform_device *pdev)
2042 {
2043         struct resource *res;
2044         resource_size_t ioaddr;
2045         struct arm_smmu_device *smmu;
2046         struct device *dev = &pdev->dev;
2047         int num_irqs, i, err;
2048
2049         smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
2050         if (!smmu) {
2051                 dev_err(dev, "failed to allocate arm_smmu_device\n");
2052                 return -ENOMEM;
2053         }
2054         smmu->dev = dev;
2055
2056         if (dev->of_node)
2057                 err = arm_smmu_device_dt_probe(pdev, smmu);
2058         else
2059                 err = arm_smmu_device_acpi_probe(pdev, smmu);
2060
2061         if (err)
2062                 return err;
2063
2064         res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
2065         ioaddr = res->start;
2066         smmu->base = devm_ioremap_resource(dev, res);
2067         if (IS_ERR(smmu->base))
2068                 return PTR_ERR(smmu->base);
2069         smmu->cb_base = smmu->base + resource_size(res) / 2;
2070
2071         num_irqs = 0;
2072         while ((res = platform_get_resource(pdev, IORESOURCE_IRQ, num_irqs))) {
2073                 num_irqs++;
2074                 if (num_irqs > smmu->num_global_irqs)
2075                         smmu->num_context_irqs++;
2076         }
2077
2078         if (!smmu->num_context_irqs) {
2079                 dev_err(dev, "found %d interrupts but expected at least %d\n",
2080                         num_irqs, smmu->num_global_irqs + 1);
2081                 return -ENODEV;
2082         }
2083
2084         smmu->irqs = devm_kcalloc(dev, num_irqs, sizeof(*smmu->irqs),
2085                                   GFP_KERNEL);
2086         if (!smmu->irqs) {
2087                 dev_err(dev, "failed to allocate %d irqs\n", num_irqs);
2088                 return -ENOMEM;
2089         }
2090
2091         for (i = 0; i < num_irqs; ++i) {
2092                 int irq = platform_get_irq(pdev, i);
2093
2094                 if (irq < 0) {
2095                         dev_err(dev, "failed to get irq index %d\n", i);
2096                         return -ENODEV;
2097                 }
2098                 smmu->irqs[i] = irq;
2099         }
2100
2101         err = arm_smmu_device_cfg_probe(smmu);
2102         if (err)
2103                 return err;
2104
2105         if (smmu->version == ARM_SMMU_V2) {
2106                 if (smmu->num_context_banks > smmu->num_context_irqs) {
2107                         dev_err(dev,
2108                               "found only %d context irq(s) but %d required\n",
2109                               smmu->num_context_irqs, smmu->num_context_banks);
2110                         return -ENODEV;
2111                 }
2112
2113                 /* Ignore superfluous interrupts */
2114                 smmu->num_context_irqs = smmu->num_context_banks;
2115         }
2116
2117         for (i = 0; i < smmu->num_global_irqs; ++i) {
2118                 err = devm_request_irq(smmu->dev, smmu->irqs[i],
2119                                        arm_smmu_global_fault,
2120                                        IRQF_SHARED,
2121                                        "arm-smmu global fault",
2122                                        smmu);
2123                 if (err) {
2124                         dev_err(dev, "failed to request global IRQ %d (%u)\n",
2125                                 i, smmu->irqs[i]);
2126                         return err;
2127                 }
2128         }
2129
2130         err = iommu_device_sysfs_add(&smmu->iommu, smmu->dev, NULL,
2131                                      "smmu.%pa", &ioaddr);
2132         if (err) {
2133                 dev_err(dev, "Failed to register iommu in sysfs\n");
2134                 return err;
2135         }
2136
2137         iommu_device_set_ops(&smmu->iommu, &arm_smmu_ops);
2138         iommu_device_set_fwnode(&smmu->iommu, dev->fwnode);
2139
2140         err = iommu_device_register(&smmu->iommu);
2141         if (err) {
2142                 dev_err(dev, "Failed to register iommu\n");
2143                 return err;
2144         }
2145
2146         platform_set_drvdata(pdev, smmu);
2147         arm_smmu_device_reset(smmu);
2148         arm_smmu_test_smr_masks(smmu);
2149
2150         /*
2151          * For ACPI and generic DT bindings, an SMMU will be probed before
2152          * any device which might need it, so we want the bus ops in place
2153          * ready to handle default domain setup as soon as any SMMU exists.
2154          */
2155         if (!using_legacy_binding)
2156                 arm_smmu_bus_init();
2157
2158         return 0;
2159 }
2160
2161 /*
2162  * With the legacy DT binding in play, though, we have no guarantees about
2163  * probe order, but then we're also not doing default domains, so we can
2164  * delay setting bus ops until we're sure every possible SMMU is ready,
2165  * and that way ensure that no add_device() calls get missed.
2166  */
2167 static int arm_smmu_legacy_bus_init(void)
2168 {
2169         if (using_legacy_binding)
2170                 arm_smmu_bus_init();
2171         return 0;
2172 }
2173 device_initcall_sync(arm_smmu_legacy_bus_init);
2174
2175 static int arm_smmu_device_remove(struct platform_device *pdev)
2176 {
2177         struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
2178
2179         if (!smmu)
2180                 return -ENODEV;
2181
2182         if (!bitmap_empty(smmu->context_map, ARM_SMMU_MAX_CBS))
2183                 dev_err(&pdev->dev, "removing device with active domains!\n");
2184
2185         /* Turn the thing off */
2186         writel(sCR0_CLIENTPD, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
2187         return 0;
2188 }
2189
2190 static void arm_smmu_device_shutdown(struct platform_device *pdev)
2191 {
2192         arm_smmu_device_remove(pdev);
2193 }
2194
2195 static int __maybe_unused arm_smmu_pm_resume(struct device *dev)
2196 {
2197         struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2198
2199         arm_smmu_device_reset(smmu);
2200         return 0;
2201 }
2202
2203 static SIMPLE_DEV_PM_OPS(arm_smmu_pm_ops, NULL, arm_smmu_pm_resume);
2204
2205 static struct platform_driver arm_smmu_driver = {
2206         .driver = {
2207                 .name           = "arm-smmu",
2208                 .of_match_table = of_match_ptr(arm_smmu_of_match),
2209                 .pm             = &arm_smmu_pm_ops,
2210         },
2211         .probe  = arm_smmu_device_probe,
2212         .remove = arm_smmu_device_remove,
2213         .shutdown = arm_smmu_device_shutdown,
2214 };
2215 module_platform_driver(arm_smmu_driver);
2216
2217 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMU implementations");
2218 MODULE_AUTHOR("Will Deacon <will.deacon@arm.com>");
2219 MODULE_LICENSE("GPL v2");