]> asedeno.scripts.mit.edu Git - linux.git/commitdiff
iommu/arm-smmu-v3: Use CMD_SYNC completion MSI
authorRobin Murphy <robin.murphy@arm.com>
Wed, 18 Oct 2017 14:04:26 +0000 (15:04 +0100)
committerWill Deacon <will.deacon@arm.com>
Fri, 20 Oct 2017 15:55:08 +0000 (16:55 +0100)
As an IRQ, the CMD_SYNC interrupt is not particularly useful, not least
because we often need to wait for sync completion within someone else's
IRQ handler anyway. However, when the SMMU is both coherent and supports
MSIs, we can have a lot more fun by not using it as an interrupt at all.
Following the example suggested in the architecture and using a write
targeting normal memory, we can let callers wait on a status variable
outside the lock instead of having to stall the entire queue or even
touch MMIO registers. Since multiple sync commands are guaranteed to
complete in order, a simple incrementing sequence count is all we need
to unambiguously support any realistic number of overlapping waiters.

Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
drivers/iommu/arm-smmu-v3.c

index aa3bd39682905f3e565e2401bbc3ec57ab1c249c..ceb8f9ef4badf537cb7c30634c7e25a4c6f30ccf 100644 (file)
 
 #define CMDQ_SYNC_0_CS_SHIFT           12
 #define CMDQ_SYNC_0_CS_NONE            (0UL << CMDQ_SYNC_0_CS_SHIFT)
+#define CMDQ_SYNC_0_CS_IRQ             (1UL << CMDQ_SYNC_0_CS_SHIFT)
 #define CMDQ_SYNC_0_CS_SEV             (2UL << CMDQ_SYNC_0_CS_SHIFT)
+#define CMDQ_SYNC_0_MSH_SHIFT          22
+#define CMDQ_SYNC_0_MSH_ISH            (3UL << CMDQ_SYNC_0_MSH_SHIFT)
+#define CMDQ_SYNC_0_MSIATTR_SHIFT      24
+#define CMDQ_SYNC_0_MSIATTR_OIWB       (0xfUL << CMDQ_SYNC_0_MSIATTR_SHIFT)
+#define CMDQ_SYNC_0_MSIDATA_SHIFT      32
+#define CMDQ_SYNC_0_MSIDATA_MASK       0xffffffffUL
+#define CMDQ_SYNC_1_MSIADDR_SHIFT      0
+#define CMDQ_SYNC_1_MSIADDR_MASK       0xffffffffffffcUL
 
 /* Event queue */
 #define EVTQ_ENT_DWORDS                        4
 /* High-level queue structures */
 #define ARM_SMMU_POLL_TIMEOUT_US       100
 #define ARM_SMMU_CMDQ_DRAIN_TIMEOUT_US 1000000 /* 1s! */
+#define ARM_SMMU_SYNC_TIMEOUT_US       1000000 /* 1s! */
 
 #define MSI_IOVA_BASE                  0x8000000
 #define MSI_IOVA_LENGTH                        0x100000
@@ -496,6 +506,10 @@ struct arm_smmu_cmdq_ent {
                } pri;
 
                #define CMDQ_OP_CMD_SYNC        0x46
+               struct {
+                       u32                     msidata;
+                       u64                     msiaddr;
+               } sync;
        };
 };
 
@@ -609,6 +623,7 @@ struct arm_smmu_device {
 
        int                             gerr_irq;
        int                             combined_irq;
+       atomic_t                        sync_nr;
 
        unsigned long                   ias; /* IPA */
        unsigned long                   oas; /* PA */
@@ -627,6 +642,8 @@ struct arm_smmu_device {
 
        struct arm_smmu_strtab_cfg      strtab_cfg;
 
+       u32                             sync_count;
+
        /* IOMMU core code handle */
        struct iommu_device             iommu;
 };
@@ -871,7 +888,13 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
                }
                break;
        case CMDQ_OP_CMD_SYNC:
-               cmd[0] |= CMDQ_SYNC_0_CS_SEV;
+               if (ent->sync.msiaddr)
+                       cmd[0] |= CMDQ_SYNC_0_CS_IRQ;
+               else
+                       cmd[0] |= CMDQ_SYNC_0_CS_SEV;
+               cmd[0] |= CMDQ_SYNC_0_MSH_ISH | CMDQ_SYNC_0_MSIATTR_OIWB;
+               cmd[0] |= (u64)ent->sync.msidata << CMDQ_SYNC_0_MSIDATA_SHIFT;
+               cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
                break;
        default:
                return -ENOENT;
@@ -957,21 +980,44 @@ static void arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
        spin_unlock_irqrestore(&smmu->cmdq.lock, flags);
 }
 
+/*
+ * The difference between val and sync_idx is bounded by the maximum size of
+ * a queue at 2^20 entries, so 32 bits is plenty for wrap-safe arithmetic.
+ */
+static int arm_smmu_sync_poll_msi(struct arm_smmu_device *smmu, u32 sync_idx)
+{
+       ktime_t timeout = ktime_add_us(ktime_get(), ARM_SMMU_SYNC_TIMEOUT_US);
+       u32 val = smp_cond_load_acquire(&smmu->sync_count,
+                                       (int)(VAL - sync_idx) >= 0 ||
+                                       !ktime_before(ktime_get(), timeout));
+
+       return (int)(val - sync_idx) < 0 ? -ETIMEDOUT : 0;
+}
+
 static void arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu)
 {
        u64 cmd[CMDQ_ENT_DWORDS];
        unsigned long flags;
        bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
+       bool msi = (smmu->features & ARM_SMMU_FEAT_MSI) &&
+                  (smmu->features & ARM_SMMU_FEAT_COHERENCY);
        struct arm_smmu_cmdq_ent ent = { .opcode = CMDQ_OP_CMD_SYNC };
        int ret;
 
+       if (msi) {
+               ent.sync.msidata = atomic_inc_return_relaxed(&smmu->sync_nr);
+               ent.sync.msiaddr = virt_to_phys(&smmu->sync_count);
+       }
        arm_smmu_cmdq_build_cmd(cmd, &ent);
 
        spin_lock_irqsave(&smmu->cmdq.lock, flags);
        arm_smmu_cmdq_insert_cmd(smmu, cmd);
-       ret = queue_poll_cons(&smmu->cmdq.q, true, wfe);
+       if (!msi)
+               ret = queue_poll_cons(&smmu->cmdq.q, true, wfe);
        spin_unlock_irqrestore(&smmu->cmdq.lock, flags);
 
+       if (msi)
+               ret = arm_smmu_sync_poll_msi(smmu, ent.sync.msidata);
        if (ret)
                dev_err_ratelimited(smmu->dev, "CMD_SYNC timeout\n");
 }
@@ -2159,6 +2205,7 @@ static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
 {
        int ret;
 
+       atomic_set(&smmu->sync_nr, 0);
        ret = arm_smmu_init_queues(smmu);
        if (ret)
                return ret;