]> asedeno.scripts.mit.edu Git - linux.git/blobdiff - drivers/irqchip/irq-gic-v3-its.c
Merge tag 'for_v5.6-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs
[linux.git] / drivers / irqchip / irq-gic-v3-its.c
index e05673bcd52bdad6adff75c48140baaeea1a79c1..f71758632f8dfd15979ed88d9fdf03a8ef071097 100644 (file)
@@ -106,6 +106,7 @@ struct its_node {
        u64                     typer;
        u64                     cbaser_save;
        u32                     ctlr_save;
+       u32                     mpidr;
        struct list_head        its_device_list;
        u64                     flags;
        unsigned long           list_nr;
@@ -116,12 +117,22 @@ struct its_node {
 };
 
 #define is_v4(its)             (!!((its)->typer & GITS_TYPER_VLPIS))
+#define is_v4_1(its)           (!!((its)->typer & GITS_TYPER_VMAPP))
 #define device_ids(its)                (FIELD_GET(GITS_TYPER_DEVBITS, (its)->typer) + 1)
 
 #define ITS_ITT_ALIGN          SZ_256
 
 /* The maximum number of VPEID bits supported by VLPI commands */
-#define ITS_MAX_VPEID_BITS     (16)
+#define ITS_MAX_VPEID_BITS                                             \
+       ({                                                              \
+               int nvpeid = 16;                                        \
+               if (gic_rdists->has_rvpeid &&                           \
+                   gic_rdists->gicd_typer2 & GICD_TYPER2_VIL)          \
+                       nvpeid = 1 + (gic_rdists->gicd_typer2 &         \
+                                     GICD_TYPER2_VID);                 \
+                                                                       \
+               nvpeid;                                                 \
+       })
 #define ITS_MAX_VPEID          (1 << (ITS_MAX_VPEID_BITS))
 
 /* Convert page order to size in bytes */
@@ -216,11 +227,27 @@ static struct its_vlpi_map *dev_event_to_vlpi_map(struct its_device *its_dev,
        return &its_dev->event_map.vlpi_maps[event];
 }
 
-static struct its_collection *irq_to_col(struct irq_data *d)
+static struct its_vlpi_map *get_vlpi_map(struct irq_data *d)
+{
+       if (irqd_is_forwarded_to_vcpu(d)) {
+               struct its_device *its_dev = irq_data_get_irq_chip_data(d);
+               u32 event = its_get_event_id(d);
+
+               return dev_event_to_vlpi_map(its_dev, event);
+       }
+
+       return NULL;
+}
+
+static int irq_to_cpuid(struct irq_data *d)
 {
        struct its_device *its_dev = irq_data_get_irq_chip_data(d);
+       struct its_vlpi_map *map = get_vlpi_map(d);
 
-       return dev_event_to_col(its_dev, its_get_event_id(d));
+       if (map)
+               return map->vpe->col_idx;
+
+       return its_dev->event_map.col_map[its_get_event_id(d)];
 }
 
 static struct its_collection *valid_col(struct its_collection *col)
@@ -322,6 +349,10 @@ struct its_cmd_desc {
                        u16 seq_num;
                        u16 its_list;
                } its_vmovp_cmd;
+
+               struct {
+                       struct its_vpe *vpe;
+               } its_invdb_cmd;
        };
 };
 
@@ -438,6 +469,38 @@ static void its_encode_vpt_size(struct its_cmd_block *cmd, u8 vpt_size)
        its_mask_encode(&cmd->raw_cmd[3], vpt_size, 4, 0);
 }
 
+static void its_encode_vconf_addr(struct its_cmd_block *cmd, u64 vconf_pa)
+{
+       its_mask_encode(&cmd->raw_cmd[0], vconf_pa >> 16, 51, 16);
+}
+
+static void its_encode_alloc(struct its_cmd_block *cmd, bool alloc)
+{
+       its_mask_encode(&cmd->raw_cmd[0], alloc, 8, 8);
+}
+
+static void its_encode_ptz(struct its_cmd_block *cmd, bool ptz)
+{
+       its_mask_encode(&cmd->raw_cmd[0], ptz, 9, 9);
+}
+
+static void its_encode_vmapp_default_db(struct its_cmd_block *cmd,
+                                       u32 vpe_db_lpi)
+{
+       its_mask_encode(&cmd->raw_cmd[1], vpe_db_lpi, 31, 0);
+}
+
+static void its_encode_vmovp_default_db(struct its_cmd_block *cmd,
+                                       u32 vpe_db_lpi)
+{
+       its_mask_encode(&cmd->raw_cmd[3], vpe_db_lpi, 31, 0);
+}
+
+static void its_encode_db(struct its_cmd_block *cmd, bool db)
+{
+       its_mask_encode(&cmd->raw_cmd[2], db, 63, 63);
+}
+
 static inline void its_fixup_cmd(struct its_cmd_block *cmd)
 {
        /* Let's fixup BE commands */
@@ -621,19 +684,45 @@ static struct its_vpe *its_build_vmapp_cmd(struct its_node *its,
                                           struct its_cmd_block *cmd,
                                           struct its_cmd_desc *desc)
 {
-       unsigned long vpt_addr;
+       unsigned long vpt_addr, vconf_addr;
        u64 target;
-
-       vpt_addr = virt_to_phys(page_address(desc->its_vmapp_cmd.vpe->vpt_page));
-       target = desc->its_vmapp_cmd.col->target_address + its->vlpi_redist_offset;
+       bool alloc;
 
        its_encode_cmd(cmd, GITS_CMD_VMAPP);
        its_encode_vpeid(cmd, desc->its_vmapp_cmd.vpe->vpe_id);
        its_encode_valid(cmd, desc->its_vmapp_cmd.valid);
+
+       if (!desc->its_vmapp_cmd.valid) {
+               if (is_v4_1(its)) {
+                       alloc = !atomic_dec_return(&desc->its_vmapp_cmd.vpe->vmapp_count);
+                       its_encode_alloc(cmd, alloc);
+               }
+
+               goto out;
+       }
+
+       vpt_addr = virt_to_phys(page_address(desc->its_vmapp_cmd.vpe->vpt_page));
+       target = desc->its_vmapp_cmd.col->target_address + its->vlpi_redist_offset;
+
        its_encode_target(cmd, target);
        its_encode_vpt_addr(cmd, vpt_addr);
        its_encode_vpt_size(cmd, LPI_NRBITS - 1);
 
+       if (!is_v4_1(its))
+               goto out;
+
+       vconf_addr = virt_to_phys(page_address(desc->its_vmapp_cmd.vpe->its_vm->vprop_page));
+
+       alloc = !atomic_fetch_inc(&desc->its_vmapp_cmd.vpe->vmapp_count);
+
+       its_encode_alloc(cmd, alloc);
+
+       /* We can only signal PTZ when alloc==1. Why do we have two bits? */
+       its_encode_ptz(cmd, alloc);
+       its_encode_vconf_addr(cmd, vconf_addr);
+       its_encode_vmapp_default_db(cmd, desc->its_vmapp_cmd.vpe->vpe_db_lpi);
+
+out:
        its_fixup_cmd(cmd);
 
        return valid_vpe(its, desc->its_vmapp_cmd.vpe);
@@ -645,7 +734,7 @@ static struct its_vpe *its_build_vmapti_cmd(struct its_node *its,
 {
        u32 db;
 
-       if (desc->its_vmapti_cmd.db_enabled)
+       if (!is_v4_1(its) && desc->its_vmapti_cmd.db_enabled)
                db = desc->its_vmapti_cmd.vpe->vpe_db_lpi;
        else
                db = 1023;
@@ -668,7 +757,7 @@ static struct its_vpe *its_build_vmovi_cmd(struct its_node *its,
 {
        u32 db;
 
-       if (desc->its_vmovi_cmd.db_enabled)
+       if (!is_v4_1(its) && desc->its_vmovi_cmd.db_enabled)
                db = desc->its_vmovi_cmd.vpe->vpe_db_lpi;
        else
                db = 1023;
@@ -698,6 +787,11 @@ static struct its_vpe *its_build_vmovp_cmd(struct its_node *its,
        its_encode_vpeid(cmd, desc->its_vmovp_cmd.vpe->vpe_id);
        its_encode_target(cmd, target);
 
+       if (is_v4_1(its)) {
+               its_encode_db(cmd, true);
+               its_encode_vmovp_default_db(cmd, desc->its_vmovp_cmd.vpe->vpe_db_lpi);
+       }
+
        its_fixup_cmd(cmd);
 
        return valid_vpe(its, desc->its_vmovp_cmd.vpe);
@@ -757,6 +851,21 @@ static struct its_vpe *its_build_vclear_cmd(struct its_node *its,
        return valid_vpe(its, map->vpe);
 }
 
+static struct its_vpe *its_build_invdb_cmd(struct its_node *its,
+                                          struct its_cmd_block *cmd,
+                                          struct its_cmd_desc *desc)
+{
+       if (WARN_ON(!is_v4_1(its)))
+               return NULL;
+
+       its_encode_cmd(cmd, GITS_CMD_INVDB);
+       its_encode_vpeid(cmd, desc->its_invdb_cmd.vpe->vpe_id);
+
+       its_fixup_cmd(cmd);
+
+       return valid_vpe(its, desc->its_invdb_cmd.vpe);
+}
+
 static u64 its_cmd_ptr_to_offset(struct its_node *its,
                                 struct its_cmd_block *ptr)
 {
@@ -1165,20 +1274,17 @@ static void its_send_vclear(struct its_device *dev, u32 event_id)
        its_send_single_vcommand(dev->its, its_build_vclear_cmd, &desc);
 }
 
-/*
- * irqchip functions - assumes MSI, mostly.
- */
-static struct its_vlpi_map *get_vlpi_map(struct irq_data *d)
+static void its_send_invdb(struct its_node *its, struct its_vpe *vpe)
 {
-       struct its_device *its_dev = irq_data_get_irq_chip_data(d);
-       u32 event = its_get_event_id(d);
-
-       if (!irqd_is_forwarded_to_vcpu(d))
-               return NULL;
+       struct its_cmd_desc desc;
 
-       return dev_event_to_vlpi_map(its_dev, event);
+       desc.its_invdb_cmd.vpe = vpe;
+       its_send_single_vcommand(its, its_build_invdb_cmd, &desc);
 }
 
+/*
+ * irqchip functions - assumes MSI, mostly.
+ */
 static void lpi_write_config(struct irq_data *d, u8 clr, u8 set)
 {
        struct its_vlpi_map *map = get_vlpi_map(d);
@@ -1221,13 +1327,25 @@ static void wait_for_syncr(void __iomem *rdbase)
 
 static void direct_lpi_inv(struct irq_data *d)
 {
-       struct its_collection *col;
+       struct its_vlpi_map *map = get_vlpi_map(d);
        void __iomem *rdbase;
+       u64 val;
+
+       if (map) {
+               struct its_device *its_dev = irq_data_get_irq_chip_data(d);
+
+               WARN_ON(!is_v4_1(its_dev->its));
+
+               val  = GICR_INVLPIR_V;
+               val |= FIELD_PREP(GICR_INVLPIR_VPEID, map->vpe->vpe_id);
+               val |= FIELD_PREP(GICR_INVLPIR_INTID, map->vintid);
+       } else {
+               val = d->hwirq;
+       }
 
        /* Target the redistributor this LPI is currently routed to */
-       col = irq_to_col(d);
-       rdbase = per_cpu_ptr(gic_rdists->rdist, col->col_id)->rd_base;
-       gic_write_lpir(d->hwirq, rdbase + GICR_INVLPIR);
+       rdbase = per_cpu_ptr(gic_rdists->rdist, irq_to_cpuid(d))->rd_base;
+       gic_write_lpir(val, rdbase + GICR_INVLPIR);
 
        wait_for_syncr(rdbase);
 }
@@ -1237,7 +1355,8 @@ static void lpi_update_config(struct irq_data *d, u8 clr, u8 set)
        struct its_device *its_dev = irq_data_get_irq_chip_data(d);
 
        lpi_write_config(d, clr, set);
-       if (gic_rdists->has_direct_lpi && !irqd_is_forwarded_to_vcpu(d))
+       if (gic_rdists->has_direct_lpi &&
+           (is_v4_1(its_dev->its) || !irqd_is_forwarded_to_vcpu(d)))
                direct_lpi_inv(d);
        else if (!irqd_is_forwarded_to_vcpu(d))
                its_send_inv(its_dev, its_get_event_id(d));
@@ -1251,6 +1370,13 @@ static void its_vlpi_set_doorbell(struct irq_data *d, bool enable)
        u32 event = its_get_event_id(d);
        struct its_vlpi_map *map;
 
+       /*
+        * GICv4.1 does away with the per-LPI nonsense, nothing to do
+        * here.
+        */
+       if (is_v4_1(its_dev->its))
+               return;
+
        map = dev_event_to_vlpi_map(its_dev, event);
 
        if (map->db_enabled == enable)
@@ -2090,6 +2216,65 @@ static bool its_parse_indirect_baser(struct its_node *its,
        return indirect;
 }
 
+static u32 compute_common_aff(u64 val)
+{
+       u32 aff, clpiaff;
+
+       aff = FIELD_GET(GICR_TYPER_AFFINITY, val);
+       clpiaff = FIELD_GET(GICR_TYPER_COMMON_LPI_AFF, val);
+
+       return aff & ~(GENMASK(31, 0) >> (clpiaff * 8));
+}
+
+static u32 compute_its_aff(struct its_node *its)
+{
+       u64 val;
+       u32 svpet;
+
+       /*
+        * Reencode the ITS SVPET and MPIDR as a GICR_TYPER, and compute
+        * the resulting affinity. We then use that to see if this match
+        * our own affinity.
+        */
+       svpet = FIELD_GET(GITS_TYPER_SVPET, its->typer);
+       val  = FIELD_PREP(GICR_TYPER_COMMON_LPI_AFF, svpet);
+       val |= FIELD_PREP(GICR_TYPER_AFFINITY, its->mpidr);
+       return compute_common_aff(val);
+}
+
+static struct its_node *find_sibling_its(struct its_node *cur_its)
+{
+       struct its_node *its;
+       u32 aff;
+
+       if (!FIELD_GET(GITS_TYPER_SVPET, cur_its->typer))
+               return NULL;
+
+       aff = compute_its_aff(cur_its);
+
+       list_for_each_entry(its, &its_nodes, entry) {
+               u64 baser;
+
+               if (!is_v4_1(its) || its == cur_its)
+                       continue;
+
+               if (!FIELD_GET(GITS_TYPER_SVPET, its->typer))
+                       continue;
+
+               if (aff != compute_its_aff(its))
+                       continue;
+
+               /* GICv4.1 guarantees that the vPE table is GITS_BASER2 */
+               baser = its->tables[2].val;
+               if (!(baser & GITS_BASER_VALID))
+                       continue;
+
+               return its;
+       }
+
+       return NULL;
+}
+
 static void its_free_tables(struct its_node *its)
 {
        int i;
@@ -2132,6 +2317,17 @@ static int its_alloc_tables(struct its_node *its)
                        break;
 
                case GITS_BASER_TYPE_VCPU:
+                       if (is_v4_1(its)) {
+                               struct its_node *sibling;
+
+                               WARN_ON(i != 2);
+                               if ((sibling = find_sibling_its(its))) {
+                                       *baser = sibling->tables[2];
+                                       its_write_baser(its, baser, baser->val);
+                                       continue;
+                               }
+                       }
+
                        indirect = its_parse_indirect_baser(its, baser,
                                                            psz, &order,
                                                            ITS_MAX_VPEID_BITS);
@@ -2153,6 +2349,220 @@ static int its_alloc_tables(struct its_node *its)
        return 0;
 }
 
+static u64 inherit_vpe_l1_table_from_its(void)
+{
+       struct its_node *its;
+       u64 val;
+       u32 aff;
+
+       val = gic_read_typer(gic_data_rdist_rd_base() + GICR_TYPER);
+       aff = compute_common_aff(val);
+
+       list_for_each_entry(its, &its_nodes, entry) {
+               u64 baser, addr;
+
+               if (!is_v4_1(its))
+                       continue;
+
+               if (!FIELD_GET(GITS_TYPER_SVPET, its->typer))
+                       continue;
+
+               if (aff != compute_its_aff(its))
+                       continue;
+
+               /* GICv4.1 guarantees that the vPE table is GITS_BASER2 */
+               baser = its->tables[2].val;
+               if (!(baser & GITS_BASER_VALID))
+                       continue;
+
+               /* We have a winner! */
+               val  = GICR_VPROPBASER_4_1_VALID;
+               if (baser & GITS_BASER_INDIRECT)
+                       val |= GICR_VPROPBASER_4_1_INDIRECT;
+               val |= FIELD_PREP(GICR_VPROPBASER_4_1_PAGE_SIZE,
+                                 FIELD_GET(GITS_BASER_PAGE_SIZE_MASK, baser));
+               switch (FIELD_GET(GITS_BASER_PAGE_SIZE_MASK, baser)) {
+               case GIC_PAGE_SIZE_64K:
+                       addr = GITS_BASER_ADDR_48_to_52(baser);
+                       break;
+               default:
+                       addr = baser & GENMASK_ULL(47, 12);
+                       break;
+               }
+               val |= FIELD_PREP(GICR_VPROPBASER_4_1_ADDR, addr >> 12);
+               val |= FIELD_PREP(GICR_VPROPBASER_SHAREABILITY_MASK,
+                                 FIELD_GET(GITS_BASER_SHAREABILITY_MASK, baser));
+               val |= FIELD_PREP(GICR_VPROPBASER_INNER_CACHEABILITY_MASK,
+                                 FIELD_GET(GITS_BASER_INNER_CACHEABILITY_MASK, baser));
+               val |= FIELD_PREP(GICR_VPROPBASER_4_1_SIZE, GITS_BASER_NR_PAGES(baser) - 1);
+
+               return val;
+       }
+
+       return 0;
+}
+
+static u64 inherit_vpe_l1_table_from_rd(cpumask_t **mask)
+{
+       u32 aff;
+       u64 val;
+       int cpu;
+
+       val = gic_read_typer(gic_data_rdist_rd_base() + GICR_TYPER);
+       aff = compute_common_aff(val);
+
+       for_each_possible_cpu(cpu) {
+               void __iomem *base = gic_data_rdist_cpu(cpu)->rd_base;
+               u32 tmp;
+
+               if (!base || cpu == smp_processor_id())
+                       continue;
+
+               val = gic_read_typer(base + GICR_TYPER);
+               tmp = compute_common_aff(val);
+               if (tmp != aff)
+                       continue;
+
+               /*
+                * At this point, we have a victim. This particular CPU
+                * has already booted, and has an affinity that matches
+                * ours wrt CommonLPIAff. Let's use its own VPROPBASER.
+                * Make sure we don't write the Z bit in that case.
+                */
+               val = gits_read_vpropbaser(base + SZ_128K + GICR_VPROPBASER);
+               val &= ~GICR_VPROPBASER_4_1_Z;
+
+               *mask = gic_data_rdist_cpu(cpu)->vpe_table_mask;
+
+               return val;
+       }
+
+       return 0;
+}
+
+static int allocate_vpe_l1_table(void)
+{
+       void __iomem *vlpi_base = gic_data_rdist_vlpi_base();
+       u64 val, gpsz, npg, pa;
+       unsigned int psz = SZ_64K;
+       unsigned int np, epp, esz;
+       struct page *page;
+
+       if (!gic_rdists->has_rvpeid)
+               return 0;
+
+       /*
+        * if VPENDBASER.Valid is set, disable any previously programmed
+        * VPE by setting PendingLast while clearing Valid. This has the
+        * effect of making sure no doorbell will be generated and we can
+        * then safely clear VPROPBASER.Valid.
+        */
+       if (gits_read_vpendbaser(vlpi_base + GICR_VPENDBASER) & GICR_VPENDBASER_Valid)
+               gits_write_vpendbaser(GICR_VPENDBASER_PendingLast,
+                                     vlpi_base + GICR_VPENDBASER);
+
+       /*
+        * If we can inherit the configuration from another RD, let's do
+        * so. Otherwise, we have to go through the allocation process. We
+        * assume that all RDs have the exact same requirements, as
+        * nothing will work otherwise.
+        */
+       val = inherit_vpe_l1_table_from_rd(&gic_data_rdist()->vpe_table_mask);
+       if (val & GICR_VPROPBASER_4_1_VALID)
+               goto out;
+
+       gic_data_rdist()->vpe_table_mask = kzalloc(sizeof(cpumask_t), GFP_KERNEL);
+       if (!gic_data_rdist()->vpe_table_mask)
+               return -ENOMEM;
+
+       val = inherit_vpe_l1_table_from_its();
+       if (val & GICR_VPROPBASER_4_1_VALID)
+               goto out;
+
+       /* First probe the page size */
+       val = FIELD_PREP(GICR_VPROPBASER_4_1_PAGE_SIZE, GIC_PAGE_SIZE_64K);
+       gits_write_vpropbaser(val, vlpi_base + GICR_VPROPBASER);
+       val = gits_read_vpropbaser(vlpi_base + GICR_VPROPBASER);
+       gpsz = FIELD_GET(GICR_VPROPBASER_4_1_PAGE_SIZE, val);
+       esz = FIELD_GET(GICR_VPROPBASER_4_1_ENTRY_SIZE, val);
+
+       switch (gpsz) {
+       default:
+               gpsz = GIC_PAGE_SIZE_4K;
+               /* fall through */
+       case GIC_PAGE_SIZE_4K:
+               psz = SZ_4K;
+               break;
+       case GIC_PAGE_SIZE_16K:
+               psz = SZ_16K;
+               break;
+       case GIC_PAGE_SIZE_64K:
+               psz = SZ_64K;
+               break;
+       }
+
+       /*
+        * Start populating the register from scratch, including RO fields
+        * (which we want to print in debug cases...)
+        */
+       val = 0;
+       val |= FIELD_PREP(GICR_VPROPBASER_4_1_PAGE_SIZE, gpsz);
+       val |= FIELD_PREP(GICR_VPROPBASER_4_1_ENTRY_SIZE, esz);
+
+       /* How many entries per GIC page? */
+       esz++;
+       epp = psz / (esz * SZ_8);
+
+       /*
+        * If we need more than just a single L1 page, flag the table
+        * as indirect and compute the number of required L1 pages.
+        */
+       if (epp < ITS_MAX_VPEID) {
+               int nl2;
+
+               val |= GICR_VPROPBASER_4_1_INDIRECT;
+
+               /* Number of L2 pages required to cover the VPEID space */
+               nl2 = DIV_ROUND_UP(ITS_MAX_VPEID, epp);
+
+               /* Number of L1 pages to point to the L2 pages */
+               npg = DIV_ROUND_UP(nl2 * SZ_8, psz);
+       } else {
+               npg = 1;
+       }
+
+       val |= FIELD_PREP(GICR_VPROPBASER_4_1_SIZE, npg);
+
+       /* Right, that's the number of CPU pages we need for L1 */
+       np = DIV_ROUND_UP(npg * psz, PAGE_SIZE);
+
+       pr_debug("np = %d, npg = %lld, psz = %d, epp = %d, esz = %d\n",
+                np, npg, psz, epp, esz);
+       page = alloc_pages(GFP_KERNEL | __GFP_ZERO, get_order(np * PAGE_SIZE));
+       if (!page)
+               return -ENOMEM;
+
+       gic_data_rdist()->vpe_l1_page = page;
+       pa = virt_to_phys(page_address(page));
+       WARN_ON(!IS_ALIGNED(pa, psz));
+
+       val |= FIELD_PREP(GICR_VPROPBASER_4_1_ADDR, pa >> 12);
+       val |= GICR_VPROPBASER_RaWb;
+       val |= GICR_VPROPBASER_InnerShareable;
+       val |= GICR_VPROPBASER_4_1_Z;
+       val |= GICR_VPROPBASER_4_1_VALID;
+
+out:
+       gits_write_vpropbaser(val, vlpi_base + GICR_VPROPBASER);
+       cpumask_set_cpu(smp_processor_id(), gic_data_rdist()->vpe_table_mask);
+
+       pr_debug("CPU%d: VPROPBASER = %llx %*pbl\n",
+                smp_processor_id(), val,
+                cpumask_pr_args(gic_data_rdist()->vpe_table_mask));
+
+       return 0;
+}
+
 static int its_alloc_collections(struct its_node *its)
 {
        int i;
@@ -2244,7 +2654,7 @@ static int __init allocate_lpi_tables(void)
        return 0;
 }
 
-static u64 its_clear_vpend_valid(void __iomem *vlpi_base)
+static u64 its_clear_vpend_valid(void __iomem *vlpi_base, u64 clr, u64 set)
 {
        u32 count = 1000000;    /* 1s! */
        bool clean;
@@ -2252,6 +2662,8 @@ static u64 its_clear_vpend_valid(void __iomem *vlpi_base)
 
        val = gits_read_vpendbaser(vlpi_base + GICR_VPENDBASER);
        val &= ~GICR_VPENDBASER_Valid;
+       val &= ~clr;
+       val |= set;
        gits_write_vpendbaser(val, vlpi_base + GICR_VPENDBASER);
 
        do {
@@ -2264,6 +2676,11 @@ static u64 its_clear_vpend_valid(void __iomem *vlpi_base)
                }
        } while (!clean && count);
 
+       if (unlikely(val & GICR_VPENDBASER_Dirty)) {
+               pr_err_ratelimited("ITS virtual pending table not cleaning\n");
+               val |= GICR_VPENDBASER_PendingLast;
+       }
+
        return val;
 }
 
@@ -2352,7 +2769,7 @@ static void its_cpu_init_lpis(void)
        val |= GICR_CTLR_ENABLE_LPIS;
        writel_relaxed(val, rbase + GICR_CTLR);
 
-       if (gic_rdists->has_vlpis) {
+       if (gic_rdists->has_vlpis && !gic_rdists->has_rvpeid) {
                void __iomem *vlpi_base = gic_data_rdist_vlpi_base();
 
                /*
@@ -2372,10 +2789,20 @@ static void its_cpu_init_lpis(void)
                 * ancient programming gets left in and has possibility of
                 * corrupting memory.
                 */
-               val = its_clear_vpend_valid(vlpi_base);
+               val = its_clear_vpend_valid(vlpi_base, 0, 0);
                WARN_ON(val & GICR_VPENDBASER_Dirty);
        }
 
+       if (allocate_vpe_l1_table()) {
+               /*
+                * If the allocation has failed, we're in massive trouble.
+                * Disable direct injection, and pray that no VM was
+                * already running...
+                */
+               gic_rdists->has_rvpeid = false;
+               gic_rdists->has_vlpis = false;
+       }
+
        /* Make sure the GIC has seen the above */
        dsb(sy);
 out:
@@ -2859,7 +3286,7 @@ static const struct irq_domain_ops its_domain_ops = {
 /*
  * This is insane.
  *
- * If a GICv4 doesn't implement Direct LPIs (which is extremely
+ * If a GICv4.0 doesn't implement Direct LPIs (which is extremely
  * likely), the only way to perform an invalidate is to use a fake
  * device to issue an INV command, implying that the LPI has first
  * been mapped to some event on that device. Since this is not exactly
@@ -2867,9 +3294,20 @@ static const struct irq_domain_ops its_domain_ops = {
  * only issue an UNMAP if we're short on available slots.
  *
  * Broken by design(tm).
+ *
+ * GICv4.1, on the other hand, mandates that we're able to invalidate
+ * by writing to a MMIO register. It doesn't implement the whole of
+ * DirectLPI, but that's good enough. And most of the time, we don't
+ * even have to invalidate anything, as the redistributor can be told
+ * whether to generate a doorbell or not (we thus leave it enabled,
+ * always).
  */
 static void its_vpe_db_proxy_unmap_locked(struct its_vpe *vpe)
 {
+       /* GICv4.1 doesn't use a proxy, so nothing to do here */
+       if (gic_rdists->has_rvpeid)
+               return;
+
        /* Already unmapped? */
        if (vpe->vpe_proxy_event == -1)
                return;
@@ -2892,6 +3330,10 @@ static void its_vpe_db_proxy_unmap_locked(struct its_vpe *vpe)
 
 static void its_vpe_db_proxy_unmap(struct its_vpe *vpe)
 {
+       /* GICv4.1 doesn't use a proxy, so nothing to do here */
+       if (gic_rdists->has_rvpeid)
+               return;
+
        if (!gic_rdists->has_direct_lpi) {
                unsigned long flags;
 
@@ -2903,6 +3345,10 @@ static void its_vpe_db_proxy_unmap(struct its_vpe *vpe)
 
 static void its_vpe_db_proxy_map_locked(struct its_vpe *vpe)
 {
+       /* GICv4.1 doesn't use a proxy, so nothing to do here */
+       if (gic_rdists->has_rvpeid)
+               return;
+
        /* Already mapped? */
        if (vpe->vpe_proxy_event != -1)
                return;
@@ -2925,6 +3371,10 @@ static void its_vpe_db_proxy_move(struct its_vpe *vpe, int from, int to)
        unsigned long flags;
        struct its_collection *target_col;
 
+       /* GICv4.1 doesn't use a proxy, so nothing to do here */
+       if (gic_rdists->has_rvpeid)
+               return;
+
        if (gic_rdists->has_direct_lpi) {
                void __iomem *rdbase;
 
@@ -2951,7 +3401,7 @@ static int its_vpe_set_affinity(struct irq_data *d,
                                bool force)
 {
        struct its_vpe *vpe = irq_data_get_irq_chip_data(d);
-       int cpu = cpumask_first(mask_val);
+       int from, cpu = cpumask_first(mask_val);
 
        /*
         * Changing affinity is mega expensive, so let's be as lazy as
@@ -2959,14 +3409,24 @@ static int its_vpe_set_affinity(struct irq_data *d,
         * into the proxy device, we need to move the doorbell
         * interrupt to its new location.
         */
-       if (vpe->col_idx != cpu) {
-               int from = vpe->col_idx;
+       if (vpe->col_idx == cpu)
+               goto out;
 
-               vpe->col_idx = cpu;
-               its_send_vmovp(vpe);
-               its_vpe_db_proxy_move(vpe, from, cpu);
-       }
+       from = vpe->col_idx;
+       vpe->col_idx = cpu;
 
+       /*
+        * GICv4.1 allows us to skip VMOVP if moving to a cpu whose RD
+        * is sharing its VPE table with the current one.
+        */
+       if (gic_data_rdist_cpu(cpu)->vpe_table_mask &&
+           cpumask_test_cpu(from, gic_data_rdist_cpu(cpu)->vpe_table_mask))
+               goto out;
+
+       its_send_vmovp(vpe);
+       its_vpe_db_proxy_move(vpe, from, cpu);
+
+out:
        irq_data_update_effective_affinity(d, cpumask_of(cpu));
 
        return IRQ_SET_MASK_OK_DONE;
@@ -3009,16 +3469,10 @@ static void its_vpe_deschedule(struct its_vpe *vpe)
        void __iomem *vlpi_base = gic_data_rdist_vlpi_base();
        u64 val;
 
-       val = its_clear_vpend_valid(vlpi_base);
+       val = its_clear_vpend_valid(vlpi_base, 0, 0);
 
-       if (unlikely(val & GICR_VPENDBASER_Dirty)) {
-               pr_err_ratelimited("ITS virtual pending table not cleaning\n");
-               vpe->idai = false;
-               vpe->pending_last = true;
-       } else {
-               vpe->idai = !!(val & GICR_VPENDBASER_IDAI);
-               vpe->pending_last = !!(val & GICR_VPENDBASER_PendingLast);
-       }
+       vpe->idai = !!(val & GICR_VPENDBASER_IDAI);
+       vpe->pending_last = !!(val & GICR_VPENDBASER_PendingLast);
 }
 
 static void its_vpe_invall(struct its_vpe *vpe)
@@ -3151,6 +3605,139 @@ static struct irq_chip its_vpe_irq_chip = {
        .irq_set_vcpu_affinity  = its_vpe_set_vcpu_affinity,
 };
 
+static struct its_node *find_4_1_its(void)
+{
+       static struct its_node *its = NULL;
+
+       if (!its) {
+               list_for_each_entry(its, &its_nodes, entry) {
+                       if (is_v4_1(its))
+                               return its;
+               }
+
+               /* Oops? */
+               its = NULL;
+       }
+
+       return its;
+}
+
+static void its_vpe_4_1_send_inv(struct irq_data *d)
+{
+       struct its_vpe *vpe = irq_data_get_irq_chip_data(d);
+       struct its_node *its;
+
+       /*
+        * GICv4.1 wants doorbells to be invalidated using the
+        * INVDB command in order to be broadcast to all RDs. Send
+        * it to the first valid ITS, and let the HW do its magic.
+        */
+       its = find_4_1_its();
+       if (its)
+               its_send_invdb(its, vpe);
+}
+
+static void its_vpe_4_1_mask_irq(struct irq_data *d)
+{
+       lpi_write_config(d->parent_data, LPI_PROP_ENABLED, 0);
+       its_vpe_4_1_send_inv(d);
+}
+
+static void its_vpe_4_1_unmask_irq(struct irq_data *d)
+{
+       lpi_write_config(d->parent_data, 0, LPI_PROP_ENABLED);
+       its_vpe_4_1_send_inv(d);
+}
+
+static void its_vpe_4_1_schedule(struct its_vpe *vpe,
+                                struct its_cmd_info *info)
+{
+       void __iomem *vlpi_base = gic_data_rdist_vlpi_base();
+       u64 val = 0;
+
+       /* Schedule the VPE */
+       val |= GICR_VPENDBASER_Valid;
+       val |= info->g0en ? GICR_VPENDBASER_4_1_VGRP0EN : 0;
+       val |= info->g1en ? GICR_VPENDBASER_4_1_VGRP1EN : 0;
+       val |= FIELD_PREP(GICR_VPENDBASER_4_1_VPEID, vpe->vpe_id);
+
+       gits_write_vpendbaser(val, vlpi_base + GICR_VPENDBASER);
+}
+
+static void its_vpe_4_1_deschedule(struct its_vpe *vpe,
+                                  struct its_cmd_info *info)
+{
+       void __iomem *vlpi_base = gic_data_rdist_vlpi_base();
+       u64 val;
+
+       if (info->req_db) {
+               /*
+                * vPE is going to block: make the vPE non-resident with
+                * PendingLast clear and DB set. The GIC guarantees that if
+                * we read-back PendingLast clear, then a doorbell will be
+                * delivered when an interrupt comes.
+                */
+               val = its_clear_vpend_valid(vlpi_base,
+                                           GICR_VPENDBASER_PendingLast,
+                                           GICR_VPENDBASER_4_1_DB);
+               vpe->pending_last = !!(val & GICR_VPENDBASER_PendingLast);
+       } else {
+               /*
+                * We're not blocking, so just make the vPE non-resident
+                * with PendingLast set, indicating that we'll be back.
+                */
+               val = its_clear_vpend_valid(vlpi_base,
+                                           0,
+                                           GICR_VPENDBASER_PendingLast);
+               vpe->pending_last = true;
+       }
+}
+
+static void its_vpe_4_1_invall(struct its_vpe *vpe)
+{
+       void __iomem *rdbase;
+       u64 val;
+
+       val  = GICR_INVALLR_V;
+       val |= FIELD_PREP(GICR_INVALLR_VPEID, vpe->vpe_id);
+
+       /* Target the redistributor this vPE is currently known on */
+       rdbase = per_cpu_ptr(gic_rdists->rdist, vpe->col_idx)->rd_base;
+       gic_write_lpir(val, rdbase + GICR_INVALLR);
+}
+
+static int its_vpe_4_1_set_vcpu_affinity(struct irq_data *d, void *vcpu_info)
+{
+       struct its_vpe *vpe = irq_data_get_irq_chip_data(d);
+       struct its_cmd_info *info = vcpu_info;
+
+       switch (info->cmd_type) {
+       case SCHEDULE_VPE:
+               its_vpe_4_1_schedule(vpe, info);
+               return 0;
+
+       case DESCHEDULE_VPE:
+               its_vpe_4_1_deschedule(vpe, info);
+               return 0;
+
+       case INVALL_VPE:
+               its_vpe_4_1_invall(vpe);
+               return 0;
+
+       default:
+               return -EINVAL;
+       }
+}
+
+static struct irq_chip its_vpe_4_1_irq_chip = {
+       .name                   = "GICv4.1-vpe",
+       .irq_mask               = its_vpe_4_1_mask_irq,
+       .irq_unmask             = its_vpe_4_1_unmask_irq,
+       .irq_eoi                = irq_chip_eoi_parent,
+       .irq_set_affinity       = its_vpe_set_affinity,
+       .irq_set_vcpu_affinity  = its_vpe_4_1_set_vcpu_affinity,
+};
+
 static int its_vpe_id_alloc(void)
 {
        return ida_simple_get(&its_vpeid_ida, 0, ITS_MAX_VPEID, GFP_KERNEL);
@@ -3186,7 +3773,10 @@ static int its_vpe_init(struct its_vpe *vpe)
 
        vpe->vpe_id = vpe_id;
        vpe->vpt_page = vpt_page;
-       vpe->vpe_proxy_event = -1;
+       if (gic_rdists->has_rvpeid)
+               atomic_set(&vpe->vmapp_count, 0);
+       else
+               vpe->vpe_proxy_event = -1;
 
        return 0;
 }
@@ -3228,6 +3818,7 @@ static void its_vpe_irq_domain_free(struct irq_domain *domain,
 static int its_vpe_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
                                    unsigned int nr_irqs, void *args)
 {
+       struct irq_chip *irqchip = &its_vpe_irq_chip;
        struct its_vm *vm = args;
        unsigned long *bitmap;
        struct page *vprop_page;
@@ -3255,6 +3846,9 @@ static int its_vpe_irq_domain_alloc(struct irq_domain *domain, unsigned int virq
        vm->nr_db_lpis = nr_ids;
        vm->vprop_page = vprop_page;
 
+       if (gic_rdists->has_rvpeid)
+               irqchip = &its_vpe_4_1_irq_chip;
+
        for (i = 0; i < nr_irqs; i++) {
                vm->vpes[i]->vpe_db_lpi = base + i;
                err = its_vpe_init(vm->vpes[i]);
@@ -3265,7 +3859,7 @@ static int its_vpe_irq_domain_alloc(struct irq_domain *domain, unsigned int virq
                if (err)
                        break;
                irq_domain_set_hwirq_and_chip(domain, virq + i, i,
-                                             &its_vpe_irq_chip, vm->vpes[i]);
+                                             irqchip, vm->vpes[i]);
                set_bit(i, bitmap);
        }
 
@@ -3778,6 +4372,14 @@ static int __init its_probe_one(struct resource *res,
                } else {
                        pr_info("ITS@%pa: Single VMOVP capable\n", &res->start);
                }
+
+               if (is_v4_1(its)) {
+                       u32 svpet = FIELD_GET(GITS_TYPER_SVPET, typer);
+                       its->mpidr = readl_relaxed(its_base + GITS_MPIDR);
+
+                       pr_info("ITS@%pa: Using GICv4.1 mode %08x %08x\n",
+                               &res->start, its->mpidr, svpet);
+               }
        }
 
        its->numa_node = numa_node;
@@ -4138,6 +4740,8 @@ int __init its_init(struct fwnode_handle *handle, struct rdists *rdists,
        bool has_v4 = false;
        int err;
 
+       gic_rdists = rdists;
+
        its_parent = parent_domain;
        of_node = to_of_node(handle);
        if (of_node)
@@ -4150,8 +4754,6 @@ int __init its_init(struct fwnode_handle *handle, struct rdists *rdists,
                return -ENXIO;
        }
 
-       gic_rdists = rdists;
-
        err = allocate_lpi_tables();
        if (err)
                return err;