]> asedeno.scripts.mit.edu Git - linux.git/commitdiff
powerpc/powernv: Shift VF resource with an offset
authorWei Yang <weiyang@linux.vnet.ibm.com>
Wed, 25 Mar 2015 08:23:57 +0000 (16:23 +0800)
committerBenjamin Herrenschmidt <benh@kernel.crashing.org>
Tue, 31 Mar 2015 02:02:38 +0000 (13:02 +1100)
On PowerNV platform, resource position in M64 BAR implies the PE# the
resource belongs to. In some cases, adjustment of a resource is necessary
to locate it to a correct position in M64 BAR .

This patch adds pnv_pci_vf_resource_shift() to shift the 'real' PF IOV BAR
address according to an offset.

Note:

    After doing so, there would be a "hole" in the /proc/iomem when offset
    is a positive value. It looks like the device return some mmio back to
    the system, which actually no one could use it.

[bhelgaas: rework loops, rework overlap check, index resource[]
conventionally, remove pci_regs.h include, squashed with next patch]
Signed-off-by: Wei Yang <weiyang@linux.vnet.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
arch/powerpc/include/asm/pci-bridge.h
arch/powerpc/kernel/pci_dn.c
arch/powerpc/platforms/powernv/pci-ioda.c
arch/powerpc/platforms/powernv/pci.c
arch/powerpc/platforms/powernv/pci.h

index 7b8ebc5929ffe8ae8ebfe9393e59b2ba4efdc78d..8716db48e9461a8bdd5924cf3e12aa11d243f3af 100644 (file)
@@ -180,6 +180,10 @@ struct pci_dn {
        int     pe_number;
 #ifdef CONFIG_PCI_IOV
        u16     vfs_expanded;           /* number of VFs IOV BAR expanded */
+       u16     num_vfs;                /* number of VFs enabled*/
+       int     offset;                 /* PE# for the first VF PE */
+#define IODA_INVALID_M64        (-1)
+       int     m64_wins[PCI_SRIOV_NUM_BARS];
 #endif /* CONFIG_PCI_IOV */
 #endif
        struct list_head child_list;
index e5f1d78ef7cf346e27b631c7b14f05b08e52f13e..b3b4df91b792cc59e533c7465b7543da37756de6 100644 (file)
@@ -217,6 +217,19 @@ void remove_dev_pci_data(struct pci_dev *pdev)
        struct pci_dn *pdn, *tmp;
        int i;
 
+       /*
+        * VF and VF PE are created/released dynamically, so we need to
+        * bind/unbind them.  Otherwise the VF and VF PE would be mismatched
+        * when re-enabling SR-IOV.
+        */
+       if (pdev->is_virtfn) {
+               pdn = pci_get_pdn(pdev);
+#ifdef CONFIG_PPC_POWERNV
+               pdn->pe_number = IODA_INVALID_PE;
+#endif
+               return;
+       }
+
        /* Only support IOV PF for now */
        if (!pdev->is_physfn)
                return;
index 217eaad23cdef8f865265b3be4ac09bb923186e3..5187d164cfe1331d46101154bedfce215ebe8a07 100644 (file)
@@ -44,6 +44,9 @@
 #include "powernv.h"
 #include "pci.h"
 
+/* 256M DMA window, 4K TCE pages, 8 bytes TCE */
+#define TCE32_TABLE_SIZE       ((0x10000000 / 0x1000) * 8)
+
 static void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level,
                            const char *fmt, ...)
 {
@@ -56,11 +59,18 @@ static void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level,
        vaf.fmt = fmt;
        vaf.va = &args;
 
-       if (pe->pdev)
+       if (pe->flags & PNV_IODA_PE_DEV)
                strlcpy(pfix, dev_name(&pe->pdev->dev), sizeof(pfix));
-       else
+       else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL))
                sprintf(pfix, "%04x:%02x     ",
                        pci_domain_nr(pe->pbus), pe->pbus->number);
+#ifdef CONFIG_PCI_IOV
+       else if (pe->flags & PNV_IODA_PE_VF)
+               sprintf(pfix, "%04x:%02x:%2x.%d",
+                       pci_domain_nr(pe->parent_dev->bus),
+                       (pe->rid & 0xff00) >> 8,
+                       PCI_SLOT(pe->rid), PCI_FUNC(pe->rid));
+#endif /* CONFIG_PCI_IOV*/
 
        printk("%spci %s: [PE# %.3d] %pV",
               level, pfix, pe->pe_number, &vaf);
@@ -591,7 +601,7 @@ static int pnv_ioda_set_peltv(struct pnv_phb *phb,
                              bool is_add)
 {
        struct pnv_ioda_pe *slave;
-       struct pci_dev *pdev;
+       struct pci_dev *pdev = NULL;
        int ret;
 
        /*
@@ -630,8 +640,12 @@ static int pnv_ioda_set_peltv(struct pnv_phb *phb,
 
        if (pe->flags & (PNV_IODA_PE_BUS_ALL | PNV_IODA_PE_BUS))
                pdev = pe->pbus->self;
-       else
+       else if (pe->flags & PNV_IODA_PE_DEV)
                pdev = pe->pdev->bus->self;
+#ifdef CONFIG_PCI_IOV
+       else if (pe->flags & PNV_IODA_PE_VF)
+               pdev = pe->parent_dev->bus->self;
+#endif /* CONFIG_PCI_IOV */
        while (pdev) {
                struct pci_dn *pdn = pci_get_pdn(pdev);
                struct pnv_ioda_pe *parent;
@@ -649,6 +663,87 @@ static int pnv_ioda_set_peltv(struct pnv_phb *phb,
        return 0;
 }
 
+#ifdef CONFIG_PCI_IOV
+static int pnv_ioda_deconfigure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
+{
+       struct pci_dev *parent;
+       uint8_t bcomp, dcomp, fcomp;
+       int64_t rc;
+       long rid_end, rid;
+
+       /* Currently, we just deconfigure VF PE. Bus PE will always there.*/
+       if (pe->pbus) {
+               int count;
+
+               dcomp = OPAL_IGNORE_RID_DEVICE_NUMBER;
+               fcomp = OPAL_IGNORE_RID_FUNCTION_NUMBER;
+               parent = pe->pbus->self;
+               if (pe->flags & PNV_IODA_PE_BUS_ALL)
+                       count = pe->pbus->busn_res.end - pe->pbus->busn_res.start + 1;
+               else
+                       count = 1;
+
+               switch(count) {
+               case  1: bcomp = OpalPciBusAll;         break;
+               case  2: bcomp = OpalPciBus7Bits;       break;
+               case  4: bcomp = OpalPciBus6Bits;       break;
+               case  8: bcomp = OpalPciBus5Bits;       break;
+               case 16: bcomp = OpalPciBus4Bits;       break;
+               case 32: bcomp = OpalPciBus3Bits;       break;
+               default:
+                       dev_err(&pe->pbus->dev, "Number of subordinate buses %d unsupported\n",
+                               count);
+                       /* Do an exact match only */
+                       bcomp = OpalPciBusAll;
+               }
+               rid_end = pe->rid + (count << 8);
+       } else {
+               if (pe->flags & PNV_IODA_PE_VF)
+                       parent = pe->parent_dev;
+               else
+                       parent = pe->pdev->bus->self;
+               bcomp = OpalPciBusAll;
+               dcomp = OPAL_COMPARE_RID_DEVICE_NUMBER;
+               fcomp = OPAL_COMPARE_RID_FUNCTION_NUMBER;
+               rid_end = pe->rid + 1;
+       }
+
+       /* Clear the reverse map */
+       for (rid = pe->rid; rid < rid_end; rid++)
+               phb->ioda.pe_rmap[rid] = 0;
+
+       /* Release from all parents PELT-V */
+       while (parent) {
+               struct pci_dn *pdn = pci_get_pdn(parent);
+               if (pdn && pdn->pe_number != IODA_INVALID_PE) {
+                       rc = opal_pci_set_peltv(phb->opal_id, pdn->pe_number,
+                                               pe->pe_number, OPAL_REMOVE_PE_FROM_DOMAIN);
+                       /* XXX What to do in case of error ? */
+               }
+               parent = parent->bus->self;
+       }
+
+       opal_pci_eeh_freeze_set(phb->opal_id, pe->pe_number,
+                                 OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
+
+       /* Disassociate PE in PELT */
+       rc = opal_pci_set_peltv(phb->opal_id, pe->pe_number,
+                               pe->pe_number, OPAL_REMOVE_PE_FROM_DOMAIN);
+       if (rc)
+               pe_warn(pe, "OPAL error %ld remove self from PELTV\n", rc);
+       rc = opal_pci_set_pe(phb->opal_id, pe->pe_number, pe->rid,
+                            bcomp, dcomp, fcomp, OPAL_UNMAP_PE);
+       if (rc)
+               pe_err(pe, "OPAL error %ld trying to setup PELT table\n", rc);
+
+       pe->pbus = NULL;
+       pe->pdev = NULL;
+       pe->parent_dev = NULL;
+
+       return 0;
+}
+#endif /* CONFIG_PCI_IOV */
+
 static int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
 {
        struct pci_dev *parent;
@@ -675,15 +770,19 @@ static int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
                case 16: bcomp = OpalPciBus4Bits;       break;
                case 32: bcomp = OpalPciBus3Bits;       break;
                default:
-                       pr_err("%s: Number of subordinate busses %d"
-                              " unsupported\n",
-                              pci_name(pe->pbus->self), count);
+                       dev_err(&pe->pbus->dev, "Number of subordinate buses %d unsupported\n",
+                               count);
                        /* Do an exact match only */
                        bcomp = OpalPciBusAll;
                }
                rid_end = pe->rid + (count << 8);
        } else {
-               parent = pe->pdev->bus->self;
+#ifdef CONFIG_PCI_IOV
+               if (pe->flags & PNV_IODA_PE_VF)
+                       parent = pe->parent_dev;
+               else
+#endif /* CONFIG_PCI_IOV */
+                       parent = pe->pdev->bus->self;
                bcomp = OpalPciBusAll;
                dcomp = OPAL_COMPARE_RID_DEVICE_NUMBER;
                fcomp = OPAL_COMPARE_RID_FUNCTION_NUMBER;
@@ -774,6 +873,78 @@ static unsigned int pnv_ioda_dma_weight(struct pci_dev *dev)
        return 10;
 }
 
+#ifdef CONFIG_PCI_IOV
+static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset)
+{
+       struct pci_dn *pdn = pci_get_pdn(dev);
+       int i;
+       struct resource *res, res2;
+       resource_size_t size;
+       u16 num_vfs;
+
+       if (!dev->is_physfn)
+               return -EINVAL;
+
+       /*
+        * "offset" is in VFs.  The M64 windows are sized so that when they
+        * are segmented, each segment is the same size as the IOV BAR.
+        * Each segment is in a separate PE, and the high order bits of the
+        * address are the PE number.  Therefore, each VF's BAR is in a
+        * separate PE, and changing the IOV BAR start address changes the
+        * range of PEs the VFs are in.
+        */
+       num_vfs = pdn->num_vfs;
+       for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
+               res = &dev->resource[i + PCI_IOV_RESOURCES];
+               if (!res->flags || !res->parent)
+                       continue;
+
+               if (!pnv_pci_is_mem_pref_64(res->flags))
+                       continue;
+
+               /*
+                * The actual IOV BAR range is determined by the start address
+                * and the actual size for num_vfs VFs BAR.  This check is to
+                * make sure that after shifting, the range will not overlap
+                * with another device.
+                */
+               size = pci_iov_resource_size(dev, i + PCI_IOV_RESOURCES);
+               res2.flags = res->flags;
+               res2.start = res->start + (size * offset);
+               res2.end = res2.start + (size * num_vfs) - 1;
+
+               if (res2.end > res->end) {
+                       dev_err(&dev->dev, "VF BAR%d: %pR would extend past %pR (trying to enable %d VFs shifted by %d)\n",
+                               i, &res2, res, num_vfs, offset);
+                       return -EBUSY;
+               }
+       }
+
+       /*
+        * After doing so, there would be a "hole" in the /proc/iomem when
+        * offset is a positive value. It looks like the device return some
+        * mmio back to the system, which actually no one could use it.
+        */
+       for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
+               res = &dev->resource[i + PCI_IOV_RESOURCES];
+               if (!res->flags || !res->parent)
+                       continue;
+
+               if (!pnv_pci_is_mem_pref_64(res->flags))
+                       continue;
+
+               size = pci_iov_resource_size(dev, i + PCI_IOV_RESOURCES);
+               res2 = *res;
+               res->start += size * offset;
+
+               dev_info(&dev->dev, "VF BAR%d: %pR shifted to %pR (enabling %d VFs shifted by %d)\n",
+                        i, &res2, res, num_vfs, offset);
+               pci_update_resource(dev, i + PCI_IOV_RESOURCES);
+       }
+       return 0;
+}
+#endif /* CONFIG_PCI_IOV */
+
 #if 0
 static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev)
 {
@@ -979,8 +1150,316 @@ static void pnv_pci_ioda_setup_PEs(void)
 }
 
 #ifdef CONFIG_PCI_IOV
+static int pnv_pci_vf_release_m64(struct pci_dev *pdev)
+{
+       struct pci_bus        *bus;
+       struct pci_controller *hose;
+       struct pnv_phb        *phb;
+       struct pci_dn         *pdn;
+       int                    i;
+
+       bus = pdev->bus;
+       hose = pci_bus_to_host(bus);
+       phb = hose->private_data;
+       pdn = pci_get_pdn(pdev);
+
+       for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
+               if (pdn->m64_wins[i] == IODA_INVALID_M64)
+                       continue;
+               opal_pci_phb_mmio_enable(phb->opal_id,
+                               OPAL_M64_WINDOW_TYPE, pdn->m64_wins[i], 0);
+               clear_bit(pdn->m64_wins[i], &phb->ioda.m64_bar_alloc);
+               pdn->m64_wins[i] = IODA_INVALID_M64;
+       }
+
+       return 0;
+}
+
+static int pnv_pci_vf_assign_m64(struct pci_dev *pdev)
+{
+       struct pci_bus        *bus;
+       struct pci_controller *hose;
+       struct pnv_phb        *phb;
+       struct pci_dn         *pdn;
+       unsigned int           win;
+       struct resource       *res;
+       int                    i;
+       int64_t                rc;
+
+       bus = pdev->bus;
+       hose = pci_bus_to_host(bus);
+       phb = hose->private_data;
+       pdn = pci_get_pdn(pdev);
+
+       /* Initialize the m64_wins to IODA_INVALID_M64 */
+       for (i = 0; i < PCI_SRIOV_NUM_BARS; i++)
+               pdn->m64_wins[i] = IODA_INVALID_M64;
+
+       for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
+               res = &pdev->resource[i + PCI_IOV_RESOURCES];
+               if (!res->flags || !res->parent)
+                       continue;
+
+               if (!pnv_pci_is_mem_pref_64(res->flags))
+                       continue;
+
+               do {
+                       win = find_next_zero_bit(&phb->ioda.m64_bar_alloc,
+                                       phb->ioda.m64_bar_idx + 1, 0);
+
+                       if (win >= phb->ioda.m64_bar_idx + 1)
+                               goto m64_failed;
+               } while (test_and_set_bit(win, &phb->ioda.m64_bar_alloc));
+
+               pdn->m64_wins[i] = win;
+
+               /* Map the M64 here */
+               rc = opal_pci_set_phb_mem_window(phb->opal_id,
+                                                OPAL_M64_WINDOW_TYPE,
+                                                pdn->m64_wins[i],
+                                                res->start,
+                                                0, /* unused */
+                                                resource_size(res));
+               if (rc != OPAL_SUCCESS) {
+                       dev_err(&pdev->dev, "Failed to map M64 window #%d: %lld\n",
+                               win, rc);
+                       goto m64_failed;
+               }
+
+               rc = opal_pci_phb_mmio_enable(phb->opal_id,
+                               OPAL_M64_WINDOW_TYPE, pdn->m64_wins[i], 1);
+               if (rc != OPAL_SUCCESS) {
+                       dev_err(&pdev->dev, "Failed to enable M64 window #%d: %llx\n",
+                               win, rc);
+                       goto m64_failed;
+               }
+       }
+       return 0;
+
+m64_failed:
+       pnv_pci_vf_release_m64(pdev);
+       return -EBUSY;
+}
+
+static void pnv_pci_ioda2_release_dma_pe(struct pci_dev *dev, struct pnv_ioda_pe *pe)
+{
+       struct pci_bus        *bus;
+       struct pci_controller *hose;
+       struct pnv_phb        *phb;
+       struct iommu_table    *tbl;
+       unsigned long         addr;
+       int64_t               rc;
+
+       bus = dev->bus;
+       hose = pci_bus_to_host(bus);
+       phb = hose->private_data;
+       tbl = pe->tce32_table;
+       addr = tbl->it_base;
+
+       opal_pci_map_pe_dma_window(phb->opal_id, pe->pe_number,
+                                  pe->pe_number << 1, 1, __pa(addr),
+                                  0, 0x1000);
+
+       rc = opal_pci_map_pe_dma_window_real(pe->phb->opal_id,
+                                       pe->pe_number,
+                                       (pe->pe_number << 1) + 1,
+                                       pe->tce_bypass_base,
+                                       0);
+       if (rc)
+               pe_warn(pe, "OPAL error %ld release DMA window\n", rc);
+
+       iommu_free_table(tbl, of_node_full_name(dev->dev.of_node));
+       free_pages(addr, get_order(TCE32_TABLE_SIZE));
+       pe->tce32_table = NULL;
+}
+
+static void pnv_ioda_release_vf_PE(struct pci_dev *pdev)
+{
+       struct pci_bus        *bus;
+       struct pci_controller *hose;
+       struct pnv_phb        *phb;
+       struct pnv_ioda_pe    *pe, *pe_n;
+       struct pci_dn         *pdn;
+
+       bus = pdev->bus;
+       hose = pci_bus_to_host(bus);
+       phb = hose->private_data;
+
+       if (!pdev->is_physfn)
+               return;
+
+       pdn = pci_get_pdn(pdev);
+       list_for_each_entry_safe(pe, pe_n, &phb->ioda.pe_list, list) {
+               if (pe->parent_dev != pdev)
+                       continue;
+
+               pnv_pci_ioda2_release_dma_pe(pdev, pe);
+
+               /* Remove from list */
+               mutex_lock(&phb->ioda.pe_list_mutex);
+               list_del(&pe->list);
+               mutex_unlock(&phb->ioda.pe_list_mutex);
+
+               pnv_ioda_deconfigure_pe(phb, pe);
+
+               pnv_ioda_free_pe(phb, pe->pe_number);
+       }
+}
+
+void pnv_pci_sriov_disable(struct pci_dev *pdev)
+{
+       struct pci_bus        *bus;
+       struct pci_controller *hose;
+       struct pnv_phb        *phb;
+       struct pci_dn         *pdn;
+       struct pci_sriov      *iov;
+       u16 num_vfs;
+
+       bus = pdev->bus;
+       hose = pci_bus_to_host(bus);
+       phb = hose->private_data;
+       pdn = pci_get_pdn(pdev);
+       iov = pdev->sriov;
+       num_vfs = pdn->num_vfs;
+
+       /* Release VF PEs */
+       pnv_ioda_release_vf_PE(pdev);
+
+       if (phb->type == PNV_PHB_IODA2) {
+               pnv_pci_vf_resource_shift(pdev, -pdn->offset);
+
+               /* Release M64 windows */
+               pnv_pci_vf_release_m64(pdev);
+
+               /* Release PE numbers */
+               bitmap_clear(phb->ioda.pe_alloc, pdn->offset, num_vfs);
+               pdn->offset = 0;
+       }
+}
+
+static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
+                                      struct pnv_ioda_pe *pe);
+static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
+{
+       struct pci_bus        *bus;
+       struct pci_controller *hose;
+       struct pnv_phb        *phb;
+       struct pnv_ioda_pe    *pe;
+       int                    pe_num;
+       u16                    vf_index;
+       struct pci_dn         *pdn;
+
+       bus = pdev->bus;
+       hose = pci_bus_to_host(bus);
+       phb = hose->private_data;
+       pdn = pci_get_pdn(pdev);
+
+       if (!pdev->is_physfn)
+               return;
+
+       /* Reserve PE for each VF */
+       for (vf_index = 0; vf_index < num_vfs; vf_index++) {
+               pe_num = pdn->offset + vf_index;
+
+               pe = &phb->ioda.pe_array[pe_num];
+               pe->pe_number = pe_num;
+               pe->phb = phb;
+               pe->flags = PNV_IODA_PE_VF;
+               pe->pbus = NULL;
+               pe->parent_dev = pdev;
+               pe->tce32_seg = -1;
+               pe->mve_number = -1;
+               pe->rid = (pci_iov_virtfn_bus(pdev, vf_index) << 8) |
+                          pci_iov_virtfn_devfn(pdev, vf_index);
+
+               pe_info(pe, "VF %04d:%02d:%02d.%d associated with PE#%d\n",
+                       hose->global_number, pdev->bus->number,
+                       PCI_SLOT(pci_iov_virtfn_devfn(pdev, vf_index)),
+                       PCI_FUNC(pci_iov_virtfn_devfn(pdev, vf_index)), pe_num);
+
+               if (pnv_ioda_configure_pe(phb, pe)) {
+                       /* XXX What do we do here ? */
+                       if (pe_num)
+                               pnv_ioda_free_pe(phb, pe_num);
+                       pe->pdev = NULL;
+                       continue;
+               }
+
+               pe->tce32_table = kzalloc_node(sizeof(struct iommu_table),
+                               GFP_KERNEL, hose->node);
+               pe->tce32_table->data = pe;
+
+               /* Put PE to the list */
+               mutex_lock(&phb->ioda.pe_list_mutex);
+               list_add_tail(&pe->list, &phb->ioda.pe_list);
+               mutex_unlock(&phb->ioda.pe_list_mutex);
+
+               pnv_pci_ioda2_setup_dma_pe(phb, pe);
+       }
+}
+
+int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
+{
+       struct pci_bus        *bus;
+       struct pci_controller *hose;
+       struct pnv_phb        *phb;
+       struct pci_dn         *pdn;
+       int                    ret;
+
+       bus = pdev->bus;
+       hose = pci_bus_to_host(bus);
+       phb = hose->private_data;
+       pdn = pci_get_pdn(pdev);
+
+       if (phb->type == PNV_PHB_IODA2) {
+               /* Calculate available PE for required VFs */
+               mutex_lock(&phb->ioda.pe_alloc_mutex);
+               pdn->offset = bitmap_find_next_zero_area(
+                       phb->ioda.pe_alloc, phb->ioda.total_pe,
+                       0, num_vfs, 0);
+               if (pdn->offset >= phb->ioda.total_pe) {
+                       mutex_unlock(&phb->ioda.pe_alloc_mutex);
+                       dev_info(&pdev->dev, "Failed to enable VF%d\n", num_vfs);
+                       pdn->offset = 0;
+                       return -EBUSY;
+               }
+               bitmap_set(phb->ioda.pe_alloc, pdn->offset, num_vfs);
+               pdn->num_vfs = num_vfs;
+               mutex_unlock(&phb->ioda.pe_alloc_mutex);
+
+               /* Assign M64 window accordingly */
+               ret = pnv_pci_vf_assign_m64(pdev);
+               if (ret) {
+                       dev_info(&pdev->dev, "Not enough M64 window resources\n");
+                       goto m64_failed;
+               }
+
+               /*
+                * When using one M64 BAR to map one IOV BAR, we need to shift
+                * the IOV BAR according to the PE# allocated to the VFs.
+                * Otherwise, the PE# for the VF will conflict with others.
+                */
+               ret = pnv_pci_vf_resource_shift(pdev, pdn->offset);
+               if (ret)
+                       goto m64_failed;
+       }
+
+       /* Setup VF PEs */
+       pnv_ioda_setup_vf_PE(pdev, num_vfs);
+
+       return 0;
+
+m64_failed:
+       bitmap_clear(phb->ioda.pe_alloc, pdn->offset, num_vfs);
+       pdn->offset = 0;
+
+       return ret;
+}
+
 int pcibios_sriov_disable(struct pci_dev *pdev)
 {
+       pnv_pci_sriov_disable(pdev);
+
        /* Release PCI data */
        remove_dev_pci_data(pdev);
        return 0;
@@ -990,6 +1469,8 @@ int pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
 {
        /* Allocate PCI data */
        add_dev_pci_data(pdev);
+
+       pnv_pci_sriov_enable(pdev, num_vfs);
        return 0;
 }
 #endif /* CONFIG_PCI_IOV */
@@ -1186,9 +1667,6 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
        int64_t rc;
        void *addr;
 
-       /* 256M DMA window, 4K TCE pages, 8 bytes TCE */
-#define TCE32_TABLE_SIZE       ((0x10000000 / 0x1000) * 8)
-
        /* XXX FIXME: Handle 64-bit only DMA devices */
        /* XXX FIXME: Provide 64-bit DMA facilities & non-4K TCE tables etc.. */
        /* XXX FIXME: Allocate multi-level tables on PHB3 */
@@ -1251,12 +1729,19 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
                                 TCE_PCI_SWINV_PAIR);
        }
        iommu_init_table(tbl, phb->hose->node);
-       iommu_register_group(tbl, phb->hose->global_number, pe->pe_number);
 
-       if (pe->pdev)
+       if (pe->flags & PNV_IODA_PE_DEV) {
+               iommu_register_group(tbl, phb->hose->global_number,
+                                    pe->pe_number);
                set_iommu_table_base_and_group(&pe->pdev->dev, tbl);
-       else
+       } else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)) {
+               iommu_register_group(tbl, phb->hose->global_number,
+                                    pe->pe_number);
                pnv_ioda_setup_bus_dma(pe, pe->pbus, true);
+       } else if (pe->flags & PNV_IODA_PE_VF) {
+               iommu_register_group(tbl, phb->hose->global_number,
+                                    pe->pe_number);
+       }
 
        return;
  fail:
@@ -1383,12 +1868,19 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
                tbl->it_type |= (TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE);
        }
        iommu_init_table(tbl, phb->hose->node);
-       iommu_register_group(tbl, phb->hose->global_number, pe->pe_number);
 
-       if (pe->pdev)
+       if (pe->flags & PNV_IODA_PE_DEV) {
+               iommu_register_group(tbl, phb->hose->global_number,
+                                    pe->pe_number);
                set_iommu_table_base_and_group(&pe->pdev->dev, tbl);
-       else
+       } else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)) {
+               iommu_register_group(tbl, phb->hose->global_number,
+                                    pe->pe_number);
                pnv_ioda_setup_bus_dma(pe, pe->pbus, true);
+       } else if (pe->flags & PNV_IODA_PE_VF) {
+               iommu_register_group(tbl, phb->hose->global_number,
+                                    pe->pe_number);
+       }
 
        /* Also create a bypass window */
        if (!pnv_iommu_bypass_disabled)
@@ -2068,6 +2560,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
        phb->hub_id = hub_id;
        phb->opal_id = phb_id;
        phb->type = ioda_type;
+       mutex_init(&phb->ioda.pe_alloc_mutex);
 
        /* Detect specific models for error handling */
        if (of_device_is_compatible(np, "ibm,p7ioc-pciex"))
@@ -2127,6 +2620,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
 
        INIT_LIST_HEAD(&phb->ioda.pe_dma_list);
        INIT_LIST_HEAD(&phb->ioda.pe_list);
+       mutex_init(&phb->ioda.pe_list_mutex);
 
        /* Calculate how many 32-bit TCE segments we have */
        phb->ioda.tce32_count = phb->ioda.m32_pci_base >> 28;
index 946aa3d62c3c7aa397a25936fc19d1b5a96eb3b1..02badcef5ceac52ff6adcdc2b7eda40f068f3db9 100644 (file)
@@ -714,6 +714,24 @@ static void pnv_pci_dma_dev_setup(struct pci_dev *pdev)
 {
        struct pci_controller *hose = pci_bus_to_host(pdev->bus);
        struct pnv_phb *phb = hose->private_data;
+#ifdef CONFIG_PCI_IOV
+       struct pnv_ioda_pe *pe;
+       struct pci_dn *pdn;
+
+       /* Fix the VF pdn PE number */
+       if (pdev->is_virtfn) {
+               pdn = pci_get_pdn(pdev);
+               WARN_ON(pdn->pe_number != IODA_INVALID_PE);
+               list_for_each_entry(pe, &phb->ioda.pe_list, list) {
+                       if (pe->rid == ((pdev->bus->number << 8) |
+                           (pdev->devfn & 0xff))) {
+                               pdn->pe_number = pe->pe_number;
+                               pe->pdev = pdev;
+                               break;
+                       }
+               }
+       }
+#endif /* CONFIG_PCI_IOV */
 
        /* If we have no phb structure, try to setup a fallback based on
         * the device-tree (RTAS PCI for example)
index 84280474e18fac1aaa3d3ce331319c3e3c3e40ce..070ee888fc95cef223337e3bd945726ffc7ac7cc 100644 (file)
@@ -23,6 +23,7 @@ enum pnv_phb_model {
 #define PNV_IODA_PE_BUS_ALL    (1 << 2)        /* PE has subordinate buses     */
 #define PNV_IODA_PE_MASTER     (1 << 3)        /* Master PE in compound case   */
 #define PNV_IODA_PE_SLAVE      (1 << 4)        /* Slave PE in compound case    */
+#define PNV_IODA_PE_VF         (1 << 5)        /* PE for one VF                */
 
 /* Data associated with a PE, including IOMMU tracking etc.. */
 struct pnv_phb;
@@ -34,6 +35,9 @@ struct pnv_ioda_pe {
         * entire bus (& children). In the former case, pdev
         * is populated, in the later case, pbus is.
         */
+#ifdef CONFIG_PCI_IOV
+       struct pci_dev          *parent_dev;
+#endif
        struct pci_dev          *pdev;
        struct pci_bus          *pbus;
 
@@ -145,6 +149,8 @@ struct pnv_phb {
 
                        /* PE allocation bitmap */
                        unsigned long           *pe_alloc;
+                       /* PE allocation mutex */
+                       struct mutex            pe_alloc_mutex;
 
                        /* M32 & IO segment maps */
                        unsigned int            *m32_segmap;
@@ -159,6 +165,7 @@ struct pnv_phb {
                         * on the sequence of creation
                         */
                        struct list_head        pe_list;
+                       struct mutex            pe_list_mutex;
 
                        /* Reverse map of PEs, will have to extend if
                         * we are to support more than 256 PEs, indexed