]> asedeno.scripts.mit.edu Git - linux.git/blobdiff - arch/powerpc/kvm/book3s_64_mmu_radix.c
Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/shli/md
[linux.git] / arch / powerpc / kvm / book3s_64_mmu_radix.c
index 481da8f93fa449ac9cb3261755a3d1eeee152006..176f911ee983a6347b9643b925bfcfedb21b9809 100644 (file)
@@ -139,44 +139,24 @@ int kvmppc_mmu_radix_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
        return 0;
 }
 
-#ifdef CONFIG_PPC_64K_PAGES
-#define MMU_BASE_PSIZE MMU_PAGE_64K
-#else
-#define MMU_BASE_PSIZE MMU_PAGE_4K
-#endif
-
 static void kvmppc_radix_tlbie_page(struct kvm *kvm, unsigned long addr,
                                    unsigned int pshift)
 {
-       int psize = MMU_BASE_PSIZE;
-
-       if (pshift >= PUD_SHIFT)
-               psize = MMU_PAGE_1G;
-       else if (pshift >= PMD_SHIFT)
-               psize = MMU_PAGE_2M;
-       addr &= ~0xfffUL;
-       addr |= mmu_psize_defs[psize].ap << 5;
-       asm volatile("ptesync": : :"memory");
-       asm volatile(PPC_TLBIE_5(%0, %1, 0, 0, 1)
-                    : : "r" (addr), "r" (kvm->arch.lpid) : "memory");
-       if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG))
-               asm volatile(PPC_TLBIE_5(%0, %1, 0, 0, 1)
-                            : : "r" (addr), "r" (kvm->arch.lpid) : "memory");
-       asm volatile("eieio ; tlbsync ; ptesync": : :"memory");
+       unsigned long psize = PAGE_SIZE;
+
+       if (pshift)
+               psize = 1UL << pshift;
+
+       addr &= ~(psize - 1);
+       radix__flush_tlb_lpid_page(kvm->arch.lpid, addr, psize);
 }
 
-static void kvmppc_radix_flush_pwc(struct kvm *kvm, unsigned long addr)
+static void kvmppc_radix_flush_pwc(struct kvm *kvm)
 {
-       unsigned long rb = 0x2 << PPC_BITLSHIFT(53); /* IS = 2 */
-
-       asm volatile("ptesync": : :"memory");
-       /* RIC=1 PRS=0 R=1 IS=2 */
-       asm volatile(PPC_TLBIE_5(%0, %1, 1, 0, 1)
-                    : : "r" (rb), "r" (kvm->arch.lpid) : "memory");
-       asm volatile("eieio ; tlbsync ; ptesync": : :"memory");
+       radix__flush_pwc_lpid(kvm->arch.lpid);
 }
 
-unsigned long kvmppc_radix_update_pte(struct kvm *kvm, pte_t *ptep,
+static unsigned long kvmppc_radix_update_pte(struct kvm *kvm, pte_t *ptep,
                                      unsigned long clr, unsigned long set,
                                      unsigned long addr, unsigned int shift)
 {
@@ -228,6 +208,167 @@ static void kvmppc_pmd_free(pmd_t *pmdp)
        kmem_cache_free(kvm_pmd_cache, pmdp);
 }
 
+static void kvmppc_unmap_pte(struct kvm *kvm, pte_t *pte,
+                            unsigned long gpa, unsigned int shift)
+
+{
+       unsigned long page_size = 1ul << shift;
+       unsigned long old;
+
+       old = kvmppc_radix_update_pte(kvm, pte, ~0UL, 0, gpa, shift);
+       kvmppc_radix_tlbie_page(kvm, gpa, shift);
+       if (old & _PAGE_DIRTY) {
+               unsigned long gfn = gpa >> PAGE_SHIFT;
+               struct kvm_memory_slot *memslot;
+
+               memslot = gfn_to_memslot(kvm, gfn);
+               if (memslot && memslot->dirty_bitmap)
+                       kvmppc_update_dirty_map(memslot, gfn, page_size);
+       }
+}
+
+/*
+ * kvmppc_free_p?d are used to free existing page tables, and recursively
+ * descend and clear and free children.
+ * Callers are responsible for flushing the PWC.
+ *
+ * When page tables are being unmapped/freed as part of page fault path
+ * (full == false), ptes are not expected. There is code to unmap them
+ * and emit a warning if encountered, but there may already be data
+ * corruption due to the unexpected mappings.
+ */
+static void kvmppc_unmap_free_pte(struct kvm *kvm, pte_t *pte, bool full)
+{
+       if (full) {
+               memset(pte, 0, sizeof(long) << PTE_INDEX_SIZE);
+       } else {
+               pte_t *p = pte;
+               unsigned long it;
+
+               for (it = 0; it < PTRS_PER_PTE; ++it, ++p) {
+                       if (pte_val(*p) == 0)
+                               continue;
+                       WARN_ON_ONCE(1);
+                       kvmppc_unmap_pte(kvm, p,
+                                        pte_pfn(*p) << PAGE_SHIFT,
+                                        PAGE_SHIFT);
+               }
+       }
+
+       kvmppc_pte_free(pte);
+}
+
+static void kvmppc_unmap_free_pmd(struct kvm *kvm, pmd_t *pmd, bool full)
+{
+       unsigned long im;
+       pmd_t *p = pmd;
+
+       for (im = 0; im < PTRS_PER_PMD; ++im, ++p) {
+               if (!pmd_present(*p))
+                       continue;
+               if (pmd_is_leaf(*p)) {
+                       if (full) {
+                               pmd_clear(p);
+                       } else {
+                               WARN_ON_ONCE(1);
+                               kvmppc_unmap_pte(kvm, (pte_t *)p,
+                                        pte_pfn(*(pte_t *)p) << PAGE_SHIFT,
+                                        PMD_SHIFT);
+                       }
+               } else {
+                       pte_t *pte;
+
+                       pte = pte_offset_map(p, 0);
+                       kvmppc_unmap_free_pte(kvm, pte, full);
+                       pmd_clear(p);
+               }
+       }
+       kvmppc_pmd_free(pmd);
+}
+
+static void kvmppc_unmap_free_pud(struct kvm *kvm, pud_t *pud)
+{
+       unsigned long iu;
+       pud_t *p = pud;
+
+       for (iu = 0; iu < PTRS_PER_PUD; ++iu, ++p) {
+               if (!pud_present(*p))
+                       continue;
+               if (pud_huge(*p)) {
+                       pud_clear(p);
+               } else {
+                       pmd_t *pmd;
+
+                       pmd = pmd_offset(p, 0);
+                       kvmppc_unmap_free_pmd(kvm, pmd, true);
+                       pud_clear(p);
+               }
+       }
+       pud_free(kvm->mm, pud);
+}
+
+void kvmppc_free_radix(struct kvm *kvm)
+{
+       unsigned long ig;
+       pgd_t *pgd;
+
+       if (!kvm->arch.pgtable)
+               return;
+       pgd = kvm->arch.pgtable;
+       for (ig = 0; ig < PTRS_PER_PGD; ++ig, ++pgd) {
+               pud_t *pud;
+
+               if (!pgd_present(*pgd))
+                       continue;
+               pud = pud_offset(pgd, 0);
+               kvmppc_unmap_free_pud(kvm, pud);
+               pgd_clear(pgd);
+       }
+       pgd_free(kvm->mm, kvm->arch.pgtable);
+       kvm->arch.pgtable = NULL;
+}
+
+static void kvmppc_unmap_free_pmd_entry_table(struct kvm *kvm, pmd_t *pmd,
+                                             unsigned long gpa)
+{
+       pte_t *pte = pte_offset_kernel(pmd, 0);
+
+       /*
+        * Clearing the pmd entry then flushing the PWC ensures that the pte
+        * page no longer be cached by the MMU, so can be freed without
+        * flushing the PWC again.
+        */
+       pmd_clear(pmd);
+       kvmppc_radix_flush_pwc(kvm);
+
+       kvmppc_unmap_free_pte(kvm, pte, false);
+}
+
+static void kvmppc_unmap_free_pud_entry_table(struct kvm *kvm, pud_t *pud,
+                                       unsigned long gpa)
+{
+       pmd_t *pmd = pmd_offset(pud, 0);
+
+       /*
+        * Clearing the pud entry then flushing the PWC ensures that the pmd
+        * page and any children pte pages will no longer be cached by the MMU,
+        * so can be freed without flushing the PWC again.
+        */
+       pud_clear(pud);
+       kvmppc_radix_flush_pwc(kvm);
+
+       kvmppc_unmap_free_pmd(kvm, pmd, false);
+}
+
+/*
+ * There are a number of bits which may differ between different faults to
+ * the same partition scope entry. RC bits, in the course of cleaning and
+ * aging. And the write bit can change, either the access could have been
+ * upgraded, or a read fault could happen concurrently with a write fault
+ * that sets those bits first.
+ */
+#define PTE_BITS_MUST_MATCH (~(_PAGE_WRITE | _PAGE_DIRTY | _PAGE_ACCESSED))
+
 static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
                             unsigned int level, unsigned long mmu_seq)
 {
@@ -235,7 +376,6 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
        pud_t *pud, *new_pud = NULL;
        pmd_t *pmd, *new_pmd = NULL;
        pte_t *ptep, *new_ptep = NULL;
-       unsigned long old;
        int ret;
 
        /* Traverse the guest's 2nd-level tree, allocate new levels needed */
@@ -273,42 +413,39 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
        if (pud_huge(*pud)) {
                unsigned long hgpa = gpa & PUD_MASK;
 
+               /* Check if we raced and someone else has set the same thing */
+               if (level == 2) {
+                       if (pud_raw(*pud) == pte_raw(pte)) {
+                               ret = 0;
+                               goto out_unlock;
+                       }
+                       /* Valid 1GB page here already, add our extra bits */
+                       WARN_ON_ONCE((pud_val(*pud) ^ pte_val(pte)) &
+                                                       PTE_BITS_MUST_MATCH);
+                       kvmppc_radix_update_pte(kvm, (pte_t *)pud,
+                                             0, pte_val(pte), hgpa, PUD_SHIFT);
+                       ret = 0;
+                       goto out_unlock;
+               }
                /*
                 * If we raced with another CPU which has just put
                 * a 1GB pte in after we saw a pmd page, try again.
                 */
-               if (level <= 1 && !new_pmd) {
+               if (!new_pmd) {
                        ret = -EAGAIN;
                        goto out_unlock;
                }
-               /* Check if we raced and someone else has set the same thing */
-               if (level == 2 && pud_raw(*pud) == pte_raw(pte)) {
-                       ret = 0;
-                       goto out_unlock;
-               }
                /* Valid 1GB page here already, remove it */
-               old = kvmppc_radix_update_pte(kvm, (pte_t *)pud,
-                                             ~0UL, 0, hgpa, PUD_SHIFT);
-               kvmppc_radix_tlbie_page(kvm, hgpa, PUD_SHIFT);
-               if (old & _PAGE_DIRTY) {
-                       unsigned long gfn = hgpa >> PAGE_SHIFT;
-                       struct kvm_memory_slot *memslot;
-                       memslot = gfn_to_memslot(kvm, gfn);
-                       if (memslot && memslot->dirty_bitmap)
-                               kvmppc_update_dirty_map(memslot,
-                                                       gfn, PUD_SIZE);
-               }
+               kvmppc_unmap_pte(kvm, (pte_t *)pud, hgpa, PUD_SHIFT);
        }
        if (level == 2) {
                if (!pud_none(*pud)) {
                        /*
                         * There's a page table page here, but we wanted to
                         * install a large page, so remove and free the page
-                        * table page.  new_pmd will be NULL since level == 2.
+                        * table page.
                         */
-                       new_pmd = pmd_offset(pud, 0);
-                       pud_clear(pud);
-                       kvmppc_radix_flush_pwc(kvm, gpa);
+                       kvmppc_unmap_free_pud_entry_table(kvm, pud, gpa);
                }
                kvmppc_radix_set_pte_at(kvm, gpa, (pte_t *)pud, pte);
                ret = 0;
@@ -324,42 +461,40 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
        if (pmd_is_leaf(*pmd)) {
                unsigned long lgpa = gpa & PMD_MASK;
 
+               /* Check if we raced and someone else has set the same thing */
+               if (level == 1) {
+                       if (pmd_raw(*pmd) == pte_raw(pte)) {
+                               ret = 0;
+                               goto out_unlock;
+                       }
+                       /* Valid 2MB page here already, add our extra bits */
+                       WARN_ON_ONCE((pmd_val(*pmd) ^ pte_val(pte)) &
+                                                       PTE_BITS_MUST_MATCH);
+                       kvmppc_radix_update_pte(kvm, pmdp_ptep(pmd),
+                                             0, pte_val(pte), lgpa, PMD_SHIFT);
+                       ret = 0;
+                       goto out_unlock;
+               }
+
                /*
                 * If we raced with another CPU which has just put
                 * a 2MB pte in after we saw a pte page, try again.
                 */
-               if (level == 0 && !new_ptep) {
+               if (!new_ptep) {
                        ret = -EAGAIN;
                        goto out_unlock;
                }
-               /* Check if we raced and someone else has set the same thing */
-               if (level == 1 && pmd_raw(*pmd) == pte_raw(pte)) {
-                       ret = 0;
-                       goto out_unlock;
-               }
                /* Valid 2MB page here already, remove it */
-               old = kvmppc_radix_update_pte(kvm, pmdp_ptep(pmd),
-                                             ~0UL, 0, lgpa, PMD_SHIFT);
-               kvmppc_radix_tlbie_page(kvm, lgpa, PMD_SHIFT);
-               if (old & _PAGE_DIRTY) {
-                       unsigned long gfn = lgpa >> PAGE_SHIFT;
-                       struct kvm_memory_slot *memslot;
-                       memslot = gfn_to_memslot(kvm, gfn);
-                       if (memslot && memslot->dirty_bitmap)
-                               kvmppc_update_dirty_map(memslot,
-                                                       gfn, PMD_SIZE);
-               }
+               kvmppc_unmap_pte(kvm, pmdp_ptep(pmd), lgpa, PMD_SHIFT);
        }
        if (level == 1) {
                if (!pmd_none(*pmd)) {
                        /*
                         * There's a page table page here, but we wanted to
                         * install a large page, so remove and free the page
-                        * table page.  new_ptep will be NULL since level == 1.
+                        * table page.
                         */
-                       new_ptep = pte_offset_kernel(pmd, 0);
-                       pmd_clear(pmd);
-                       kvmppc_radix_flush_pwc(kvm, gpa);
+                       kvmppc_unmap_free_pmd_entry_table(kvm, pmd, gpa);
                }
                kvmppc_radix_set_pte_at(kvm, gpa, pmdp_ptep(pmd), pte);
                ret = 0;
@@ -378,12 +513,12 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
                        ret = 0;
                        goto out_unlock;
                }
-               /* PTE was previously valid, so invalidate it */
-               old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_PRESENT,
-                                             0, gpa, 0);
-               kvmppc_radix_tlbie_page(kvm, gpa, 0);
-               if (old & _PAGE_DIRTY)
-                       mark_page_dirty(kvm, gpa >> PAGE_SHIFT);
+               /* Valid page here already, add our extra bits */
+               WARN_ON_ONCE((pte_val(*ptep) ^ pte_val(pte)) &
+                                                       PTE_BITS_MUST_MATCH);
+               kvmppc_radix_update_pte(kvm, ptep, 0, pte_val(pte), gpa, 0);
+               ret = 0;
+               goto out_unlock;
        }
        kvmppc_radix_set_pte_at(kvm, gpa, ptep, pte);
        ret = 0;
@@ -565,9 +700,13 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
                        unsigned long mask = (1ul << shift) - PAGE_SIZE;
                        pte = __pte(pte_val(pte) | (hva & mask));
                }
-               if (!(writing || upgrade_write))
-                       pte = __pte(pte_val(pte) & ~ _PAGE_WRITE);
-               pte = __pte(pte_val(pte) | _PAGE_EXEC);
+               pte = __pte(pte_val(pte) | _PAGE_EXEC | _PAGE_ACCESSED);
+               if (writing || upgrade_write) {
+                       if (pte_val(pte) & _PAGE_WRITE)
+                               pte = __pte(pte_val(pte) | _PAGE_DIRTY);
+               } else {
+                       pte = __pte(pte_val(pte) & ~(_PAGE_WRITE | _PAGE_DIRTY));
+               }
        }
 
        /* Allocate space in the tree and write the PTE */
@@ -734,51 +873,6 @@ int kvmppc_init_vm_radix(struct kvm *kvm)
        return 0;
 }
 
-void kvmppc_free_radix(struct kvm *kvm)
-{
-       unsigned long ig, iu, im;
-       pte_t *pte;
-       pmd_t *pmd;
-       pud_t *pud;
-       pgd_t *pgd;
-
-       if (!kvm->arch.pgtable)
-               return;
-       pgd = kvm->arch.pgtable;
-       for (ig = 0; ig < PTRS_PER_PGD; ++ig, ++pgd) {
-               if (!pgd_present(*pgd))
-                       continue;
-               pud = pud_offset(pgd, 0);
-               for (iu = 0; iu < PTRS_PER_PUD; ++iu, ++pud) {
-                       if (!pud_present(*pud))
-                               continue;
-                       if (pud_huge(*pud)) {
-                               pud_clear(pud);
-                               continue;
-                       }
-                       pmd = pmd_offset(pud, 0);
-                       for (im = 0; im < PTRS_PER_PMD; ++im, ++pmd) {
-                               if (pmd_is_leaf(*pmd)) {
-                                       pmd_clear(pmd);
-                                       continue;
-                               }
-                               if (!pmd_present(*pmd))
-                                       continue;
-                               pte = pte_offset_map(pmd, 0);
-                               memset(pte, 0, sizeof(long) << PTE_INDEX_SIZE);
-                               kvmppc_pte_free(pte);
-                               pmd_clear(pmd);
-                       }
-                       kvmppc_pmd_free(pmd_offset(pud, 0));
-                       pud_clear(pud);
-               }
-               pud_free(kvm->mm, pud_offset(pgd, 0));
-               pgd_clear(pgd);
-       }
-       pgd_free(kvm->mm, kvm->arch.pgtable);
-       kvm->arch.pgtable = NULL;
-}
-
 static void pte_ctor(void *addr)
 {
        memset(addr, 0, RADIX_PTE_TABLE_SIZE);