X-Git-Url: https://asedeno.scripts.mit.edu/gitweb/?a=blobdiff_plain;f=mm%2Fhmm.c;h=b4e9afdc2181d4b634d33cb6fea4700349908afd;hb=44532d4c591c10d6907ac5030373bc306617d92b;hp=774d684fa2b470ed710440c8bdb4fc0efd412b72;hpb=53b3b6bbfde6aae8d1ededc86ad4e0e1e00eb5f8;p=linux.git diff --git a/mm/hmm.c b/mm/hmm.c index 774d684fa2b4..b4e9afdc2181 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -11,7 +11,7 @@ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * - * Authors: Jérôme Glisse + * Authors: Jérôme Glisse */ /* * Refer to include/linux/hmm.h for information about heterogeneous memory @@ -91,16 +91,6 @@ static struct hmm *hmm_register(struct mm_struct *mm) spin_lock_init(&hmm->lock); hmm->mm = mm; - /* - * We should only get here if hold the mmap_sem in write mode ie on - * registration of first mirror through hmm_mirror_register() - */ - hmm->mmu_notifier.ops = &hmm_mmu_notifier_ops; - if (__mmu_notifier_register(&hmm->mmu_notifier, mm)) { - kfree(hmm); - return NULL; - } - spin_lock(&mm->page_table_lock); if (!mm->hmm) mm->hmm = hmm; @@ -108,12 +98,27 @@ static struct hmm *hmm_register(struct mm_struct *mm) cleanup = true; spin_unlock(&mm->page_table_lock); - if (cleanup) { - mmu_notifier_unregister(&hmm->mmu_notifier, mm); - kfree(hmm); - } + if (cleanup) + goto error; + + /* + * We should only get here if hold the mmap_sem in write mode ie on + * registration of first mirror through hmm_mirror_register() + */ + hmm->mmu_notifier.ops = &hmm_mmu_notifier_ops; + if (__mmu_notifier_register(&hmm->mmu_notifier, mm)) + goto error_mm; return mm->hmm; + +error_mm: + spin_lock(&mm->page_table_lock); + if (mm->hmm == hmm) + mm->hmm = NULL; + spin_unlock(&mm->page_table_lock); +error: + kfree(hmm); + return NULL; } void hmm_mm_destroy(struct mm_struct *mm) @@ -121,10 +126,8 @@ void hmm_mm_destroy(struct mm_struct *mm) kfree(mm->hmm); } -static void hmm_invalidate_range(struct hmm *hmm, - enum hmm_update_type action, - unsigned long start, - unsigned long end) +static int hmm_invalidate_range(struct hmm *hmm, + const struct hmm_update *update) { struct hmm_mirror *mirror; struct hmm_range *range; @@ -133,22 +136,30 @@ static void hmm_invalidate_range(struct hmm *hmm, list_for_each_entry(range, &hmm->ranges, list) { unsigned long addr, idx, npages; - if (end < range->start || start >= range->end) + if (update->end < range->start || update->start >= range->end) continue; range->valid = false; - addr = max(start, range->start); + addr = max(update->start, range->start); idx = (addr - range->start) >> PAGE_SHIFT; - npages = (min(range->end, end) - addr) >> PAGE_SHIFT; + npages = (min(range->end, update->end) - addr) >> PAGE_SHIFT; memset(&range->pfns[idx], 0, sizeof(*range->pfns) * npages); } spin_unlock(&hmm->lock); down_read(&hmm->mirrors_sem); - list_for_each_entry(mirror, &hmm->mirrors, list) - mirror->ops->sync_cpu_device_pagetables(mirror, action, - start, end); + list_for_each_entry(mirror, &hmm->mirrors, list) { + int ret; + + ret = mirror->ops->sync_cpu_device_pagetables(mirror, update); + if (!update->blockable && ret == -EAGAIN) { + up_read(&hmm->mirrors_sem); + return -EAGAIN; + } + } up_read(&hmm->mirrors_sem); + + return 0; } static void hmm_release(struct mmu_notifier *mn, struct mm_struct *mm) @@ -197,11 +208,16 @@ static void hmm_invalidate_range_end(struct mmu_notifier *mn, unsigned long start, unsigned long end) { + struct hmm_update update; struct hmm *hmm = mm->hmm; VM_BUG_ON(!hmm); - hmm_invalidate_range(mm->hmm, HMM_UPDATE_INVALIDATE, start, end); + update.start = start; + update.end = end; + update.event = HMM_UPDATE_INVALIDATE; + update.blockable = true; + hmm_invalidate_range(hmm, &update); } static const struct mmu_notifier_ops hmm_mmu_notifier_ops = { @@ -278,12 +294,13 @@ void hmm_mirror_unregister(struct hmm_mirror *mirror) if (!should_unregister || mm == NULL) return; + mmu_notifier_unregister_no_release(&hmm->mmu_notifier, mm); + spin_lock(&mm->page_table_lock); if (mm->hmm == hmm) mm->hmm = NULL; spin_unlock(&mm->page_table_lock); - mmu_notifier_unregister_no_release(&hmm->mmu_notifier, mm); kfree(hmm); } EXPORT_SYMBOL(hmm_mirror_unregister); @@ -571,22 +588,42 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp, { struct hmm_vma_walk *hmm_vma_walk = walk->private; struct hmm_range *range = hmm_vma_walk->range; + struct vm_area_struct *vma = walk->vma; uint64_t *pfns = range->pfns; unsigned long addr = start, i; pte_t *ptep; + pmd_t pmd; - i = (addr - range->start) >> PAGE_SHIFT; again: - if (pmd_none(*pmdp)) + pmd = READ_ONCE(*pmdp); + if (pmd_none(pmd)) return hmm_vma_walk_hole(start, end, walk); - if (pmd_huge(*pmdp) && (range->vma->vm_flags & VM_HUGETLB)) + if (pmd_huge(pmd) && (range->vma->vm_flags & VM_HUGETLB)) return hmm_pfns_bad(start, end, walk); - if (pmd_devmap(*pmdp) || pmd_trans_huge(*pmdp)) { - pmd_t pmd; + if (thp_migration_supported() && is_pmd_migration_entry(pmd)) { + bool fault, write_fault; + unsigned long npages; + uint64_t *pfns; + i = (addr - range->start) >> PAGE_SHIFT; + npages = (end - addr) >> PAGE_SHIFT; + pfns = &range->pfns[i]; + + hmm_range_need_fault(hmm_vma_walk, pfns, npages, + 0, &fault, &write_fault); + if (fault || write_fault) { + hmm_vma_walk->last = addr; + pmd_migration_entry_wait(vma->vm_mm, pmdp); + return -EAGAIN; + } + return 0; + } else if (!pmd_present(pmd)) + return hmm_pfns_bad(start, end, walk); + + if (pmd_devmap(pmd) || pmd_trans_huge(pmd)) { /* * No need to take pmd_lock here, even if some other threads * is splitting the huge pmd we will get that event through @@ -601,13 +638,21 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp, if (!pmd_devmap(pmd) && !pmd_trans_huge(pmd)) goto again; + i = (addr - range->start) >> PAGE_SHIFT; return hmm_vma_handle_pmd(walk, addr, end, &pfns[i], pmd); } - if (pmd_bad(*pmdp)) + /* + * We have handled all the valid case above ie either none, migration, + * huge or transparent huge. At this point either it is a valid pmd + * entry pointing to pte directory or it is a bad pmd that will not + * recover. + */ + if (pmd_bad(pmd)) return hmm_pfns_bad(start, end, walk); ptep = pte_offset_map(pmdp, addr); + i = (addr - range->start) >> PAGE_SHIFT; for (; addr < end; addr += PAGE_SIZE, ptep++, i++) { int r;