Merge tag 'y2038-alsa-v8-signed' of git://git.kernel.org:/pub/scm/linux/kernel/git...

[linux.git] / mm / hugetlb.c
diff --git a/mm/hugetlb.c b/mm/hugetlb.c

index b45a95363a844a12f54eb904437f3fd562057b50..ac65bb5e38ac267dec41c3e09bdaa53501da4c9a 100644 (file)
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -244,16 +244,66 @@ struct file_region {
         long to;
  };
  
+/* Must be called with resv->lock held. Calling this with count_only == true
+ * will count the number of pages to be added but will not modify the linked
+ * list.
+ */
+static long add_reservation_in_range(struct resv_map *resv, long f, long t,
+                                    bool count_only)
+{
+       long chg = 0;
+       struct list_head *head = &resv->regions;
+       struct file_region *rg = NULL, *trg = NULL, *nrg = NULL;
+
+       /* Locate the region we are before or in. */
+       list_for_each_entry(rg, head, link)
+               if (f <= rg->to)
+                       break;
+
+       /* Round our left edge to the current segment if it encloses us. */
+       if (f > rg->from)
+               f = rg->from;
+
+       chg = t - f;
+
+       /* Check for and consume any regions we now overlap with. */
+       nrg = rg;
+       list_for_each_entry_safe(rg, trg, rg->link.prev, link) {
+               if (&rg->link == head)
+                       break;
+               if (rg->from > t)
+                       break;
+
+               /* We overlap with this area, if it extends further than
+                * us then we must extend ourselves.  Account for its
+                * existing reservation.
+                */
+               if (rg->to > t) {
+                       chg += rg->to - t;
+                       t = rg->to;
+               }
+               chg -= rg->to - rg->from;
+
+               if (!count_only && rg != nrg) {
+                       list_del(&rg->link);
+                       kfree(rg);
+               }
+       }
+
+       if (!count_only) {
+               nrg->from = f;
+               nrg->to = t;
+       }
+
+       return chg;
+}
+
  /*
   * Add the huge page range represented by [f, t) to the reserve
- * map.  In the normal case, existing regions will be expanded
- * to accommodate the specified range.  Sufficient regions should
- * exist for expansion due to the previous call to region_chg
- * with the same range.  However, it is possible that region_del
- * could have been called after region_chg and modifed the map
- * in such a way that no region exists to be expanded.  In this
- * case, pull a region descriptor from the cache associated with
- * the map and use that for the new range.
+ * map.  Existing regions will be expanded to accommodate the specified
+ * range, or a region will be taken from the cache.  Sufficient regions
+ * must exist in the cache due to the previous call to region_chg with
+ * the same range.
   *
   * Return the number of new huge pages added to the map.  This
   * number is greater than or equal to zero.
@@ -261,7 +311,7 @@ struct file_region {
  static long region_add(struct resv_map *resv, long f, long t)
  {
         struct list_head *head = &resv->regions;
-       struct file_region *rg, *nrg, *trg;
+       struct file_region *rg, *nrg;
         long add = 0;
  
         spin_lock(&resv->lock);
@@ -272,9 +322,8 @@ static long region_add(struct resv_map *resv, long f, long t)
  
         /*
          * If no region exists which can be expanded to include the
-        * specified range, the list must have been modified by an
-        * interleving call to region_del().  Pull a region descriptor
-        * from the cache and use it for this range.
+        * specified range, pull a region descriptor from the cache
+        * and use it for this range.
          */
         if (&rg->link == head || t < rg->from) {
                 VM_BUG_ON(resv->region_cache_count <= 0);
@@ -292,38 +341,7 @@ static long region_add(struct resv_map *resv, long f, long t)
                 goto out_locked;
         }
  
-       /* Round our left edge to the current segment if it encloses us. */
-       if (f > rg->from)
-               f = rg->from;
-
-       /* Check for and consume any regions we now overlap with. */
-       nrg = rg;
-       list_for_each_entry_safe(rg, trg, rg->link.prev, link) {
-               if (&rg->link == head)
-                       break;
-               if (rg->from > t)
-                       break;
-
-               /* If this area reaches higher then extend our area to
-                * include it completely.  If this is not the first area
-                * which we intend to reuse, free it. */
-               if (rg->to > t)
-                       t = rg->to;
-               if (rg != nrg) {
-                       /* Decrement return value by the deleted range.
-                        * Another range will span this area so that by
-                        * end of routine add will be >= zero
-                        */
-                       add -= (rg->to - rg->from);
-                       list_del(&rg->link);
-                       kfree(rg);
-               }
-       }
-
-       add += (nrg->from - f);         /* Added to beginning of region */
-       nrg->from = f;
-       add += t - nrg->to;             /* Added to end of region */
-       nrg->to = t;
+       add = add_reservation_in_range(resv, f, t, false);
  
  out_locked:
         resv->adds_in_progress--;
@@ -339,15 +357,9 @@ static long region_add(struct resv_map *resv, long f, long t)
   * call to region_add that will actually modify the reserve
   * map to add the specified range [f, t).  region_chg does
   * not change the number of huge pages represented by the
- * map.  However, if the existing regions in the map can not
- * be expanded to represent the new range, a new file_region
- * structure is added to the map as a placeholder.  This is
- * so that the subsequent region_add call will have all the
- * regions it needs and will not fail.
- *
- * Upon entry, region_chg will also examine the cache of region descriptors
- * associated with the map.  If there are not enough descriptors cached, one
- * will be allocated for the in progress add operation.
+ * map.  A new file_region structure is added to the cache
+ * as a placeholder, so that the subsequent region_add
+ * call will have all the regions it needs and will not fail.
   *
   * Returns the number of huge pages that need to be added to the existing
   * reservation map for the range [f, t).  This number is greater or equal to
@@ -356,11 +368,8 @@ static long region_add(struct resv_map *resv, long f, long t)
   */
  static long region_chg(struct resv_map *resv, long f, long t)
  {
-       struct list_head *head = &resv->regions;
-       struct file_region *rg, *nrg = NULL;
         long chg = 0;
  
-retry:
         spin_lock(&resv->lock);
  retry_locked:
         resv->adds_in_progress++;
@@ -378,10 +387,8 @@ static long region_chg(struct resv_map *resv, long f, long t)
                 spin_unlock(&resv->lock);
  
                 trg = kmalloc(sizeof(*trg), GFP_KERNEL);
-               if (!trg) {
-                       kfree(nrg);
+               if (!trg)
                         return -ENOMEM;
-               }
  
                 spin_lock(&resv->lock);
                 list_add(&trg->link, &resv->region_cache);
@@ -389,61 +396,8 @@ static long region_chg(struct resv_map *resv, long f, long t)
                 goto retry_locked;
         }
  
-       /* Locate the region we are before or in. */
-       list_for_each_entry(rg, head, link)
-               if (f <= rg->to)
-                       break;
+       chg = add_reservation_in_range(resv, f, t, true);
  
-       /* If we are below the current region then a new region is required.
-        * Subtle, allocate a new region at the position but make it zero
-        * size such that we can guarantee to record the reservation. */
-       if (&rg->link == head || t < rg->from) {
-               if (!nrg) {
-                       resv->adds_in_progress--;
-                       spin_unlock(&resv->lock);
-                       nrg = kmalloc(sizeof(*nrg), GFP_KERNEL);
-                       if (!nrg)
-                               return -ENOMEM;
-
-                       nrg->from = f;
-                       nrg->to   = f;
-                       INIT_LIST_HEAD(&nrg->link);
-                       goto retry;
-               }
-
-               list_add(&nrg->link, rg->link.prev);
-               chg = t - f;
-               goto out_nrg;
-       }
-
-       /* Round our left edge to the current segment if it encloses us. */
-       if (f > rg->from)
-               f = rg->from;
-       chg = t - f;
-
-       /* Check for and consume any regions we now overlap with. */
-       list_for_each_entry(rg, rg->link.prev, link) {
-               if (&rg->link == head)
-                       break;
-               if (rg->from > t)
-                       goto out;
-
-               /* We overlap with this area, if it extends further than
-                * us then we must extend ourselves.  Account for its
-                * existing reservation. */
-               if (rg->to > t) {
-                       chg += rg->to - t;
-                       t = rg->to;
-               }
-               chg -= rg->to - rg->from;
-       }
-
-out:
-       spin_unlock(&resv->lock);
-       /*  We already know we raced and no longer need the new region */
-       kfree(nrg);
-       return chg;
-out_nrg:
         spin_unlock(&resv->lock);
         return chg;
  }
@@ -1069,85 +1023,12 @@ static void free_gigantic_page(struct page *page, unsigned int order)
  }
  
  #ifdef CONFIG_CONTIG_ALLOC
-static int __alloc_gigantic_page(unsigned long start_pfn,
-                               unsigned long nr_pages, gfp_t gfp_mask)
-{
-       unsigned long end_pfn = start_pfn + nr_pages;
-       return alloc_contig_range(start_pfn, end_pfn, MIGRATE_MOVABLE,
-                                 gfp_mask);
-}
-
-static bool pfn_range_valid_gigantic(struct zone *z,
-                       unsigned long start_pfn, unsigned long nr_pages)
-{
-       unsigned long i, end_pfn = start_pfn + nr_pages;
-       struct page *page;
-
-       for (i = start_pfn; i < end_pfn; i++) {
-               page = pfn_to_online_page(i);
-               if (!page)
-                       return false;
-
-               if (page_zone(page) != z)
-                       return false;
-
-               if (PageReserved(page))
-                       return false;
-
-               if (page_count(page) > 0)
-                       return false;
-
-               if (PageHuge(page))
-                       return false;
-       }
-
-       return true;
-}
-
-static bool zone_spans_last_pfn(const struct zone *zone,
-                       unsigned long start_pfn, unsigned long nr_pages)
-{
-       unsigned long last_pfn = start_pfn + nr_pages - 1;
-       return zone_spans_pfn(zone, last_pfn);
-}
-
  static struct page *alloc_gigantic_page(struct hstate *h, gfp_t gfp_mask,
                 int nid, nodemask_t *nodemask)
  {
-       unsigned int order = huge_page_order(h);
-       unsigned long nr_pages = 1 << order;
-       unsigned long ret, pfn, flags;
-       struct zonelist *zonelist;
-       struct zone *zone;
-       struct zoneref *z;
+       unsigned long nr_pages = 1UL << huge_page_order(h);
  
-       zonelist = node_zonelist(nid, gfp_mask);
-       for_each_zone_zonelist_nodemask(zone, z, zonelist, gfp_zone(gfp_mask), nodemask) {
-               spin_lock_irqsave(&zone->lock, flags);
-
-               pfn = ALIGN(zone->zone_start_pfn, nr_pages);
-               while (zone_spans_last_pfn(zone, pfn, nr_pages)) {
-                       if (pfn_range_valid_gigantic(zone, pfn, nr_pages)) {
-                               /*
-                                * We release the zone lock here because
-                                * alloc_contig_range() will also lock the zone
-                                * at some point. If there's an allocation
-                                * spinning on this lock, it may win the race
-                                * and cause alloc_contig_range() to fail...
-                                */
-                               spin_unlock_irqrestore(&zone->lock, flags);
-                               ret = __alloc_gigantic_page(pfn, nr_pages, gfp_mask);
-                               if (!ret)
-                                       return pfn_to_page(pfn);
-                               spin_lock_irqsave(&zone->lock, flags);
-                       }
-                       pfn += nr_pages;
-               }
-
-               spin_unlock_irqrestore(&zone->lock, flags);
-       }
-
-       return NULL;
+       return alloc_contig_pages(nr_pages, gfp_mask, nid, nodemask);
  }
  
  static void prep_new_huge_page(struct hstate *h, struct page *page, int nid);
@@ -3915,7 +3796,7 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
                          * handling userfault.  Reacquire after handling
                          * fault to make calling code simpler.
                          */
-                       hash = hugetlb_fault_mutex_hash(h, mapping, idx, haddr);
+                       hash = hugetlb_fault_mutex_hash(mapping, idx);
                         mutex_unlock(&hugetlb_fault_mutex_table[hash]);
                         ret = handle_userfault(&vmf, VM_UFFD_MISSING);
                         mutex_lock(&hugetlb_fault_mutex_table[hash]);
@@ -4042,8 +3923,7 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
  }
  
  #ifdef CONFIG_SMP
-u32 hugetlb_fault_mutex_hash(struct hstate *h, struct address_space *mapping,
-                           pgoff_t idx, unsigned long address)
+u32 hugetlb_fault_mutex_hash(struct address_space *mapping, pgoff_t idx)
  {
         unsigned long key[2];
         u32 hash;
@@ -4051,7 +3931,7 @@ u32 hugetlb_fault_mutex_hash(struct hstate *h, struct address_space *mapping,
         key[0] = (unsigned long) mapping;
         key[1] = idx;
  
-       hash = jhash2((u32 *)&key, sizeof(key)/sizeof(u32), 0);
+       hash = jhash2((u32 *)&key, sizeof(key)/(sizeof(u32)), 0);
  
         return hash & (num_fault_mutexes - 1);
  }
@@ -4060,8 +3940,7 @@ u32 hugetlb_fault_mutex_hash(struct hstate *h, struct address_space *mapping,
   * For uniprocesor systems we always use a single mutex, so just
   * return 0 and avoid the hashing overhead.
   */
-u32 hugetlb_fault_mutex_hash(struct hstate *h, struct address_space *mapping,
-                           pgoff_t idx, unsigned long address)
+u32 hugetlb_fault_mutex_hash(struct address_space *mapping, pgoff_t idx)
  {
         return 0;
  }
@@ -4105,7 +3984,7 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
          * get spurious allocation failures if two CPUs race to instantiate
          * the same page in the page cache.
          */
-       hash = hugetlb_fault_mutex_hash(h, mapping, idx, haddr);
+       hash = hugetlb_fault_mutex_hash(mapping, idx);
         mutex_lock(&hugetlb_fault_mutex_table[hash]);
  
         entry = huge_ptep_get(ptep);
@@ -4459,6 +4338,21 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
                                 break;
                         }
                 }
+
+               /*
+                * If subpage information not requested, update counters
+                * and skip the same_page loop below.
+                */
+               if (!pages && !vmas && !pfn_offset &&
+                   (vaddr + huge_page_size(h) < vma->vm_end) &&
+                   (remainder >= pages_per_huge_page(h))) {
+                       vaddr += huge_page_size(h);
+                       remainder -= pages_per_huge_page(h);
+                       i += pages_per_huge_page(h);
+                       spin_unlock(ptl);
+                       continue;
+               }
+
  same_page:
                 if (pages) {
                         pages[i] = mem_map_offset(page, pfn_offset);
@@ -4842,7 +4736,7 @@ pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
         if (!vma_shareable(vma, addr))
                 return (pte_t *)pmd_alloc(mm, pud, addr);
  
-       i_mmap_lock_write(mapping);
+       i_mmap_lock_read(mapping);
         vma_interval_tree_foreach(svma, &mapping->i_mmap, idx, idx) {
                 if (svma == vma)
                         continue;
@@ -4872,7 +4766,7 @@ pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
         spin_unlock(ptl);
  out:
         pte = (pte_t *)pmd_alloc(mm, pud, addr);
-       i_mmap_unlock_write(mapping);
+       i_mmap_unlock_read(mapping);
         return pte;
  }