mm: don't warn about allocations which stall for too long

[linux.git] / mm / page_alloc.c
diff --git a/mm/page_alloc.c b/mm/page_alloc.c

index 805f30dd1c26ef3755b485b5452c589c0e00493d..bd1a686e40fe4c896bc66fe52b5d2e196f514651 100644 (file)
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -82,6 +82,8 @@ DEFINE_PER_CPU(int, numa_node);
  EXPORT_PER_CPU_SYMBOL(numa_node);
  #endif
  
+DEFINE_STATIC_KEY_TRUE(vm_numa_stat_key);
+
  #ifdef CONFIG_HAVE_MEMORYLESS_NODES
  /*
   * N.B., Do NOT reference the '_numa_mem_' per cpu variable directly.
@@ -1168,6 +1170,7 @@ static void free_one_page(struct zone *zone,
  static void __meminit __init_single_page(struct page *page, unsigned long pfn,
                                 unsigned long zone, int nid)
  {
+       mm_zero_struct_page(page);
         set_page_links(page, zone, nid, pfn);
         init_page_count(page);
         page_mapcount_reset(page);
@@ -1812,7 +1815,7 @@ static void prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags
   * Go through the free lists for the given migratetype and remove
   * the smallest available page from the freelists
   */
-static inline
+static __always_inline
  struct page *__rmqueue_smallest(struct zone *zone, unsigned int order,
                                                 int migratetype)
  {
@@ -1856,7 +1859,7 @@ static int fallbacks[MIGRATE_TYPES][4] = {
  };
  
  #ifdef CONFIG_CMA
-static struct page *__rmqueue_cma_fallback(struct zone *zone,
+static __always_inline struct page *__rmqueue_cma_fallback(struct zone *zone,
                                         unsigned int order)
  {
         return __rmqueue_smallest(zone, order, MIGRATE_CMA);
@@ -2237,7 +2240,7 @@ static bool unreserve_highatomic_pageblock(const struct alloc_context *ac,
   * deviation from the rest of this file, to make the for loop
   * condition simpler.
   */
-static inline bool
+static __always_inline bool
  __rmqueue_fallback(struct zone *zone, int order, int start_migratetype)
  {
         struct free_area *area;
@@ -2309,8 +2312,8 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype)
   * Do the hard work of removing an element from the buddy allocator.
   * Call me with the zone->lock already held.
   */
-static struct page *__rmqueue(struct zone *zone, unsigned int order,
-                               int migratetype)
+static __always_inline struct page *
+__rmqueue(struct zone *zone, unsigned int order, int migratetype)
  {
         struct page *page;
  
@@ -2335,7 +2338,7 @@ static struct page *__rmqueue(struct zone *zone, unsigned int order,
   */
  static int rmqueue_bulk(struct zone *zone, unsigned int order,
                         unsigned long count, struct list_head *list,
-                       int migratetype, bool cold)
+                       int migratetype)
  {
         int i, alloced = 0;
  
@@ -2349,19 +2352,16 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
                         continue;
  
                 /*
-                * Split buddy pages returned by expand() are received here
-                * in physical page order. The page is added to the callers and
-                * list and the list head then moves forward. From the callers
-                * perspective, the linked list is ordered by page number in
-                * some conditions. This is useful for IO devices that can
-                * merge IO requests if the physical pages are ordered
-                * properly.
+                * Split buddy pages returned by expand() are received here in
+                * physical page order. The page is added to the tail of
+                * caller's list. From the callers perspective, the linked list
+                * is ordered by page number under some conditions. This is
+                * useful for IO devices that can forward direction from the
+                * head, thus also in the physical page order. This is useful
+                * for IO devices that can merge IO requests if the physical
+                * pages are ordered properly.
                  */
-               if (likely(!cold))
-                       list_add(&page->lru, list);
-               else
-                       list_add_tail(&page->lru, list);
-               list = &page->lru;
+               list_add_tail(&page->lru, list);
                 alloced++;
                 if (is_migrate_cma(get_pcppage_migratetype(page)))
                         __mod_zone_page_state(zone, NR_FREE_CMA_PAGES,
@@ -2610,24 +2610,25 @@ void mark_free_pages(struct zone *zone)
  }
  #endif /* CONFIG_PM */
  
-/*
- * Free a 0-order page
- * cold == true ? free a cold page : free a hot page
- */
-void free_hot_cold_page(struct page *page, bool cold)
+static bool free_unref_page_prepare(struct page *page, unsigned long pfn)
  {
-       struct zone *zone = page_zone(page);
-       struct per_cpu_pages *pcp;
-       unsigned long flags;
-       unsigned long pfn = page_to_pfn(page);
         int migratetype;
  
         if (!free_pcp_prepare(page))
-               return;
+               return false;
  
         migratetype = get_pfnblock_migratetype(page, pfn);
         set_pcppage_migratetype(page, migratetype);
-       local_irq_save(flags);
+       return true;
+}
+
+static void free_unref_page_commit(struct page *page, unsigned long pfn)
+{
+       struct zone *zone = page_zone(page);
+       struct per_cpu_pages *pcp;
+       int migratetype;
+
+       migratetype = get_pcppage_migratetype(page);
         __count_vm_event(PGFREE);
  
         /*
@@ -2640,38 +2641,62 @@ void free_hot_cold_page(struct page *page, bool cold)
         if (migratetype >= MIGRATE_PCPTYPES) {
                 if (unlikely(is_migrate_isolate(migratetype))) {
                         free_one_page(zone, page, pfn, 0, migratetype);
-                       goto out;
+                       return;
                 }
                 migratetype = MIGRATE_MOVABLE;
         }
  
         pcp = &this_cpu_ptr(zone->pageset)->pcp;
-       if (!cold)
-               list_add(&page->lru, &pcp->lists[migratetype]);
-       else
-               list_add_tail(&page->lru, &pcp->lists[migratetype]);
+       list_add(&page->lru, &pcp->lists[migratetype]);
         pcp->count++;
         if (pcp->count >= pcp->high) {
                 unsigned long batch = READ_ONCE(pcp->batch);
                 free_pcppages_bulk(zone, batch, pcp);
                 pcp->count -= batch;
         }
+}
  
-out:
+/*
+ * Free a 0-order page
+ */
+void free_unref_page(struct page *page)
+{
+       unsigned long flags;
+       unsigned long pfn = page_to_pfn(page);
+
+       if (!free_unref_page_prepare(page, pfn))
+               return;
+
+       local_irq_save(flags);
+       free_unref_page_commit(page, pfn);
         local_irq_restore(flags);
  }
  
  /*
   * Free a list of 0-order pages
   */
-void free_hot_cold_page_list(struct list_head *list, bool cold)
+void free_unref_page_list(struct list_head *list)
  {
         struct page *page, *next;
+       unsigned long flags, pfn;
  
+       /* Prepare pages for freeing */
+       list_for_each_entry_safe(page, next, list, lru) {
+               pfn = page_to_pfn(page);
+               if (!free_unref_page_prepare(page, pfn))
+                       list_del(&page->lru);
+               set_page_private(page, pfn);
+       }
+
+       local_irq_save(flags);
         list_for_each_entry_safe(page, next, list, lru) {
-               trace_mm_page_free_batched(page, cold);
-               free_hot_cold_page(page, cold);
+               unsigned long pfn = page_private(page);
+
+               set_page_private(page, 0);
+               trace_mm_page_free_batched(page);
+               free_unref_page_commit(page, pfn);
         }
+       local_irq_restore(flags);
  }
  
  /*
@@ -2754,6 +2779,10 @@ static inline void zone_statistics(struct zone *preferred_zone, struct zone *z)
  #ifdef CONFIG_NUMA
         enum numa_stat_item local_stat = NUMA_LOCAL;
  
+       /* skip numa counters update if numa stats is disabled */
+       if (!static_branch_likely(&vm_numa_stat_key))
+               return;
+
         if (z->node != numa_node_id())
                 local_stat = NUMA_OTHER;
  
@@ -2769,7 +2798,7 @@ static inline void zone_statistics(struct zone *preferred_zone, struct zone *z)
  
  /* Remove page from the per-cpu list, caller must protect the list */
  static struct page *__rmqueue_pcplist(struct zone *zone, int migratetype,
-                       bool cold, struct per_cpu_pages *pcp,
+                       struct per_cpu_pages *pcp,
                         struct list_head *list)
  {
         struct page *page;
@@ -2778,16 +2807,12 @@ static struct page *__rmqueue_pcplist(struct zone *zone, int migratetype,
                 if (list_empty(list)) {
                         pcp->count += rmqueue_bulk(zone, 0,
                                         pcp->batch, list,
-                                       migratetype, cold);
+                                       migratetype);
                         if (unlikely(list_empty(list)))
                                 return NULL;
                 }
  
-               if (cold)
-                       page = list_last_entry(list, struct page, lru);
-               else
-                       page = list_first_entry(list, struct page, lru);
-
+               page = list_first_entry(list, struct page, lru);
                 list_del(&page->lru);
                 pcp->count--;
         } while (check_new_pcp(page));
@@ -2802,14 +2827,13 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone,
  {
         struct per_cpu_pages *pcp;
         struct list_head *list;
-       bool cold = ((gfp_flags & __GFP_COLD) != 0);
         struct page *page;
         unsigned long flags;
  
         local_irq_save(flags);
         pcp = &this_cpu_ptr(zone->pageset)->pcp;
         list = &pcp->lists[migratetype];
-       page = __rmqueue_pcplist(zone,  migratetype, cold, pcp, list);
+       page = __rmqueue_pcplist(zone,  migratetype, pcp, list);
         if (page) {
                 __count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order);
                 zone_statistics(preferred_zone, zone);
@@ -3017,9 +3041,6 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark,
                 if (!area->nr_free)
                         continue;
  
-               if (alloc_harder)
-                       return true;
-
                 for (mt = 0; mt < MIGRATE_PCPTYPES; mt++) {
                         if (!list_empty(&area->free_list[mt]))
                                 return true;
@@ -3031,6 +3052,9 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark,
                         return true;
                 }
  #endif
+               if (alloc_harder &&
+                       !list_empty(&area->free_list[MIGRATE_HIGHATOMIC]))
+                       return true;
         }
         return false;
  }
@@ -3879,8 +3903,6 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
         enum compact_result compact_result;
         int compaction_retries;
         int no_progress_loops;
-       unsigned long alloc_start = jiffies;
-       unsigned int stall_timeout = 10 * HZ;
         unsigned int cpuset_mems_cookie;
         int reserve_flags;
  
@@ -4012,14 +4034,6 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
         if (!can_direct_reclaim)
                 goto nopage;
  
-       /* Make sure we know about allocations which stall for too long */
-       if (time_after(jiffies, alloc_start + stall_timeout)) {
-               warn_alloc(gfp_mask & ~__GFP_NOWARN, ac->nodemask,
-                       "page allocation stalls for %ums, order:%u",
-                       jiffies_to_msecs(jiffies-alloc_start), order);
-               stall_timeout += 10 * HZ;
-       }
-
         /* Avoid recursion of direct reclaim */
         if (current->flags & PF_MEMALLOC)
                 goto nopage;
@@ -4270,7 +4284,7 @@ void __free_pages(struct page *page, unsigned int order)
  {
         if (put_page_testzero(page)) {
                 if (order == 0)
-                       free_hot_cold_page(page, false);
+                       free_unref_page(page);
                 else
                         __free_pages_ok(page, order);
         }
@@ -4328,7 +4342,7 @@ void __page_frag_cache_drain(struct page *page, unsigned int count)
                 unsigned int order = compound_order(page);
  
                 if (order == 0)
-                       free_hot_cold_page(page, false);
+                       free_unref_page(page);
                 else
                         __free_pages_ok(page, order);
         }
@@ -6215,6 +6229,44 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size,
         free_area_init_core(pgdat);
  }
  
+#ifdef CONFIG_HAVE_MEMBLOCK
+/*
+ * Only struct pages that are backed by physical memory are zeroed and
+ * initialized by going through __init_single_page(). But, there are some
+ * struct pages which are reserved in memblock allocator and their fields
+ * may be accessed (for example page_to_pfn() on some configuration accesses
+ * flags). We must explicitly zero those struct pages.
+ */
+void __paginginit zero_resv_unavail(void)
+{
+       phys_addr_t start, end;
+       unsigned long pfn;
+       u64 i, pgcnt;
+
+       /*
+        * Loop through ranges that are reserved, but do not have reported
+        * physical memory backing.
+        */
+       pgcnt = 0;
+       for_each_resv_unavail_range(i, &start, &end) {
+               for (pfn = PFN_DOWN(start); pfn < PFN_UP(end); pfn++) {
+                       mm_zero_struct_page(pfn_to_page(pfn));
+                       pgcnt++;
+               }
+       }
+
+       /*
+        * Struct pages that do not have backing memory. This could be because
+        * firmware is using some of this memory, or for some other reasons.
+        * Once memblock is changed so such behaviour is not allowed: i.e.
+        * list of "reserved" memory must be a subset of list of "memory", then
+        * this code can be removed.
+        */
+       if (pgcnt)
+               pr_info("Reserved but unavailable: %lld pages", pgcnt);
+}
+#endif /* CONFIG_HAVE_MEMBLOCK */
+
  #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
  
  #if MAX_NUMNODES > 1
@@ -6638,6 +6690,7 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn)
                         node_set_state(nid, N_MEMORY);
                 check_for_memory(pgdat, nid);
         }
+       zero_resv_unavail();
  }
  
  static int __init cmdline_parse_core(char *p, unsigned long *core)
@@ -6801,6 +6854,7 @@ void __init free_area_init(unsigned long *zones_size)
  {
         free_area_init_node(0, zones_size,
                         __pa(PAGE_OFFSET) >> PAGE_SHIFT, NULL);
+       zero_resv_unavail();
  }
  
  static int page_alloc_cpu_dead(unsigned int cpu)