mm, oom: reorganize the oom report in dump_header

[linux.git] / mm / page_alloc.c
diff --git a/mm/page_alloc.c b/mm/page_alloc.c

index e95b5b7c9c3d637efe29d07d86c75975acbfc500..a48db99da7b5997f1984e85a53d12a904ca6ee3f 100644 (file)
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -16,6 +16,7 @@
  
  #include <linux/stddef.h>
  #include <linux/mm.h>
+#include <linux/highmem.h>
  #include <linux/swap.h>
  #include <linux/interrupt.h>
  #include <linux/pagemap.h>
@@ -121,10 +122,8 @@ nodemask_t node_states[NR_NODE_STATES] __read_mostly = {
  };
  EXPORT_SYMBOL(node_states);
  
-/* Protect totalram_pages and zone->managed_pages */
-static DEFINE_SPINLOCK(managed_page_count_lock);
-
-unsigned long totalram_pages __read_mostly;
+atomic_long_t _totalram_pages __read_mostly;
+EXPORT_SYMBOL(_totalram_pages);
  unsigned long totalreserve_pages __read_mostly;
  unsigned long totalcma_pages __read_mostly;
  
@@ -237,7 +236,7 @@ static char * const zone_names[MAX_NR_ZONES] = {
  #endif
  };
  
-char * const migratetype_names[MIGRATE_TYPES] = {
+const char * const migratetype_names[MIGRATE_TYPES] = {
         "Unmovable",
         "Movable",
         "Reclaimable",
@@ -263,6 +262,7 @@ compound_page_dtor * const compound_page_dtors[] = {
  
  int min_free_kbytes = 1024;
  int user_min_free_kbytes = -1;
+int watermark_boost_factor __read_mostly = 15000;
  int watermark_scale_factor = 10;
  
  static unsigned long nr_kernel_pages __meminitdata;
@@ -1183,6 +1183,7 @@ static void __meminit __init_single_page(struct page *page, unsigned long pfn,
         init_page_count(page);
         page_mapcount_reset(page);
         page_cpupid_reset_last(page);
+       page_kasan_tag_reset(page);
  
         INIT_LIST_HEAD(&page->lru);
  #ifdef WANT_PAGE_VIRTUAL
@@ -1279,7 +1280,7 @@ static void __init __free_pages_boot_core(struct page *page, unsigned int order)
         __ClearPageReserved(p);
         set_page_count(p, 0);
  
-       page_zone(page)->managed_pages += nr_pages;
+       atomic_long_add(nr_pages, &page_zone(page)->managed_pages);
         set_page_refcounted(page);
         __free_pages(page, order);
  }
@@ -1981,8 +1982,8 @@ struct page *__rmqueue_smallest(struct zone *zone, unsigned int order,
   */
  static int fallbacks[MIGRATE_TYPES][4] = {
         [MIGRATE_UNMOVABLE]   = { MIGRATE_RECLAIMABLE, MIGRATE_MOVABLE,   MIGRATE_TYPES },
-       [MIGRATE_RECLAIMABLE] = { MIGRATE_UNMOVABLE,   MIGRATE_MOVABLE,   MIGRATE_TYPES },
         [MIGRATE_MOVABLE]     = { MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE, MIGRATE_TYPES },
+       [MIGRATE_RECLAIMABLE] = { MIGRATE_UNMOVABLE,   MIGRATE_MOVABLE,   MIGRATE_TYPES },
  #ifdef CONFIG_CMA
         [MIGRATE_CMA]         = { MIGRATE_TYPES }, /* Never used */
  #endif
@@ -2129,6 +2130,21 @@ static bool can_steal_fallback(unsigned int order, int start_mt)
         return false;
  }
  
+static inline void boost_watermark(struct zone *zone)
+{
+       unsigned long max_boost;
+
+       if (!watermark_boost_factor)
+               return;
+
+       max_boost = mult_frac(zone->_watermark[WMARK_HIGH],
+                       watermark_boost_factor, 10000);
+       max_boost = max(pageblock_nr_pages, max_boost);
+
+       zone->watermark_boost = min(zone->watermark_boost + pageblock_nr_pages,
+               max_boost);
+}
+
  /*
   * This function implements actual steal behaviour. If order is large enough,
   * we can steal whole pageblock. If not, we first move freepages in this
@@ -2138,7 +2154,7 @@ static bool can_steal_fallback(unsigned int order, int start_mt)
   * itself, so pages freed in the future will be put on the correct free list.
   */
  static void steal_suitable_fallback(struct zone *zone, struct page *page,
-                                       int start_type, bool whole_block)
+               unsigned int alloc_flags, int start_type, bool whole_block)
  {
         unsigned int current_order = page_order(page);
         struct free_area *area;
@@ -2160,6 +2176,15 @@ static void steal_suitable_fallback(struct zone *zone, struct page *page,
                 goto single_page;
         }
  
+       /*
+        * Boost watermarks to increase reclaim pressure to reduce the
+        * likelihood of future fallbacks. Wake kswapd now as the node
+        * may be balanced overall and kswapd will not wake naturally.
+        */
+       boost_watermark(zone);
+       if (alloc_flags & ALLOC_KSWAPD)
+               wakeup_kswapd(zone, 0, 0, zone_idx(zone));
+
         /* We are not allowed to try stealing from the whole block */
         if (!whole_block)
                 goto single_page;
@@ -2258,7 +2283,7 @@ static void reserve_highatomic_pageblock(struct page *page, struct zone *zone,
          * Limit the number reserved to 1 pageblock or roughly 1% of a zone.
          * Check is race-prone but harmless.
          */
-       max_managed = (zone->managed_pages / 100) + pageblock_nr_pages;
+       max_managed = (zone_managed_pages(zone) / 100) + pageblock_nr_pages;
         if (zone->nr_reserved_highatomic >= max_managed)
                 return;
  
@@ -2375,20 +2400,30 @@ static bool unreserve_highatomic_pageblock(const struct alloc_context *ac,
   * condition simpler.
   */
  static __always_inline bool
-__rmqueue_fallback(struct zone *zone, int order, int start_migratetype)
+__rmqueue_fallback(struct zone *zone, int order, int start_migratetype,
+                                               unsigned int alloc_flags)
  {
         struct free_area *area;
         int current_order;
+       int min_order = order;
         struct page *page;
         int fallback_mt;
         bool can_steal;
  
+       /*
+        * Do not steal pages from freelists belonging to other pageblocks
+        * i.e. orders < pageblock_order. If there are no local zones free,
+        * the zonelists will be reiterated without ALLOC_NOFRAGMENT.
+        */
+       if (alloc_flags & ALLOC_NOFRAGMENT)
+               min_order = pageblock_order;
+
         /*
          * Find the largest available free page in the other list. This roughly
          * approximates finding the pageblock with the most free pages, which
          * would be too costly to do exactly.
          */
-       for (current_order = MAX_ORDER - 1; current_order >= order;
+       for (current_order = MAX_ORDER - 1; current_order >= min_order;
                                 --current_order) {
                 area = &(zone->free_area[current_order]);
                 fallback_mt = find_suitable_fallback(area, current_order,
@@ -2433,7 +2468,8 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype)
         page = list_first_entry(&area->free_list[fallback_mt],
                                                         struct page, lru);
  
-       steal_suitable_fallback(zone, page, start_migratetype, can_steal);
+       steal_suitable_fallback(zone, page, alloc_flags, start_migratetype,
+                                                               can_steal);
  
         trace_mm_page_alloc_extfrag(page, order, current_order,
                 start_migratetype, fallback_mt);
@@ -2447,7 +2483,8 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype)
   * Call me with the zone->lock already held.
   */
  static __always_inline struct page *
-__rmqueue(struct zone *zone, unsigned int order, int migratetype)
+__rmqueue(struct zone *zone, unsigned int order, int migratetype,
+                                               unsigned int alloc_flags)
  {
         struct page *page;
  
@@ -2457,7 +2494,8 @@ __rmqueue(struct zone *zone, unsigned int order, int migratetype)
                 if (migratetype == MIGRATE_MOVABLE)
                         page = __rmqueue_cma_fallback(zone, order);
  
-               if (!page && __rmqueue_fallback(zone, order, migratetype))
+               if (!page && __rmqueue_fallback(zone, order, migratetype,
+                                                               alloc_flags))
                         goto retry;
         }
  
@@ -2472,13 +2510,14 @@ __rmqueue(struct zone *zone, unsigned int order, int migratetype)
   */
  static int rmqueue_bulk(struct zone *zone, unsigned int order,
                         unsigned long count, struct list_head *list,
-                       int migratetype)
+                       int migratetype, unsigned int alloc_flags)
  {
         int i, alloced = 0;
  
         spin_lock(&zone->lock);
         for (i = 0; i < count; ++i) {
-               struct page *page = __rmqueue(zone, order, migratetype);
+               struct page *page = __rmqueue(zone, order, migratetype,
+                                                               alloc_flags);
                 if (unlikely(page == NULL))
                         break;
  
@@ -2934,6 +2973,7 @@ static inline void zone_statistics(struct zone *preferred_zone, struct zone *z)
  
  /* Remove page from the per-cpu list, caller must protect the list */
  static struct page *__rmqueue_pcplist(struct zone *zone, int migratetype,
+                       unsigned int alloc_flags,
                         struct per_cpu_pages *pcp,
                         struct list_head *list)
  {
@@ -2943,7 +2983,7 @@ static struct page *__rmqueue_pcplist(struct zone *zone, int migratetype,
                 if (list_empty(list)) {
                         pcp->count += rmqueue_bulk(zone, 0,
                                         pcp->batch, list,
-                                       migratetype);
+                                       migratetype, alloc_flags);
                         if (unlikely(list_empty(list)))
                                 return NULL;
                 }
@@ -2959,7 +2999,8 @@ static struct page *__rmqueue_pcplist(struct zone *zone, int migratetype,
  /* Lock and remove page from the per-cpu list */
  static struct page *rmqueue_pcplist(struct zone *preferred_zone,
                         struct zone *zone, unsigned int order,
-                       gfp_t gfp_flags, int migratetype)
+                       gfp_t gfp_flags, int migratetype,
+                       unsigned int alloc_flags)
  {
         struct per_cpu_pages *pcp;
         struct list_head *list;
@@ -2969,7 +3010,7 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone,
         local_irq_save(flags);
         pcp = &this_cpu_ptr(zone->pageset)->pcp;
         list = &pcp->lists[migratetype];
-       page = __rmqueue_pcplist(zone,  migratetype, pcp, list);
+       page = __rmqueue_pcplist(zone,  migratetype, alloc_flags, pcp, list);
         if (page) {
                 __count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order);
                 zone_statistics(preferred_zone, zone);
@@ -2992,7 +3033,7 @@ struct page *rmqueue(struct zone *preferred_zone,
  
         if (likely(order == 0)) {
                 page = rmqueue_pcplist(preferred_zone, zone, order,
-                               gfp_flags, migratetype);
+                               gfp_flags, migratetype, alloc_flags);
                 goto out;
         }
  
@@ -3011,7 +3052,7 @@ struct page *rmqueue(struct zone *preferred_zone,
                                 trace_mm_page_alloc_zone_locked(page, order, migratetype);
                 }
                 if (!page)
-                       page = __rmqueue(zone, order, migratetype);
+                       page = __rmqueue(zone, order, migratetype, alloc_flags);
         } while (page && check_new_pages(page, order));
         spin_unlock(&zone->lock);
         if (!page)
@@ -3253,6 +3294,40 @@ static bool zone_allows_reclaim(struct zone *local_zone, struct zone *zone)
  }
  #endif /* CONFIG_NUMA */
  
+/*
+ * The restriction on ZONE_DMA32 as being a suitable zone to use to avoid
+ * fragmentation is subtle. If the preferred zone was HIGHMEM then
+ * premature use of a lower zone may cause lowmem pressure problems that
+ * are worse than fragmentation. If the next zone is ZONE_DMA then it is
+ * probably too small. It only makes sense to spread allocations to avoid
+ * fragmentation between the Normal and DMA32 zones.
+ */
+static inline unsigned int
+alloc_flags_nofragment(struct zone *zone, gfp_t gfp_mask)
+{
+       unsigned int alloc_flags = 0;
+
+       if (gfp_mask & __GFP_KSWAPD_RECLAIM)
+               alloc_flags |= ALLOC_KSWAPD;
+
+#ifdef CONFIG_ZONE_DMA32
+       if (zone_idx(zone) != ZONE_NORMAL)
+               goto out;
+
+       /*
+        * If ZONE_DMA32 exists, assume it is the one after ZONE_NORMAL and
+        * the pointer is within zone->zone_pgdat->node_zones[]. Also assume
+        * on UMA that if Normal is populated then so is DMA32.
+        */
+       BUILD_BUG_ON(ZONE_NORMAL - ZONE_DMA32 != 1);
+       if (nr_online_nodes > 1 && !populated_zone(--zone))
+               goto out;
+
+out:
+#endif /* CONFIG_ZONE_DMA32 */
+       return alloc_flags;
+}
+
  /*
   * get_page_from_freelist goes through the zonelist trying to allocate
   * a page.
@@ -3261,14 +3336,18 @@ static struct page *
  get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
                                                 const struct alloc_context *ac)
  {
-       struct zoneref *z = ac->preferred_zoneref;
+       struct zoneref *z;
         struct zone *zone;
         struct pglist_data *last_pgdat_dirty_limit = NULL;
+       bool no_fallback;
  
+retry:
         /*
          * Scan zonelist, looking for a zone with enough free.
          * See also __cpuset_node_allowed() comment in kernel/cpuset.c.
          */
+       no_fallback = alloc_flags & ALLOC_NOFRAGMENT;
+       z = ac->preferred_zoneref;
         for_next_zone_zonelist_nodemask(zone, z, ac->zonelist, ac->high_zoneidx,
                                                                 ac->nodemask) {
                 struct page *page;
@@ -3307,7 +3386,23 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
                         }
                 }
  
-               mark = zone->watermark[alloc_flags & ALLOC_WMARK_MASK];
+               if (no_fallback && nr_online_nodes > 1 &&
+                   zone != ac->preferred_zoneref->zone) {
+                       int local_nid;
+
+                       /*
+                        * If moving to a remote node, retry but allow
+                        * fragmenting fallbacks. Locality is more important
+                        * than fragmentation avoidance.
+                        */
+                       local_nid = zone_to_nid(ac->preferred_zoneref->zone);
+                       if (zone_to_nid(zone) != local_nid) {
+                               alloc_flags &= ~ALLOC_NOFRAGMENT;
+                               goto retry;
+                       }
+               }
+
+               mark = wmark_pages(zone, alloc_flags & ALLOC_WMARK_MASK);
                 if (!zone_watermark_fast(zone, order, mark,
                                        ac_classzone_idx(ac), alloc_flags)) {
                         int ret;
@@ -3374,6 +3469,15 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
                 }
         }
  
+       /*
+        * It's possible on a UMA machine to get through all zones that are
+        * fragmented. If avoiding fragmentation, reset and try again.
+        */
+       if (no_fallback) {
+               alloc_flags &= ~ALLOC_NOFRAGMENT;
+               goto retry;
+       }
+
         return NULL;
  }
  
@@ -3413,13 +3517,13 @@ void warn_alloc(gfp_t gfp_mask, nodemask_t *nodemask, const char *fmt, ...)
         va_start(args, fmt);
         vaf.fmt = fmt;
         vaf.va = &args;
-       pr_warn("%s: %pV, mode:%#x(%pGg), nodemask=%*pbl\n",
+       pr_warn("%s: %pV, mode:%#x(%pGg), nodemask=%*pbl",
                         current->comm, &vaf, gfp_mask, &gfp_mask,
                         nodemask_pr_args(nodemask));
         va_end(args);
  
         cpuset_print_current_mems_allowed();
-
+       pr_cont("\n");
         dump_stack();
         warn_alloc_show_mem(gfp_mask, nodemask);
  }
@@ -3861,6 +3965,9 @@ gfp_to_alloc_flags(gfp_t gfp_mask)
         } else if (unlikely(rt_task(current)) && !in_interrupt())
                 alloc_flags |= ALLOC_HARDER;
  
+       if (gfp_mask & __GFP_KSWAPD_RECLAIM)
+               alloc_flags |= ALLOC_KSWAPD;
+
  #ifdef CONFIG_CMA
         if (gfpflags_to_migratetype(gfp_mask) == MIGRATE_MOVABLE)
                 alloc_flags |= ALLOC_CMA;
@@ -4092,7 +4199,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
         if (!ac->preferred_zoneref->zone)
                 goto nopage;
  
-       if (gfp_mask & __GFP_KSWAPD_RECLAIM)
+       if (alloc_flags & ALLOC_KSWAPD)
                 wake_all_kswapds(order, gfp_mask, ac);
  
         /*
@@ -4150,7 +4257,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
  
  retry:
         /* Ensure kswapd doesn't accidentally go to sleep as long as we loop */
-       if (gfp_mask & __GFP_KSWAPD_RECLAIM)
+       if (alloc_flags & ALLOC_KSWAPD)
                 wake_all_kswapds(order, gfp_mask, ac);
  
         reserve_flags = __gfp_pfmemalloc_flags(gfp_mask);
@@ -4369,6 +4476,12 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, int preferred_nid,
  
         finalise_ac(gfp_mask, &ac);
  
+       /*
+        * Forbid the first pass from falling back to types that fragment
+        * memory until all local zones are considered.
+        */
+       alloc_flags |= alloc_flags_nofragment(ac.preferred_zoneref->zone, gfp_mask);
+
         /* First allocation attempt */
         page = get_page_from_freelist(alloc_mask, order, alloc_flags, &ac);
         if (likely(page))
@@ -4427,16 +4540,19 @@ unsigned long get_zeroed_page(gfp_t gfp_mask)
  }
  EXPORT_SYMBOL(get_zeroed_page);
  
-void __free_pages(struct page *page, unsigned int order)
+static inline void free_the_page(struct page *page, unsigned int order)
  {
-       if (put_page_testzero(page)) {
-               if (order == 0)
-                       free_unref_page(page);
-               else
-                       __free_pages_ok(page, order);
-       }
+       if (order == 0)         /* Via pcp? */
+               free_unref_page(page);
+       else
+               __free_pages_ok(page, order);
  }
  
+void __free_pages(struct page *page, unsigned int order)
+{
+       if (put_page_testzero(page))
+               free_the_page(page, order);
+}
  EXPORT_SYMBOL(__free_pages);
  
  void free_pages(unsigned long addr, unsigned int order)
@@ -4485,14 +4601,8 @@ void __page_frag_cache_drain(struct page *page, unsigned int count)
  {
         VM_BUG_ON_PAGE(page_ref_count(page) == 0, page);
  
-       if (page_ref_sub_and_test(page, count)) {
-               unsigned int order = compound_order(page);
-
-               if (order == 0)
-                       free_unref_page(page);
-               else
-                       __free_pages_ok(page, order);
-       }
+       if (page_ref_sub_and_test(page, count))
+               free_the_page(page, compound_order(page));
  }
  EXPORT_SYMBOL(__page_frag_cache_drain);
  
@@ -4558,7 +4668,7 @@ void page_frag_free(void *addr)
         struct page *page = virt_to_head_page(addr);
  
         if (unlikely(put_page_testzero(page)))
-               __free_pages_ok(page, compound_order(page));
+               free_the_page(page, compound_order(page));
  }
  EXPORT_SYMBOL(page_frag_free);
  
@@ -4660,7 +4770,7 @@ static unsigned long nr_free_zone_pages(int offset)
         struct zonelist *zonelist = node_zonelist(numa_node_id(), GFP_KERNEL);
  
         for_each_zone_zonelist(zone, z, zonelist, offset) {
-               unsigned long size = zone->managed_pages;
+               unsigned long size = zone_managed_pages(zone);
                 unsigned long high = high_wmark_pages(zone);
                 if (size > high)
                         sum += size - high;
@@ -4712,7 +4822,7 @@ long si_mem_available(void)
                 pages[lru] = global_node_page_state(NR_LRU_BASE + lru);
  
         for_each_zone(zone)
-               wmark_low += zone->watermark[WMARK_LOW];
+               wmark_low += low_wmark_pages(zone);
  
         /*
          * Estimate the amount of memory available for userspace allocations,
@@ -4746,11 +4856,11 @@ EXPORT_SYMBOL_GPL(si_mem_available);
  
  void si_meminfo(struct sysinfo *val)
  {
-       val->totalram = totalram_pages;
+       val->totalram = totalram_pages();
         val->sharedram = global_node_page_state(NR_SHMEM);
         val->freeram = global_zone_page_state(NR_FREE_PAGES);
         val->bufferram = nr_blockdev_pages();
-       val->totalhigh = totalhigh_pages;
+       val->totalhigh = totalhigh_pages();
         val->freehigh = nr_free_highpages();
         val->mem_unit = PAGE_SIZE;
  }
@@ -4767,7 +4877,7 @@ void si_meminfo_node(struct sysinfo *val, int nid)
         pg_data_t *pgdat = NODE_DATA(nid);
  
         for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++)
-               managed_pages += pgdat->node_zones[zone_type].managed_pages;
+               managed_pages += zone_managed_pages(&pgdat->node_zones[zone_type]);
         val->totalram = managed_pages;
         val->sharedram = node_page_state(pgdat, NR_SHMEM);
         val->freeram = sum_zone_node_page_state(nid, NR_FREE_PAGES);
@@ -4776,7 +4886,7 @@ void si_meminfo_node(struct sysinfo *val, int nid)
                 struct zone *zone = &pgdat->node_zones[zone_type];
  
                 if (is_highmem(zone)) {
-                       managed_highpages += zone->managed_pages;
+                       managed_highpages += zone_managed_pages(zone);
                         free_highpages += zone_page_state(zone, NR_FREE_PAGES);
                 }
         }
@@ -4983,7 +5093,7 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
                         K(zone_page_state(zone, NR_ZONE_UNEVICTABLE)),
                         K(zone_page_state(zone, NR_ZONE_WRITE_PENDING)),
                         K(zone->present_pages),
-                       K(zone->managed_pages),
+                       K(zone_managed_pages(zone)),
                         K(zone_page_state(zone, NR_MLOCK)),
                         zone_page_state(zone, NR_KERNEL_STACK_KB),
                         K(zone_page_state(zone, NR_PAGETABLE)),
@@ -5655,7 +5765,7 @@ static int zone_batchsize(struct zone *zone)
          * The per-cpu-pages pools are set to around 1000th of the
          * size of the zone.
          */
-       batch = zone->managed_pages / 1024;
+       batch = zone_managed_pages(zone) / 1024;
         /* But no more than a meg. */
         if (batch * PAGE_SIZE > 1024 * 1024)
                 batch = (1024 * 1024) / PAGE_SIZE;
@@ -5736,7 +5846,6 @@ static void pageset_init(struct per_cpu_pageset *p)
         memset(p, 0, sizeof(*p));
  
         pcp = &p->pcp;
-       pcp->count = 0;
         for (migratetype = 0; migratetype < MIGRATE_PCPTYPES; migratetype++)
                 INIT_LIST_HEAD(&pcp->lists[migratetype]);
  }
@@ -5766,7 +5875,7 @@ static void pageset_set_high_and_batch(struct zone *zone,
  {
         if (percpu_pagelist_fraction)
                 pageset_set_high(pcp,
-                       (zone->managed_pages /
+                       (zone_managed_pages(zone) /
                                 percpu_pagelist_fraction));
         else
                 pageset_set_batch(pcp, zone_batchsize(zone));
@@ -6323,7 +6432,7 @@ static void __meminit pgdat_init_internals(struct pglist_data *pgdat)
  static void __meminit zone_init_internals(struct zone *zone, enum zone_type idx, int nid,
                                                         unsigned long remaining_pages)
  {
-       zone->managed_pages = remaining_pages;
+       atomic_long_set(&zone->managed_pages, remaining_pages);
         zone_set_nid(zone, nid);
         zone->name = zone_names[idx];
         zone->zone_pgdat = NODE_DATA(nid);
@@ -7075,18 +7184,16 @@ early_param("movablecore", cmdline_parse_movablecore);
  
  void adjust_managed_page_count(struct page *page, long count)
  {
-       spin_lock(&managed_page_count_lock);
-       page_zone(page)->managed_pages += count;
-       totalram_pages += count;
+       atomic_long_add(count, &page_zone(page)->managed_pages);
+       totalram_pages_add(count);
  #ifdef CONFIG_HIGHMEM
         if (PageHighMem(page))
-               totalhigh_pages += count;
+               totalhigh_pages_add(count);
  #endif
-       spin_unlock(&managed_page_count_lock);
  }
  EXPORT_SYMBOL(adjust_managed_page_count);
  
-unsigned long free_reserved_area(void *start, void *end, int poison, char *s)
+unsigned long free_reserved_area(void *start, void *end, int poison, const char *s)
  {
         void *pos;
         unsigned long pages = 0;
@@ -7123,9 +7230,9 @@ EXPORT_SYMBOL(free_reserved_area);
  void free_highmem_page(struct page *page)
  {
         __free_reserved_page(page);
-       totalram_pages++;
-       page_zone(page)->managed_pages++;
-       totalhigh_pages++;
+       totalram_pages_inc();
+       atomic_long_inc(&page_zone(page)->managed_pages);
+       totalhigh_pages_inc();
  }
  #endif
  
@@ -7174,10 +7281,10 @@ void __init mem_init_print_info(const char *str)
                 physpages << (PAGE_SHIFT - 10),
                 codesize >> 10, datasize >> 10, rosize >> 10,
                 (init_data_size + init_code_size) >> 10, bss_size >> 10,
-               (physpages - totalram_pages - totalcma_pages) << (PAGE_SHIFT - 10),
+               (physpages - totalram_pages() - totalcma_pages) << (PAGE_SHIFT - 10),
                 totalcma_pages << (PAGE_SHIFT - 10),
  #ifdef CONFIG_HIGHMEM
-               totalhigh_pages << (PAGE_SHIFT - 10),
+               totalhigh_pages() << (PAGE_SHIFT - 10),
  #endif
                 str ? ", " : "", str ? str : "");
  }
@@ -7257,6 +7364,7 @@ static void calculate_totalreserve_pages(void)
                 for (i = 0; i < MAX_NR_ZONES; i++) {
                         struct zone *zone = pgdat->node_zones + i;
                         long max = 0;
+                       unsigned long managed_pages = zone_managed_pages(zone);
  
                         /* Find valid and maximum lowmem_reserve in the zone */
                         for (j = i; j < MAX_NR_ZONES; j++) {
@@ -7267,8 +7375,8 @@ static void calculate_totalreserve_pages(void)
                         /* we treat the high watermark as reserved pages. */
                         max += high_wmark_pages(zone);
  
-                       if (max > zone->managed_pages)
-                               max = zone->managed_pages;
+                       if (max > managed_pages)
+                               max = managed_pages;
  
                         pgdat->totalreserve_pages += max;
  
@@ -7292,7 +7400,7 @@ static void setup_per_zone_lowmem_reserve(void)
         for_each_online_pgdat(pgdat) {
                 for (j = 0; j < MAX_NR_ZONES; j++) {
                         struct zone *zone = pgdat->node_zones + j;
-                       unsigned long managed_pages = zone->managed_pages;
+                       unsigned long managed_pages = zone_managed_pages(zone);
  
                         zone->lowmem_reserve[j] = 0;
  
@@ -7310,7 +7418,7 @@ static void setup_per_zone_lowmem_reserve(void)
                                         lower_zone->lowmem_reserve[j] =
                                                 managed_pages / sysctl_lowmem_reserve_ratio[idx];
                                 }
-                               managed_pages += lower_zone->managed_pages;
+                               managed_pages += zone_managed_pages(lower_zone);
                         }
                 }
         }
@@ -7329,14 +7437,14 @@ static void __setup_per_zone_wmarks(void)
         /* Calculate total number of !ZONE_HIGHMEM pages */
         for_each_zone(zone) {
                 if (!is_highmem(zone))
-                       lowmem_pages += zone->managed_pages;
+                       lowmem_pages += zone_managed_pages(zone);
         }
  
         for_each_zone(zone) {
                 u64 tmp;
  
                 spin_lock_irqsave(&zone->lock, flags);
-               tmp = (u64)pages_min * zone->managed_pages;
+               tmp = (u64)pages_min * zone_managed_pages(zone);
                 do_div(tmp, lowmem_pages);
                 if (is_highmem(zone)) {
                         /*
@@ -7350,15 +7458,15 @@ static void __setup_per_zone_wmarks(void)
                          */
                         unsigned long min_pages;
  
-                       min_pages = zone->managed_pages / 1024;
+                       min_pages = zone_managed_pages(zone) / 1024;
                         min_pages = clamp(min_pages, SWAP_CLUSTER_MAX, 128UL);
-                       zone->watermark[WMARK_MIN] = min_pages;
+                       zone->_watermark[WMARK_MIN] = min_pages;
                 } else {
                         /*
                          * If it's a lowmem zone, reserve a number of pages
                          * proportionate to the zone's size.
                          */
-                       zone->watermark[WMARK_MIN] = tmp;
+                       zone->_watermark[WMARK_MIN] = tmp;
                 }
  
                 /*
@@ -7367,11 +7475,12 @@ static void __setup_per_zone_wmarks(void)
                  * ensure a minimum size on small systems.
                  */
                 tmp = max_t(u64, tmp >> 2,
-                           mult_frac(zone->managed_pages,
+                           mult_frac(zone_managed_pages(zone),
                                       watermark_scale_factor, 10000));
  
-               zone->watermark[WMARK_LOW]  = min_wmark_pages(zone) + tmp;
-               zone->watermark[WMARK_HIGH] = min_wmark_pages(zone) + tmp * 2;
+               zone->_watermark[WMARK_LOW]  = min_wmark_pages(zone) + tmp;
+               zone->_watermark[WMARK_HIGH] = min_wmark_pages(zone) + tmp * 2;
+               zone->watermark_boost = 0;
  
                 spin_unlock_irqrestore(&zone->lock, flags);
         }
@@ -7472,6 +7581,18 @@ int min_free_kbytes_sysctl_handler(struct ctl_table *table, int write,
         return 0;
  }
  
+int watermark_boost_factor_sysctl_handler(struct ctl_table *table, int write,
+       void __user *buffer, size_t *length, loff_t *ppos)
+{
+       int rc;
+
+       rc = proc_dointvec_minmax(table, write, buffer, length, ppos);
+       if (rc)
+               return rc;
+
+       return 0;
+}
+
  int watermark_scale_factor_sysctl_handler(struct ctl_table *table, int write,
         void __user *buffer, size_t *length, loff_t *ppos)
  {
@@ -7497,8 +7618,8 @@ static void setup_min_unmapped_ratio(void)
                 pgdat->min_unmapped_pages = 0;
  
         for_each_zone(zone)
-               zone->zone_pgdat->min_unmapped_pages += (zone->managed_pages *
-                               sysctl_min_unmapped_ratio) / 100;
+               zone->zone_pgdat->min_unmapped_pages += (zone_managed_pages(zone) *
+                                                        sysctl_min_unmapped_ratio) / 100;
  }
  
  
@@ -7525,8 +7646,8 @@ static void setup_min_slab_ratio(void)
                 pgdat->min_slab_pages = 0;
  
         for_each_zone(zone)
-               zone->zone_pgdat->min_slab_pages += (zone->managed_pages *
-                               sysctl_min_slab_ratio) / 100;
+               zone->zone_pgdat->min_slab_pages += (zone_managed_pages(zone) *
+                                                    sysctl_min_slab_ratio) / 100;
  }
  
  int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *table, int write,
@@ -7766,8 +7887,7 @@ void *__init alloc_large_system_hash(const char *tablename,
   * race condition. So you can't expect this function should be exact.
   */
  bool has_unmovable_pages(struct zone *zone, struct page *page, int count,
-                        int migratetype,
-                        bool skip_hwpoisoned_pages)
+                        int migratetype, int flags)
  {
         unsigned long pfn, iter, found;
  
@@ -7841,7 +7961,7 @@ bool has_unmovable_pages(struct zone *zone, struct page *page, int count,
                  * The HWPoisoned page may be not in buddy system, and
                  * page_count() is not 0.
                  */
-               if (skip_hwpoisoned_pages && PageHWPoison(page))
+               if ((flags & SKIP_HWPOISON) && PageHWPoison(page))
                         continue;
  
                 if (__PageMovable(page))
@@ -7868,6 +7988,8 @@ bool has_unmovable_pages(struct zone *zone, struct page *page, int count,
         return false;
  unmovable:
         WARN_ON_ONCE(zone_idx(zone) == ZONE_MOVABLE);
+       if (flags & REPORT_FAILURE)
+               dump_page(pfn_to_page(pfn+iter), "unmovable page");
         return true;
  }
  
@@ -7994,8 +8116,7 @@ int alloc_contig_range(unsigned long start, unsigned long end,
          */
  
         ret = start_isolate_page_range(pfn_max_align_down(start),
-                                      pfn_max_align_up(end), migratetype,
-                                      false);
+                                      pfn_max_align_up(end), migratetype, 0);
         if (ret)
                 return ret;