IB/hfi1: Fix NULL pointer dereference when invalid num_vls is used

[linux.git] / mm / vmscan.c
diff --git a/mm/vmscan.c b/mm/vmscan.c

index 4390a8d5be41ee497569622e3b0381a851870114..8b920ce3ae02f206f8598d6510986ceef6f0440d 100644 (file)
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -116,6 +116,16 @@ struct scan_control {
  
         /* Number of pages freed so far during a call to shrink_zones() */
         unsigned long nr_reclaimed;
+
+       struct {
+               unsigned int dirty;
+               unsigned int unqueued_dirty;
+               unsigned int congested;
+               unsigned int writeback;
+               unsigned int immediate;
+               unsigned int file_taken;
+               unsigned int taken;
+       } nr;
  };
  
  #ifdef ARCH_HAS_PREFETCH
@@ -190,6 +200,29 @@ static bool sane_reclaim(struct scan_control *sc)
  #endif
         return false;
  }
+
+static void set_memcg_congestion(pg_data_t *pgdat,
+                               struct mem_cgroup *memcg,
+                               bool congested)
+{
+       struct mem_cgroup_per_node *mn;
+
+       if (!memcg)
+               return;
+
+       mn = mem_cgroup_nodeinfo(memcg, pgdat->node_id);
+       WRITE_ONCE(mn->congested, congested);
+}
+
+static bool memcg_congested(pg_data_t *pgdat,
+                       struct mem_cgroup *memcg)
+{
+       struct mem_cgroup_per_node *mn;
+
+       mn = mem_cgroup_nodeinfo(memcg, pgdat->node_id);
+       return READ_ONCE(mn->congested);
+
+}
  #else
  static bool global_reclaim(struct scan_control *sc)
  {
@@ -200,6 +233,18 @@ static bool sane_reclaim(struct scan_control *sc)
  {
         return true;
  }
+
+static inline void set_memcg_congestion(struct pglist_data *pgdat,
+                               struct mem_cgroup *memcg, bool congested)
+{
+}
+
+static inline bool memcg_congested(struct pglist_data *pgdat,
+                       struct mem_cgroup *memcg)
+{
+       return false;
+
+}
  #endif
  
  /*
@@ -648,7 +693,7 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
         BUG_ON(!PageLocked(page));
         BUG_ON(mapping != page_mapping(page));
  
-       spin_lock_irqsave(&mapping->tree_lock, flags);
+       xa_lock_irqsave(&mapping->i_pages, flags);
         /*
          * The non racy check for a busy page.
          *
@@ -672,7 +717,7 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
          * load is not satisfied before that of page->_refcount.
          *
          * Note that if SetPageDirty is always performed via set_page_dirty,
-        * and thus under tree_lock, then this ordering is not required.
+        * and thus under the i_pages lock, then this ordering is not required.
          */
         if (unlikely(PageTransHuge(page)) && PageSwapCache(page))
                 refcount = 1 + HPAGE_PMD_NR;
@@ -690,7 +735,7 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
                 swp_entry_t swap = { .val = page_private(page) };
                 mem_cgroup_swapout(page, swap);
                 __delete_from_swap_cache(page);
-               spin_unlock_irqrestore(&mapping->tree_lock, flags);
+               xa_unlock_irqrestore(&mapping->i_pages, flags);
                 put_swap_page(page, swap);
         } else {
                 void (*freepage)(struct page *);
@@ -711,13 +756,13 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
                  * only page cache pages found in these are zero pages
                  * covering holes, and because we don't want to mix DAX
                  * exceptional entries and shadow exceptional entries in the
-                * same page_tree.
+                * same address_space.
                  */
                 if (reclaimed && page_is_file_cache(page) &&
                     !mapping_exiting(mapping) && !dax_mapping(mapping))
                         shadow = workingset_eviction(mapping, page);
                 __delete_from_page_cache(page, shadow);
-               spin_unlock_irqrestore(&mapping->tree_lock, flags);
+               xa_unlock_irqrestore(&mapping->i_pages, flags);
  
                 if (freepage != NULL)
                         freepage(page);
@@ -726,7 +771,7 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
         return 1;
  
  cannot_free:
-       spin_unlock_irqrestore(&mapping->tree_lock, flags);
+       xa_unlock_irqrestore(&mapping->i_pages, flags);
         return 0;
  }
  
@@ -857,17 +902,6 @@ static void page_check_dirty_writeback(struct page *page,
                 mapping->a_ops->is_dirty_writeback(page, dirty, writeback);
  }
  
-struct reclaim_stat {
-       unsigned nr_dirty;
-       unsigned nr_unqueued_dirty;
-       unsigned nr_congested;
-       unsigned nr_writeback;
-       unsigned nr_immediate;
-       unsigned nr_activate;
-       unsigned nr_ref_keep;
-       unsigned nr_unmap_fail;
-};
-
  /*
   * shrink_page_list() returns the number of reclaimed pages
   */
@@ -926,7 +960,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
                         (PageSwapCache(page) && (sc->gfp_mask & __GFP_IO));
  
                 /*
-                * The number of dirty pages determines if a zone is marked
+                * The number of dirty pages determines if a node is marked
                  * reclaim_congested which affects wait_iff_congested. kswapd
                  * will stall and start writing pages if the tail of the LRU
                  * is all dirty unqueued pages.
@@ -1754,23 +1788,6 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
         mem_cgroup_uncharge_list(&page_list);
         free_unref_page_list(&page_list);
  
-       /*
-        * If reclaim is isolating dirty pages under writeback, it implies
-        * that the long-lived page allocation rate is exceeding the page
-        * laundering rate. Either the global limits are not being effective
-        * at throttling processes due to the page distribution throughout
-        * zones or there is heavy usage of a slow backing device. The
-        * only option is to throttle from reclaim context which is not ideal
-        * as there is no guarantee the dirtying process is throttled in the
-        * same way balance_dirty_pages() manages.
-        *
-        * Once a zone is flagged ZONE_WRITEBACK, kswapd will count the number
-        * of pages under pages flagged for immediate reclaim and stall if any
-        * are encountered in the nr_immediate check below.
-        */
-       if (stat.nr_writeback && stat.nr_writeback == nr_taken)
-               set_bit(PGDAT_WRITEBACK, &pgdat->flags);
-
         /*
          * If dirty pages are scanned that are not queued for IO, it
          * implies that flushers are not doing their job. This can
@@ -1785,48 +1802,17 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
         if (stat.nr_unqueued_dirty == nr_taken)
                 wakeup_flusher_threads(WB_REASON_VMSCAN);
  
-       /*
-        * Legacy memcg will stall in page writeback so avoid forcibly
-        * stalling here.
-        */
-       if (sane_reclaim(sc)) {
-               /*
-                * Tag a zone as congested if all the dirty pages scanned were
-                * backed by a congested BDI and wait_iff_congested will stall.
-                */
-               if (stat.nr_dirty && stat.nr_dirty == stat.nr_congested)
-                       set_bit(PGDAT_CONGESTED, &pgdat->flags);
-
-               /* Allow kswapd to start writing pages during reclaim. */
-               if (stat.nr_unqueued_dirty == nr_taken)
-                       set_bit(PGDAT_DIRTY, &pgdat->flags);
-
-               /*
-                * If kswapd scans pages marked marked for immediate
-                * reclaim and under writeback (nr_immediate), it implies
-                * that pages are cycling through the LRU faster than
-                * they are written so also forcibly stall.
-                */
-               if (stat.nr_immediate && current_may_throttle())
-                       congestion_wait(BLK_RW_ASYNC, HZ/10);
-       }
-
-       /*
-        * Stall direct reclaim for IO completions if underlying BDIs or zone
-        * is congested. Allow kswapd to continue until it starts encountering
-        * unqueued dirty pages or cycling through the LRU too quickly.
-        */
-       if (!sc->hibernation_mode && !current_is_kswapd() &&
-           current_may_throttle())
-               wait_iff_congested(pgdat, BLK_RW_ASYNC, HZ/10);
+       sc->nr.dirty += stat.nr_dirty;
+       sc->nr.congested += stat.nr_congested;
+       sc->nr.unqueued_dirty += stat.nr_unqueued_dirty;
+       sc->nr.writeback += stat.nr_writeback;
+       sc->nr.immediate += stat.nr_immediate;
+       sc->nr.taken += nr_taken;
+       if (file)
+               sc->nr.file_taken += nr_taken;
  
         trace_mm_vmscan_lru_shrink_inactive(pgdat->node_id,
-                       nr_scanned, nr_reclaimed,
-                       stat.nr_dirty,  stat.nr_writeback,
-                       stat.nr_congested, stat.nr_immediate,
-                       stat.nr_activate, stat.nr_ref_keep,
-                       stat.nr_unmap_fail,
-                       sc->priority, file);
+                       nr_scanned, nr_reclaimed, &stat, sc->priority, file);
         return nr_reclaimed;
  }
  
@@ -2507,6 +2493,12 @@ static inline bool should_continue_reclaim(struct pglist_data *pgdat,
         return true;
  }
  
+static bool pgdat_memcg_congested(pg_data_t *pgdat, struct mem_cgroup *memcg)
+{
+       return test_bit(PGDAT_CONGESTED, &pgdat->flags) ||
+               (memcg && memcg_congested(pgdat, memcg));
+}
+
  static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc)
  {
         struct reclaim_state *reclaim_state = current->reclaim_state;
@@ -2522,6 +2514,8 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc)
                 unsigned long node_lru_pages = 0;
                 struct mem_cgroup *memcg;
  
+               memset(&sc->nr, 0, sizeof(sc->nr));
+
                 nr_reclaimed = sc->nr_reclaimed;
                 nr_scanned = sc->nr_scanned;
  
@@ -2536,7 +2530,7 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc)
                                         sc->memcg_low_skipped = 1;
                                         continue;
                                 }
-                               mem_cgroup_event(memcg, MEMCG_LOW);
+                               memcg_memory_event(memcg, MEMCG_LOW);
                         }
  
                         reclaimed = sc->nr_reclaimed;
@@ -2587,6 +2581,67 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc)
                 if (sc->nr_reclaimed - nr_reclaimed)
                         reclaimable = true;
  
+               if (current_is_kswapd()) {
+                       /*
+                        * If reclaim is isolating dirty pages under writeback,
+                        * it implies that the long-lived page allocation rate
+                        * is exceeding the page laundering rate. Either the
+                        * global limits are not being effective at throttling
+                        * processes due to the page distribution throughout
+                        * zones or there is heavy usage of a slow backing
+                        * device. The only option is to throttle from reclaim
+                        * context which is not ideal as there is no guarantee
+                        * the dirtying process is throttled in the same way
+                        * balance_dirty_pages() manages.
+                        *
+                        * Once a node is flagged PGDAT_WRITEBACK, kswapd will
+                        * count the number of pages under pages flagged for
+                        * immediate reclaim and stall if any are encountered
+                        * in the nr_immediate check below.
+                        */
+                       if (sc->nr.writeback && sc->nr.writeback == sc->nr.taken)
+                               set_bit(PGDAT_WRITEBACK, &pgdat->flags);
+
+                       /*
+                        * Tag a node as congested if all the dirty pages
+                        * scanned were backed by a congested BDI and
+                        * wait_iff_congested will stall.
+                        */
+                       if (sc->nr.dirty && sc->nr.dirty == sc->nr.congested)
+                               set_bit(PGDAT_CONGESTED, &pgdat->flags);
+
+                       /* Allow kswapd to start writing pages during reclaim.*/
+                       if (sc->nr.unqueued_dirty == sc->nr.file_taken)
+                               set_bit(PGDAT_DIRTY, &pgdat->flags);
+
+                       /*
+                        * If kswapd scans pages marked marked for immediate
+                        * reclaim and under writeback (nr_immediate), it
+                        * implies that pages are cycling through the LRU
+                        * faster than they are written so also forcibly stall.
+                        */
+                       if (sc->nr.immediate)
+                               congestion_wait(BLK_RW_ASYNC, HZ/10);
+               }
+
+               /*
+                * Legacy memcg will stall in page writeback so avoid forcibly
+                * stalling in wait_iff_congested().
+                */
+               if (!global_reclaim(sc) && sane_reclaim(sc) &&
+                   sc->nr.dirty && sc->nr.dirty == sc->nr.congested)
+                       set_memcg_congestion(pgdat, root, true);
+
+               /*
+                * Stall direct reclaim for IO completions if underlying BDIs
+                * and node is congested. Allow kswapd to continue until it
+                * starts encountering unqueued dirty pages or cycling through
+                * the LRU too quickly.
+                */
+               if (!sc->hibernation_mode && !current_is_kswapd() &&
+                  current_may_throttle() && pgdat_memcg_congested(pgdat, root))
+                       wait_iff_congested(BLK_RW_ASYNC, HZ/10);
+
         } while (should_continue_reclaim(pgdat, sc->nr_reclaimed - nr_reclaimed,
                                          sc->nr_scanned - nr_scanned, sc));
  
@@ -2802,6 +2857,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
                         continue;
                 last_pgdat = zone->zone_pgdat;
                 snapshot_refaults(sc->target_mem_cgroup, zone->zone_pgdat);
+               set_memcg_congestion(last_pgdat, sc->target_mem_cgroup, false);
         }
  
         delayacct_freepages_end();
@@ -3808,7 +3864,7 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in
  
         if (node_pagecache_reclaimable(pgdat) > pgdat->min_unmapped_pages) {
                 /*
-                * Free memory by calling shrink zone with increasing
+                * Free memory by calling shrink node with increasing
                  * priorities until we have enough memory freed.
                  */
                 do {