X-Git-Url: https://asedeno.scripts.mit.edu/gitweb/?a=blobdiff_plain;f=mm%2Fvmscan.c;h=4911754c93b7ed956726db7906d047f3579214b3;hb=649775be63c8b2e0b56ecc5bbc96d38205ec5259;hp=dbdc46a84f63089de614a2807ef62a70f7c14968;hpb=1b531e55c52bbe2927e2c3490d47c82113d303e7;p=linux.git diff --git a/mm/vmscan.c b/mm/vmscan.c index dbdc46a84f63..4911754c93b7 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -88,9 +88,6 @@ struct scan_control { /* Can pages be swapped as part of reclaim? */ unsigned int may_swap:1; - /* e.g. boosted watermark reclaim leaves slabs alone */ - unsigned int may_shrinkslab:1; - /* * Cgroups are not reclaimed below their configured memory.low, * unless we threaten to OOM. If any cgroups are skipped due to @@ -174,11 +171,22 @@ int vm_swappiness = 60; */ unsigned long vm_total_pages; +static void set_task_reclaim_state(struct task_struct *task, + struct reclaim_state *rs) +{ + /* Check for an overwrite */ + WARN_ON_ONCE(rs && task->reclaim_state); + + /* Check for the nulling of an already-nulled member */ + WARN_ON_ONCE(!rs && !task->reclaim_state); + + task->reclaim_state = rs; +} + static LIST_HEAD(shrinker_list); static DECLARE_RWSEM(shrinker_rwsem); -#ifdef CONFIG_MEMCG_KMEM - +#ifdef CONFIG_MEMCG /* * We allow subsystems to populate their shrinker-related * LRU lists before register_shrinker_prepared() is called @@ -230,30 +238,7 @@ static void unregister_memcg_shrinker(struct shrinker *shrinker) idr_remove(&shrinker_idr, id); up_write(&shrinker_rwsem); } -#else /* CONFIG_MEMCG_KMEM */ -static int prealloc_memcg_shrinker(struct shrinker *shrinker) -{ - return 0; -} -static void unregister_memcg_shrinker(struct shrinker *shrinker) -{ -} -#endif /* CONFIG_MEMCG_KMEM */ - -static void set_task_reclaim_state(struct task_struct *task, - struct reclaim_state *rs) -{ - /* Check for an overwrite */ - WARN_ON_ONCE(rs && task->reclaim_state); - - /* Check for the nulling of an already-nulled member */ - WARN_ON_ONCE(!rs && !task->reclaim_state); - - task->reclaim_state = rs; -} - -#ifdef CONFIG_MEMCG static bool global_reclaim(struct scan_control *sc) { return !sc->target_mem_cgroup; @@ -308,6 +293,15 @@ static bool memcg_congested(pg_data_t *pgdat, } #else +static int prealloc_memcg_shrinker(struct shrinker *shrinker) +{ + return 0; +} + +static void unregister_memcg_shrinker(struct shrinker *shrinker) +{ +} + static bool global_reclaim(struct scan_control *sc) { return true; @@ -594,7 +588,7 @@ static unsigned long do_shrink_slab(struct shrink_control *shrinkctl, return freed; } -#ifdef CONFIG_MEMCG_KMEM +#ifdef CONFIG_MEMCG static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid, struct mem_cgroup *memcg, int priority) { @@ -602,7 +596,7 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid, unsigned long ret, freed = 0; int i; - if (!memcg_kmem_enabled() || !mem_cgroup_online(memcg)) + if (!mem_cgroup_online(memcg)) return 0; if (!down_read_trylock(&shrinker_rwsem)) @@ -628,6 +622,11 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid, continue; } + /* Call non-slab shrinkers even though kmem is disabled */ + if (!memcg_kmem_enabled() && + !(shrinker->flags & SHRINKER_NONSLAB)) + continue; + ret = do_shrink_slab(&sc, shrinker, priority); if (ret == SHRINK_EMPTY) { clear_bit(i, map->map); @@ -664,13 +663,13 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid, up_read(&shrinker_rwsem); return freed; } -#else /* CONFIG_MEMCG_KMEM */ +#else /* CONFIG_MEMCG */ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid, struct mem_cgroup *memcg, int priority) { return 0; } -#endif /* CONFIG_MEMCG_KMEM */ +#endif /* CONFIG_MEMCG */ /** * shrink_slab - shrink slab caches @@ -1152,7 +1151,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, VM_BUG_ON_PAGE(PageActive(page), page); - nr_pages = 1 << compound_order(page); + nr_pages = compound_nr(page); /* Account the number of base pages even though THP */ sc->nr_scanned += nr_pages; @@ -1490,10 +1489,9 @@ static unsigned long shrink_page_list(struct list_head *page_list, * Is there need to periodically free_page_list? It would * appear not as the counts should be low */ - if (unlikely(PageTransHuge(page))) { - mem_cgroup_uncharge(page); + if (unlikely(PageTransHuge(page))) (*get_compound_page_dtor(page))(page); - } else + else list_add(&page->lru, &free_pages); continue; @@ -1708,7 +1706,7 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, VM_BUG_ON_PAGE(!PageLRU(page), page); - nr_pages = 1 << compound_order(page); + nr_pages = compound_nr(page); total_scan += nr_pages; if (page_zonenum(page) > sc->reclaim_idx) { @@ -1914,7 +1912,6 @@ static unsigned noinline_for_stack move_pages_to_lru(struct lruvec *lruvec, if (unlikely(PageCompound(page))) { spin_unlock_irq(&pgdat->lru_lock); - mem_cgroup_uncharge(page); (*get_compound_page_dtor(page))(page); spin_lock_irq(&pgdat->lru_lock); } else @@ -2589,7 +2586,6 @@ static bool in_reclaim_compaction(struct scan_control *sc) */ static inline bool should_continue_reclaim(struct pglist_data *pgdat, unsigned long nr_reclaimed, - unsigned long nr_scanned, struct scan_control *sc) { unsigned long pages_for_compaction; @@ -2600,40 +2596,18 @@ static inline bool should_continue_reclaim(struct pglist_data *pgdat, if (!in_reclaim_compaction(sc)) return false; - /* Consider stopping depending on scan and reclaim activity */ - if (sc->gfp_mask & __GFP_RETRY_MAYFAIL) { - /* - * For __GFP_RETRY_MAYFAIL allocations, stop reclaiming if the - * full LRU list has been scanned and we are still failing - * to reclaim pages. This full LRU scan is potentially - * expensive but a __GFP_RETRY_MAYFAIL caller really wants to succeed - */ - if (!nr_reclaimed && !nr_scanned) - return false; - } else { - /* - * For non-__GFP_RETRY_MAYFAIL allocations which can presumably - * fail without consequence, stop if we failed to reclaim - * any pages from the last SWAP_CLUSTER_MAX number of - * pages that were scanned. This will return to the - * caller faster at the risk reclaim/compaction and - * the resulting allocation attempt fails - */ - if (!nr_reclaimed) - return false; - } - /* - * If we have not reclaimed enough pages for compaction and the - * inactive lists are large enough, continue reclaiming + * Stop if we failed to reclaim any pages from the last SWAP_CLUSTER_MAX + * number of pages that were scanned. This will return to the caller + * with the risk reclaim/compaction and the resulting allocation attempt + * fails. In the past we have tried harder for __GFP_RETRY_MAYFAIL + * allocations through requiring that the full LRU list has been scanned + * first, by assuming that zero delta of sc->nr_scanned means full LRU + * scan, but that approximation was wrong, and there were corner cases + * where always a non-zero amount of pages were scanned. */ - pages_for_compaction = compact_gap(sc->order); - inactive_lru_pages = node_page_state(pgdat, NR_INACTIVE_FILE); - if (get_nr_swap_pages() > 0) - inactive_lru_pages += node_page_state(pgdat, NR_INACTIVE_ANON); - if (sc->nr_reclaimed < pages_for_compaction && - inactive_lru_pages > pages_for_compaction) - return true; + if (!nr_reclaimed) + return false; /* If compaction would go ahead or the allocation would succeed, stop */ for (z = 0; z <= sc->reclaim_idx; z++) { @@ -2650,7 +2624,17 @@ static inline bool should_continue_reclaim(struct pglist_data *pgdat, ; } } - return true; + + /* + * If we have not reclaimed enough pages for compaction and the + * inactive lists are large enough, continue reclaiming + */ + pages_for_compaction = compact_gap(sc->order); + inactive_lru_pages = node_page_state(pgdat, NR_INACTIVE_FILE); + if (get_nr_swap_pages() > 0) + inactive_lru_pages += node_page_state(pgdat, NR_INACTIVE_ANON); + + return inactive_lru_pages > pages_for_compaction; } static bool pgdat_memcg_congested(pg_data_t *pgdat, struct mem_cgroup *memcg) @@ -2667,10 +2651,6 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc) do { struct mem_cgroup *root = sc->target_mem_cgroup; - struct mem_cgroup_reclaim_cookie reclaim = { - .pgdat = pgdat, - .priority = sc->priority, - }; unsigned long node_lru_pages = 0; struct mem_cgroup *memcg; @@ -2679,7 +2659,7 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc) nr_reclaimed = sc->nr_reclaimed; nr_scanned = sc->nr_scanned; - memcg = mem_cgroup_iter(root, NULL, &reclaim); + memcg = mem_cgroup_iter(root, NULL, NULL); do { unsigned long lru_pages; unsigned long reclaimed; @@ -2714,31 +2694,15 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc) shrink_node_memcg(pgdat, memcg, sc, &lru_pages); node_lru_pages += lru_pages; - if (sc->may_shrinkslab) { - shrink_slab(sc->gfp_mask, pgdat->node_id, - memcg, sc->priority); - } + shrink_slab(sc->gfp_mask, pgdat->node_id, memcg, + sc->priority); /* Record the group's reclaim efficiency */ vmpressure(sc->gfp_mask, memcg, false, sc->nr_scanned - scanned, sc->nr_reclaimed - reclaimed); - /* - * Kswapd have to scan all memory cgroups to fulfill - * the overall scan target for the node. - * - * Limit reclaim, on the other hand, only cares about - * nr_to_reclaim pages to be reclaimed and it will - * retry with decreasing priority if one round over the - * whole hierarchy is not sufficient. - */ - if (!current_is_kswapd() && - sc->nr_reclaimed >= sc->nr_to_reclaim) { - mem_cgroup_iter_break(root, memcg); - break; - } - } while ((memcg = mem_cgroup_iter(root, memcg, &reclaim))); + } while ((memcg = mem_cgroup_iter(root, memcg, NULL))); if (reclaim_state) { sc->nr_reclaimed += reclaim_state->reclaimed_slab; @@ -2815,7 +2779,7 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc) wait_iff_congested(BLK_RW_ASYNC, HZ/10); } while (should_continue_reclaim(pgdat, sc->nr_reclaimed - nr_reclaimed, - sc->nr_scanned - nr_scanned, sc)); + sc)); /* * Kswapd gives up on balancing particular nodes after too @@ -3194,7 +3158,6 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order, .may_writepage = !laptop_mode, .may_unmap = 1, .may_swap = 1, - .may_shrinkslab = 1, }; /* @@ -3226,6 +3189,7 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order, #ifdef CONFIG_MEMCG +/* Only used by soft limit reclaim. Do not reuse for anything else. */ unsigned long mem_cgroup_shrink_node(struct mem_cgroup *memcg, gfp_t gfp_mask, bool noswap, pg_data_t *pgdat, @@ -3238,11 +3202,11 @@ unsigned long mem_cgroup_shrink_node(struct mem_cgroup *memcg, .may_unmap = 1, .reclaim_idx = MAX_NR_ZONES - 1, .may_swap = !noswap, - .may_shrinkslab = 1, }; unsigned long lru_pages; - set_task_reclaim_state(current, &sc.reclaim_state); + WARN_ON_ONCE(!current->reclaim_state); + sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK); @@ -3260,7 +3224,6 @@ unsigned long mem_cgroup_shrink_node(struct mem_cgroup *memcg, trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed); - set_task_reclaim_state(current, NULL); *nr_scanned = sc.nr_scanned; return sc.nr_reclaimed; @@ -3286,7 +3249,6 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg, .may_writepage = !laptop_mode, .may_unmap = 1, .may_swap = may_swap, - .may_shrinkslab = 1, }; set_task_reclaim_state(current, &sc.reclaim_state); @@ -3598,7 +3560,6 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx) */ sc.may_writepage = !laptop_mode && !nr_boost_reclaim; sc.may_swap = !nr_boost_reclaim; - sc.may_shrinkslab = !nr_boost_reclaim; /* * Do some background aging of the anon list, to give