]> asedeno.scripts.mit.edu Git - linux.git/blobdiff - mm/memcontrol.c
mm, memcg: move memcg limit enforcement from zones to nodes
[linux.git] / mm / memcontrol.c
index c9ebec98e92a392417ce9e72d83e1766eea7108e..9cbd40ebccd1f77a4c81a97cd2253dd3ccdf2bbb 100644 (file)
@@ -132,15 +132,11 @@ static const char * const mem_cgroup_lru_names[] = {
  * their hierarchy representation
  */
 
-struct mem_cgroup_tree_per_zone {
+struct mem_cgroup_tree_per_node {
        struct rb_root rb_root;
        spinlock_t lock;
 };
 
-struct mem_cgroup_tree_per_node {
-       struct mem_cgroup_tree_per_zone rb_tree_per_zone[MAX_NR_ZONES];
-};
-
 struct mem_cgroup_tree {
        struct mem_cgroup_tree_per_node *rb_tree_per_node[MAX_NUMNODES];
 };
@@ -374,37 +370,35 @@ ino_t page_cgroup_ino(struct page *page)
        return ino;
 }
 
-static struct mem_cgroup_per_zone *
-mem_cgroup_page_zoneinfo(struct mem_cgroup *memcg, struct page *page)
+static struct mem_cgroup_per_node *
+mem_cgroup_page_nodeinfo(struct mem_cgroup *memcg, struct page *page)
 {
        int nid = page_to_nid(page);
-       int zid = page_zonenum(page);
 
-       return &memcg->nodeinfo[nid]->zoneinfo[zid];
+       return memcg->nodeinfo[nid];
 }
 
-static struct mem_cgroup_tree_per_zone *
-soft_limit_tree_node_zone(int nid, int zid)
+static struct mem_cgroup_tree_per_node *
+soft_limit_tree_node(int nid)
 {
-       return &soft_limit_tree.rb_tree_per_node[nid]->rb_tree_per_zone[zid];
+       return soft_limit_tree.rb_tree_per_node[nid];
 }
 
-static struct mem_cgroup_tree_per_zone *
+static struct mem_cgroup_tree_per_node *
 soft_limit_tree_from_page(struct page *page)
 {
        int nid = page_to_nid(page);
-       int zid = page_zonenum(page);
 
-       return &soft_limit_tree.rb_tree_per_node[nid]->rb_tree_per_zone[zid];
+       return soft_limit_tree.rb_tree_per_node[nid];
 }
 
-static void __mem_cgroup_insert_exceeded(struct mem_cgroup_per_zone *mz,
-                                        struct mem_cgroup_tree_per_zone *mctz,
+static void __mem_cgroup_insert_exceeded(struct mem_cgroup_per_node *mz,
+                                        struct mem_cgroup_tree_per_node *mctz,
                                         unsigned long new_usage_in_excess)
 {
        struct rb_node **p = &mctz->rb_root.rb_node;
        struct rb_node *parent = NULL;
-       struct mem_cgroup_per_zone *mz_node;
+       struct mem_cgroup_per_node *mz_node;
 
        if (mz->on_tree)
                return;
@@ -414,7 +408,7 @@ static void __mem_cgroup_insert_exceeded(struct mem_cgroup_per_zone *mz,
                return;
        while (*p) {
                parent = *p;
-               mz_node = rb_entry(parent, struct mem_cgroup_per_zone,
+               mz_node = rb_entry(parent, struct mem_cgroup_per_node,
                                        tree_node);
                if (mz->usage_in_excess < mz_node->usage_in_excess)
                        p = &(*p)->rb_left;
@@ -430,8 +424,8 @@ static void __mem_cgroup_insert_exceeded(struct mem_cgroup_per_zone *mz,
        mz->on_tree = true;
 }
 
-static void __mem_cgroup_remove_exceeded(struct mem_cgroup_per_zone *mz,
-                                        struct mem_cgroup_tree_per_zone *mctz)
+static void __mem_cgroup_remove_exceeded(struct mem_cgroup_per_node *mz,
+                                        struct mem_cgroup_tree_per_node *mctz)
 {
        if (!mz->on_tree)
                return;
@@ -439,8 +433,8 @@ static void __mem_cgroup_remove_exceeded(struct mem_cgroup_per_zone *mz,
        mz->on_tree = false;
 }
 
-static void mem_cgroup_remove_exceeded(struct mem_cgroup_per_zone *mz,
-                                      struct mem_cgroup_tree_per_zone *mctz)
+static void mem_cgroup_remove_exceeded(struct mem_cgroup_per_node *mz,
+                                      struct mem_cgroup_tree_per_node *mctz)
 {
        unsigned long flags;
 
@@ -464,8 +458,8 @@ static unsigned long soft_limit_excess(struct mem_cgroup *memcg)
 static void mem_cgroup_update_tree(struct mem_cgroup *memcg, struct page *page)
 {
        unsigned long excess;
-       struct mem_cgroup_per_zone *mz;
-       struct mem_cgroup_tree_per_zone *mctz;
+       struct mem_cgroup_per_node *mz;
+       struct mem_cgroup_tree_per_node *mctz;
 
        mctz = soft_limit_tree_from_page(page);
        /*
@@ -473,7 +467,7 @@ static void mem_cgroup_update_tree(struct mem_cgroup *memcg, struct page *page)
         * because their event counter is not touched.
         */
        for (; memcg; memcg = parent_mem_cgroup(memcg)) {
-               mz = mem_cgroup_page_zoneinfo(memcg, page);
+               mz = mem_cgroup_page_nodeinfo(memcg, page);
                excess = soft_limit_excess(memcg);
                /*
                 * We have to update the tree if mz is on RB-tree or
@@ -498,24 +492,22 @@ static void mem_cgroup_update_tree(struct mem_cgroup *memcg, struct page *page)
 
 static void mem_cgroup_remove_from_trees(struct mem_cgroup *memcg)
 {
-       struct mem_cgroup_tree_per_zone *mctz;
-       struct mem_cgroup_per_zone *mz;
-       int nid, zid;
+       struct mem_cgroup_tree_per_node *mctz;
+       struct mem_cgroup_per_node *mz;
+       int nid;
 
        for_each_node(nid) {
-               for (zid = 0; zid < MAX_NR_ZONES; zid++) {
-                       mz = &memcg->nodeinfo[nid]->zoneinfo[zid];
-                       mctz = soft_limit_tree_node_zone(nid, zid);
-                       mem_cgroup_remove_exceeded(mz, mctz);
-               }
+               mz = mem_cgroup_nodeinfo(memcg, nid);
+               mctz = soft_limit_tree_node(nid);
+               mem_cgroup_remove_exceeded(mz, mctz);
        }
 }
 
-static struct mem_cgroup_per_zone *
-__mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz)
+static struct mem_cgroup_per_node *
+__mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_node *mctz)
 {
        struct rb_node *rightmost = NULL;
-       struct mem_cgroup_per_zone *mz;
+       struct mem_cgroup_per_node *mz;
 
 retry:
        mz = NULL;
@@ -523,7 +515,7 @@ __mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz)
        if (!rightmost)
                goto done;              /* Nothing to reclaim from */
 
-       mz = rb_entry(rightmost, struct mem_cgroup_per_zone, tree_node);
+       mz = rb_entry(rightmost, struct mem_cgroup_per_node, tree_node);
        /*
         * Remove the node now but someone else can add it back,
         * we will to add it back at the end of reclaim to its correct
@@ -537,10 +529,10 @@ __mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz)
        return mz;
 }
 
-static struct mem_cgroup_per_zone *
-mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz)
+static struct mem_cgroup_per_node *
+mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_node *mctz)
 {
-       struct mem_cgroup_per_zone *mz;
+       struct mem_cgroup_per_node *mz;
 
        spin_lock_irq(&mctz->lock);
        mz = __mem_cgroup_largest_soft_limit_node(mctz);
@@ -634,20 +626,16 @@ unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg,
                                           int nid, unsigned int lru_mask)
 {
        unsigned long nr = 0;
-       int zid;
+       struct mem_cgroup_per_node *mz;
+       enum lru_list lru;
 
        VM_BUG_ON((unsigned)nid >= nr_node_ids);
 
-       for (zid = 0; zid < MAX_NR_ZONES; zid++) {
-               struct mem_cgroup_per_zone *mz;
-               enum lru_list lru;
-
-               for_each_lru(lru) {
-                       if (!(BIT(lru) & lru_mask))
-                               continue;
-                       mz = &memcg->nodeinfo[nid]->zoneinfo[zid];
-                       nr += mz->lru_size[lru];
-               }
+       for_each_lru(lru) {
+               if (!(BIT(lru) & lru_mask))
+                       continue;
+               mz = mem_cgroup_nodeinfo(memcg, nid);
+               nr += mz->lru_size[lru];
        }
        return nr;
 }
@@ -800,9 +788,9 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
        rcu_read_lock();
 
        if (reclaim) {
-               struct mem_cgroup_per_zone *mz;
+               struct mem_cgroup_per_node *mz;
 
-               mz = mem_cgroup_zone_zoneinfo(root, reclaim->zone);
+               mz = mem_cgroup_nodeinfo(root, reclaim->pgdat->node_id);
                iter = &mz->iter[reclaim->priority];
 
                if (prev && reclaim->generation != iter->generation)
@@ -901,19 +889,17 @@ static void invalidate_reclaim_iterators(struct mem_cgroup *dead_memcg)
 {
        struct mem_cgroup *memcg = dead_memcg;
        struct mem_cgroup_reclaim_iter *iter;
-       struct mem_cgroup_per_zone *mz;
-       int nid, zid;
+       struct mem_cgroup_per_node *mz;
+       int nid;
        int i;
 
        while ((memcg = parent_mem_cgroup(memcg))) {
                for_each_node(nid) {
-                       for (zid = 0; zid < MAX_NR_ZONES; zid++) {
-                               mz = &memcg->nodeinfo[nid]->zoneinfo[zid];
-                               for (i = 0; i <= DEF_PRIORITY; i++) {
-                                       iter = &mz->iter[i];
-                                       cmpxchg(&iter->position,
-                                               dead_memcg, NULL);
-                               }
+                       mz = mem_cgroup_nodeinfo(memcg, nid);
+                       for (i = 0; i <= DEF_PRIORITY; i++) {
+                               iter = &mz->iter[i];
+                               cmpxchg(&iter->position,
+                                       dead_memcg, NULL);
                        }
                }
        }
@@ -945,7 +931,7 @@ static void invalidate_reclaim_iterators(struct mem_cgroup *dead_memcg)
  */
 struct lruvec *mem_cgroup_page_lruvec(struct page *page, struct pglist_data *pgdat)
 {
-       struct mem_cgroup_per_zone *mz;
+       struct mem_cgroup_per_node *mz;
        struct mem_cgroup *memcg;
        struct lruvec *lruvec;
 
@@ -962,7 +948,7 @@ struct lruvec *mem_cgroup_page_lruvec(struct page *page, struct pglist_data *pgd
        if (!memcg)
                memcg = root_mem_cgroup;
 
-       mz = mem_cgroup_page_zoneinfo(memcg, page);
+       mz = mem_cgroup_page_nodeinfo(memcg, page);
        lruvec = &mz->lruvec;
 out:
        /*
@@ -989,7 +975,7 @@ struct lruvec *mem_cgroup_page_lruvec(struct page *page, struct pglist_data *pgd
 void mem_cgroup_update_lru_size(struct lruvec *lruvec, enum lru_list lru,
                                enum zone_type zid, int nr_pages)
 {
-       struct mem_cgroup_per_zone *mz;
+       struct mem_cgroup_per_node *mz;
        unsigned long *lru_size;
        long size;
        bool empty;
@@ -999,7 +985,7 @@ void mem_cgroup_update_lru_size(struct lruvec *lruvec, enum lru_list lru,
        if (mem_cgroup_disabled())
                return;
 
-       mz = container_of(lruvec, struct mem_cgroup_per_zone, lruvec);
+       mz = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
        lru_size = mz->lru_size + lru;
        empty = list_empty(lruvec->lists + lru);
 
@@ -1392,7 +1378,7 @@ int mem_cgroup_select_victim_node(struct mem_cgroup *memcg)
 #endif
 
 static int mem_cgroup_soft_reclaim(struct mem_cgroup *root_memcg,
-                                  struct zone *zone,
+                                  pg_data_t *pgdat,
                                   gfp_t gfp_mask,
                                   unsigned long *total_scanned)
 {
@@ -1402,7 +1388,7 @@ static int mem_cgroup_soft_reclaim(struct mem_cgroup *root_memcg,
        unsigned long excess;
        unsigned long nr_scanned;
        struct mem_cgroup_reclaim_cookie reclaim = {
-               .zone = zone,
+               .pgdat = pgdat,
                .priority = 0,
        };
 
@@ -1433,7 +1419,7 @@ static int mem_cgroup_soft_reclaim(struct mem_cgroup *root_memcg,
                        continue;
                }
                total += mem_cgroup_shrink_node(victim, gfp_mask, false,
-                                       zone, &nr_scanned);
+                                       pgdat, &nr_scanned);
                *total_scanned += nr_scanned;
                if (!soft_limit_excess(root_memcg))
                        break;
@@ -2560,22 +2546,22 @@ static int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg,
        return ret;
 }
 
-unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
+unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
                                            gfp_t gfp_mask,
                                            unsigned long *total_scanned)
 {
        unsigned long nr_reclaimed = 0;
-       struct mem_cgroup_per_zone *mz, *next_mz = NULL;
+       struct mem_cgroup_per_node *mz, *next_mz = NULL;
        unsigned long reclaimed;
        int loop = 0;
-       struct mem_cgroup_tree_per_zone *mctz;
+       struct mem_cgroup_tree_per_node *mctz;
        unsigned long excess;
        unsigned long nr_scanned;
 
        if (order > 0)
                return 0;
 
-       mctz = soft_limit_tree_node_zone(zone_to_nid(zone), zone_idx(zone));
+       mctz = soft_limit_tree_node(pgdat->node_id);
        /*
         * This loop can run a while, specially if mem_cgroup's continuously
         * keep exceeding their soft limit and putting the system under
@@ -2590,7 +2576,7 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
                        break;
 
                nr_scanned = 0;
-               reclaimed = mem_cgroup_soft_reclaim(mz->memcg, zone,
+               reclaimed = mem_cgroup_soft_reclaim(mz->memcg, pgdat,
                                                    gfp_mask, &nr_scanned);
                nr_reclaimed += reclaimed;
                *total_scanned += nr_scanned;
@@ -3211,22 +3197,21 @@ static int memcg_stat_show(struct seq_file *m, void *v)
 
 #ifdef CONFIG_DEBUG_VM
        {
-               int nid, zid;
-               struct mem_cgroup_per_zone *mz;
+               pg_data_t *pgdat;
+               struct mem_cgroup_per_node *mz;
                struct zone_reclaim_stat *rstat;
                unsigned long recent_rotated[2] = {0, 0};
                unsigned long recent_scanned[2] = {0, 0};
 
-               for_each_online_node(nid)
-                       for (zid = 0; zid < MAX_NR_ZONES; zid++) {
-                               mz = &memcg->nodeinfo[nid]->zoneinfo[zid];
-                               rstat = &mz->lruvec.reclaim_stat;
+               for_each_online_pgdat(pgdat) {
+                       mz = mem_cgroup_nodeinfo(memcg, pgdat->node_id);
+                       rstat = &mz->lruvec.reclaim_stat;
 
-                               recent_rotated[0] += rstat->recent_rotated[0];
-                               recent_rotated[1] += rstat->recent_rotated[1];
-                               recent_scanned[0] += rstat->recent_scanned[0];
-                               recent_scanned[1] += rstat->recent_scanned[1];
-                       }
+                       recent_rotated[0] += rstat->recent_rotated[0];
+                       recent_rotated[1] += rstat->recent_rotated[1];
+                       recent_scanned[0] += rstat->recent_scanned[0];
+                       recent_scanned[1] += rstat->recent_scanned[1];
+               }
                seq_printf(m, "recent_rotated_anon %lu\n", recent_rotated[0]);
                seq_printf(m, "recent_rotated_file %lu\n", recent_rotated[1]);
                seq_printf(m, "recent_scanned_anon %lu\n", recent_scanned[0]);
@@ -4106,11 +4091,10 @@ struct mem_cgroup *mem_cgroup_from_id(unsigned short id)
        return idr_find(&mem_cgroup_idr, id);
 }
 
-static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *memcg, int node)
+static int alloc_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node)
 {
        struct mem_cgroup_per_node *pn;
-       struct mem_cgroup_per_zone *mz;
-       int zone, tmp = node;
+       int tmp = node;
        /*
         * This routine is called against possible nodes.
         * But it's BUG to call kmalloc() against offline node.
@@ -4125,18 +4109,16 @@ static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *memcg, int node)
        if (!pn)
                return 1;
 
-       for (zone = 0; zone < MAX_NR_ZONES; zone++) {
-               mz = &pn->zoneinfo[zone];
-               lruvec_init(&mz->lruvec);
-               mz->usage_in_excess = 0;
-               mz->on_tree = false;
-               mz->memcg = memcg;
-       }
+       lruvec_init(&pn->lruvec);
+       pn->usage_in_excess = 0;
+       pn->on_tree = false;
+       pn->memcg = memcg;
+
        memcg->nodeinfo[node] = pn;
        return 0;
 }
 
-static void free_mem_cgroup_per_zone_info(struct mem_cgroup *memcg, int node)
+static void free_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node)
 {
        kfree(memcg->nodeinfo[node]);
 }
@@ -4147,7 +4129,7 @@ static void mem_cgroup_free(struct mem_cgroup *memcg)
 
        memcg_wb_domain_exit(memcg);
        for_each_node(node)
-               free_mem_cgroup_per_zone_info(memcg, node);
+               free_mem_cgroup_per_node_info(memcg, node);
        free_percpu(memcg->stat);
        kfree(memcg);
 }
@@ -4176,7 +4158,7 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
                goto fail;
 
        for_each_node(node)
-               if (alloc_mem_cgroup_per_zone_info(memcg, node))
+               if (alloc_mem_cgroup_per_node_info(memcg, node))
                        goto fail;
 
        if (memcg_wb_domain_init(memcg, GFP_KERNEL))
@@ -5779,18 +5761,12 @@ static int __init mem_cgroup_init(void)
 
        for_each_node(node) {
                struct mem_cgroup_tree_per_node *rtpn;
-               int zone;
 
                rtpn = kzalloc_node(sizeof(*rtpn), GFP_KERNEL,
                                    node_online(node) ? node : NUMA_NO_NODE);
 
-               for (zone = 0; zone < MAX_NR_ZONES; zone++) {
-                       struct mem_cgroup_tree_per_zone *rtpz;
-
-                       rtpz = &rtpn->rb_tree_per_zone[zone];
-                       rtpz->rb_root = RB_ROOT;
-                       spin_lock_init(&rtpz->lock);
-               }
+               rtpn->rb_root = RB_ROOT;
+               spin_lock_init(&rtpn->lock);
                soft_limit_tree.rb_tree_per_node[node] = rtpn;
        }