]> asedeno.scripts.mit.edu Git - linux.git/blob - mm/list_lru.c
c9bdde9c03d13e6902dc8ec224d227b625b9af6f
[linux.git] / mm / list_lru.c
1 /*
2  * Copyright (c) 2013 Red Hat, Inc. and Parallels Inc. All rights reserved.
3  * Authors: David Chinner and Glauber Costa
4  *
5  * Generic LRU infrastructure
6  */
7 #include <linux/kernel.h>
8 #include <linux/module.h>
9 #include <linux/mm.h>
10 #include <linux/list_lru.h>
11 #include <linux/slab.h>
12 #include <linux/mutex.h>
13 #include <linux/memcontrol.h>
14
15 #ifdef CONFIG_MEMCG_KMEM
16 static LIST_HEAD(list_lrus);
17 static DEFINE_MUTEX(list_lrus_mutex);
18
19 static void list_lru_register(struct list_lru *lru)
20 {
21         mutex_lock(&list_lrus_mutex);
22         list_add(&lru->list, &list_lrus);
23         mutex_unlock(&list_lrus_mutex);
24 }
25
26 static void list_lru_unregister(struct list_lru *lru)
27 {
28         mutex_lock(&list_lrus_mutex);
29         list_del(&lru->list);
30         mutex_unlock(&list_lrus_mutex);
31 }
32
33 static int lru_shrinker_id(struct list_lru *lru)
34 {
35         return lru->shrinker_id;
36 }
37
38 static inline bool list_lru_memcg_aware(struct list_lru *lru)
39 {
40         /*
41          * This needs node 0 to be always present, even
42          * in the systems supporting sparse numa ids.
43          */
44         return !!lru->node[0].memcg_lrus;
45 }
46
47 static inline struct list_lru_one *
48 list_lru_from_memcg_idx(struct list_lru_node *nlru, int idx)
49 {
50         struct list_lru_memcg *memcg_lrus;
51         /*
52          * Either lock or RCU protects the array of per cgroup lists
53          * from relocation (see memcg_update_list_lru_node).
54          */
55         memcg_lrus = rcu_dereference_check(nlru->memcg_lrus,
56                                            lockdep_is_held(&nlru->lock));
57         if (memcg_lrus && idx >= 0)
58                 return memcg_lrus->lru[idx];
59         return &nlru->lru;
60 }
61
62 static __always_inline struct mem_cgroup *mem_cgroup_from_kmem(void *ptr)
63 {
64         struct page *page;
65
66         if (!memcg_kmem_enabled())
67                 return NULL;
68         page = virt_to_head_page(ptr);
69         return page->mem_cgroup;
70 }
71
72 static inline struct list_lru_one *
73 list_lru_from_kmem(struct list_lru_node *nlru, void *ptr,
74                    struct mem_cgroup **memcg_ptr)
75 {
76         struct list_lru_one *l = &nlru->lru;
77         struct mem_cgroup *memcg = NULL;
78
79         if (!nlru->memcg_lrus)
80                 goto out;
81
82         memcg = mem_cgroup_from_kmem(ptr);
83         if (!memcg)
84                 goto out;
85
86         l = list_lru_from_memcg_idx(nlru, memcg_cache_id(memcg));
87 out:
88         if (memcg_ptr)
89                 *memcg_ptr = memcg;
90         return l;
91 }
92 #else
93 static void list_lru_register(struct list_lru *lru)
94 {
95 }
96
97 static void list_lru_unregister(struct list_lru *lru)
98 {
99 }
100
101 static int lru_shrinker_id(struct list_lru *lru)
102 {
103         return -1;
104 }
105
106 static inline bool list_lru_memcg_aware(struct list_lru *lru)
107 {
108         return false;
109 }
110
111 static inline struct list_lru_one *
112 list_lru_from_memcg_idx(struct list_lru_node *nlru, int idx)
113 {
114         return &nlru->lru;
115 }
116
117 static inline struct list_lru_one *
118 list_lru_from_kmem(struct list_lru_node *nlru, void *ptr,
119                    struct mem_cgroup **memcg_ptr)
120 {
121         if (memcg_ptr)
122                 *memcg_ptr = NULL;
123         return &nlru->lru;
124 }
125 #endif /* CONFIG_MEMCG_KMEM */
126
127 bool list_lru_add(struct list_lru *lru, struct list_head *item)
128 {
129         int nid = page_to_nid(virt_to_page(item));
130         struct list_lru_node *nlru = &lru->node[nid];
131         struct mem_cgroup *memcg;
132         struct list_lru_one *l;
133
134         spin_lock(&nlru->lock);
135         if (list_empty(item)) {
136                 l = list_lru_from_kmem(nlru, item, &memcg);
137                 list_add_tail(item, &l->list);
138                 /* Set shrinker bit if the first element was added */
139                 if (!l->nr_items++)
140                         memcg_set_shrinker_bit(memcg, nid,
141                                                lru_shrinker_id(lru));
142                 nlru->nr_items++;
143                 spin_unlock(&nlru->lock);
144                 return true;
145         }
146         spin_unlock(&nlru->lock);
147         return false;
148 }
149 EXPORT_SYMBOL_GPL(list_lru_add);
150
151 bool list_lru_del(struct list_lru *lru, struct list_head *item)
152 {
153         int nid = page_to_nid(virt_to_page(item));
154         struct list_lru_node *nlru = &lru->node[nid];
155         struct list_lru_one *l;
156
157         spin_lock(&nlru->lock);
158         if (!list_empty(item)) {
159                 l = list_lru_from_kmem(nlru, item, NULL);
160                 list_del_init(item);
161                 l->nr_items--;
162                 nlru->nr_items--;
163                 spin_unlock(&nlru->lock);
164                 return true;
165         }
166         spin_unlock(&nlru->lock);
167         return false;
168 }
169 EXPORT_SYMBOL_GPL(list_lru_del);
170
171 void list_lru_isolate(struct list_lru_one *list, struct list_head *item)
172 {
173         list_del_init(item);
174         list->nr_items--;
175 }
176 EXPORT_SYMBOL_GPL(list_lru_isolate);
177
178 void list_lru_isolate_move(struct list_lru_one *list, struct list_head *item,
179                            struct list_head *head)
180 {
181         list_move(item, head);
182         list->nr_items--;
183 }
184 EXPORT_SYMBOL_GPL(list_lru_isolate_move);
185
186 unsigned long list_lru_count_one(struct list_lru *lru,
187                                  int nid, struct mem_cgroup *memcg)
188 {
189         struct list_lru_node *nlru = &lru->node[nid];
190         struct list_lru_one *l;
191         unsigned long count;
192
193         rcu_read_lock();
194         l = list_lru_from_memcg_idx(nlru, memcg_cache_id(memcg));
195         count = l->nr_items;
196         rcu_read_unlock();
197
198         return count;
199 }
200 EXPORT_SYMBOL_GPL(list_lru_count_one);
201
202 unsigned long list_lru_count_node(struct list_lru *lru, int nid)
203 {
204         struct list_lru_node *nlru;
205
206         nlru = &lru->node[nid];
207         return nlru->nr_items;
208 }
209 EXPORT_SYMBOL_GPL(list_lru_count_node);
210
211 static unsigned long
212 __list_lru_walk_one(struct list_lru *lru, int nid, int memcg_idx,
213                     list_lru_walk_cb isolate, void *cb_arg,
214                     unsigned long *nr_to_walk)
215 {
216
217         struct list_lru_node *nlru = &lru->node[nid];
218         struct list_lru_one *l;
219         struct list_head *item, *n;
220         unsigned long isolated = 0;
221
222         spin_lock(&nlru->lock);
223         l = list_lru_from_memcg_idx(nlru, memcg_idx);
224 restart:
225         list_for_each_safe(item, n, &l->list) {
226                 enum lru_status ret;
227
228                 /*
229                  * decrement nr_to_walk first so that we don't livelock if we
230                  * get stuck on large numbesr of LRU_RETRY items
231                  */
232                 if (!*nr_to_walk)
233                         break;
234                 --*nr_to_walk;
235
236                 ret = isolate(item, l, &nlru->lock, cb_arg);
237                 switch (ret) {
238                 case LRU_REMOVED_RETRY:
239                         assert_spin_locked(&nlru->lock);
240                         /* fall through */
241                 case LRU_REMOVED:
242                         isolated++;
243                         nlru->nr_items--;
244                         /*
245                          * If the lru lock has been dropped, our list
246                          * traversal is now invalid and so we have to
247                          * restart from scratch.
248                          */
249                         if (ret == LRU_REMOVED_RETRY)
250                                 goto restart;
251                         break;
252                 case LRU_ROTATE:
253                         list_move_tail(item, &l->list);
254                         break;
255                 case LRU_SKIP:
256                         break;
257                 case LRU_RETRY:
258                         /*
259                          * The lru lock has been dropped, our list traversal is
260                          * now invalid and so we have to restart from scratch.
261                          */
262                         assert_spin_locked(&nlru->lock);
263                         goto restart;
264                 default:
265                         BUG();
266                 }
267         }
268
269         spin_unlock(&nlru->lock);
270         return isolated;
271 }
272
273 unsigned long
274 list_lru_walk_one(struct list_lru *lru, int nid, struct mem_cgroup *memcg,
275                   list_lru_walk_cb isolate, void *cb_arg,
276                   unsigned long *nr_to_walk)
277 {
278         return __list_lru_walk_one(lru, nid, memcg_cache_id(memcg),
279                                    isolate, cb_arg, nr_to_walk);
280 }
281 EXPORT_SYMBOL_GPL(list_lru_walk_one);
282
283 unsigned long list_lru_walk_node(struct list_lru *lru, int nid,
284                                  list_lru_walk_cb isolate, void *cb_arg,
285                                  unsigned long *nr_to_walk)
286 {
287         long isolated = 0;
288         int memcg_idx;
289
290         isolated += __list_lru_walk_one(lru, nid, -1, isolate, cb_arg,
291                                         nr_to_walk);
292         if (*nr_to_walk > 0 && list_lru_memcg_aware(lru)) {
293                 for_each_memcg_cache_index(memcg_idx) {
294                         isolated += __list_lru_walk_one(lru, nid, memcg_idx,
295                                                 isolate, cb_arg, nr_to_walk);
296                         if (*nr_to_walk <= 0)
297                                 break;
298                 }
299         }
300         return isolated;
301 }
302 EXPORT_SYMBOL_GPL(list_lru_walk_node);
303
304 static void init_one_lru(struct list_lru_one *l)
305 {
306         INIT_LIST_HEAD(&l->list);
307         l->nr_items = 0;
308 }
309
310 #ifdef CONFIG_MEMCG_KMEM
311 static void __memcg_destroy_list_lru_node(struct list_lru_memcg *memcg_lrus,
312                                           int begin, int end)
313 {
314         int i;
315
316         for (i = begin; i < end; i++)
317                 kfree(memcg_lrus->lru[i]);
318 }
319
320 static int __memcg_init_list_lru_node(struct list_lru_memcg *memcg_lrus,
321                                       int begin, int end)
322 {
323         int i;
324
325         for (i = begin; i < end; i++) {
326                 struct list_lru_one *l;
327
328                 l = kmalloc(sizeof(struct list_lru_one), GFP_KERNEL);
329                 if (!l)
330                         goto fail;
331
332                 init_one_lru(l);
333                 memcg_lrus->lru[i] = l;
334         }
335         return 0;
336 fail:
337         __memcg_destroy_list_lru_node(memcg_lrus, begin, i - 1);
338         return -ENOMEM;
339 }
340
341 static int memcg_init_list_lru_node(struct list_lru_node *nlru)
342 {
343         struct list_lru_memcg *memcg_lrus;
344         int size = memcg_nr_cache_ids;
345
346         memcg_lrus = kvmalloc(sizeof(*memcg_lrus) +
347                               size * sizeof(void *), GFP_KERNEL);
348         if (!memcg_lrus)
349                 return -ENOMEM;
350
351         if (__memcg_init_list_lru_node(memcg_lrus, 0, size)) {
352                 kvfree(memcg_lrus);
353                 return -ENOMEM;
354         }
355         RCU_INIT_POINTER(nlru->memcg_lrus, memcg_lrus);
356
357         return 0;
358 }
359
360 static void memcg_destroy_list_lru_node(struct list_lru_node *nlru)
361 {
362         struct list_lru_memcg *memcg_lrus;
363         /*
364          * This is called when shrinker has already been unregistered,
365          * and nobody can use it. So, there is no need to use kvfree_rcu().
366          */
367         memcg_lrus = rcu_dereference_protected(nlru->memcg_lrus, true);
368         __memcg_destroy_list_lru_node(memcg_lrus, 0, memcg_nr_cache_ids);
369         kvfree(memcg_lrus);
370 }
371
372 static void kvfree_rcu(struct rcu_head *head)
373 {
374         struct list_lru_memcg *mlru;
375
376         mlru = container_of(head, struct list_lru_memcg, rcu);
377         kvfree(mlru);
378 }
379
380 static int memcg_update_list_lru_node(struct list_lru_node *nlru,
381                                       int old_size, int new_size)
382 {
383         struct list_lru_memcg *old, *new;
384
385         BUG_ON(old_size > new_size);
386
387         old = rcu_dereference_protected(nlru->memcg_lrus,
388                                         lockdep_is_held(&list_lrus_mutex));
389         new = kvmalloc(sizeof(*new) + new_size * sizeof(void *), GFP_KERNEL);
390         if (!new)
391                 return -ENOMEM;
392
393         if (__memcg_init_list_lru_node(new, old_size, new_size)) {
394                 kvfree(new);
395                 return -ENOMEM;
396         }
397
398         memcpy(&new->lru, &old->lru, old_size * sizeof(void *));
399
400         /*
401          * The locking below allows readers that hold nlru->lock avoid taking
402          * rcu_read_lock (see list_lru_from_memcg_idx).
403          *
404          * Since list_lru_{add,del} may be called under an IRQ-safe lock,
405          * we have to use IRQ-safe primitives here to avoid deadlock.
406          */
407         spin_lock_irq(&nlru->lock);
408         rcu_assign_pointer(nlru->memcg_lrus, new);
409         spin_unlock_irq(&nlru->lock);
410
411         call_rcu(&old->rcu, kvfree_rcu);
412         return 0;
413 }
414
415 static void memcg_cancel_update_list_lru_node(struct list_lru_node *nlru,
416                                               int old_size, int new_size)
417 {
418         struct list_lru_memcg *memcg_lrus;
419
420         memcg_lrus = rcu_dereference_protected(nlru->memcg_lrus,
421                                                lockdep_is_held(&list_lrus_mutex));
422         /* do not bother shrinking the array back to the old size, because we
423          * cannot handle allocation failures here */
424         __memcg_destroy_list_lru_node(memcg_lrus, old_size, new_size);
425 }
426
427 static int memcg_init_list_lru(struct list_lru *lru, bool memcg_aware)
428 {
429         int i;
430
431         if (!memcg_aware)
432                 return 0;
433
434         for_each_node(i) {
435                 if (memcg_init_list_lru_node(&lru->node[i]))
436                         goto fail;
437         }
438         return 0;
439 fail:
440         for (i = i - 1; i >= 0; i--) {
441                 if (!lru->node[i].memcg_lrus)
442                         continue;
443                 memcg_destroy_list_lru_node(&lru->node[i]);
444         }
445         return -ENOMEM;
446 }
447
448 static void memcg_destroy_list_lru(struct list_lru *lru)
449 {
450         int i;
451
452         if (!list_lru_memcg_aware(lru))
453                 return;
454
455         for_each_node(i)
456                 memcg_destroy_list_lru_node(&lru->node[i]);
457 }
458
459 static int memcg_update_list_lru(struct list_lru *lru,
460                                  int old_size, int new_size)
461 {
462         int i;
463
464         if (!list_lru_memcg_aware(lru))
465                 return 0;
466
467         for_each_node(i) {
468                 if (memcg_update_list_lru_node(&lru->node[i],
469                                                old_size, new_size))
470                         goto fail;
471         }
472         return 0;
473 fail:
474         for (i = i - 1; i >= 0; i--) {
475                 if (!lru->node[i].memcg_lrus)
476                         continue;
477
478                 memcg_cancel_update_list_lru_node(&lru->node[i],
479                                                   old_size, new_size);
480         }
481         return -ENOMEM;
482 }
483
484 static void memcg_cancel_update_list_lru(struct list_lru *lru,
485                                          int old_size, int new_size)
486 {
487         int i;
488
489         if (!list_lru_memcg_aware(lru))
490                 return;
491
492         for_each_node(i)
493                 memcg_cancel_update_list_lru_node(&lru->node[i],
494                                                   old_size, new_size);
495 }
496
497 int memcg_update_all_list_lrus(int new_size)
498 {
499         int ret = 0;
500         struct list_lru *lru;
501         int old_size = memcg_nr_cache_ids;
502
503         mutex_lock(&list_lrus_mutex);
504         list_for_each_entry(lru, &list_lrus, list) {
505                 ret = memcg_update_list_lru(lru, old_size, new_size);
506                 if (ret)
507                         goto fail;
508         }
509 out:
510         mutex_unlock(&list_lrus_mutex);
511         return ret;
512 fail:
513         list_for_each_entry_continue_reverse(lru, &list_lrus, list)
514                 memcg_cancel_update_list_lru(lru, old_size, new_size);
515         goto out;
516 }
517
518 static void memcg_drain_list_lru_node(struct list_lru *lru, int nid,
519                                       int src_idx, struct mem_cgroup *dst_memcg)
520 {
521         struct list_lru_node *nlru = &lru->node[nid];
522         int dst_idx = dst_memcg->kmemcg_id;
523         struct list_lru_one *src, *dst;
524         bool set;
525
526         /*
527          * Since list_lru_{add,del} may be called under an IRQ-safe lock,
528          * we have to use IRQ-safe primitives here to avoid deadlock.
529          */
530         spin_lock_irq(&nlru->lock);
531
532         src = list_lru_from_memcg_idx(nlru, src_idx);
533         dst = list_lru_from_memcg_idx(nlru, dst_idx);
534
535         list_splice_init(&src->list, &dst->list);
536         set = (!dst->nr_items && src->nr_items);
537         dst->nr_items += src->nr_items;
538         if (set)
539                 memcg_set_shrinker_bit(dst_memcg, nid, lru_shrinker_id(lru));
540         src->nr_items = 0;
541
542         spin_unlock_irq(&nlru->lock);
543 }
544
545 static void memcg_drain_list_lru(struct list_lru *lru,
546                                  int src_idx, struct mem_cgroup *dst_memcg)
547 {
548         int i;
549
550         if (!list_lru_memcg_aware(lru))
551                 return;
552
553         for_each_node(i)
554                 memcg_drain_list_lru_node(lru, i, src_idx, dst_memcg);
555 }
556
557 void memcg_drain_all_list_lrus(int src_idx, struct mem_cgroup *dst_memcg)
558 {
559         struct list_lru *lru;
560
561         mutex_lock(&list_lrus_mutex);
562         list_for_each_entry(lru, &list_lrus, list)
563                 memcg_drain_list_lru(lru, src_idx, dst_memcg);
564         mutex_unlock(&list_lrus_mutex);
565 }
566 #else
567 static int memcg_init_list_lru(struct list_lru *lru, bool memcg_aware)
568 {
569         return 0;
570 }
571
572 static void memcg_destroy_list_lru(struct list_lru *lru)
573 {
574 }
575 #endif /* CONFIG_MEMCG_KMEM */
576
577 int __list_lru_init(struct list_lru *lru, bool memcg_aware,
578                     struct lock_class_key *key, struct shrinker *shrinker)
579 {
580         int i;
581         size_t size = sizeof(*lru->node) * nr_node_ids;
582         int err = -ENOMEM;
583
584 #ifdef CONFIG_MEMCG_KMEM
585         if (shrinker)
586                 lru->shrinker_id = shrinker->id;
587         else
588                 lru->shrinker_id = -1;
589 #endif
590         memcg_get_cache_ids();
591
592         lru->node = kzalloc(size, GFP_KERNEL);
593         if (!lru->node)
594                 goto out;
595
596         for_each_node(i) {
597                 spin_lock_init(&lru->node[i].lock);
598                 if (key)
599                         lockdep_set_class(&lru->node[i].lock, key);
600                 init_one_lru(&lru->node[i].lru);
601         }
602
603         err = memcg_init_list_lru(lru, memcg_aware);
604         if (err) {
605                 kfree(lru->node);
606                 /* Do this so a list_lru_destroy() doesn't crash: */
607                 lru->node = NULL;
608                 goto out;
609         }
610
611         list_lru_register(lru);
612 out:
613         memcg_put_cache_ids();
614         return err;
615 }
616 EXPORT_SYMBOL_GPL(__list_lru_init);
617
618 void list_lru_destroy(struct list_lru *lru)
619 {
620         /* Already destroyed or not yet initialized? */
621         if (!lru->node)
622                 return;
623
624         memcg_get_cache_ids();
625
626         list_lru_unregister(lru);
627
628         memcg_destroy_list_lru(lru);
629         kfree(lru->node);
630         lru->node = NULL;
631
632 #ifdef CONFIG_MEMCG_KMEM
633         lru->shrinker_id = -1;
634 #endif
635         memcg_put_cache_ids();
636 }
637 EXPORT_SYMBOL_GPL(list_lru_destroy);