]> asedeno.scripts.mit.edu Git - linux.git/blob - mm/page_counter.c
mm: memory.low hierarchical behavior
[linux.git] / mm / page_counter.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Lockless hierarchical page accounting & limiting
4  *
5  * Copyright (C) 2014 Red Hat, Inc., Johannes Weiner
6  */
7
8 #include <linux/page_counter.h>
9 #include <linux/atomic.h>
10 #include <linux/kernel.h>
11 #include <linux/string.h>
12 #include <linux/sched.h>
13 #include <linux/bug.h>
14 #include <asm/page.h>
15
16 static void propagate_low_usage(struct page_counter *c, unsigned long usage)
17 {
18         unsigned long low_usage, old;
19         long delta;
20
21         if (!c->parent)
22                 return;
23
24         if (!c->low && !atomic_long_read(&c->low_usage))
25                 return;
26
27         if (usage <= c->low)
28                 low_usage = usage;
29         else
30                 low_usage = 0;
31
32         old = atomic_long_xchg(&c->low_usage, low_usage);
33         delta = low_usage - old;
34         if (delta)
35                 atomic_long_add(delta, &c->parent->children_low_usage);
36 }
37
38 /**
39  * page_counter_cancel - take pages out of the local counter
40  * @counter: counter
41  * @nr_pages: number of pages to cancel
42  */
43 void page_counter_cancel(struct page_counter *counter, unsigned long nr_pages)
44 {
45         long new;
46
47         new = atomic_long_sub_return(nr_pages, &counter->usage);
48         propagate_low_usage(counter, new);
49         /* More uncharges than charges? */
50         WARN_ON_ONCE(new < 0);
51 }
52
53 /**
54  * page_counter_charge - hierarchically charge pages
55  * @counter: counter
56  * @nr_pages: number of pages to charge
57  *
58  * NOTE: This does not consider any configured counter limits.
59  */
60 void page_counter_charge(struct page_counter *counter, unsigned long nr_pages)
61 {
62         struct page_counter *c;
63
64         for (c = counter; c; c = c->parent) {
65                 long new;
66
67                 new = atomic_long_add_return(nr_pages, &c->usage);
68                 propagate_low_usage(counter, new);
69                 /*
70                  * This is indeed racy, but we can live with some
71                  * inaccuracy in the watermark.
72                  */
73                 if (new > c->watermark)
74                         c->watermark = new;
75         }
76 }
77
78 /**
79  * page_counter_try_charge - try to hierarchically charge pages
80  * @counter: counter
81  * @nr_pages: number of pages to charge
82  * @fail: points first counter to hit its limit, if any
83  *
84  * Returns %true on success, or %false and @fail if the counter or one
85  * of its ancestors has hit its configured limit.
86  */
87 bool page_counter_try_charge(struct page_counter *counter,
88                              unsigned long nr_pages,
89                              struct page_counter **fail)
90 {
91         struct page_counter *c;
92
93         for (c = counter; c; c = c->parent) {
94                 long new;
95                 /*
96                  * Charge speculatively to avoid an expensive CAS.  If
97                  * a bigger charge fails, it might falsely lock out a
98                  * racing smaller charge and send it into reclaim
99                  * early, but the error is limited to the difference
100                  * between the two sizes, which is less than 2M/4M in
101                  * case of a THP locking out a regular page charge.
102                  *
103                  * The atomic_long_add_return() implies a full memory
104                  * barrier between incrementing the count and reading
105                  * the limit.  When racing with page_counter_limit(),
106                  * we either see the new limit or the setter sees the
107                  * counter has changed and retries.
108                  */
109                 new = atomic_long_add_return(nr_pages, &c->usage);
110                 if (new > c->max) {
111                         atomic_long_sub(nr_pages, &c->usage);
112                         propagate_low_usage(counter, new);
113                         /*
114                          * This is racy, but we can live with some
115                          * inaccuracy in the failcnt.
116                          */
117                         c->failcnt++;
118                         *fail = c;
119                         goto failed;
120                 }
121                 propagate_low_usage(counter, new);
122                 /*
123                  * Just like with failcnt, we can live with some
124                  * inaccuracy in the watermark.
125                  */
126                 if (new > c->watermark)
127                         c->watermark = new;
128         }
129         return true;
130
131 failed:
132         for (c = counter; c != *fail; c = c->parent)
133                 page_counter_cancel(c, nr_pages);
134
135         return false;
136 }
137
138 /**
139  * page_counter_uncharge - hierarchically uncharge pages
140  * @counter: counter
141  * @nr_pages: number of pages to uncharge
142  */
143 void page_counter_uncharge(struct page_counter *counter, unsigned long nr_pages)
144 {
145         struct page_counter *c;
146
147         for (c = counter; c; c = c->parent)
148                 page_counter_cancel(c, nr_pages);
149 }
150
151 /**
152  * page_counter_set_max - set the maximum number of pages allowed
153  * @counter: counter
154  * @nr_pages: limit to set
155  *
156  * Returns 0 on success, -EBUSY if the current number of pages on the
157  * counter already exceeds the specified limit.
158  *
159  * The caller must serialize invocations on the same counter.
160  */
161 int page_counter_set_max(struct page_counter *counter, unsigned long nr_pages)
162 {
163         for (;;) {
164                 unsigned long old;
165                 long usage;
166
167                 /*
168                  * Update the limit while making sure that it's not
169                  * below the concurrently-changing counter value.
170                  *
171                  * The xchg implies two full memory barriers before
172                  * and after, so the read-swap-read is ordered and
173                  * ensures coherency with page_counter_try_charge():
174                  * that function modifies the count before checking
175                  * the limit, so if it sees the old limit, we see the
176                  * modified counter and retry.
177                  */
178                 usage = atomic_long_read(&counter->usage);
179
180                 if (usage > nr_pages)
181                         return -EBUSY;
182
183                 old = xchg(&counter->max, nr_pages);
184
185                 if (atomic_long_read(&counter->usage) <= usage)
186                         return 0;
187
188                 counter->max = old;
189                 cond_resched();
190         }
191 }
192
193 /**
194  * page_counter_set_low - set the amount of protected memory
195  * @counter: counter
196  * @nr_pages: value to set
197  *
198  * The caller must serialize invocations on the same counter.
199  */
200 void page_counter_set_low(struct page_counter *counter, unsigned long nr_pages)
201 {
202         struct page_counter *c;
203
204         counter->low = nr_pages;
205
206         for (c = counter; c; c = c->parent)
207                 propagate_low_usage(c, atomic_long_read(&c->usage));
208 }
209
210 /**
211  * page_counter_memparse - memparse() for page counter limits
212  * @buf: string to parse
213  * @max: string meaning maximum possible value
214  * @nr_pages: returns the result in number of pages
215  *
216  * Returns -EINVAL, or 0 and @nr_pages on success.  @nr_pages will be
217  * limited to %PAGE_COUNTER_MAX.
218  */
219 int page_counter_memparse(const char *buf, const char *max,
220                           unsigned long *nr_pages)
221 {
222         char *end;
223         u64 bytes;
224
225         if (!strcmp(buf, max)) {
226                 *nr_pages = PAGE_COUNTER_MAX;
227                 return 0;
228         }
229
230         bytes = memparse(buf, &end);
231         if (*end != '\0')
232                 return -EINVAL;
233
234         *nr_pages = min(bytes / PAGE_SIZE, (u64)PAGE_COUNTER_MAX);
235
236         return 0;
237 }