]> asedeno.scripts.mit.edu Git - linux.git/blob - kernel/cgroup/freezer.c
cgroup: cgroup v2 freezer
[linux.git] / kernel / cgroup / freezer.c
1 //SPDX-License-Identifier: GPL-2.0
2 #include <linux/cgroup.h>
3 #include <linux/sched.h>
4 #include <linux/sched/task.h>
5 #include <linux/sched/signal.h>
6
7 #include "cgroup-internal.h"
8
9 /*
10  * Propagate the cgroup frozen state upwards by the cgroup tree.
11  */
12 static void cgroup_propagate_frozen(struct cgroup *cgrp, bool frozen)
13 {
14         int desc = 1;
15
16         /*
17          * If the new state is frozen, some freezing ancestor cgroups may change
18          * their state too, depending on if all their descendants are frozen.
19          *
20          * Otherwise, all ancestor cgroups are forced into the non-frozen state.
21          */
22         while ((cgrp = cgroup_parent(cgrp))) {
23                 if (frozen) {
24                         cgrp->freezer.nr_frozen_descendants += desc;
25                         if (!test_bit(CGRP_FROZEN, &cgrp->flags) &&
26                             test_bit(CGRP_FREEZE, &cgrp->flags) &&
27                             cgrp->freezer.nr_frozen_descendants ==
28                             cgrp->nr_descendants) {
29                                 set_bit(CGRP_FROZEN, &cgrp->flags);
30                                 cgroup_file_notify(&cgrp->events_file);
31                                 desc++;
32                         }
33                 } else {
34                         cgrp->freezer.nr_frozen_descendants -= desc;
35                         if (test_bit(CGRP_FROZEN, &cgrp->flags)) {
36                                 clear_bit(CGRP_FROZEN, &cgrp->flags);
37                                 cgroup_file_notify(&cgrp->events_file);
38                                 desc++;
39                         }
40                 }
41         }
42 }
43
44 /*
45  * Revisit the cgroup frozen state.
46  * Checks if the cgroup is really frozen and perform all state transitions.
47  */
48 void cgroup_update_frozen(struct cgroup *cgrp)
49 {
50         bool frozen;
51
52         lockdep_assert_held(&css_set_lock);
53
54         /*
55          * If the cgroup has to be frozen (CGRP_FREEZE bit set),
56          * and all tasks are frozen and/or stopped, let's consider
57          * the cgroup frozen. Otherwise it's not frozen.
58          */
59         frozen = test_bit(CGRP_FREEZE, &cgrp->flags) &&
60                 cgrp->freezer.nr_frozen_tasks == __cgroup_task_count(cgrp);
61
62         if (frozen) {
63                 /* Already there? */
64                 if (test_bit(CGRP_FROZEN, &cgrp->flags))
65                         return;
66
67                 set_bit(CGRP_FROZEN, &cgrp->flags);
68         } else {
69                 /* Already there? */
70                 if (!test_bit(CGRP_FROZEN, &cgrp->flags))
71                         return;
72
73                 clear_bit(CGRP_FROZEN, &cgrp->flags);
74         }
75         cgroup_file_notify(&cgrp->events_file);
76
77         /* Update the state of ancestor cgroups. */
78         cgroup_propagate_frozen(cgrp, frozen);
79 }
80
81 /*
82  * Increment cgroup's nr_frozen_tasks.
83  */
84 static void cgroup_inc_frozen_cnt(struct cgroup *cgrp)
85 {
86         cgrp->freezer.nr_frozen_tasks++;
87 }
88
89 /*
90  * Decrement cgroup's nr_frozen_tasks.
91  */
92 static void cgroup_dec_frozen_cnt(struct cgroup *cgrp)
93 {
94         cgrp->freezer.nr_frozen_tasks--;
95         WARN_ON_ONCE(cgrp->freezer.nr_frozen_tasks < 0);
96 }
97
98 /*
99  * Enter frozen/stopped state, if not yet there. Update cgroup's counters,
100  * and revisit the state of the cgroup, if necessary.
101  */
102 void cgroup_enter_frozen(void)
103 {
104         struct cgroup *cgrp;
105
106         if (current->frozen)
107                 return;
108
109         spin_lock_irq(&css_set_lock);
110         current->frozen = true;
111         cgrp = task_dfl_cgroup(current);
112         cgroup_inc_frozen_cnt(cgrp);
113         cgroup_update_frozen(cgrp);
114         spin_unlock_irq(&css_set_lock);
115 }
116
117 /*
118  * Conditionally leave frozen/stopped state. Update cgroup's counters,
119  * and revisit the state of the cgroup, if necessary.
120  *
121  * If always_leave is not set, and the cgroup is freezing,
122  * we're racing with the cgroup freezing. In this case, we don't
123  * drop the frozen counter to avoid a transient switch to
124  * the unfrozen state.
125  */
126 void cgroup_leave_frozen(bool always_leave)
127 {
128         struct cgroup *cgrp;
129
130         spin_lock_irq(&css_set_lock);
131         cgrp = task_dfl_cgroup(current);
132         if (always_leave || !test_bit(CGRP_FREEZE, &cgrp->flags)) {
133                 cgroup_dec_frozen_cnt(cgrp);
134                 cgroup_update_frozen(cgrp);
135                 WARN_ON_ONCE(!current->frozen);
136                 current->frozen = false;
137         }
138         spin_unlock_irq(&css_set_lock);
139
140         if (unlikely(current->frozen)) {
141                 /*
142                  * If the task remained in the frozen state,
143                  * make sure it won't reach userspace without
144                  * entering the signal handling loop.
145                  */
146                 spin_lock_irq(&current->sighand->siglock);
147                 recalc_sigpending();
148                 spin_unlock_irq(&current->sighand->siglock);
149         }
150 }
151
152 /*
153  * Freeze or unfreeze the task by setting or clearing the JOBCTL_TRAP_FREEZE
154  * jobctl bit.
155  */
156 static void cgroup_freeze_task(struct task_struct *task, bool freeze)
157 {
158         unsigned long flags;
159
160         /* If the task is about to die, don't bother with freezing it. */
161         if (!lock_task_sighand(task, &flags))
162                 return;
163
164         if (freeze) {
165                 task->jobctl |= JOBCTL_TRAP_FREEZE;
166                 signal_wake_up(task, false);
167         } else {
168                 task->jobctl &= ~JOBCTL_TRAP_FREEZE;
169                 wake_up_process(task);
170         }
171
172         unlock_task_sighand(task, &flags);
173 }
174
175 /*
176  * Freeze or unfreeze all tasks in the given cgroup.
177  */
178 static void cgroup_do_freeze(struct cgroup *cgrp, bool freeze)
179 {
180         struct css_task_iter it;
181         struct task_struct *task;
182
183         lockdep_assert_held(&cgroup_mutex);
184
185         spin_lock_irq(&css_set_lock);
186         if (freeze)
187                 set_bit(CGRP_FREEZE, &cgrp->flags);
188         else
189                 clear_bit(CGRP_FREEZE, &cgrp->flags);
190         spin_unlock_irq(&css_set_lock);
191
192         css_task_iter_start(&cgrp->self, 0, &it);
193         while ((task = css_task_iter_next(&it))) {
194                 /*
195                  * Ignore kernel threads here. Freezing cgroups containing
196                  * kthreads isn't supported.
197                  */
198                 if (task->flags & PF_KTHREAD)
199                         continue;
200                 cgroup_freeze_task(task, freeze);
201         }
202         css_task_iter_end(&it);
203
204         /*
205          * Cgroup state should be revisited here to cover empty leaf cgroups
206          * and cgroups which descendants are already in the desired state.
207          */
208         spin_lock_irq(&css_set_lock);
209         if (cgrp->nr_descendants == cgrp->freezer.nr_frozen_descendants)
210                 cgroup_update_frozen(cgrp);
211         spin_unlock_irq(&css_set_lock);
212 }
213
214 /*
215  * Adjust the task state (freeze or unfreeze) and revisit the state of
216  * source and destination cgroups.
217  */
218 void cgroup_freezer_migrate_task(struct task_struct *task,
219                                  struct cgroup *src, struct cgroup *dst)
220 {
221         lockdep_assert_held(&css_set_lock);
222
223         /*
224          * Kernel threads are not supposed to be frozen at all.
225          */
226         if (task->flags & PF_KTHREAD)
227                 return;
228
229         /*
230          * Adjust counters of freezing and frozen tasks.
231          * Note, that if the task is frozen, but the destination cgroup is not
232          * frozen, we bump both counters to keep them balanced.
233          */
234         if (task->frozen) {
235                 cgroup_inc_frozen_cnt(dst);
236                 cgroup_dec_frozen_cnt(src);
237         }
238         cgroup_update_frozen(dst);
239         cgroup_update_frozen(src);
240
241         /*
242          * Force the task to the desired state.
243          */
244         cgroup_freeze_task(task, test_bit(CGRP_FREEZE, &dst->flags));
245 }
246
247 void cgroup_freezer_frozen_exit(struct task_struct *task)
248 {
249         struct cgroup *cgrp = task_dfl_cgroup(task);
250
251         lockdep_assert_held(&css_set_lock);
252
253         cgroup_dec_frozen_cnt(cgrp);
254         cgroup_update_frozen(cgrp);
255 }
256
257 void cgroup_freeze(struct cgroup *cgrp, bool freeze)
258 {
259         struct cgroup_subsys_state *css;
260         struct cgroup *dsct;
261         bool applied = false;
262
263         lockdep_assert_held(&cgroup_mutex);
264
265         /*
266          * Nothing changed? Just exit.
267          */
268         if (cgrp->freezer.freeze == freeze)
269                 return;
270
271         cgrp->freezer.freeze = freeze;
272
273         /*
274          * Propagate changes downwards the cgroup tree.
275          */
276         css_for_each_descendant_pre(css, &cgrp->self) {
277                 dsct = css->cgroup;
278
279                 if (cgroup_is_dead(dsct))
280                         continue;
281
282                 if (freeze) {
283                         dsct->freezer.e_freeze++;
284                         /*
285                          * Already frozen because of ancestor's settings?
286                          */
287                         if (dsct->freezer.e_freeze > 1)
288                                 continue;
289                 } else {
290                         dsct->freezer.e_freeze--;
291                         /*
292                          * Still frozen because of ancestor's settings?
293                          */
294                         if (dsct->freezer.e_freeze > 0)
295                                 continue;
296
297                         WARN_ON_ONCE(dsct->freezer.e_freeze < 0);
298                 }
299
300                 /*
301                  * Do change actual state: freeze or unfreeze.
302                  */
303                 cgroup_do_freeze(dsct, freeze);
304                 applied = true;
305         }
306
307         /*
308          * Even if the actual state hasn't changed, let's notify a user.
309          * The state can be enforced by an ancestor cgroup: the cgroup
310          * can already be in the desired state or it can be locked in the
311          * opposite state, so that the transition will never happen.
312          * In both cases it's better to notify a user, that there is
313          * nothing to wait for.
314          */
315         if (!applied)
316                 cgroup_file_notify(&cgrp->events_file);
317 }