Merge branch 'cgroup-auto-detach'

author Alexei Starovoitov <ast@kernel.org>

Tue, 28 May 2019 16:30:03 +0000 (09:30 -0700)

committer Alexei Starovoitov <ast@kernel.org>

Tue, 28 May 2019 16:30:03 +0000 (09:30 -0700)
author Alexei Starovoitov <ast@kernel.org>
Tue, 28 May 2019 16:30:03 +0000 (09:30 -0700)
committer Alexei Starovoitov <ast@kernel.org>
Tue, 28 May 2019 16:30:03 +0000 (09:30 -0700)
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h

index cb3c6b3b89c802941edea7f732e2009b00a4b356..9f100fc422c33fd629600895524afd399f610ee1 100644 (file)
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -6,6 +6,7 @@
  #include <linux/errno.h>
  #include <linux/jump_label.h>
  #include <linux/percpu.h>
+#include <linux/percpu-refcount.h>
  #include <linux/rbtree.h>
  #include <uapi/linux/bpf.h>
  
@@ -72,10 +73,16 @@ struct cgroup_bpf {
  
         /* temp storage for effective prog array used by prog_attach/detach */
         struct bpf_prog_array __rcu *inactive;
+
+       /* reference counter used to detach bpf programs after cgroup removal */
+       struct percpu_ref refcnt;
+
+       /* cgroup_bpf is released using a work queue */
+       struct work_struct release_work;
  };
  
-void cgroup_bpf_put(struct cgroup *cgrp);
  int cgroup_bpf_inherit(struct cgroup *cgrp);
+void cgroup_bpf_offline(struct cgroup *cgrp);
  
  int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
                         enum bpf_attach_type type, u32 flags);
@@ -283,8 +290,8 @@ int cgroup_bpf_prog_query(const union bpf_attr *attr,
  
  struct bpf_prog;
  struct cgroup_bpf {};
-static inline void cgroup_bpf_put(struct cgroup *cgrp) {}
  static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; }
+static inline void cgroup_bpf_offline(struct cgroup *cgrp) {}
  
  static inline int cgroup_bpf_prog_attach(const union bpf_attr *attr,
                                          enum bpf_prog_type ptype,
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h

index c0077adeea8334dc136233de439351ca3e742eff..49e8facf7c4a54e240042cbfbc3fecf4b41b6432 100644 (file)
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -924,4 +924,22 @@ static inline bool cgroup_task_frozen(struct task_struct *task)
  
  #endif /* !CONFIG_CGROUPS */
  
+#ifdef CONFIG_CGROUP_BPF
+static inline void cgroup_bpf_get(struct cgroup *cgrp)
+{
+       percpu_ref_get(&cgrp->bpf.refcnt);
+}
+
+static inline void cgroup_bpf_put(struct cgroup *cgrp)
+{
+       percpu_ref_put(&cgrp->bpf.refcnt);
+}
+
+#else /* CONFIG_CGROUP_BPF */
+
+static inline void cgroup_bpf_get(struct cgroup *cgrp) {}
+static inline void cgroup_bpf_put(struct cgroup *cgrp) {}
+
+#endif /* CONFIG_CGROUP_BPF */
+
  #endif /* _LINUX_CGROUP_H */
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c

index fcde0f7b25854882c42e76f901406cbf4a1718a9..d995edbe816dd6ea0ab170d19f6528365b1a72ba 100644 (file)
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -22,12 +22,21 @@
  DEFINE_STATIC_KEY_FALSE(cgroup_bpf_enabled_key);
  EXPORT_SYMBOL(cgroup_bpf_enabled_key);
  
+void cgroup_bpf_offline(struct cgroup *cgrp)
+{
+       cgroup_get(cgrp);
+       percpu_ref_kill(&cgrp->bpf.refcnt);
+}
+
  /**
- * cgroup_bpf_put() - put references of all bpf programs
- * @cgrp: the cgroup to modify
+ * cgroup_bpf_release() - put references of all bpf programs and
+ *                        release all cgroup bpf data
+ * @work: work structure embedded into the cgroup to modify
   */
-void cgroup_bpf_put(struct cgroup *cgrp)
+static void cgroup_bpf_release(struct work_struct *work)
  {
+       struct cgroup *cgrp = container_of(work, struct cgroup,
+                                          bpf.release_work);
         enum bpf_cgroup_storage_type stype;
         unsigned int type;
  
@@ -47,6 +56,22 @@ void cgroup_bpf_put(struct cgroup *cgrp)
                 }
                 bpf_prog_array_free(cgrp->bpf.effective[type]);
         }
+
+       percpu_ref_exit(&cgrp->bpf.refcnt);
+       cgroup_put(cgrp);
+}
+
+/**
+ * cgroup_bpf_release_fn() - callback used to schedule releasing
+ *                           of bpf cgroup data
+ * @ref: percpu ref counter structure
+ */
+static void cgroup_bpf_release_fn(struct percpu_ref *ref)
+{
+       struct cgroup *cgrp = container_of(ref, struct cgroup, bpf.refcnt);
+
+       INIT_WORK(&cgrp->bpf.release_work, cgroup_bpf_release);
+       queue_work(system_wq, &cgrp->bpf.release_work);
  }
  
  /* count number of elements in the list.
@@ -167,7 +192,12 @@ int cgroup_bpf_inherit(struct cgroup *cgrp)
   */
  #define        NR ARRAY_SIZE(cgrp->bpf.effective)
         struct bpf_prog_array __rcu *arrays[NR] = {};
-       int i;
+       int ret, i;
+
+       ret = percpu_ref_init(&cgrp->bpf.refcnt, cgroup_bpf_release_fn, 0,
+                             GFP_KERNEL);
+       if (ret)
+               return ret;
  
         for (i = 0; i < NR; i++)
                 INIT_LIST_HEAD(&cgrp->bpf.progs[i]);
@@ -183,6 +213,9 @@ int cgroup_bpf_inherit(struct cgroup *cgrp)
  cleanup:
         for (i = 0; i < NR; i++)
                 bpf_prog_array_free(arrays[i]);
+
+       percpu_ref_exit(&cgrp->bpf.refcnt);
+
         return -ENOMEM;
  }
  
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c

index 217cec4e22c68c6053b0e8406b670affd64963dc..ef9cfbfc82a954d6688fb61fd47dd90769e0876e 100644 (file)
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -4955,8 +4955,6 @@ static void css_release_work_fn(struct work_struct *work)
                 if (cgrp->kn)
                         RCU_INIT_POINTER(*(void __rcu __force **)&cgrp->kn->priv,
                                          NULL);
-
-               cgroup_bpf_put(cgrp);
         }
  
         mutex_unlock(&cgroup_mutex);
@@ -5482,6 +5480,8 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
  
         cgroup1_check_for_release(parent);
  
+       cgroup_bpf_offline(cgrp);
+
         /* put the base reference */
         percpu_ref_kill(&cgrp->self.refcnt);
  
@@ -6221,6 +6221,7 @@ void cgroup_sk_alloc(struct sock_cgroup_data *skcd)
                  * Don't use cgroup_get_live().
                  */
                 cgroup_get(sock_cgroup_ptr(skcd));
+               cgroup_bpf_get(sock_cgroup_ptr(skcd));
                 return;
         }
  
@@ -6232,6 +6233,7 @@ void cgroup_sk_alloc(struct sock_cgroup_data *skcd)
                 cset = task_css_set(current);
                 if (likely(cgroup_tryget(cset->dfl_cgrp))) {
                         skcd->val = (unsigned long)cset->dfl_cgrp;
+                       cgroup_bpf_get(cset->dfl_cgrp);
                         break;
                 }
                 cpu_relax();
@@ -6242,7 +6244,10 @@ void cgroup_sk_alloc(struct sock_cgroup_data *skcd)
  
  void cgroup_sk_free(struct sock_cgroup_data *skcd)
  {
-       cgroup_put(sock_cgroup_ptr(skcd));
+       struct cgroup *cgrp = sock_cgroup_ptr(skcd);
+
+       cgroup_bpf_put(cgrp);
+       cgroup_put(cgrp);
  }
  
  #endif /* CONFIG_SOCK_CGROUP_DATA */
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile

index 4f0a1cdbfe7c2fdc082e69ec5da733e1b7ef86c5..253e5a2856be69b0657c73944102f09e5171151a 100644 (file)
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -26,7 +26,6 @@ hostprogs-y += map_perf_test
  hostprogs-y += test_overhead
  hostprogs-y += test_cgrp2_array_pin
  hostprogs-y += test_cgrp2_attach
-hostprogs-y += test_cgrp2_attach2
  hostprogs-y += test_cgrp2_sock
  hostprogs-y += test_cgrp2_sock2
  hostprogs-y += xdp1
@@ -81,7 +80,6 @@ map_perf_test-objs := bpf_load.o map_perf_test_user.o
  test_overhead-objs := bpf_load.o test_overhead_user.o
  test_cgrp2_array_pin-objs := test_cgrp2_array_pin.o
  test_cgrp2_attach-objs := test_cgrp2_attach.o
-test_cgrp2_attach2-objs := test_cgrp2_attach2.o $(CGROUP_HELPERS)
  test_cgrp2_sock-objs := test_cgrp2_sock.o
  test_cgrp2_sock2-objs := bpf_load.o test_cgrp2_sock2.o
  xdp1-objs := xdp1_user.o
diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore

index b3da2ffdc1581ac91b9f4da5626c99282188af7b..b2a9902f11c5f8a90f6f09b718316a8991da4dab 100644 (file)
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -22,6 +22,7 @@ test_lirc_mode2_user
  get_cgroup_id_user
  test_skb_cgroup_id_user
  test_socket_cookie
+test_cgroup_attach
  test_cgroup_storage
  test_select_reuseport
  test_flow_dissector
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile

index fa002da36d0d07167f5d90efe141f3f635cc3b9a..9b21391c49662fe4376d94df174c42d9127d845a 100644 (file)
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -26,7 +26,7 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test
         test_sock test_btf test_sockmap test_lirc_mode2_user get_cgroup_id_user \
         test_socket_cookie test_cgroup_storage test_select_reuseport test_section_names \
         test_netcnt test_tcpnotify_user test_sock_fields test_sysctl test_hashmap \
-       test_btf_dump
+       test_btf_dump test_cgroup_attach
  
  BPF_OBJ_FILES = $(patsubst %.c,%.o, $(notdir $(wildcard progs/*.c)))
  TEST_GEN_FILES = $(BPF_OBJ_FILES)
@@ -99,6 +99,7 @@ $(OUTPUT)/test_cgroup_storage: cgroup_helpers.c
  $(OUTPUT)/test_netcnt: cgroup_helpers.c
  $(OUTPUT)/test_sock_fields: cgroup_helpers.c
  $(OUTPUT)/test_sysctl: cgroup_helpers.c
+$(OUTPUT)/test_cgroup_attach: cgroup_helpers.c
  
  .PHONY: force
  
diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c

index 6692a40a6979eac0b48665e77566e6beef066d17..0d89f0396be4c5690cb1ccbaf1564e3179ec2a8b 100644 (file)
--- a/tools/testing/selftests/bpf/cgroup_helpers.c
+++ b/tools/testing/selftests/bpf/cgroup_helpers.c
@@ -33,6 +33,60 @@
         snprintf(buf, sizeof(buf), "%s%s%s", CGROUP_MOUNT_PATH, \
                  CGROUP_WORK_DIR, path)
  
+/**
+ * enable_all_controllers() - Enable all available cgroup v2 controllers
+ *
+ * Enable all available cgroup v2 controllers in order to increase
+ * the code coverage.
+ *
+ * If successful, 0 is returned.
+ */
+int enable_all_controllers(char *cgroup_path)
+{
+       char path[PATH_MAX + 1];
+       char buf[PATH_MAX];
+       char *c, *c2;
+       int fd, cfd;
+       size_t len;
+
+       snprintf(path, sizeof(path), "%s/cgroup.controllers", cgroup_path);
+       fd = open(path, O_RDONLY);
+       if (fd < 0) {
+               log_err("Opening cgroup.controllers: %s", path);
+               return 1;
+       }
+
+       len = read(fd, buf, sizeof(buf) - 1);
+       if (len < 0) {
+               close(fd);
+               log_err("Reading cgroup.controllers: %s", path);
+               return 1;
+       }
+       buf[len] = 0;
+       close(fd);
+
+       /* No controllers available? We're probably on cgroup v1. */
+       if (len == 0)
+               return 0;
+
+       snprintf(path, sizeof(path), "%s/cgroup.subtree_control", cgroup_path);
+       cfd = open(path, O_RDWR);
+       if (cfd < 0) {
+               log_err("Opening cgroup.subtree_control: %s", path);
+               return 1;
+       }
+
+       for (c = strtok_r(buf, " ", &c2); c; c = strtok_r(NULL, " ", &c2)) {
+               if (dprintf(cfd, "+%s\n", c) <= 0) {
+                       log_err("Enabling controller %s: %s", c, path);
+                       close(cfd);
+                       return 1;
+               }
+       }
+       close(cfd);
+       return 0;
+}
+
  /**
   * setup_cgroup_environment() - Setup the cgroup environment
   *
@@ -71,6 +125,9 @@ int setup_cgroup_environment(void)
                 return 1;
         }
  
+       if (enable_all_controllers(cgroup_workdir))
+               return 1;
+
         return 0;
  }
  
diff --git a/samples/bpf/test_cgrp2_attach2.c b/tools/testing/selftests/bpf/test_cgroup_attach.c

similarity index 79%

rename from samples/bpf/test_cgrp2_attach2.c

rename to tools/testing/selftests/bpf/test_cgroup_attach.c

index 0bb6507256b77dacd5ffa18f407ffc8a2d021b5d..7671909ee1cbdd4d140fa7836ede3895f59c0eb0 100644 (file)
--- a/samples/bpf/test_cgrp2_attach2.c
+++ b/tools/testing/selftests/bpf/test_cgroup_attach.c
@@ -1,3 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0
+
  /* eBPF example program:
   *
   * - Creates arraymap in kernel with 4 bytes keys and 8 byte values
@@ -25,20 +27,27 @@
  #include <sys/resource.h>
  #include <sys/time.h>
  #include <unistd.h>
+#include <linux/filter.h>
  
  #include <linux/bpf.h>
  #include <bpf/bpf.h>
  
-#include "bpf_insn.h"
+#include "bpf_util.h"
  #include "bpf_rlimit.h"
  #include "cgroup_helpers.h"
  
  #define FOO            "/foo"
  #define BAR            "/foo/bar/"
-#define PING_CMD       "ping -c1 -w1 127.0.0.1 > /dev/null"
+#define PING_CMD       "ping -q -c1 -w1 127.0.0.1 > /dev/null"
  
  char bpf_log_buf[BPF_LOG_BUF_SIZE];
  
+#ifdef DEBUG
+#define debug(args...) printf(args)
+#else
+#define debug(args...)
+#endif
+
  static int prog_load(int verdict)
  {
         int ret;
@@ -89,7 +98,7 @@ static int test_foo_bar(void)
                 goto err;
         }
  
-       printf("Attached DROP prog. This ping in cgroup /foo should fail...\n");
+       debug("Attached DROP prog. This ping in cgroup /foo should fail...\n");
         assert(system(PING_CMD) != 0);
  
         /* Create cgroup /foo/bar, get fd, and join it */
@@ -100,7 +109,7 @@ static int test_foo_bar(void)
         if (join_cgroup(BAR))
                 goto err;
  
-       printf("Attached DROP prog. This ping in cgroup /foo/bar should fail...\n");
+       debug("Attached DROP prog. This ping in cgroup /foo/bar should fail...\n");
         assert(system(PING_CMD) != 0);
  
         if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS,
@@ -109,7 +118,7 @@ static int test_foo_bar(void)
                 goto err;
         }
  
-       printf("Attached PASS prog. This ping in cgroup /foo/bar should pass...\n");
+       debug("Attached PASS prog. This ping in cgroup /foo/bar should pass...\n");
         assert(system(PING_CMD) == 0);
  
         if (bpf_prog_detach(bar, BPF_CGROUP_INET_EGRESS)) {
@@ -117,7 +126,7 @@ static int test_foo_bar(void)
                 goto err;
         }
  
-       printf("Detached PASS from /foo/bar while DROP is attached to /foo.\n"
+       debug("Detached PASS from /foo/bar while DROP is attached to /foo.\n"
                "This ping in cgroup /foo/bar should fail...\n");
         assert(system(PING_CMD) != 0);
  
@@ -132,7 +141,7 @@ static int test_foo_bar(void)
                 goto err;
         }
  
-       printf("Attached PASS from /foo/bar and detached DROP from /foo.\n"
+       debug("Attached PASS from /foo/bar and detached DROP from /foo.\n"
                "This ping in cgroup /foo/bar should pass...\n");
         assert(system(PING_CMD) == 0);
  
@@ -199,9 +208,9 @@ static int test_foo_bar(void)
         close(bar);
         cleanup_cgroup_environment();
         if (!rc)
-               printf("### override:PASS\n");
+               printf("#override:PASS\n");
         else
-               printf("### override:FAIL\n");
+               printf("#override:FAIL\n");
         return rc;
  }
  
@@ -441,19 +450,122 @@ static int test_multiprog(void)
         close(cg5);
         cleanup_cgroup_environment();
         if (!rc)
-               printf("### multi:PASS\n");
+               printf("#multi:PASS\n");
         else
-               printf("### multi:FAIL\n");
+               printf("#multi:FAIL\n");
         return rc;
  }
  
-int main(int argc, char **argv)
+static int test_autodetach(void)
  {
-       int rc = 0;
+       __u32 prog_cnt = 4, attach_flags;
+       int allow_prog[2] = {0};
+       __u32 prog_ids[2] = {0};
+       int cg = 0, i, rc = -1;
+       void *ptr = NULL;
+       int attempts;
+
+       for (i = 0; i < ARRAY_SIZE(allow_prog); i++) {
+               allow_prog[i] = prog_load_cnt(1, 1 << i);
+               if (!allow_prog[i])
+                       goto err;
+       }
+
+       if (setup_cgroup_environment())
+               goto err;
+
+       /* create a cgroup, attach two programs and remember their ids */
+       cg = create_and_get_cgroup("/cg_autodetach");
+       if (cg < 0)
+               goto err;
+
+       if (join_cgroup("/cg_autodetach"))
+               goto err;
+
+       for (i = 0; i < ARRAY_SIZE(allow_prog); i++) {
+               if (bpf_prog_attach(allow_prog[i], cg, BPF_CGROUP_INET_EGRESS,
+                                   BPF_F_ALLOW_MULTI)) {
+                       log_err("Attaching prog[%d] to cg:egress", i);
+                       goto err;
+               }
+       }
+
+       /* make sure that programs are attached and run some traffic */
+       assert(bpf_prog_query(cg, BPF_CGROUP_INET_EGRESS, 0, &attach_flags,
+                             prog_ids, &prog_cnt) == 0);
+       assert(system(PING_CMD) == 0);
+
+       /* allocate some memory (4Mb) to pin the original cgroup */
+       ptr = malloc(4 * (1 << 20));
+       if (!ptr)
+               goto err;
+
+       /* close programs and cgroup fd */
+       for (i = 0; i < ARRAY_SIZE(allow_prog); i++) {
+               close(allow_prog[i]);
+               allow_prog[i] = 0;
+       }
+
+       close(cg);
+       cg = 0;
  
-       rc = test_foo_bar();
-       if (rc)
-               return rc;
+       /* leave the cgroup and remove it. don't detach programs */
+       cleanup_cgroup_environment();
+
+       /* wait for the asynchronous auto-detachment.
+        * wait for no more than 5 sec and give up.
+        */
+       for (i = 0; i < ARRAY_SIZE(prog_ids); i++) {
+               for (attempts = 5; attempts >= 0; attempts--) {
+                       int fd = bpf_prog_get_fd_by_id(prog_ids[i]);
+
+                       if (fd < 0)
+                               break;
+
+                       /* don't leave the fd open */
+                       close(fd);
+
+                       if (!attempts)
+                               goto err;
+
+                       sleep(1);
+               }
+       }
+
+       rc = 0;
+err:
+       for (i = 0; i < ARRAY_SIZE(allow_prog); i++)
+               if (allow_prog[i] > 0)
+                       close(allow_prog[i]);
+       if (cg)
+               close(cg);
+       free(ptr);
+       cleanup_cgroup_environment();
+       if (!rc)
+               printf("#autodetach:PASS\n");
+       else
+               printf("#autodetach:FAIL\n");
+       return rc;
+}
+
+int main(void)
+{
+       int (*tests[])(void) = {
+               test_foo_bar,
+               test_multiprog,
+               test_autodetach,
+       };
+       int errors = 0;
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(tests); i++)
+               if (tests[i]())
+                       errors++;
+
+       if (errors)
+               printf("test_cgroup_attach:FAIL\n");
+       else
+               printf("test_cgroup_attach:PASS\n");
  
-       return test_multiprog();
+       return errors ? EXIT_FAILURE : EXIT_SUCCESS;
  }
author	Alexei Starovoitov <ast@kernel.org>
	Tue, 28 May 2019 16:30:03 +0000 (09:30 -0700)
committer	Alexei Starovoitov <ast@kernel.org>
	Tue, 28 May 2019 16:30:03 +0000 (09:30 -0700)
include/linux/bpf-cgroup.h		patch \| blob \| history
include/linux/cgroup.h		patch \| blob \| history
kernel/bpf/cgroup.c		patch \| blob \| history
kernel/cgroup/cgroup.c		patch \| blob \| history
samples/bpf/Makefile		patch \| blob \| history
tools/testing/selftests/bpf/.gitignore		patch \| blob \| history
tools/testing/selftests/bpf/Makefile		patch \| blob \| history
tools/testing/selftests/bpf/cgroup_helpers.c		patch \| blob \| history
tools/testing/selftests/bpf/test_cgroup_attach.c	[moved from samples/bpf/test_cgrp2_attach2.c with 79% similarity]	patch \| blob \| history