Merge branches 'pm-core', 'pm-qos', 'pm-domains' and 'pm-opp'

[linux.git] / kernel / bpf / syscall.c
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c

index 237f3d6a7ddc7ad7db83dbec73c27c8ca67dc0ea..bbb016adbaeb61c5d46a525f3e7235d11535335e 100644 (file)
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -12,11 +12,14 @@
  #include <linux/bpf.h>
  #include <linux/syscalls.h>
  #include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/mmzone.h>
  #include <linux/anon_inodes.h>
  #include <linux/file.h>
  #include <linux/license.h>
  #include <linux/filter.h>
  #include <linux/version.h>
+#include <linux/kernel.h>
  
  DEFINE_PER_CPU(int, bpf_prog_active);
  
@@ -48,6 +51,30 @@ void bpf_register_map_type(struct bpf_map_type_list *tl)
         list_add(&tl->list_node, &bpf_map_types);
  }
  
+void *bpf_map_area_alloc(size_t size)
+{
+       /* We definitely need __GFP_NORETRY, so OOM killer doesn't
+        * trigger under memory pressure as we really just want to
+        * fail instead.
+        */
+       const gfp_t flags = __GFP_NOWARN | __GFP_NORETRY | __GFP_ZERO;
+       void *area;
+
+       if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) {
+               area = kmalloc(size, GFP_USER | flags);
+               if (area != NULL)
+                       return area;
+       }
+
+       return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM | flags,
+                        PAGE_KERNEL);
+}
+
+void bpf_map_area_free(void *area)
+{
+       kvfree(area);
+}
+
  int bpf_map_precharge_memlock(u32 pages)
  {
         struct user_struct *user = get_current_user();
@@ -137,18 +164,31 @@ static int bpf_map_release(struct inode *inode, struct file *filp)
  static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp)
  {
         const struct bpf_map *map = filp->private_data;
+       const struct bpf_array *array;
+       u32 owner_prog_type = 0;
+
+       if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) {
+               array = container_of(map, struct bpf_array, map);
+               owner_prog_type = array->owner_prog_type;
+       }
  
         seq_printf(m,
                    "map_type:\t%u\n"
                    "key_size:\t%u\n"
                    "value_size:\t%u\n"
                    "max_entries:\t%u\n"
-                  "map_flags:\t%#x\n",
+                  "map_flags:\t%#x\n"
+                  "memlock:\t%llu\n",
                    map->map_type,
                    map->key_size,
                    map->value_size,
                    map->max_entries,
-                  map->map_flags);
+                  map->map_flags,
+                  map->pages * 1ULL << PAGE_SHIFT);
+
+       if (owner_prog_type)
+               seq_printf(m, "owner_prog_type:\t%u\n",
+                          owner_prog_type);
  }
  #endif
  
@@ -254,12 +294,6 @@ struct bpf_map *bpf_map_get_with_uref(u32 ufd)
         return map;
  }
  
-/* helper to convert user pointers passed inside __aligned_u64 fields */
-static void __user *u64_to_ptr(__u64 val)
-{
-       return (void __user *) (unsigned long) val;
-}
-
  int __weak bpf_stackmap_copy(struct bpf_map *map, void *key, void *value)
  {
         return -ENOTSUPP;
@@ -270,8 +304,8 @@ int __weak bpf_stackmap_copy(struct bpf_map *map, void *key, void *value)
  
  static int map_lookup_elem(union bpf_attr *attr)
  {
-       void __user *ukey = u64_to_ptr(attr->key);
-       void __user *uvalue = u64_to_ptr(attr->value);
+       void __user *ukey = u64_to_user_ptr(attr->key);
+       void __user *uvalue = u64_to_user_ptr(attr->value);
         int ufd = attr->map_fd;
         struct bpf_map *map;
         void *key, *value, *ptr;
@@ -297,6 +331,7 @@ static int map_lookup_elem(union bpf_attr *attr)
                 goto free_key;
  
         if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
+           map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
             map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
                 value_size = round_up(map->value_size, 8) * num_possible_cpus();
         else
@@ -307,7 +342,8 @@ static int map_lookup_elem(union bpf_attr *attr)
         if (!value)
                 goto free_key;
  
-       if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH) {
+       if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
+           map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
                 err = bpf_percpu_hash_copy(map, key, value);
         } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
                 err = bpf_percpu_array_copy(map, key, value);
@@ -344,8 +380,8 @@ static int map_lookup_elem(union bpf_attr *attr)
  
  static int map_update_elem(union bpf_attr *attr)
  {
-       void __user *ukey = u64_to_ptr(attr->key);
-       void __user *uvalue = u64_to_ptr(attr->value);
+       void __user *ukey = u64_to_user_ptr(attr->key);
+       void __user *uvalue = u64_to_user_ptr(attr->value);
         int ufd = attr->map_fd;
         struct bpf_map *map;
         void *key, *value;
@@ -371,6 +407,7 @@ static int map_update_elem(union bpf_attr *attr)
                 goto free_key;
  
         if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
+           map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
             map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
                 value_size = round_up(map->value_size, 8) * num_possible_cpus();
         else
@@ -390,7 +427,8 @@ static int map_update_elem(union bpf_attr *attr)
          */
         preempt_disable();
         __this_cpu_inc(bpf_prog_active);
-       if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH) {
+       if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
+           map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
                 err = bpf_percpu_hash_update(map, key, value, attr->flags);
         } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
                 err = bpf_percpu_array_update(map, key, value, attr->flags);
@@ -422,7 +460,7 @@ static int map_update_elem(union bpf_attr *attr)
  
  static int map_delete_elem(union bpf_attr *attr)
  {
-       void __user *ukey = u64_to_ptr(attr->key);
+       void __user *ukey = u64_to_user_ptr(attr->key);
         int ufd = attr->map_fd;
         struct bpf_map *map;
         struct fd f;
@@ -466,8 +504,8 @@ static int map_delete_elem(union bpf_attr *attr)
  
  static int map_get_next_key(union bpf_attr *attr)
  {
-       void __user *ukey = u64_to_ptr(attr->key);
-       void __user *unext_key = u64_to_ptr(attr->next_key);
+       void __user *ukey = u64_to_user_ptr(attr->key);
+       void __user *unext_key = u64_to_user_ptr(attr->next_key);
         int ufd = attr->map_fd;
         struct bpf_map *map;
         void *key, *next_key;
@@ -567,6 +605,8 @@ static void fixup_bpf_calls(struct bpf_prog *prog)
                                 prog->dst_needed = 1;
                         if (insn->imm == BPF_FUNC_get_prandom_u32)
                                 bpf_user_rnd_init_once();
+                       if (insn->imm == BPF_FUNC_xdp_adjust_head)
+                               prog->xdp_adjust_head = 1;
                         if (insn->imm == BPF_FUNC_tail_call) {
                                 /* mark bpf_tail_call as different opcode
                                  * to avoid conditional branch in
@@ -601,19 +641,39 @@ static void free_used_maps(struct bpf_prog_aux *aux)
         kfree(aux->used_maps);
  }
  
+int __bpf_prog_charge(struct user_struct *user, u32 pages)
+{
+       unsigned long memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
+       unsigned long user_bufs;
+
+       if (user) {
+               user_bufs = atomic_long_add_return(pages, &user->locked_vm);
+               if (user_bufs > memlock_limit) {
+                       atomic_long_sub(pages, &user->locked_vm);
+                       return -EPERM;
+               }
+       }
+
+       return 0;
+}
+
+void __bpf_prog_uncharge(struct user_struct *user, u32 pages)
+{
+       if (user)
+               atomic_long_sub(pages, &user->locked_vm);
+}
+
  static int bpf_prog_charge_memlock(struct bpf_prog *prog)
  {
         struct user_struct *user = get_current_user();
-       unsigned long memlock_limit;
-
-       memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
+       int ret;
  
-       atomic_long_add(prog->pages, &user->locked_vm);
-       if (atomic_long_read(&user->locked_vm) > memlock_limit) {
-               atomic_long_sub(prog->pages, &user->locked_vm);
+       ret = __bpf_prog_charge(user, prog->pages);
+       if (ret) {
                 free_uid(user);
-               return -EPERM;
+               return ret;
         }
+
         prog->aux->user = user;
         return 0;
  }
@@ -622,7 +682,7 @@ static void bpf_prog_uncharge_memlock(struct bpf_prog *prog)
  {
         struct user_struct *user = prog->aux->user;
  
-       atomic_long_sub(prog->pages, &user->locked_vm);
+       __bpf_prog_uncharge(user, prog->pages);
         free_uid(user);
  }
  
@@ -650,8 +710,30 @@ static int bpf_prog_release(struct inode *inode, struct file *filp)
         return 0;
  }
  
+#ifdef CONFIG_PROC_FS
+static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp)
+{
+       const struct bpf_prog *prog = filp->private_data;
+       char prog_tag[sizeof(prog->tag) * 2 + 1] = { };
+
+       bin2hex(prog_tag, prog->tag, sizeof(prog->tag));
+       seq_printf(m,
+                  "prog_type:\t%u\n"
+                  "prog_jited:\t%u\n"
+                  "prog_tag:\t%s\n"
+                  "memlock:\t%llu\n",
+                  prog->type,
+                  prog->jited,
+                  prog_tag,
+                  prog->pages * 1ULL << PAGE_SHIFT);
+}
+#endif
+
  static const struct file_operations bpf_prog_fops = {
-        .release = bpf_prog_release,
+#ifdef CONFIG_PROC_FS
+       .show_fdinfo    = bpf_prog_show_fdinfo,
+#endif
+       .release        = bpf_prog_release,
  };
  
  int bpf_prog_new_fd(struct bpf_prog *prog)
@@ -682,10 +764,22 @@ struct bpf_prog *bpf_prog_add(struct bpf_prog *prog, int i)
  }
  EXPORT_SYMBOL_GPL(bpf_prog_add);
  
+void bpf_prog_sub(struct bpf_prog *prog, int i)
+{
+       /* Only to be used for undoing previous bpf_prog_add() in some
+        * error path. We still know that another entity in our call
+        * path holds a reference to the program, thus atomic_sub() can
+        * be safely used in such cases!
+        */
+       WARN_ON(atomic_sub_return(i, &prog->aux->refcnt) == 0);
+}
+EXPORT_SYMBOL_GPL(bpf_prog_sub);
+
  struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog)
  {
         return bpf_prog_add(prog, 1);
  }
+EXPORT_SYMBOL_GPL(bpf_prog_inc);
  
  static struct bpf_prog *__bpf_prog_get(u32 ufd, enum bpf_prog_type *type)
  {
@@ -732,7 +826,7 @@ static int bpf_prog_load(union bpf_attr *attr)
                 return -EINVAL;
  
         /* copy eBPF program license from user space */
-       if (strncpy_from_user(license, u64_to_ptr(attr->license),
+       if (strncpy_from_user(license, u64_to_user_ptr(attr->license),
                               sizeof(license) - 1) < 0)
                 return -EFAULT;
         license[sizeof(license) - 1] = 0;
@@ -740,8 +834,8 @@ static int bpf_prog_load(union bpf_attr *attr)
         /* eBPF programs must be GPL compatible to use GPL-ed functions */
         is_gpl = license_is_gpl_compatible(license);
  
-       if (attr->insn_cnt >= BPF_MAXINSNS)
-               return -EINVAL;
+       if (attr->insn_cnt == 0 || attr->insn_cnt > BPF_MAXINSNS)
+               return -E2BIG;
  
         if (type == BPF_PROG_TYPE_KPROBE &&
             attr->kern_version != LINUX_VERSION_CODE)
@@ -762,8 +856,8 @@ static int bpf_prog_load(union bpf_attr *attr)
         prog->len = attr->insn_cnt;
  
         err = -EFAULT;
-       if (copy_from_user(prog->insns, u64_to_ptr(attr->insns),
-                          prog->len * sizeof(struct bpf_insn)) != 0)
+       if (copy_from_user(prog->insns, u64_to_user_ptr(attr->insns),
+                          bpf_prog_insn_size(prog)) != 0)
                 goto free_prog;
  
         prog->orig_prog = NULL;
@@ -813,7 +907,7 @@ static int bpf_obj_pin(const union bpf_attr *attr)
         if (CHECK_ATTR(BPF_OBJ))
                 return -EINVAL;
  
-       return bpf_obj_pin_user(attr->bpf_fd, u64_to_ptr(attr->pathname));
+       return bpf_obj_pin_user(attr->bpf_fd, u64_to_user_ptr(attr->pathname));
  }
  
  static int bpf_obj_get(const union bpf_attr *attr)
@@ -821,9 +915,93 @@ static int bpf_obj_get(const union bpf_attr *attr)
         if (CHECK_ATTR(BPF_OBJ) || attr->bpf_fd != 0)
                 return -EINVAL;
  
-       return bpf_obj_get_user(u64_to_ptr(attr->pathname));
+       return bpf_obj_get_user(u64_to_user_ptr(attr->pathname));
  }
  
+#ifdef CONFIG_CGROUP_BPF
+
+#define BPF_PROG_ATTACH_LAST_FIELD attach_flags
+
+static int bpf_prog_attach(const union bpf_attr *attr)
+{
+       enum bpf_prog_type ptype;
+       struct bpf_prog *prog;
+       struct cgroup *cgrp;
+       int ret;
+
+       if (!capable(CAP_NET_ADMIN))
+               return -EPERM;
+
+       if (CHECK_ATTR(BPF_PROG_ATTACH))
+               return -EINVAL;
+
+       if (attr->attach_flags & ~BPF_F_ALLOW_OVERRIDE)
+               return -EINVAL;
+
+       switch (attr->attach_type) {
+       case BPF_CGROUP_INET_INGRESS:
+       case BPF_CGROUP_INET_EGRESS:
+               ptype = BPF_PROG_TYPE_CGROUP_SKB;
+               break;
+       case BPF_CGROUP_INET_SOCK_CREATE:
+               ptype = BPF_PROG_TYPE_CGROUP_SOCK;
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype);
+       if (IS_ERR(prog))
+               return PTR_ERR(prog);
+
+       cgrp = cgroup_get_from_fd(attr->target_fd);
+       if (IS_ERR(cgrp)) {
+               bpf_prog_put(prog);
+               return PTR_ERR(cgrp);
+       }
+
+       ret = cgroup_bpf_update(cgrp, prog, attr->attach_type,
+                               attr->attach_flags & BPF_F_ALLOW_OVERRIDE);
+       if (ret)
+               bpf_prog_put(prog);
+       cgroup_put(cgrp);
+
+       return ret;
+}
+
+#define BPF_PROG_DETACH_LAST_FIELD attach_type
+
+static int bpf_prog_detach(const union bpf_attr *attr)
+{
+       struct cgroup *cgrp;
+       int ret;
+
+       if (!capable(CAP_NET_ADMIN))
+               return -EPERM;
+
+       if (CHECK_ATTR(BPF_PROG_DETACH))
+               return -EINVAL;
+
+       switch (attr->attach_type) {
+       case BPF_CGROUP_INET_INGRESS:
+       case BPF_CGROUP_INET_EGRESS:
+       case BPF_CGROUP_INET_SOCK_CREATE:
+               cgrp = cgroup_get_from_fd(attr->target_fd);
+               if (IS_ERR(cgrp))
+                       return PTR_ERR(cgrp);
+
+               ret = cgroup_bpf_update(cgrp, NULL, attr->attach_type, false);
+               cgroup_put(cgrp);
+               break;
+
+       default:
+               return -EINVAL;
+       }
+
+       return ret;
+}
+#endif /* CONFIG_CGROUP_BPF */
+
  SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
  {
         union bpf_attr attr = {};
@@ -890,6 +1068,16 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
         case BPF_OBJ_GET:
                 err = bpf_obj_get(&attr);
                 break;
+
+#ifdef CONFIG_CGROUP_BPF
+       case BPF_PROG_ATTACH:
+               err = bpf_prog_attach(&attr);
+               break;
+       case BPF_PROG_DETACH:
+               err = bpf_prog_detach(&attr);
+               break;
+#endif
+
         default:
                 err = -EINVAL;
                 break;