]> asedeno.scripts.mit.edu Git - linux.git/blobdiff - kernel/bpf/syscall.c
Merge tag 'gvt-fixes-2020-02-26' of https://github.com/intel/gvt-linux into drm-intel...
[linux.git] / kernel / bpf / syscall.c
index e3461ec5957060aef7db6660e6795674c25901c6..a91ad518c050335be6c751296c42149752e7c2fe 100644 (file)
@@ -23,6 +23,7 @@
 #include <linux/timekeeping.h>
 #include <linux/ctype.h>
 #include <linux/nospec.h>
+#include <linux/audit.h>
 #include <uapi/linux/btf.h>
 
 #define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \
@@ -128,6 +129,152 @@ static struct bpf_map *find_and_alloc_map(union bpf_attr *attr)
        return map;
 }
 
+static u32 bpf_map_value_size(struct bpf_map *map)
+{
+       if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
+           map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
+           map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY ||
+           map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
+               return round_up(map->value_size, 8) * num_possible_cpus();
+       else if (IS_FD_MAP(map))
+               return sizeof(u32);
+       else
+               return  map->value_size;
+}
+
+static void maybe_wait_bpf_programs(struct bpf_map *map)
+{
+       /* Wait for any running BPF programs to complete so that
+        * userspace, when we return to it, knows that all programs
+        * that could be running use the new map value.
+        */
+       if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS ||
+           map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS)
+               synchronize_rcu();
+}
+
+static int bpf_map_update_value(struct bpf_map *map, struct fd f, void *key,
+                               void *value, __u64 flags)
+{
+       int err;
+
+       /* Need to create a kthread, thus must support schedule */
+       if (bpf_map_is_dev_bound(map)) {
+               return bpf_map_offload_update_elem(map, key, value, flags);
+       } else if (map->map_type == BPF_MAP_TYPE_CPUMAP ||
+                  map->map_type == BPF_MAP_TYPE_SOCKHASH ||
+                  map->map_type == BPF_MAP_TYPE_SOCKMAP ||
+                  map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
+               return map->ops->map_update_elem(map, key, value, flags);
+       } else if (IS_FD_PROG_ARRAY(map)) {
+               return bpf_fd_array_map_update_elem(map, f.file, key, value,
+                                                   flags);
+       }
+
+       /* must increment bpf_prog_active to avoid kprobe+bpf triggering from
+        * inside bpf map update or delete otherwise deadlocks are possible
+        */
+       preempt_disable();
+       __this_cpu_inc(bpf_prog_active);
+       if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
+           map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
+               err = bpf_percpu_hash_update(map, key, value, flags);
+       } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
+               err = bpf_percpu_array_update(map, key, value, flags);
+       } else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) {
+               err = bpf_percpu_cgroup_storage_update(map, key, value,
+                                                      flags);
+       } else if (IS_FD_ARRAY(map)) {
+               rcu_read_lock();
+               err = bpf_fd_array_map_update_elem(map, f.file, key, value,
+                                                  flags);
+               rcu_read_unlock();
+       } else if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) {
+               rcu_read_lock();
+               err = bpf_fd_htab_map_update_elem(map, f.file, key, value,
+                                                 flags);
+               rcu_read_unlock();
+       } else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) {
+               /* rcu_read_lock() is not needed */
+               err = bpf_fd_reuseport_array_update_elem(map, key, value,
+                                                        flags);
+       } else if (map->map_type == BPF_MAP_TYPE_QUEUE ||
+                  map->map_type == BPF_MAP_TYPE_STACK) {
+               err = map->ops->map_push_elem(map, value, flags);
+       } else {
+               rcu_read_lock();
+               err = map->ops->map_update_elem(map, key, value, flags);
+               rcu_read_unlock();
+       }
+       __this_cpu_dec(bpf_prog_active);
+       preempt_enable();
+       maybe_wait_bpf_programs(map);
+
+       return err;
+}
+
+static int bpf_map_copy_value(struct bpf_map *map, void *key, void *value,
+                             __u64 flags)
+{
+       void *ptr;
+       int err;
+
+       if (bpf_map_is_dev_bound(map))
+               return bpf_map_offload_lookup_elem(map, key, value);
+
+       preempt_disable();
+       this_cpu_inc(bpf_prog_active);
+       if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
+           map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
+               err = bpf_percpu_hash_copy(map, key, value);
+       } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
+               err = bpf_percpu_array_copy(map, key, value);
+       } else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) {
+               err = bpf_percpu_cgroup_storage_copy(map, key, value);
+       } else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) {
+               err = bpf_stackmap_copy(map, key, value);
+       } else if (IS_FD_ARRAY(map) || IS_FD_PROG_ARRAY(map)) {
+               err = bpf_fd_array_map_lookup_elem(map, key, value);
+       } else if (IS_FD_HASH(map)) {
+               err = bpf_fd_htab_map_lookup_elem(map, key, value);
+       } else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) {
+               err = bpf_fd_reuseport_array_lookup_elem(map, key, value);
+       } else if (map->map_type == BPF_MAP_TYPE_QUEUE ||
+                  map->map_type == BPF_MAP_TYPE_STACK) {
+               err = map->ops->map_peek_elem(map, value);
+       } else if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
+               /* struct_ops map requires directly updating "value" */
+               err = bpf_struct_ops_map_sys_lookup_elem(map, key, value);
+       } else {
+               rcu_read_lock();
+               if (map->ops->map_lookup_elem_sys_only)
+                       ptr = map->ops->map_lookup_elem_sys_only(map, key);
+               else
+                       ptr = map->ops->map_lookup_elem(map, key);
+               if (IS_ERR(ptr)) {
+                       err = PTR_ERR(ptr);
+               } else if (!ptr) {
+                       err = -ENOENT;
+               } else {
+                       err = 0;
+                       if (flags & BPF_F_LOCK)
+                               /* lock 'ptr' and copy everything but lock */
+                               copy_map_value_locked(map, value, ptr, true);
+                       else
+                               copy_map_value(map, value, ptr);
+                       /* mask lock, since value wasn't zero inited */
+                       check_and_init_map_lock(map, value);
+               }
+               rcu_read_unlock();
+       }
+
+       this_cpu_dec(bpf_prog_active);
+       preempt_enable();
+       maybe_wait_bpf_programs(map);
+
+       return err;
+}
+
 static void *__bpf_map_area_alloc(u64 size, int numa_node, bool mmapable)
 {
        /* We really just want to fail instead of triggering OOM killer
@@ -627,7 +774,7 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf,
        return ret;
 }
 
-#define BPF_MAP_CREATE_LAST_FIELD btf_value_type_id
+#define BPF_MAP_CREATE_LAST_FIELD btf_vmlinux_value_type_id
 /* called via syscall */
 static int map_create(union bpf_attr *attr)
 {
@@ -641,6 +788,14 @@ static int map_create(union bpf_attr *attr)
        if (err)
                return -EINVAL;
 
+       if (attr->btf_vmlinux_value_type_id) {
+               if (attr->map_type != BPF_MAP_TYPE_STRUCT_OPS ||
+                   attr->btf_key_type_id || attr->btf_value_type_id)
+                       return -EINVAL;
+       } else if (attr->btf_key_type_id && !attr->btf_value_type_id) {
+               return -EINVAL;
+       }
+
        f_flags = bpf_get_file_flag(attr->map_flags);
        if (f_flags < 0)
                return f_flags;
@@ -663,32 +818,35 @@ static int map_create(union bpf_attr *attr)
        atomic64_set(&map->usercnt, 1);
        mutex_init(&map->freeze_mutex);
 
-       if (attr->btf_key_type_id || attr->btf_value_type_id) {
+       map->spin_lock_off = -EINVAL;
+       if (attr->btf_key_type_id || attr->btf_value_type_id ||
+           /* Even the map's value is a kernel's struct,
+            * the bpf_prog.o must have BTF to begin with
+            * to figure out the corresponding kernel's
+            * counter part.  Thus, attr->btf_fd has
+            * to be valid also.
+            */
+           attr->btf_vmlinux_value_type_id) {
                struct btf *btf;
 
-               if (!attr->btf_value_type_id) {
-                       err = -EINVAL;
-                       goto free_map;
-               }
-
                btf = btf_get_by_fd(attr->btf_fd);
                if (IS_ERR(btf)) {
                        err = PTR_ERR(btf);
                        goto free_map;
                }
+               map->btf = btf;
 
-               err = map_check_btf(map, btf, attr->btf_key_type_id,
-                                   attr->btf_value_type_id);
-               if (err) {
-                       btf_put(btf);
-                       goto free_map;
+               if (attr->btf_value_type_id) {
+                       err = map_check_btf(map, btf, attr->btf_key_type_id,
+                                           attr->btf_value_type_id);
+                       if (err)
+                               goto free_map;
                }
 
-               map->btf = btf;
                map->btf_key_type_id = attr->btf_key_type_id;
                map->btf_value_type_id = attr->btf_value_type_id;
-       } else {
-               map->spin_lock_off = -EINVAL;
+               map->btf_vmlinux_value_type_id =
+                       attr->btf_vmlinux_value_type_id;
        }
 
        err = security_bpf_map_alloc(map);
@@ -815,7 +973,7 @@ static int map_lookup_elem(union bpf_attr *attr)
        void __user *uvalue = u64_to_user_ptr(attr->value);
        int ufd = attr->map_fd;
        struct bpf_map *map;
-       void *key, *value, *ptr;
+       void *key, *value;
        u32 value_size;
        struct fd f;
        int err;
@@ -847,72 +1005,14 @@ static int map_lookup_elem(union bpf_attr *attr)
                goto err_put;
        }
 
-       if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
-           map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
-           map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY ||
-           map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
-               value_size = round_up(map->value_size, 8) * num_possible_cpus();
-       else if (IS_FD_MAP(map))
-               value_size = sizeof(u32);
-       else
-               value_size = map->value_size;
+       value_size = bpf_map_value_size(map);
 
        err = -ENOMEM;
        value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
        if (!value)
                goto free_key;
 
-       if (bpf_map_is_dev_bound(map)) {
-               err = bpf_map_offload_lookup_elem(map, key, value);
-               goto done;
-       }
-
-       preempt_disable();
-       this_cpu_inc(bpf_prog_active);
-       if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
-           map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
-               err = bpf_percpu_hash_copy(map, key, value);
-       } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
-               err = bpf_percpu_array_copy(map, key, value);
-       } else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) {
-               err = bpf_percpu_cgroup_storage_copy(map, key, value);
-       } else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) {
-               err = bpf_stackmap_copy(map, key, value);
-       } else if (IS_FD_ARRAY(map) || IS_FD_PROG_ARRAY(map)) {
-               err = bpf_fd_array_map_lookup_elem(map, key, value);
-       } else if (IS_FD_HASH(map)) {
-               err = bpf_fd_htab_map_lookup_elem(map, key, value);
-       } else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) {
-               err = bpf_fd_reuseport_array_lookup_elem(map, key, value);
-       } else if (map->map_type == BPF_MAP_TYPE_QUEUE ||
-                  map->map_type == BPF_MAP_TYPE_STACK) {
-               err = map->ops->map_peek_elem(map, value);
-       } else {
-               rcu_read_lock();
-               if (map->ops->map_lookup_elem_sys_only)
-                       ptr = map->ops->map_lookup_elem_sys_only(map, key);
-               else
-                       ptr = map->ops->map_lookup_elem(map, key);
-               if (IS_ERR(ptr)) {
-                       err = PTR_ERR(ptr);
-               } else if (!ptr) {
-                       err = -ENOENT;
-               } else {
-                       err = 0;
-                       if (attr->flags & BPF_F_LOCK)
-                               /* lock 'ptr' and copy everything but lock */
-                               copy_map_value_locked(map, value, ptr, true);
-                       else
-                               copy_map_value(map, value, ptr);
-                       /* mask lock, since value wasn't zero inited */
-                       check_and_init_map_lock(map, value);
-               }
-               rcu_read_unlock();
-       }
-       this_cpu_dec(bpf_prog_active);
-       preempt_enable();
-
-done:
+       err = bpf_map_copy_value(map, key, value, attr->flags);
        if (err)
                goto free_value;
 
@@ -931,16 +1031,6 @@ static int map_lookup_elem(union bpf_attr *attr)
        return err;
 }
 
-static void maybe_wait_bpf_programs(struct bpf_map *map)
-{
-       /* Wait for any running BPF programs to complete so that
-        * userspace, when we return to it, knows that all programs
-        * that could be running use the new map value.
-        */
-       if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS ||
-           map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS)
-               synchronize_rcu();
-}
 
 #define BPF_MAP_UPDATE_ELEM_LAST_FIELD flags
 
@@ -996,60 +1086,8 @@ static int map_update_elem(union bpf_attr *attr)
        if (copy_from_user(value, uvalue, value_size) != 0)
                goto free_value;
 
-       /* Need to create a kthread, thus must support schedule */
-       if (bpf_map_is_dev_bound(map)) {
-               err = bpf_map_offload_update_elem(map, key, value, attr->flags);
-               goto out;
-       } else if (map->map_type == BPF_MAP_TYPE_CPUMAP ||
-                  map->map_type == BPF_MAP_TYPE_SOCKHASH ||
-                  map->map_type == BPF_MAP_TYPE_SOCKMAP) {
-               err = map->ops->map_update_elem(map, key, value, attr->flags);
-               goto out;
-       } else if (IS_FD_PROG_ARRAY(map)) {
-               err = bpf_fd_array_map_update_elem(map, f.file, key, value,
-                                                  attr->flags);
-               goto out;
-       }
+       err = bpf_map_update_value(map, f, key, value, attr->flags);
 
-       /* must increment bpf_prog_active to avoid kprobe+bpf triggering from
-        * inside bpf map update or delete otherwise deadlocks are possible
-        */
-       preempt_disable();
-       __this_cpu_inc(bpf_prog_active);
-       if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
-           map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
-               err = bpf_percpu_hash_update(map, key, value, attr->flags);
-       } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
-               err = bpf_percpu_array_update(map, key, value, attr->flags);
-       } else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) {
-               err = bpf_percpu_cgroup_storage_update(map, key, value,
-                                                      attr->flags);
-       } else if (IS_FD_ARRAY(map)) {
-               rcu_read_lock();
-               err = bpf_fd_array_map_update_elem(map, f.file, key, value,
-                                                  attr->flags);
-               rcu_read_unlock();
-       } else if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) {
-               rcu_read_lock();
-               err = bpf_fd_htab_map_update_elem(map, f.file, key, value,
-                                                 attr->flags);
-               rcu_read_unlock();
-       } else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) {
-               /* rcu_read_lock() is not needed */
-               err = bpf_fd_reuseport_array_update_elem(map, key, value,
-                                                        attr->flags);
-       } else if (map->map_type == BPF_MAP_TYPE_QUEUE ||
-                  map->map_type == BPF_MAP_TYPE_STACK) {
-               err = map->ops->map_push_elem(map, value, attr->flags);
-       } else {
-               rcu_read_lock();
-               err = map->ops->map_update_elem(map, key, value, attr->flags);
-               rcu_read_unlock();
-       }
-       __this_cpu_dec(bpf_prog_active);
-       preempt_enable();
-       maybe_wait_bpf_programs(map);
-out:
 free_value:
        kfree(value);
 free_key:
@@ -1091,7 +1129,9 @@ static int map_delete_elem(union bpf_attr *attr)
        if (bpf_map_is_dev_bound(map)) {
                err = bpf_map_offload_delete_elem(map, key);
                goto out;
-       } else if (IS_FD_PROG_ARRAY(map)) {
+       } else if (IS_FD_PROG_ARRAY(map) ||
+                  map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
+               /* These maps require sleepable context */
                err = map->ops->map_delete_elem(map, key);
                goto out;
        }
@@ -1178,6 +1218,220 @@ static int map_get_next_key(union bpf_attr *attr)
        return err;
 }
 
+int generic_map_delete_batch(struct bpf_map *map,
+                            const union bpf_attr *attr,
+                            union bpf_attr __user *uattr)
+{
+       void __user *keys = u64_to_user_ptr(attr->batch.keys);
+       u32 cp, max_count;
+       int err = 0;
+       void *key;
+
+       if (attr->batch.elem_flags & ~BPF_F_LOCK)
+               return -EINVAL;
+
+       if ((attr->batch.elem_flags & BPF_F_LOCK) &&
+           !map_value_has_spin_lock(map)) {
+               return -EINVAL;
+       }
+
+       max_count = attr->batch.count;
+       if (!max_count)
+               return 0;
+
+       key = kmalloc(map->key_size, GFP_USER | __GFP_NOWARN);
+       if (!key)
+               return -ENOMEM;
+
+       for (cp = 0; cp < max_count; cp++) {
+               err = -EFAULT;
+               if (copy_from_user(key, keys + cp * map->key_size,
+                                  map->key_size))
+                       break;
+
+               if (bpf_map_is_dev_bound(map)) {
+                       err = bpf_map_offload_delete_elem(map, key);
+                       break;
+               }
+
+               preempt_disable();
+               __this_cpu_inc(bpf_prog_active);
+               rcu_read_lock();
+               err = map->ops->map_delete_elem(map, key);
+               rcu_read_unlock();
+               __this_cpu_dec(bpf_prog_active);
+               preempt_enable();
+               maybe_wait_bpf_programs(map);
+               if (err)
+                       break;
+       }
+       if (copy_to_user(&uattr->batch.count, &cp, sizeof(cp)))
+               err = -EFAULT;
+
+       kfree(key);
+       return err;
+}
+
+int generic_map_update_batch(struct bpf_map *map,
+                            const union bpf_attr *attr,
+                            union bpf_attr __user *uattr)
+{
+       void __user *values = u64_to_user_ptr(attr->batch.values);
+       void __user *keys = u64_to_user_ptr(attr->batch.keys);
+       u32 value_size, cp, max_count;
+       int ufd = attr->map_fd;
+       void *key, *value;
+       struct fd f;
+       int err = 0;
+
+       f = fdget(ufd);
+       if (attr->batch.elem_flags & ~BPF_F_LOCK)
+               return -EINVAL;
+
+       if ((attr->batch.elem_flags & BPF_F_LOCK) &&
+           !map_value_has_spin_lock(map)) {
+               return -EINVAL;
+       }
+
+       value_size = bpf_map_value_size(map);
+
+       max_count = attr->batch.count;
+       if (!max_count)
+               return 0;
+
+       key = kmalloc(map->key_size, GFP_USER | __GFP_NOWARN);
+       if (!key)
+               return -ENOMEM;
+
+       value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
+       if (!value) {
+               kfree(key);
+               return -ENOMEM;
+       }
+
+       for (cp = 0; cp < max_count; cp++) {
+               err = -EFAULT;
+               if (copy_from_user(key, keys + cp * map->key_size,
+                   map->key_size) ||
+                   copy_from_user(value, values + cp * value_size, value_size))
+                       break;
+
+               err = bpf_map_update_value(map, f, key, value,
+                                          attr->batch.elem_flags);
+
+               if (err)
+                       break;
+       }
+
+       if (copy_to_user(&uattr->batch.count, &cp, sizeof(cp)))
+               err = -EFAULT;
+
+       kfree(value);
+       kfree(key);
+       return err;
+}
+
+#define MAP_LOOKUP_RETRIES 3
+
+int generic_map_lookup_batch(struct bpf_map *map,
+                                   const union bpf_attr *attr,
+                                   union bpf_attr __user *uattr)
+{
+       void __user *uobatch = u64_to_user_ptr(attr->batch.out_batch);
+       void __user *ubatch = u64_to_user_ptr(attr->batch.in_batch);
+       void __user *values = u64_to_user_ptr(attr->batch.values);
+       void __user *keys = u64_to_user_ptr(attr->batch.keys);
+       void *buf, *buf_prevkey, *prev_key, *key, *value;
+       int err, retry = MAP_LOOKUP_RETRIES;
+       u32 value_size, cp, max_count;
+
+       if (attr->batch.elem_flags & ~BPF_F_LOCK)
+               return -EINVAL;
+
+       if ((attr->batch.elem_flags & BPF_F_LOCK) &&
+           !map_value_has_spin_lock(map))
+               return -EINVAL;
+
+       value_size = bpf_map_value_size(map);
+
+       max_count = attr->batch.count;
+       if (!max_count)
+               return 0;
+
+       if (put_user(0, &uattr->batch.count))
+               return -EFAULT;
+
+       buf_prevkey = kmalloc(map->key_size, GFP_USER | __GFP_NOWARN);
+       if (!buf_prevkey)
+               return -ENOMEM;
+
+       buf = kmalloc(map->key_size + value_size, GFP_USER | __GFP_NOWARN);
+       if (!buf) {
+               kvfree(buf_prevkey);
+               return -ENOMEM;
+       }
+
+       err = -EFAULT;
+       prev_key = NULL;
+       if (ubatch && copy_from_user(buf_prevkey, ubatch, map->key_size))
+               goto free_buf;
+       key = buf;
+       value = key + map->key_size;
+       if (ubatch)
+               prev_key = buf_prevkey;
+
+       for (cp = 0; cp < max_count;) {
+               rcu_read_lock();
+               err = map->ops->map_get_next_key(map, prev_key, key);
+               rcu_read_unlock();
+               if (err)
+                       break;
+               err = bpf_map_copy_value(map, key, value,
+                                        attr->batch.elem_flags);
+
+               if (err == -ENOENT) {
+                       if (retry) {
+                               retry--;
+                               continue;
+                       }
+                       err = -EINTR;
+                       break;
+               }
+
+               if (err)
+                       goto free_buf;
+
+               if (copy_to_user(keys + cp * map->key_size, key,
+                                map->key_size)) {
+                       err = -EFAULT;
+                       goto free_buf;
+               }
+               if (copy_to_user(values + cp * value_size, value, value_size)) {
+                       err = -EFAULT;
+                       goto free_buf;
+               }
+
+               if (!prev_key)
+                       prev_key = buf_prevkey;
+
+               swap(prev_key, key);
+               retry = MAP_LOOKUP_RETRIES;
+               cp++;
+       }
+
+       if (err == -EFAULT)
+               goto free_buf;
+
+       if ((copy_to_user(&uattr->batch.count, &cp, sizeof(cp)) ||
+                   (cp && copy_to_user(uobatch, prev_key, map->key_size))))
+               err = -EFAULT;
+
+free_buf:
+       kfree(buf_prevkey);
+       kfree(buf);
+       return err;
+}
+
 #define BPF_MAP_LOOKUP_AND_DELETE_ELEM_LAST_FIELD value
 
 static int map_lookup_and_delete_elem(union bpf_attr *attr)
@@ -1306,6 +1560,36 @@ static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog)
        return 0;
 }
 
+enum bpf_audit {
+       BPF_AUDIT_LOAD,
+       BPF_AUDIT_UNLOAD,
+       BPF_AUDIT_MAX,
+};
+
+static const char * const bpf_audit_str[BPF_AUDIT_MAX] = {
+       [BPF_AUDIT_LOAD]   = "LOAD",
+       [BPF_AUDIT_UNLOAD] = "UNLOAD",
+};
+
+static void bpf_audit_prog(const struct bpf_prog *prog, unsigned int op)
+{
+       struct audit_context *ctx = NULL;
+       struct audit_buffer *ab;
+
+       if (WARN_ON_ONCE(op >= BPF_AUDIT_MAX))
+               return;
+       if (audit_enabled == AUDIT_OFF)
+               return;
+       if (op == BPF_AUDIT_LOAD)
+               ctx = audit_context();
+       ab = audit_log_start(ctx, GFP_ATOMIC, AUDIT_BPF);
+       if (unlikely(!ab))
+               return;
+       audit_log_format(ab, "prog-id=%u op=%s",
+                        prog->aux->id, bpf_audit_str[op]);
+       audit_log_end(ab);
+}
+
 int __bpf_prog_charge(struct user_struct *user, u32 pages)
 {
        unsigned long memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
@@ -1421,6 +1705,7 @@ static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock)
 {
        if (atomic64_dec_and_test(&prog->aux->refcnt)) {
                perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_UNLOAD, 0);
+               bpf_audit_prog(prog, BPF_AUDIT_UNLOAD);
                /* bpf_prog_free_id() must be called first */
                bpf_prog_free_id(prog, do_idr_lock);
                __bpf_prog_put_noref(prog, true);
@@ -1640,17 +1925,24 @@ bpf_prog_load_check_attach(enum bpf_prog_type prog_type,
                           enum bpf_attach_type expected_attach_type,
                           u32 btf_id, u32 prog_fd)
 {
-       switch (prog_type) {
-       case BPF_PROG_TYPE_TRACING:
+       if (btf_id) {
                if (btf_id > BTF_MAX_TYPE)
                        return -EINVAL;
-               break;
-       default:
-               if (btf_id || prog_fd)
+
+               switch (prog_type) {
+               case BPF_PROG_TYPE_TRACING:
+               case BPF_PROG_TYPE_STRUCT_OPS:
+               case BPF_PROG_TYPE_EXT:
+                       break;
+               default:
                        return -EINVAL;
-               break;
+               }
        }
 
+       if (prog_fd && prog_type != BPF_PROG_TYPE_TRACING &&
+           prog_type != BPF_PROG_TYPE_EXT)
+               return -EINVAL;
+
        switch (prog_type) {
        case BPF_PROG_TYPE_CGROUP_SOCK:
                switch (expected_attach_type) {
@@ -1691,6 +1983,10 @@ bpf_prog_load_check_attach(enum bpf_prog_type prog_type,
                default:
                        return -EINVAL;
                }
+       case BPF_PROG_TYPE_EXT:
+               if (expected_attach_type)
+                       return -EINVAL;
+               /* fallthrough */
        default:
                return 0;
        }
@@ -1830,6 +2126,7 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
         */
        bpf_prog_kallsyms_add(prog);
        perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_LOAD, 0);
+       bpf_audit_prog(prog, BPF_AUDIT_LOAD);
 
        err = bpf_prog_new_fd(prog);
        if (err < 0)
@@ -1892,7 +2189,8 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog)
        int tr_fd, err;
 
        if (prog->expected_attach_type != BPF_TRACE_FENTRY &&
-           prog->expected_attach_type != BPF_TRACE_FEXIT) {
+           prog->expected_attach_type != BPF_TRACE_FEXIT &&
+           prog->type != BPF_PROG_TYPE_EXT) {
                err = -EINVAL;
                goto out_put_prog;
        }
@@ -1959,12 +2257,14 @@ static int bpf_raw_tracepoint_open(const union bpf_attr *attr)
 
        if (prog->type != BPF_PROG_TYPE_RAW_TRACEPOINT &&
            prog->type != BPF_PROG_TYPE_TRACING &&
+           prog->type != BPF_PROG_TYPE_EXT &&
            prog->type != BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE) {
                err = -EINVAL;
                goto out_put_prog;
        }
 
-       if (prog->type == BPF_PROG_TYPE_TRACING) {
+       if (prog->type == BPF_PROG_TYPE_TRACING ||
+           prog->type == BPF_PROG_TYPE_EXT) {
                if (attr->raw_tracepoint.name) {
                        /* The attach point for this category of programs
                         * should be specified via btf_id during program load.
@@ -2040,10 +2340,10 @@ static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog,
        }
 }
 
-#define BPF_PROG_ATTACH_LAST_FIELD attach_flags
+#define BPF_PROG_ATTACH_LAST_FIELD replace_bpf_fd
 
 #define BPF_F_ATTACH_MASK \
-       (BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI)
+       (BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI | BPF_F_REPLACE)
 
 static int bpf_prog_attach(const union bpf_attr *attr)
 {
@@ -2305,17 +2605,12 @@ static int bpf_obj_get_next_id(const union bpf_attr *attr,
 
 #define BPF_PROG_GET_FD_BY_ID_LAST_FIELD prog_id
 
-static int bpf_prog_get_fd_by_id(const union bpf_attr *attr)
+struct bpf_prog *bpf_prog_by_id(u32 id)
 {
        struct bpf_prog *prog;
-       u32 id = attr->prog_id;
-       int fd;
 
-       if (CHECK_ATTR(BPF_PROG_GET_FD_BY_ID))
-               return -EINVAL;
-
-       if (!capable(CAP_SYS_ADMIN))
-               return -EPERM;
+       if (!id)
+               return ERR_PTR(-ENOENT);
 
        spin_lock_bh(&prog_idr_lock);
        prog = idr_find(&prog_idr, id);
@@ -2324,7 +2619,22 @@ static int bpf_prog_get_fd_by_id(const union bpf_attr *attr)
        else
                prog = ERR_PTR(-ENOENT);
        spin_unlock_bh(&prog_idr_lock);
+       return prog;
+}
+
+static int bpf_prog_get_fd_by_id(const union bpf_attr *attr)
+{
+       struct bpf_prog *prog;
+       u32 id = attr->prog_id;
+       int fd;
+
+       if (CHECK_ATTR(BPF_PROG_GET_FD_BY_ID))
+               return -EINVAL;
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -EPERM;
 
+       prog = bpf_prog_by_id(id);
        if (IS_ERR(prog))
                return PTR_ERR(prog);
 
@@ -2774,6 +3084,7 @@ static int bpf_map_get_info_by_fd(struct bpf_map *map,
                info.btf_key_type_id = map->btf_key_type_id;
                info.btf_value_type_id = map->btf_value_type_id;
        }
+       info.btf_vmlinux_value_type_id = map->btf_vmlinux_value_type_id;
 
        if (bpf_map_is_dev_bound(map)) {
                err = bpf_map_offload_info_fill(&info, map);
@@ -2986,6 +3297,61 @@ static int bpf_task_fd_query(const union bpf_attr *attr,
        return err;
 }
 
+#define BPF_MAP_BATCH_LAST_FIELD batch.flags
+
+#define BPF_DO_BATCH(fn)                       \
+       do {                                    \
+               if (!fn) {                      \
+                       err = -ENOTSUPP;        \
+                       goto err_put;           \
+               }                               \
+               err = fn(map, attr, uattr);     \
+       } while (0)
+
+static int bpf_map_do_batch(const union bpf_attr *attr,
+                           union bpf_attr __user *uattr,
+                           int cmd)
+{
+       struct bpf_map *map;
+       int err, ufd;
+       struct fd f;
+
+       if (CHECK_ATTR(BPF_MAP_BATCH))
+               return -EINVAL;
+
+       ufd = attr->batch.map_fd;
+       f = fdget(ufd);
+       map = __bpf_map_get(f);
+       if (IS_ERR(map))
+               return PTR_ERR(map);
+
+       if ((cmd == BPF_MAP_LOOKUP_BATCH ||
+            cmd == BPF_MAP_LOOKUP_AND_DELETE_BATCH) &&
+           !(map_get_sys_perms(map, f) & FMODE_CAN_READ)) {
+               err = -EPERM;
+               goto err_put;
+       }
+
+       if (cmd != BPF_MAP_LOOKUP_BATCH &&
+           !(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
+               err = -EPERM;
+               goto err_put;
+       }
+
+       if (cmd == BPF_MAP_LOOKUP_BATCH)
+               BPF_DO_BATCH(map->ops->map_lookup_batch);
+       else if (cmd == BPF_MAP_LOOKUP_AND_DELETE_BATCH)
+               BPF_DO_BATCH(map->ops->map_lookup_and_delete_batch);
+       else if (cmd == BPF_MAP_UPDATE_BATCH)
+               BPF_DO_BATCH(map->ops->map_update_batch);
+       else
+               BPF_DO_BATCH(map->ops->map_delete_batch);
+
+err_put:
+       fdput(f);
+       return err;
+}
+
 SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
 {
        union bpf_attr attr = {};
@@ -3083,6 +3449,19 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
        case BPF_MAP_LOOKUP_AND_DELETE_ELEM:
                err = map_lookup_and_delete_elem(&attr);
                break;
+       case BPF_MAP_LOOKUP_BATCH:
+               err = bpf_map_do_batch(&attr, uattr, BPF_MAP_LOOKUP_BATCH);
+               break;
+       case BPF_MAP_LOOKUP_AND_DELETE_BATCH:
+               err = bpf_map_do_batch(&attr, uattr,
+                                      BPF_MAP_LOOKUP_AND_DELETE_BATCH);
+               break;
+       case BPF_MAP_UPDATE_BATCH:
+               err = bpf_map_do_batch(&attr, uattr, BPF_MAP_UPDATE_BATCH);
+               break;
+       case BPF_MAP_DELETE_BATCH:
+               err = bpf_map_do_batch(&attr, uattr, BPF_MAP_DELETE_BATCH);
+               break;
        default:
                err = -EINVAL;
                break;