kernel/bpf/syscall.c

   1 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
   2  *
   3  * This program is free software; you can redistribute it and/or
   4  * modify it under the terms of version 2 of the GNU General Public
   5  * License as published by the Free Software Foundation.
   6  *
   7  * This program is distributed in the hope that it will be useful, but
   8  * WITHOUT ANY WARRANTY; without even the implied warranty of
   9  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  10  * General Public License for more details.
  11  */
  12 #include <linux/bpf.h>
  13 #include <linux/bpf_trace.h>
  14 #include <linux/syscalls.h>
  15 #include <linux/slab.h>
  16 #include <linux/sched/signal.h>
  17 #include <linux/vmalloc.h>
  18 #include <linux/mmzone.h>
  19 #include <linux/anon_inodes.h>
  20 #include <linux/file.h>
  21 #include <linux/license.h>
  22 #include <linux/filter.h>
  23 #include <linux/version.h>
  24 #include <linux/kernel.h>
  25 #include <linux/idr.h>
  26
  27 #define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PROG_ARRAY || \
  28                            (map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \
  29                            (map)->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || \
  30                            (map)->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS)
  31 #define IS_FD_HASH(map) ((map)->map_type == BPF_MAP_TYPE_HASH_OF_MAPS)
  32 #define IS_FD_MAP(map) (IS_FD_ARRAY(map) || IS_FD_HASH(map))
  33
  34 DEFINE_PER_CPU(int, bpf_prog_active);
  35 static DEFINE_IDR(prog_idr);
  36 static DEFINE_SPINLOCK(prog_idr_lock);
  37 static DEFINE_IDR(map_idr);
  38 static DEFINE_SPINLOCK(map_idr_lock);
  39
  40 int sysctl_unprivileged_bpf_disabled __read_mostly;
  41
  42 static const struct bpf_map_ops * const bpf_map_types[] = {
  43 #define BPF_PROG_TYPE(_id, _ops)
  44 #define BPF_MAP_TYPE(_id, _ops) \
  45         [_id] = &_ops,
  46 #include <linux/bpf_types.h>
  47 #undef BPF_PROG_TYPE
  48 #undef BPF_MAP_TYPE
  49 };
  50
  51 static struct bpf_map *find_and_alloc_map(union bpf_attr *attr)
  52 {
  53         struct bpf_map *map;
  54
  55         if (attr->map_type >= ARRAY_SIZE(bpf_map_types) ||
  56             !bpf_map_types[attr->map_type])
  57                 return ERR_PTR(-EINVAL);
  58
  59         map = bpf_map_types[attr->map_type]->map_alloc(attr);
  60         if (IS_ERR(map))
  61                 return map;
  62         map->ops = bpf_map_types[attr->map_type];
  63         map->map_type = attr->map_type;
  64         return map;
  65 }
  66
  67 void *bpf_map_area_alloc(size_t size)
  68 {
  69         /* We definitely need __GFP_NORETRY, so OOM killer doesn't
  70          * trigger under memory pressure as we really just want to
  71          * fail instead.
  72          */
  73         const gfp_t flags = __GFP_NOWARN | __GFP_NORETRY | __GFP_ZERO;
  74         void *area;
  75
  76         if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) {
  77                 area = kmalloc(size, GFP_USER | flags);
  78                 if (area != NULL)
  79                         return area;
  80         }
  81
  82         return __vmalloc(size, GFP_KERNEL | flags, PAGE_KERNEL);
  83 }
  84
  85 void bpf_map_area_free(void *area)
  86 {
  87         kvfree(area);
  88 }
  89
  90 int bpf_map_precharge_memlock(u32 pages)
  91 {
  92         struct user_struct *user = get_current_user();
  93         unsigned long memlock_limit, cur;
  94
  95         memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
  96         cur = atomic_long_read(&user->locked_vm);
  97         free_uid(user);
  98         if (cur + pages > memlock_limit)
  99                 return -EPERM;
 100         return 0;
 101 }
 102
 103 static int bpf_map_charge_memlock(struct bpf_map *map)
 104 {
 105         struct user_struct *user = get_current_user();
 106         unsigned long memlock_limit;
 107
 108         memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
 109
 110         atomic_long_add(map->pages, &user->locked_vm);
 111
 112         if (atomic_long_read(&user->locked_vm) > memlock_limit) {
 113                 atomic_long_sub(map->pages, &user->locked_vm);
 114                 free_uid(user);
 115                 return -EPERM;
 116         }
 117         map->user = user;
 118         return 0;
 119 }
 120
 121 static void bpf_map_uncharge_memlock(struct bpf_map *map)
 122 {
 123         struct user_struct *user = map->user;
 124
 125         atomic_long_sub(map->pages, &user->locked_vm);
 126         free_uid(user);
 127 }
 128
 129 static int bpf_map_alloc_id(struct bpf_map *map)
 130 {
 131         int id;
 132
 133         spin_lock_bh(&map_idr_lock);
 134         id = idr_alloc_cyclic(&map_idr, map, 1, INT_MAX, GFP_ATOMIC);
 135         if (id > 0)
 136                 map->id = id;
 137         spin_unlock_bh(&map_idr_lock);
 138
 139         if (WARN_ON_ONCE(!id))
 140                 return -ENOSPC;
 141
 142         return id > 0 ? 0 : id;
 143 }
 144
 145 static void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock)
 146 {
 147         if (do_idr_lock)
 148                 spin_lock_bh(&map_idr_lock);
 149         else
 150                 __acquire(&map_idr_lock);
 151
 152         idr_remove(&map_idr, map->id);
 153
 154         if (do_idr_lock)
 155                 spin_unlock_bh(&map_idr_lock);
 156         else
 157                 __release(&map_idr_lock);
 158 }
 159
 160 /* called from workqueue */
 161 static void bpf_map_free_deferred(struct work_struct *work)
 162 {
 163         struct bpf_map *map = container_of(work, struct bpf_map, work);
 164
 165         bpf_map_uncharge_memlock(map);
 166         /* implementation dependent freeing */
 167         map->ops->map_free(map);
 168 }
 169
 170 static void bpf_map_put_uref(struct bpf_map *map)
 171 {
 172         if (atomic_dec_and_test(&map->usercnt)) {
 173                 if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY)
 174                         bpf_fd_array_map_clear(map);
 175         }
 176 }
 177
 178 /* decrement map refcnt and schedule it for freeing via workqueue
 179  * (unrelying map implementation ops->map_free() might sleep)
 180  */
 181 static void __bpf_map_put(struct bpf_map *map, bool do_idr_lock)
 182 {
 183         if (atomic_dec_and_test(&map->refcnt)) {
 184                 /* bpf_map_free_id() must be called first */
 185                 bpf_map_free_id(map, do_idr_lock);
 186                 INIT_WORK(&map->work, bpf_map_free_deferred);
 187                 schedule_work(&map->work);
 188         }
 189 }
 190
 191 void bpf_map_put(struct bpf_map *map)
 192 {
 193         __bpf_map_put(map, true);
 194 }
 195
 196 void bpf_map_put_with_uref(struct bpf_map *map)
 197 {
 198         bpf_map_put_uref(map);
 199         bpf_map_put(map);
 200 }
 201
 202 static int bpf_map_release(struct inode *inode, struct file *filp)
 203 {
 204         struct bpf_map *map = filp->private_data;
 205
 206         if (map->ops->map_release)
 207                 map->ops->map_release(map, filp);
 208
 209         bpf_map_put_with_uref(map);
 210         return 0;
 211 }
 212
 213 #ifdef CONFIG_PROC_FS
 214 static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp)
 215 {
 216         const struct bpf_map *map = filp->private_data;
 217         const struct bpf_array *array;
 218         u32 owner_prog_type = 0;
 219
 220         if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) {
 221                 array = container_of(map, struct bpf_array, map);
 222                 owner_prog_type = array->owner_prog_type;
 223         }
 224
 225         seq_printf(m,
 226                    "map_type:\t%u\n"
 227                    "key_size:\t%u\n"
 228                    "value_size:\t%u\n"
 229                    "max_entries:\t%u\n"
 230                    "map_flags:\t%#x\n"
 231                    "memlock:\t%llu\n",
 232                    map->map_type,
 233                    map->key_size,
 234                    map->value_size,
 235                    map->max_entries,
 236                    map->map_flags,
 237                    map->pages * 1ULL << PAGE_SHIFT);
 238
 239         if (owner_prog_type)
 240                 seq_printf(m, "owner_prog_type:\t%u\n",
 241                            owner_prog_type);
 242 }
 243 #endif
 244
 245 static const struct file_operations bpf_map_fops = {
 246 #ifdef CONFIG_PROC_FS
 247         .show_fdinfo    = bpf_map_show_fdinfo,
 248 #endif
 249         .release        = bpf_map_release,
 250 };
 251
 252 int bpf_map_new_fd(struct bpf_map *map)
 253 {
 254         return anon_inode_getfd("bpf-map", &bpf_map_fops, map,
 255                                 O_RDWR | O_CLOEXEC);
 256 }
 257
 258 /* helper macro to check that unused fields 'union bpf_attr' are zero */
 259 #define CHECK_ATTR(CMD) \
 260         memchr_inv((void *) &attr->CMD##_LAST_FIELD + \
 261                    sizeof(attr->CMD##_LAST_FIELD), 0, \
 262                    sizeof(*attr) - \
 263                    offsetof(union bpf_attr, CMD##_LAST_FIELD) - \
 264                    sizeof(attr->CMD##_LAST_FIELD)) != NULL
 265
 266 #define BPF_MAP_CREATE_LAST_FIELD inner_map_fd
 267 /* called via syscall */
 268 static int map_create(union bpf_attr *attr)
 269 {
 270         struct bpf_map *map;
 271         int err;
 272
 273         err = CHECK_ATTR(BPF_MAP_CREATE);
 274         if (err)
 275                 return -EINVAL;
 276
 277         /* find map type and init map: hashtable vs rbtree vs bloom vs ... */
 278         map = find_and_alloc_map(attr);
 279         if (IS_ERR(map))
 280                 return PTR_ERR(map);
 281
 282         atomic_set(&map->refcnt, 1);
 283         atomic_set(&map->usercnt, 1);
 284
 285         err = bpf_map_charge_memlock(map);
 286         if (err)
 287                 goto free_map_nouncharge;
 288
 289         err = bpf_map_alloc_id(map);
 290         if (err)
 291                 goto free_map;
 292
 293         err = bpf_map_new_fd(map);
 294         if (err < 0) {
 295                 /* failed to allocate fd.
 296                  * bpf_map_put() is needed because the above
 297                  * bpf_map_alloc_id() has published the map
 298                  * to the userspace and the userspace may
 299                  * have refcnt-ed it through BPF_MAP_GET_FD_BY_ID.
 300                  */
 301                 bpf_map_put(map);
 302                 return err;
 303         }
 304
 305         trace_bpf_map_create(map, err);
 306         return err;
 307
 308 free_map:
 309         bpf_map_uncharge_memlock(map);
 310 free_map_nouncharge:
 311         map->ops->map_free(map);
 312         return err;
 313 }
 314
 315 /* if error is returned, fd is released.
 316  * On success caller should complete fd access with matching fdput()
 317  */
 318 struct bpf_map *__bpf_map_get(struct fd f)
 319 {
 320         if (!f.file)
 321                 return ERR_PTR(-EBADF);
 322         if (f.file->f_op != &bpf_map_fops) {
 323                 fdput(f);
 324                 return ERR_PTR(-EINVAL);
 325         }
 326
 327         return f.file->private_data;
 328 }
 329
 330 /* prog's and map's refcnt limit */
 331 #define BPF_MAX_REFCNT 32768
 332
 333 struct bpf_map *bpf_map_inc(struct bpf_map *map, bool uref)
 334 {
 335         if (atomic_inc_return(&map->refcnt) > BPF_MAX_REFCNT) {
 336                 atomic_dec(&map->refcnt);
 337                 return ERR_PTR(-EBUSY);
 338         }
 339         if (uref)
 340                 atomic_inc(&map->usercnt);
 341         return map;
 342 }
 343
 344 struct bpf_map *bpf_map_get_with_uref(u32 ufd)
 345 {
 346         struct fd f = fdget(ufd);
 347         struct bpf_map *map;
 348
 349         map = __bpf_map_get(f);
 350         if (IS_ERR(map))
 351                 return map;
 352
 353         map = bpf_map_inc(map, true);
 354         fdput(f);
 355
 356         return map;
 357 }
 358
 359 /* map_idr_lock should have been held */
 360 static struct bpf_map *bpf_map_inc_not_zero(struct bpf_map *map,
 361                                             bool uref)
 362 {
 363         int refold;
 364
 365         refold = __atomic_add_unless(&map->refcnt, 1, 0);
 366
 367         if (refold >= BPF_MAX_REFCNT) {
 368                 __bpf_map_put(map, false);
 369                 return ERR_PTR(-EBUSY);
 370         }
 371
 372         if (!refold)
 373                 return ERR_PTR(-ENOENT);
 374
 375         if (uref)
 376                 atomic_inc(&map->usercnt);
 377
 378         return map;
 379 }
 380
 381 int __weak bpf_stackmap_copy(struct bpf_map *map, void *key, void *value)
 382 {
 383         return -ENOTSUPP;
 384 }
 385
 386 /* last field in 'union bpf_attr' used by this command */
 387 #define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value
 388
 389 static int map_lookup_elem(union bpf_attr *attr)
 390 {
 391         void __user *ukey = u64_to_user_ptr(attr->key);
 392         void __user *uvalue = u64_to_user_ptr(attr->value);
 393         int ufd = attr->map_fd;
 394         struct bpf_map *map;
 395         void *key, *value, *ptr;
 396         u32 value_size;
 397         struct fd f;
 398         int err;
 399
 400         if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM))
 401                 return -EINVAL;
 402
 403         f = fdget(ufd);
 404         map = __bpf_map_get(f);
 405         if (IS_ERR(map))
 406                 return PTR_ERR(map);
 407
 408         err = -ENOMEM;
 409         key = kmalloc(map->key_size, GFP_USER);
 410         if (!key)
 411                 goto err_put;
 412
 413         err = -EFAULT;
 414         if (copy_from_user(key, ukey, map->key_size) != 0)
 415                 goto free_key;
 416
 417         if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
 418             map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
 419             map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
 420                 value_size = round_up(map->value_size, 8) * num_possible_cpus();
 421         else if (IS_FD_MAP(map))
 422                 value_size = sizeof(u32);
 423         else
 424                 value_size = map->value_size;
 425
 426         err = -ENOMEM;
 427         value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
 428         if (!value)
 429                 goto free_key;
 430
 431         if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
 432             map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
 433                 err = bpf_percpu_hash_copy(map, key, value);
 434         } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
 435                 err = bpf_percpu_array_copy(map, key, value);
 436         } else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) {
 437                 err = bpf_stackmap_copy(map, key, value);
 438         } else if (IS_FD_ARRAY(map)) {
 439                 err = bpf_fd_array_map_lookup_elem(map, key, value);
 440         } else if (IS_FD_HASH(map)) {
 441                 err = bpf_fd_htab_map_lookup_elem(map, key, value);
 442         } else {
 443                 rcu_read_lock();
 444                 ptr = map->ops->map_lookup_elem(map, key);
 445                 if (ptr)
 446                         memcpy(value, ptr, value_size);
 447                 rcu_read_unlock();
 448                 err = ptr ? 0 : -ENOENT;
 449         }
 450
 451         if (err)
 452                 goto free_value;
 453
 454         err = -EFAULT;
 455         if (copy_to_user(uvalue, value, value_size) != 0)
 456                 goto free_value;
 457
 458         trace_bpf_map_lookup_elem(map, ufd, key, value);
 459         err = 0;
 460
 461 free_value:
 462         kfree(value);
 463 free_key:
 464         kfree(key);
 465 err_put:
 466         fdput(f);
 467         return err;
 468 }
 469
 470 #define BPF_MAP_UPDATE_ELEM_LAST_FIELD flags
 471
 472 static int map_update_elem(union bpf_attr *attr)
 473 {
 474         void __user *ukey = u64_to_user_ptr(attr->key);
 475         void __user *uvalue = u64_to_user_ptr(attr->value);
 476         int ufd = attr->map_fd;
 477         struct bpf_map *map;
 478         void *key, *value;
 479         u32 value_size;
 480         struct fd f;
 481         int err;
 482
 483         if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM))
 484                 return -EINVAL;
 485
 486         f = fdget(ufd);
 487         map = __bpf_map_get(f);
 488         if (IS_ERR(map))
 489                 return PTR_ERR(map);
 490
 491         err = -ENOMEM;
 492         key = kmalloc(map->key_size, GFP_USER);
 493         if (!key)
 494                 goto err_put;
 495
 496         err = -EFAULT;
 497         if (copy_from_user(key, ukey, map->key_size) != 0)
 498                 goto free_key;
 499
 500         if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
 501             map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
 502             map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
 503                 value_size = round_up(map->value_size, 8) * num_possible_cpus();
 504         else
 505                 value_size = map->value_size;
 506
 507         err = -ENOMEM;
 508         value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
 509         if (!value)
 510                 goto free_key;
 511
 512         err = -EFAULT;
 513         if (copy_from_user(value, uvalue, value_size) != 0)
 514                 goto free_value;
 515
 516         /* must increment bpf_prog_active to avoid kprobe+bpf triggering from
 517          * inside bpf map update or delete otherwise deadlocks are possible
 518          */
 519         preempt_disable();
 520         __this_cpu_inc(bpf_prog_active);
 521         if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
 522             map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
 523                 err = bpf_percpu_hash_update(map, key, value, attr->flags);
 524         } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
 525                 err = bpf_percpu_array_update(map, key, value, attr->flags);
 526         } else if (map->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY ||
 527                    map->map_type == BPF_MAP_TYPE_PROG_ARRAY ||
 528                    map->map_type == BPF_MAP_TYPE_CGROUP_ARRAY ||
 529                    map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS) {
 530                 rcu_read_lock();
 531                 err = bpf_fd_array_map_update_elem(map, f.file, key, value,
 532                                                    attr->flags);
 533                 rcu_read_unlock();
 534         } else if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) {
 535                 rcu_read_lock();
 536                 err = bpf_fd_htab_map_update_elem(map, f.file, key, value,
 537                                                   attr->flags);
 538                 rcu_read_unlock();
 539         } else {
 540                 rcu_read_lock();
 541                 err = map->ops->map_update_elem(map, key, value, attr->flags);
 542                 rcu_read_unlock();
 543         }
 544         __this_cpu_dec(bpf_prog_active);
 545         preempt_enable();
 546
 547         if (!err)
 548                 trace_bpf_map_update_elem(map, ufd, key, value);
 549 free_value:
 550         kfree(value);
 551 free_key:
 552         kfree(key);
 553 err_put:
 554         fdput(f);
 555         return err;
 556 }
 557
 558 #define BPF_MAP_DELETE_ELEM_LAST_FIELD key
 559
 560 static int map_delete_elem(union bpf_attr *attr)
 561 {
 562         void __user *ukey = u64_to_user_ptr(attr->key);
 563         int ufd = attr->map_fd;
 564         struct bpf_map *map;
 565         struct fd f;
 566         void *key;
 567         int err;
 568
 569         if (CHECK_ATTR(BPF_MAP_DELETE_ELEM))
 570                 return -EINVAL;
 571
 572         f = fdget(ufd);
 573         map = __bpf_map_get(f);
 574         if (IS_ERR(map))
 575                 return PTR_ERR(map);
 576
 577         err = -ENOMEM;
 578         key = kmalloc(map->key_size, GFP_USER);
 579         if (!key)
 580                 goto err_put;
 581
 582         err = -EFAULT;
 583         if (copy_from_user(key, ukey, map->key_size) != 0)
 584                 goto free_key;
 585
 586         preempt_disable();
 587         __this_cpu_inc(bpf_prog_active);
 588         rcu_read_lock();
 589         err = map->ops->map_delete_elem(map, key);
 590         rcu_read_unlock();
 591         __this_cpu_dec(bpf_prog_active);
 592         preempt_enable();
 593
 594         if (!err)
 595                 trace_bpf_map_delete_elem(map, ufd, key);
 596 free_key:
 597         kfree(key);
 598 err_put:
 599         fdput(f);
 600         return err;
 601 }
 602
 603 /* last field in 'union bpf_attr' used by this command */
 604 #define BPF_MAP_GET_NEXT_KEY_LAST_FIELD next_key
 605
 606 static int map_get_next_key(union bpf_attr *attr)
 607 {
 608         void __user *ukey = u64_to_user_ptr(attr->key);
 609         void __user *unext_key = u64_to_user_ptr(attr->next_key);
 610         int ufd = attr->map_fd;
 611         struct bpf_map *map;
 612         void *key, *next_key;
 613         struct fd f;
 614         int err;
 615
 616         if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY))
 617                 return -EINVAL;
 618
 619         f = fdget(ufd);
 620         map = __bpf_map_get(f);
 621         if (IS_ERR(map))
 622                 return PTR_ERR(map);
 623
 624         if (ukey) {
 625                 err = -ENOMEM;
 626                 key = kmalloc(map->key_size, GFP_USER);
 627                 if (!key)
 628                         goto err_put;
 629
 630                 err = -EFAULT;
 631                 if (copy_from_user(key, ukey, map->key_size) != 0)
 632                         goto free_key;
 633         } else {
 634                 key = NULL;
 635         }
 636
 637         err = -ENOMEM;
 638         next_key = kmalloc(map->key_size, GFP_USER);
 639         if (!next_key)
 640                 goto free_key;
 641
 642         rcu_read_lock();
 643         err = map->ops->map_get_next_key(map, key, next_key);
 644         rcu_read_unlock();
 645         if (err)
 646                 goto free_next_key;
 647
 648         err = -EFAULT;
 649         if (copy_to_user(unext_key, next_key, map->key_size) != 0)
 650                 goto free_next_key;
 651
 652         trace_bpf_map_next_key(map, ufd, key, next_key);
 653         err = 0;
 654
 655 free_next_key:
 656         kfree(next_key);
 657 free_key:
 658         kfree(key);
 659 err_put:
 660         fdput(f);
 661         return err;
 662 }
 663
 664 static const struct bpf_verifier_ops * const bpf_prog_types[] = {
 665 #define BPF_PROG_TYPE(_id, _ops) \
 666         [_id] = &_ops,
 667 #define BPF_MAP_TYPE(_id, _ops)
 668 #include <linux/bpf_types.h>
 669 #undef BPF_PROG_TYPE
 670 #undef BPF_MAP_TYPE
 671 };
 672
 673 static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog)
 674 {
 675         if (type >= ARRAY_SIZE(bpf_prog_types) || !bpf_prog_types[type])
 676                 return -EINVAL;
 677
 678         prog->aux->ops = bpf_prog_types[type];
 679         prog->type = type;
 680         return 0;
 681 }
 682
 683 /* drop refcnt on maps used by eBPF program and free auxilary data */
 684 static void free_used_maps(struct bpf_prog_aux *aux)
 685 {
 686         int i;
 687
 688         for (i = 0; i < aux->used_map_cnt; i++)
 689                 bpf_map_put(aux->used_maps[i]);
 690
 691         kfree(aux->used_maps);
 692 }
 693
 694 int __bpf_prog_charge(struct user_struct *user, u32 pages)
 695 {
 696         unsigned long memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
 697         unsigned long user_bufs;
 698
 699         if (user) {
 700                 user_bufs = atomic_long_add_return(pages, &user->locked_vm);
 701                 if (user_bufs > memlock_limit) {
 702                         atomic_long_sub(pages, &user->locked_vm);
 703                         return -EPERM;
 704                 }
 705         }
 706
 707         return 0;
 708 }
 709
 710 void __bpf_prog_uncharge(struct user_struct *user, u32 pages)
 711 {
 712         if (user)
 713                 atomic_long_sub(pages, &user->locked_vm);
 714 }
 715
 716 static int bpf_prog_charge_memlock(struct bpf_prog *prog)
 717 {
 718         struct user_struct *user = get_current_user();
 719         int ret;
 720
 721         ret = __bpf_prog_charge(user, prog->pages);
 722         if (ret) {
 723                 free_uid(user);
 724                 return ret;
 725         }
 726
 727         prog->aux->user = user;
 728         return 0;
 729 }
 730
 731 static void bpf_prog_uncharge_memlock(struct bpf_prog *prog)
 732 {
 733         struct user_struct *user = prog->aux->user;
 734
 735         __bpf_prog_uncharge(user, prog->pages);
 736         free_uid(user);
 737 }
 738
 739 static int bpf_prog_alloc_id(struct bpf_prog *prog)
 740 {
 741         int id;
 742
 743         spin_lock_bh(&prog_idr_lock);
 744         id = idr_alloc_cyclic(&prog_idr, prog, 1, INT_MAX, GFP_ATOMIC);
 745         if (id > 0)
 746                 prog->aux->id = id;
 747         spin_unlock_bh(&prog_idr_lock);
 748
 749         /* id is in [1, INT_MAX) */
 750         if (WARN_ON_ONCE(!id))
 751                 return -ENOSPC;
 752
 753         return id > 0 ? 0 : id;
 754 }
 755
 756 static void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock)
 757 {
 758         /* cBPF to eBPF migrations are currently not in the idr store. */
 759         if (!prog->aux->id)
 760                 return;
 761
 762         if (do_idr_lock)
 763                 spin_lock_bh(&prog_idr_lock);
 764         else
 765                 __acquire(&prog_idr_lock);
 766
 767         idr_remove(&prog_idr, prog->aux->id);
 768
 769         if (do_idr_lock)
 770                 spin_unlock_bh(&prog_idr_lock);
 771         else
 772                 __release(&prog_idr_lock);
 773 }
 774
 775 static void __bpf_prog_put_rcu(struct rcu_head *rcu)
 776 {
 777         struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu);
 778
 779         free_used_maps(aux);
 780         bpf_prog_uncharge_memlock(aux->prog);
 781         bpf_prog_free(aux->prog);
 782 }
 783
 784 static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock)
 785 {
 786         if (atomic_dec_and_test(&prog->aux->refcnt)) {
 787                 trace_bpf_prog_put_rcu(prog);
 788                 /* bpf_prog_free_id() must be called first */
 789                 bpf_prog_free_id(prog, do_idr_lock);
 790                 bpf_prog_kallsyms_del(prog);
 791                 call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu);
 792         }
 793 }
 794
 795 void bpf_prog_put(struct bpf_prog *prog)
 796 {
 797         __bpf_prog_put(prog, true);
 798 }
 799 EXPORT_SYMBOL_GPL(bpf_prog_put);
 800
 801 static int bpf_prog_release(struct inode *inode, struct file *filp)
 802 {
 803         struct bpf_prog *prog = filp->private_data;
 804
 805         bpf_prog_put(prog);
 806         return 0;
 807 }
 808
 809 #ifdef CONFIG_PROC_FS
 810 static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp)
 811 {
 812         const struct bpf_prog *prog = filp->private_data;
 813         char prog_tag[sizeof(prog->tag) * 2 + 1] = { };
 814
 815         bin2hex(prog_tag, prog->tag, sizeof(prog->tag));
 816         seq_printf(m,
 817                    "prog_type:\t%u\n"
 818                    "prog_jited:\t%u\n"
 819                    "prog_tag:\t%s\n"
 820                    "memlock:\t%llu\n",
 821                    prog->type,
 822                    prog->jited,
 823                    prog_tag,
 824                    prog->pages * 1ULL << PAGE_SHIFT);
 825 }
 826 #endif
 827
 828 static const struct file_operations bpf_prog_fops = {
 829 #ifdef CONFIG_PROC_FS
 830         .show_fdinfo    = bpf_prog_show_fdinfo,
 831 #endif
 832         .release        = bpf_prog_release,
 833 };
 834
 835 int bpf_prog_new_fd(struct bpf_prog *prog)
 836 {
 837         return anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog,
 838                                 O_RDWR | O_CLOEXEC);
 839 }
 840
 841 static struct bpf_prog *____bpf_prog_get(struct fd f)
 842 {
 843         if (!f.file)
 844                 return ERR_PTR(-EBADF);
 845         if (f.file->f_op != &bpf_prog_fops) {
 846                 fdput(f);
 847                 return ERR_PTR(-EINVAL);
 848         }
 849
 850         return f.file->private_data;
 851 }
 852
 853 struct bpf_prog *bpf_prog_add(struct bpf_prog *prog, int i)
 854 {
 855         if (atomic_add_return(i, &prog->aux->refcnt) > BPF_MAX_REFCNT) {
 856                 atomic_sub(i, &prog->aux->refcnt);
 857                 return ERR_PTR(-EBUSY);
 858         }
 859         return prog;
 860 }
 861 EXPORT_SYMBOL_GPL(bpf_prog_add);
 862
 863 void bpf_prog_sub(struct bpf_prog *prog, int i)
 864 {
 865         /* Only to be used for undoing previous bpf_prog_add() in some
 866          * error path. We still know that another entity in our call
 867          * path holds a reference to the program, thus atomic_sub() can
 868          * be safely used in such cases!
 869          */
 870         WARN_ON(atomic_sub_return(i, &prog->aux->refcnt) == 0);
 871 }
 872 EXPORT_SYMBOL_GPL(bpf_prog_sub);
 873
 874 struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog)
 875 {
 876         return bpf_prog_add(prog, 1);
 877 }
 878 EXPORT_SYMBOL_GPL(bpf_prog_inc);
 879
 880 /* prog_idr_lock should have been held */
 881 static struct bpf_prog *bpf_prog_inc_not_zero(struct bpf_prog *prog)
 882 {
 883         int refold;
 884
 885         refold = __atomic_add_unless(&prog->aux->refcnt, 1, 0);
 886
 887         if (refold >= BPF_MAX_REFCNT) {
 888                 __bpf_prog_put(prog, false);
 889                 return ERR_PTR(-EBUSY);
 890         }
 891
 892         if (!refold)
 893                 return ERR_PTR(-ENOENT);
 894
 895         return prog;
 896 }
 897
 898 static struct bpf_prog *__bpf_prog_get(u32 ufd, enum bpf_prog_type *type)
 899 {
 900         struct fd f = fdget(ufd);
 901         struct bpf_prog *prog;
 902
 903         prog = ____bpf_prog_get(f);
 904         if (IS_ERR(prog))
 905                 return prog;
 906         if (type && prog->type != *type) {
 907                 prog = ERR_PTR(-EINVAL);
 908                 goto out;
 909         }
 910
 911         prog = bpf_prog_inc(prog);
 912 out:
 913         fdput(f);
 914         return prog;
 915 }
 916
 917 struct bpf_prog *bpf_prog_get(u32 ufd)
 918 {
 919         return __bpf_prog_get(ufd, NULL);
 920 }
 921
 922 struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type)
 923 {
 924         struct bpf_prog *prog = __bpf_prog_get(ufd, &type);
 925
 926         if (!IS_ERR(prog))
 927                 trace_bpf_prog_get_type(prog);
 928         return prog;
 929 }
 930 EXPORT_SYMBOL_GPL(bpf_prog_get_type);
 931
 932 /* last field in 'union bpf_attr' used by this command */
 933 #define BPF_PROG_LOAD_LAST_FIELD prog_flags
 934
 935 static int bpf_prog_load(union bpf_attr *attr)
 936 {
 937         enum bpf_prog_type type = attr->prog_type;
 938         struct bpf_prog *prog;
 939         int err;
 940         char license[128];
 941         bool is_gpl;
 942
 943         if (CHECK_ATTR(BPF_PROG_LOAD))
 944                 return -EINVAL;
 945
 946         if (attr->prog_flags & ~BPF_F_STRICT_ALIGNMENT)
 947                 return -EINVAL;
 948
 949         /* copy eBPF program license from user space */
 950         if (strncpy_from_user(license, u64_to_user_ptr(attr->license),
 951                               sizeof(license) - 1) < 0)
 952                 return -EFAULT;
 953         license[sizeof(license) - 1] = 0;
 954
 955         /* eBPF programs must be GPL compatible to use GPL-ed functions */
 956         is_gpl = license_is_gpl_compatible(license);
 957
 958         if (attr->insn_cnt == 0 || attr->insn_cnt > BPF_MAXINSNS)
 959                 return -E2BIG;
 960
 961         if (type == BPF_PROG_TYPE_KPROBE &&
 962             attr->kern_version != LINUX_VERSION_CODE)
 963                 return -EINVAL;
 964
 965         if (type != BPF_PROG_TYPE_SOCKET_FILTER &&
 966             type != BPF_PROG_TYPE_CGROUP_SKB &&
 967             !capable(CAP_SYS_ADMIN))
 968                 return -EPERM;
 969
 970         /* plain bpf_prog allocation */
 971         prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER);
 972         if (!prog)
 973                 return -ENOMEM;
 974
 975         err = bpf_prog_charge_memlock(prog);
 976         if (err)
 977                 goto free_prog_nouncharge;
 978
 979         prog->len = attr->insn_cnt;
 980
 981         err = -EFAULT;
 982         if (copy_from_user(prog->insns, u64_to_user_ptr(attr->insns),
 983                            bpf_prog_insn_size(prog)) != 0)
 984                 goto free_prog;
 985
 986         prog->orig_prog = NULL;
 987         prog->jited = 0;
 988
 989         atomic_set(&prog->aux->refcnt, 1);
 990         prog->gpl_compatible = is_gpl ? 1 : 0;
 991
 992         /* find program type: socket_filter vs tracing_filter */
 993         err = find_prog_type(type, prog);
 994         if (err < 0)
 995                 goto free_prog;
 996
 997         /* run eBPF verifier */
 998         err = bpf_check(&prog, attr);
 999         if (err < 0)
1000                 goto free_used_maps;
1001
1002         /* eBPF program is ready to be JITed */
1003         prog = bpf_prog_select_runtime(prog, &err);
1004         if (err < 0)
1005                 goto free_used_maps;
1006
1007         err = bpf_prog_alloc_id(prog);
1008         if (err)
1009                 goto free_used_maps;
1010
1011         err = bpf_prog_new_fd(prog);
1012         if (err < 0) {
1013                 /* failed to allocate fd.
1014                  * bpf_prog_put() is needed because the above
1015                  * bpf_prog_alloc_id() has published the prog
1016                  * to the userspace and the userspace may
1017                  * have refcnt-ed it through BPF_PROG_GET_FD_BY_ID.
1018                  */
1019                 bpf_prog_put(prog);
1020                 return err;
1021         }
1022
1023         bpf_prog_kallsyms_add(prog);
1024         trace_bpf_prog_load(prog, err);
1025         return err;
1026
1027 free_used_maps:
1028         free_used_maps(prog->aux);
1029 free_prog:
1030         bpf_prog_uncharge_memlock(prog);
1031 free_prog_nouncharge:
1032         bpf_prog_free(prog);
1033         return err;
1034 }
1035
1036 #define BPF_OBJ_LAST_FIELD bpf_fd
1037
1038 static int bpf_obj_pin(const union bpf_attr *attr)
1039 {
1040         if (CHECK_ATTR(BPF_OBJ))
1041                 return -EINVAL;
1042
1043         return bpf_obj_pin_user(attr->bpf_fd, u64_to_user_ptr(attr->pathname));
1044 }
1045
1046 static int bpf_obj_get(const union bpf_attr *attr)
1047 {
1048         if (CHECK_ATTR(BPF_OBJ) || attr->bpf_fd != 0)
1049                 return -EINVAL;
1050
1051         return bpf_obj_get_user(u64_to_user_ptr(attr->pathname));
1052 }
1053
1054 #ifdef CONFIG_CGROUP_BPF
1055
1056 #define BPF_PROG_ATTACH_LAST_FIELD attach_flags
1057
1058 static int bpf_prog_attach(const union bpf_attr *attr)
1059 {
1060         enum bpf_prog_type ptype;
1061         struct bpf_prog *prog;
1062         struct cgroup *cgrp;
1063         int ret;
1064
1065         if (!capable(CAP_NET_ADMIN))
1066                 return -EPERM;
1067
1068         if (CHECK_ATTR(BPF_PROG_ATTACH))
1069                 return -EINVAL;
1070
1071         if (attr->attach_flags & ~BPF_F_ALLOW_OVERRIDE)
1072                 return -EINVAL;
1073
1074         switch (attr->attach_type) {
1075         case BPF_CGROUP_INET_INGRESS:
1076         case BPF_CGROUP_INET_EGRESS:
1077                 ptype = BPF_PROG_TYPE_CGROUP_SKB;
1078                 break;
1079         case BPF_CGROUP_INET_SOCK_CREATE:
1080                 ptype = BPF_PROG_TYPE_CGROUP_SOCK;
1081                 break;
1082         case BPF_CGROUP_SOCK_OPS:
1083                 ptype = BPF_PROG_TYPE_SOCK_OPS;
1084                 break;
1085         default:
1086                 return -EINVAL;
1087         }
1088
1089         prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype);
1090         if (IS_ERR(prog))
1091                 return PTR_ERR(prog);
1092
1093         cgrp = cgroup_get_from_fd(attr->target_fd);
1094         if (IS_ERR(cgrp)) {
1095                 bpf_prog_put(prog);
1096                 return PTR_ERR(cgrp);
1097         }
1098
1099         ret = cgroup_bpf_update(cgrp, prog, attr->attach_type,
1100                                 attr->attach_flags & BPF_F_ALLOW_OVERRIDE);
1101         if (ret)
1102                 bpf_prog_put(prog);
1103         cgroup_put(cgrp);
1104
1105         return ret;
1106 }
1107
1108 #define BPF_PROG_DETACH_LAST_FIELD attach_type
1109
1110 static int bpf_prog_detach(const union bpf_attr *attr)
1111 {
1112         struct cgroup *cgrp;
1113         int ret;
1114
1115         if (!capable(CAP_NET_ADMIN))
1116                 return -EPERM;
1117
1118         if (CHECK_ATTR(BPF_PROG_DETACH))
1119                 return -EINVAL;
1120
1121         switch (attr->attach_type) {
1122         case BPF_CGROUP_INET_INGRESS:
1123         case BPF_CGROUP_INET_EGRESS:
1124         case BPF_CGROUP_INET_SOCK_CREATE:
1125         case BPF_CGROUP_SOCK_OPS:
1126                 cgrp = cgroup_get_from_fd(attr->target_fd);
1127                 if (IS_ERR(cgrp))
1128                         return PTR_ERR(cgrp);
1129
1130                 ret = cgroup_bpf_update(cgrp, NULL, attr->attach_type, false);
1131                 cgroup_put(cgrp);
1132                 break;
1133
1134         default:
1135                 return -EINVAL;
1136         }
1137
1138         return ret;
1139 }
1140
1141 #endif /* CONFIG_CGROUP_BPF */
1142
1143 #define BPF_PROG_TEST_RUN_LAST_FIELD test.duration
1144
1145 static int bpf_prog_test_run(const union bpf_attr *attr,
1146                              union bpf_attr __user *uattr)
1147 {
1148         struct bpf_prog *prog;
1149         int ret = -ENOTSUPP;
1150
1151         if (CHECK_ATTR(BPF_PROG_TEST_RUN))
1152                 return -EINVAL;
1153
1154         prog = bpf_prog_get(attr->test.prog_fd);
1155         if (IS_ERR(prog))
1156                 return PTR_ERR(prog);
1157
1158         if (prog->aux->ops->test_run)
1159                 ret = prog->aux->ops->test_run(prog, attr, uattr);
1160
1161         bpf_prog_put(prog);
1162         return ret;
1163 }
1164
1165 #define BPF_OBJ_GET_NEXT_ID_LAST_FIELD next_id
1166
1167 static int bpf_obj_get_next_id(const union bpf_attr *attr,
1168                                union bpf_attr __user *uattr,
1169                                struct idr *idr,
1170                                spinlock_t *lock)
1171 {
1172         u32 next_id = attr->start_id;
1173         int err = 0;
1174
1175         if (CHECK_ATTR(BPF_OBJ_GET_NEXT_ID) || next_id >= INT_MAX)
1176                 return -EINVAL;
1177
1178         if (!capable(CAP_SYS_ADMIN))
1179                 return -EPERM;
1180
1181         next_id++;
1182         spin_lock_bh(lock);
1183         if (!idr_get_next(idr, &next_id))
1184                 err = -ENOENT;
1185         spin_unlock_bh(lock);
1186
1187         if (!err)
1188                 err = put_user(next_id, &uattr->next_id);
1189
1190         return err;
1191 }
1192
1193 #define BPF_PROG_GET_FD_BY_ID_LAST_FIELD prog_id
1194
1195 static int bpf_prog_get_fd_by_id(const union bpf_attr *attr)
1196 {
1197         struct bpf_prog *prog;
1198         u32 id = attr->prog_id;
1199         int fd;
1200
1201         if (CHECK_ATTR(BPF_PROG_GET_FD_BY_ID))
1202                 return -EINVAL;
1203
1204         if (!capable(CAP_SYS_ADMIN))
1205                 return -EPERM;
1206
1207         spin_lock_bh(&prog_idr_lock);
1208         prog = idr_find(&prog_idr, id);
1209         if (prog)
1210                 prog = bpf_prog_inc_not_zero(prog);
1211         else
1212                 prog = ERR_PTR(-ENOENT);
1213         spin_unlock_bh(&prog_idr_lock);
1214
1215         if (IS_ERR(prog))
1216                 return PTR_ERR(prog);
1217
1218         fd = bpf_prog_new_fd(prog);
1219         if (fd < 0)
1220                 bpf_prog_put(prog);
1221
1222         return fd;
1223 }
1224
1225 #define BPF_MAP_GET_FD_BY_ID_LAST_FIELD map_id
1226
1227 static int bpf_map_get_fd_by_id(const union bpf_attr *attr)
1228 {
1229         struct bpf_map *map;
1230         u32 id = attr->map_id;
1231         int fd;
1232
1233         if (CHECK_ATTR(BPF_MAP_GET_FD_BY_ID))
1234                 return -EINVAL;
1235
1236         if (!capable(CAP_SYS_ADMIN))
1237                 return -EPERM;
1238
1239         spin_lock_bh(&map_idr_lock);
1240         map = idr_find(&map_idr, id);
1241         if (map)
1242                 map = bpf_map_inc_not_zero(map, true);
1243         else
1244                 map = ERR_PTR(-ENOENT);
1245         spin_unlock_bh(&map_idr_lock);
1246
1247         if (IS_ERR(map))
1248                 return PTR_ERR(map);
1249
1250         fd = bpf_map_new_fd(map);
1251         if (fd < 0)
1252                 bpf_map_put(map);
1253
1254         return fd;
1255 }
1256
1257 static int check_uarg_tail_zero(void __user *uaddr,
1258                                 size_t expected_size,
1259                                 size_t actual_size)
1260 {
1261         unsigned char __user *addr;
1262         unsigned char __user *end;
1263         unsigned char val;
1264         int err;
1265
1266         if (actual_size <= expected_size)
1267                 return 0;
1268
1269         addr = uaddr + expected_size;
1270         end  = uaddr + actual_size;
1271
1272         for (; addr < end; addr++) {
1273                 err = get_user(val, addr);
1274                 if (err)
1275                         return err;
1276                 if (val)
1277                         return -E2BIG;
1278         }
1279
1280         return 0;
1281 }
1282
1283 static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
1284                                    const union bpf_attr *attr,
1285                                    union bpf_attr __user *uattr)
1286 {
1287         struct bpf_prog_info __user *uinfo = u64_to_user_ptr(attr->info.info);
1288         struct bpf_prog_info info = {};
1289         u32 info_len = attr->info.info_len;
1290         char __user *uinsns;
1291         u32 ulen;
1292         int err;
1293
1294         err = check_uarg_tail_zero(uinfo, sizeof(info), info_len);
1295         if (err)
1296                 return err;
1297         info_len = min_t(u32, sizeof(info), info_len);
1298
1299         if (copy_from_user(&info, uinfo, info_len))
1300                 return err;
1301
1302         info.type = prog->type;
1303         info.id = prog->aux->id;
1304
1305         memcpy(info.tag, prog->tag, sizeof(prog->tag));
1306
1307         if (!capable(CAP_SYS_ADMIN)) {
1308                 info.jited_prog_len = 0;
1309                 info.xlated_prog_len = 0;
1310                 goto done;
1311         }
1312
1313         ulen = info.jited_prog_len;
1314         info.jited_prog_len = prog->jited_len;
1315         if (info.jited_prog_len && ulen) {
1316                 uinsns = u64_to_user_ptr(info.jited_prog_insns);
1317                 ulen = min_t(u32, info.jited_prog_len, ulen);
1318                 if (copy_to_user(uinsns, prog->bpf_func, ulen))
1319                         return -EFAULT;
1320         }
1321
1322         ulen = info.xlated_prog_len;
1323         info.xlated_prog_len = bpf_prog_size(prog->len);
1324         if (info.xlated_prog_len && ulen) {
1325                 uinsns = u64_to_user_ptr(info.xlated_prog_insns);
1326                 ulen = min_t(u32, info.xlated_prog_len, ulen);
1327                 if (copy_to_user(uinsns, prog->insnsi, ulen))
1328                         return -EFAULT;
1329         }
1330
1331 done:
1332         if (copy_to_user(uinfo, &info, info_len) ||
1333             put_user(info_len, &uattr->info.info_len))
1334                 return -EFAULT;
1335
1336         return 0;
1337 }
1338
1339 static int bpf_map_get_info_by_fd(struct bpf_map *map,
1340                                   const union bpf_attr *attr,
1341                                   union bpf_attr __user *uattr)
1342 {
1343         struct bpf_map_info __user *uinfo = u64_to_user_ptr(attr->info.info);
1344         struct bpf_map_info info = {};
1345         u32 info_len = attr->info.info_len;
1346         int err;
1347
1348         err = check_uarg_tail_zero(uinfo, sizeof(info), info_len);
1349         if (err)
1350                 return err;
1351         info_len = min_t(u32, sizeof(info), info_len);
1352
1353         info.type = map->map_type;
1354         info.id = map->id;
1355         info.key_size = map->key_size;
1356         info.value_size = map->value_size;
1357         info.max_entries = map->max_entries;
1358         info.map_flags = map->map_flags;
1359
1360         if (copy_to_user(uinfo, &info, info_len) ||
1361             put_user(info_len, &uattr->info.info_len))
1362                 return -EFAULT;
1363
1364         return 0;
1365 }
1366
1367 #define BPF_OBJ_GET_INFO_BY_FD_LAST_FIELD info.info
1368
1369 static int bpf_obj_get_info_by_fd(const union bpf_attr *attr,
1370                                   union bpf_attr __user *uattr)
1371 {
1372         int ufd = attr->info.bpf_fd;
1373         struct fd f;
1374         int err;
1375
1376         if (CHECK_ATTR(BPF_OBJ_GET_INFO_BY_FD))
1377                 return -EINVAL;
1378
1379         f = fdget(ufd);
1380         if (!f.file)
1381                 return -EBADFD;
1382
1383         if (f.file->f_op == &bpf_prog_fops)
1384                 err = bpf_prog_get_info_by_fd(f.file->private_data, attr,
1385                                               uattr);
1386         else if (f.file->f_op == &bpf_map_fops)
1387                 err = bpf_map_get_info_by_fd(f.file->private_data, attr,
1388                                              uattr);
1389         else
1390                 err = -EINVAL;
1391
1392         fdput(f);
1393         return err;
1394 }
1395
1396 SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
1397 {
1398         union bpf_attr attr = {};
1399         int err;
1400
1401         if (!capable(CAP_SYS_ADMIN) && sysctl_unprivileged_bpf_disabled)
1402                 return -EPERM;
1403
1404         if (!access_ok(VERIFY_READ, uattr, 1))
1405                 return -EFAULT;
1406
1407         if (size > PAGE_SIZE)   /* silly large */
1408                 return -E2BIG;
1409
1410         /* If we're handed a bigger struct than we know of,
1411          * ensure all the unknown bits are 0 - i.e. new
1412          * user-space does not rely on any kernel feature
1413          * extensions we dont know about yet.
1414          */
1415         err = check_uarg_tail_zero(uattr, sizeof(attr), size);
1416         if (err)
1417                 return err;
1418         size = min_t(u32, size, sizeof(attr));
1419
1420         /* copy attributes from user space, may be less than sizeof(bpf_attr) */
1421         if (copy_from_user(&attr, uattr, size) != 0)
1422                 return -EFAULT;
1423
1424         switch (cmd) {
1425         case BPF_MAP_CREATE:
1426                 err = map_create(&attr);
1427                 break;
1428         case BPF_MAP_LOOKUP_ELEM:
1429                 err = map_lookup_elem(&attr);
1430                 break;
1431         case BPF_MAP_UPDATE_ELEM:
1432                 err = map_update_elem(&attr);
1433                 break;
1434         case BPF_MAP_DELETE_ELEM:
1435                 err = map_delete_elem(&attr);
1436                 break;
1437         case BPF_MAP_GET_NEXT_KEY:
1438                 err = map_get_next_key(&attr);
1439                 break;
1440         case BPF_PROG_LOAD:
1441                 err = bpf_prog_load(&attr);
1442                 break;
1443         case BPF_OBJ_PIN:
1444                 err = bpf_obj_pin(&attr);
1445                 break;
1446         case BPF_OBJ_GET:
1447                 err = bpf_obj_get(&attr);
1448                 break;
1449 #ifdef CONFIG_CGROUP_BPF
1450         case BPF_PROG_ATTACH:
1451                 err = bpf_prog_attach(&attr);
1452                 break;
1453         case BPF_PROG_DETACH:
1454                 err = bpf_prog_detach(&attr);
1455                 break;
1456 #endif
1457         case BPF_PROG_TEST_RUN:
1458                 err = bpf_prog_test_run(&attr, uattr);
1459                 break;
1460         case BPF_PROG_GET_NEXT_ID:
1461                 err = bpf_obj_get_next_id(&attr, uattr,
1462                                           &prog_idr, &prog_idr_lock);
1463                 break;
1464         case BPF_MAP_GET_NEXT_ID:
1465                 err = bpf_obj_get_next_id(&attr, uattr,
1466                                           &map_idr, &map_idr_lock);
1467                 break;
1468         case BPF_PROG_GET_FD_BY_ID:
1469                 err = bpf_prog_get_fd_by_id(&attr);
1470                 break;
1471         case BPF_MAP_GET_FD_BY_ID:
1472                 err = bpf_map_get_fd_by_id(&attr);
1473                 break;
1474         case BPF_OBJ_GET_INFO_BY_FD:
1475                 err = bpf_obj_get_info_by_fd(&attr, uattr);
1476                 break;
1477         default:
1478                 err = -EINVAL;
1479                 break;
1480         }
1481
1482         return err;
1483 }