kernel/bpf/syscall.c

   1 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
   2  *
   3  * This program is free software; you can redistribute it and/or
   4  * modify it under the terms of version 2 of the GNU General Public
   5  * License as published by the Free Software Foundation.
   6  *
   7  * This program is distributed in the hope that it will be useful, but
   8  * WITHOUT ANY WARRANTY; without even the implied warranty of
   9  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  10  * General Public License for more details.
  11  */
  12 #include <linux/bpf.h>
  13 #include <linux/syscalls.h>
  14 #include <linux/slab.h>
  15 #include <linux/anon_inodes.h>
  16 #include <linux/file.h>
  17 #include <linux/license.h>
  18 #include <linux/filter.h>
  19 #include <linux/version.h>
  20 #include <linux/kernel.h>
  21
  22 DEFINE_PER_CPU(int, bpf_prog_active);
  23
  24 int sysctl_unprivileged_bpf_disabled __read_mostly;
  25
  26 static LIST_HEAD(bpf_map_types);
  27
  28 static struct bpf_map *find_and_alloc_map(union bpf_attr *attr)
  29 {
  30         struct bpf_map_type_list *tl;
  31         struct bpf_map *map;
  32
  33         list_for_each_entry(tl, &bpf_map_types, list_node) {
  34                 if (tl->type == attr->map_type) {
  35                         map = tl->ops->map_alloc(attr);
  36                         if (IS_ERR(map))
  37                                 return map;
  38                         map->ops = tl->ops;
  39                         map->map_type = attr->map_type;
  40                         return map;
  41                 }
  42         }
  43         return ERR_PTR(-EINVAL);
  44 }
  45
  46 /* boot time registration of different map implementations */
  47 void bpf_register_map_type(struct bpf_map_type_list *tl)
  48 {
  49         list_add(&tl->list_node, &bpf_map_types);
  50 }
  51
  52 int bpf_map_precharge_memlock(u32 pages)
  53 {
  54         struct user_struct *user = get_current_user();
  55         unsigned long memlock_limit, cur;
  56
  57         memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
  58         cur = atomic_long_read(&user->locked_vm);
  59         free_uid(user);
  60         if (cur + pages > memlock_limit)
  61                 return -EPERM;
  62         return 0;
  63 }
  64
  65 static int bpf_map_charge_memlock(struct bpf_map *map)
  66 {
  67         struct user_struct *user = get_current_user();
  68         unsigned long memlock_limit;
  69
  70         memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
  71
  72         atomic_long_add(map->pages, &user->locked_vm);
  73
  74         if (atomic_long_read(&user->locked_vm) > memlock_limit) {
  75                 atomic_long_sub(map->pages, &user->locked_vm);
  76                 free_uid(user);
  77                 return -EPERM;
  78         }
  79         map->user = user;
  80         return 0;
  81 }
  82
  83 static void bpf_map_uncharge_memlock(struct bpf_map *map)
  84 {
  85         struct user_struct *user = map->user;
  86
  87         atomic_long_sub(map->pages, &user->locked_vm);
  88         free_uid(user);
  89 }
  90
  91 /* called from workqueue */
  92 static void bpf_map_free_deferred(struct work_struct *work)
  93 {
  94         struct bpf_map *map = container_of(work, struct bpf_map, work);
  95
  96         bpf_map_uncharge_memlock(map);
  97         /* implementation dependent freeing */
  98         map->ops->map_free(map);
  99 }
 100
 101 static void bpf_map_put_uref(struct bpf_map *map)
 102 {
 103         if (atomic_dec_and_test(&map->usercnt)) {
 104                 if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY)
 105                         bpf_fd_array_map_clear(map);
 106         }
 107 }
 108
 109 /* decrement map refcnt and schedule it for freeing via workqueue
 110  * (unrelying map implementation ops->map_free() might sleep)
 111  */
 112 void bpf_map_put(struct bpf_map *map)
 113 {
 114         if (atomic_dec_and_test(&map->refcnt)) {
 115                 INIT_WORK(&map->work, bpf_map_free_deferred);
 116                 schedule_work(&map->work);
 117         }
 118 }
 119
 120 void bpf_map_put_with_uref(struct bpf_map *map)
 121 {
 122         bpf_map_put_uref(map);
 123         bpf_map_put(map);
 124 }
 125
 126 static int bpf_map_release(struct inode *inode, struct file *filp)
 127 {
 128         struct bpf_map *map = filp->private_data;
 129
 130         if (map->ops->map_release)
 131                 map->ops->map_release(map, filp);
 132
 133         bpf_map_put_with_uref(map);
 134         return 0;
 135 }
 136
 137 #ifdef CONFIG_PROC_FS
 138 static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp)
 139 {
 140         const struct bpf_map *map = filp->private_data;
 141         const struct bpf_array *array;
 142         u32 owner_prog_type = 0;
 143
 144         if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) {
 145                 array = container_of(map, struct bpf_array, map);
 146                 owner_prog_type = array->owner_prog_type;
 147         }
 148
 149         seq_printf(m,
 150                    "map_type:\t%u\n"
 151                    "key_size:\t%u\n"
 152                    "value_size:\t%u\n"
 153                    "max_entries:\t%u\n"
 154                    "map_flags:\t%#x\n"
 155                    "memlock:\t%llu\n",
 156                    map->map_type,
 157                    map->key_size,
 158                    map->value_size,
 159                    map->max_entries,
 160                    map->map_flags,
 161                    map->pages * 1ULL << PAGE_SHIFT);
 162
 163         if (owner_prog_type)
 164                 seq_printf(m, "owner_prog_type:\t%u\n",
 165                            owner_prog_type);
 166 }
 167 #endif
 168
 169 static const struct file_operations bpf_map_fops = {
 170 #ifdef CONFIG_PROC_FS
 171         .show_fdinfo    = bpf_map_show_fdinfo,
 172 #endif
 173         .release        = bpf_map_release,
 174 };
 175
 176 int bpf_map_new_fd(struct bpf_map *map)
 177 {
 178         return anon_inode_getfd("bpf-map", &bpf_map_fops, map,
 179                                 O_RDWR | O_CLOEXEC);
 180 }
 181
 182 /* helper macro to check that unused fields 'union bpf_attr' are zero */
 183 #define CHECK_ATTR(CMD) \
 184         memchr_inv((void *) &attr->CMD##_LAST_FIELD + \
 185                    sizeof(attr->CMD##_LAST_FIELD), 0, \
 186                    sizeof(*attr) - \
 187                    offsetof(union bpf_attr, CMD##_LAST_FIELD) - \
 188                    sizeof(attr->CMD##_LAST_FIELD)) != NULL
 189
 190 #define BPF_MAP_CREATE_LAST_FIELD map_flags
 191 /* called via syscall */
 192 static int map_create(union bpf_attr *attr)
 193 {
 194         struct bpf_map *map;
 195         int err;
 196
 197         err = CHECK_ATTR(BPF_MAP_CREATE);
 198         if (err)
 199                 return -EINVAL;
 200
 201         /* find map type and init map: hashtable vs rbtree vs bloom vs ... */
 202         map = find_and_alloc_map(attr);
 203         if (IS_ERR(map))
 204                 return PTR_ERR(map);
 205
 206         atomic_set(&map->refcnt, 1);
 207         atomic_set(&map->usercnt, 1);
 208
 209         err = bpf_map_charge_memlock(map);
 210         if (err)
 211                 goto free_map_nouncharge;
 212
 213         err = bpf_map_new_fd(map);
 214         if (err < 0)
 215                 /* failed to allocate fd */
 216                 goto free_map;
 217
 218         return err;
 219
 220 free_map:
 221         bpf_map_uncharge_memlock(map);
 222 free_map_nouncharge:
 223         map->ops->map_free(map);
 224         return err;
 225 }
 226
 227 /* if error is returned, fd is released.
 228  * On success caller should complete fd access with matching fdput()
 229  */
 230 struct bpf_map *__bpf_map_get(struct fd f)
 231 {
 232         if (!f.file)
 233                 return ERR_PTR(-EBADF);
 234         if (f.file->f_op != &bpf_map_fops) {
 235                 fdput(f);
 236                 return ERR_PTR(-EINVAL);
 237         }
 238
 239         return f.file->private_data;
 240 }
 241
 242 /* prog's and map's refcnt limit */
 243 #define BPF_MAX_REFCNT 32768
 244
 245 struct bpf_map *bpf_map_inc(struct bpf_map *map, bool uref)
 246 {
 247         if (atomic_inc_return(&map->refcnt) > BPF_MAX_REFCNT) {
 248                 atomic_dec(&map->refcnt);
 249                 return ERR_PTR(-EBUSY);
 250         }
 251         if (uref)
 252                 atomic_inc(&map->usercnt);
 253         return map;
 254 }
 255
 256 struct bpf_map *bpf_map_get_with_uref(u32 ufd)
 257 {
 258         struct fd f = fdget(ufd);
 259         struct bpf_map *map;
 260
 261         map = __bpf_map_get(f);
 262         if (IS_ERR(map))
 263                 return map;
 264
 265         map = bpf_map_inc(map, true);
 266         fdput(f);
 267
 268         return map;
 269 }
 270
 271 int __weak bpf_stackmap_copy(struct bpf_map *map, void *key, void *value)
 272 {
 273         return -ENOTSUPP;
 274 }
 275
 276 /* last field in 'union bpf_attr' used by this command */
 277 #define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value
 278
 279 static int map_lookup_elem(union bpf_attr *attr)
 280 {
 281         void __user *ukey = u64_to_user_ptr(attr->key);
 282         void __user *uvalue = u64_to_user_ptr(attr->value);
 283         int ufd = attr->map_fd;
 284         struct bpf_map *map;
 285         void *key, *value, *ptr;
 286         u32 value_size;
 287         struct fd f;
 288         int err;
 289
 290         if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM))
 291                 return -EINVAL;
 292
 293         f = fdget(ufd);
 294         map = __bpf_map_get(f);
 295         if (IS_ERR(map))
 296                 return PTR_ERR(map);
 297
 298         err = -ENOMEM;
 299         key = kmalloc(map->key_size, GFP_USER);
 300         if (!key)
 301                 goto err_put;
 302
 303         err = -EFAULT;
 304         if (copy_from_user(key, ukey, map->key_size) != 0)
 305                 goto free_key;
 306
 307         if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
 308             map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
 309             map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
 310                 value_size = round_up(map->value_size, 8) * num_possible_cpus();
 311         else
 312                 value_size = map->value_size;
 313
 314         err = -ENOMEM;
 315         value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
 316         if (!value)
 317                 goto free_key;
 318
 319         if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
 320             map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
 321                 err = bpf_percpu_hash_copy(map, key, value);
 322         } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
 323                 err = bpf_percpu_array_copy(map, key, value);
 324         } else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) {
 325                 err = bpf_stackmap_copy(map, key, value);
 326         } else {
 327                 rcu_read_lock();
 328                 ptr = map->ops->map_lookup_elem(map, key);
 329                 if (ptr)
 330                         memcpy(value, ptr, value_size);
 331                 rcu_read_unlock();
 332                 err = ptr ? 0 : -ENOENT;
 333         }
 334
 335         if (err)
 336                 goto free_value;
 337
 338         err = -EFAULT;
 339         if (copy_to_user(uvalue, value, value_size) != 0)
 340                 goto free_value;
 341
 342         err = 0;
 343
 344 free_value:
 345         kfree(value);
 346 free_key:
 347         kfree(key);
 348 err_put:
 349         fdput(f);
 350         return err;
 351 }
 352
 353 #define BPF_MAP_UPDATE_ELEM_LAST_FIELD flags
 354
 355 static int map_update_elem(union bpf_attr *attr)
 356 {
 357         void __user *ukey = u64_to_user_ptr(attr->key);
 358         void __user *uvalue = u64_to_user_ptr(attr->value);
 359         int ufd = attr->map_fd;
 360         struct bpf_map *map;
 361         void *key, *value;
 362         u32 value_size;
 363         struct fd f;
 364         int err;
 365
 366         if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM))
 367                 return -EINVAL;
 368
 369         f = fdget(ufd);
 370         map = __bpf_map_get(f);
 371         if (IS_ERR(map))
 372                 return PTR_ERR(map);
 373
 374         err = -ENOMEM;
 375         key = kmalloc(map->key_size, GFP_USER);
 376         if (!key)
 377                 goto err_put;
 378
 379         err = -EFAULT;
 380         if (copy_from_user(key, ukey, map->key_size) != 0)
 381                 goto free_key;
 382
 383         if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
 384             map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
 385             map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
 386                 value_size = round_up(map->value_size, 8) * num_possible_cpus();
 387         else
 388                 value_size = map->value_size;
 389
 390         err = -ENOMEM;
 391         value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
 392         if (!value)
 393                 goto free_key;
 394
 395         err = -EFAULT;
 396         if (copy_from_user(value, uvalue, value_size) != 0)
 397                 goto free_value;
 398
 399         /* must increment bpf_prog_active to avoid kprobe+bpf triggering from
 400          * inside bpf map update or delete otherwise deadlocks are possible
 401          */
 402         preempt_disable();
 403         __this_cpu_inc(bpf_prog_active);
 404         if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
 405             map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
 406                 err = bpf_percpu_hash_update(map, key, value, attr->flags);
 407         } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
 408                 err = bpf_percpu_array_update(map, key, value, attr->flags);
 409         } else if (map->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY ||
 410                    map->map_type == BPF_MAP_TYPE_PROG_ARRAY ||
 411                    map->map_type == BPF_MAP_TYPE_CGROUP_ARRAY) {
 412                 rcu_read_lock();
 413                 err = bpf_fd_array_map_update_elem(map, f.file, key, value,
 414                                                    attr->flags);
 415                 rcu_read_unlock();
 416         } else {
 417                 rcu_read_lock();
 418                 err = map->ops->map_update_elem(map, key, value, attr->flags);
 419                 rcu_read_unlock();
 420         }
 421         __this_cpu_dec(bpf_prog_active);
 422         preempt_enable();
 423
 424 free_value:
 425         kfree(value);
 426 free_key:
 427         kfree(key);
 428 err_put:
 429         fdput(f);
 430         return err;
 431 }
 432
 433 #define BPF_MAP_DELETE_ELEM_LAST_FIELD key
 434
 435 static int map_delete_elem(union bpf_attr *attr)
 436 {
 437         void __user *ukey = u64_to_user_ptr(attr->key);
 438         int ufd = attr->map_fd;
 439         struct bpf_map *map;
 440         struct fd f;
 441         void *key;
 442         int err;
 443
 444         if (CHECK_ATTR(BPF_MAP_DELETE_ELEM))
 445                 return -EINVAL;
 446
 447         f = fdget(ufd);
 448         map = __bpf_map_get(f);
 449         if (IS_ERR(map))
 450                 return PTR_ERR(map);
 451
 452         err = -ENOMEM;
 453         key = kmalloc(map->key_size, GFP_USER);
 454         if (!key)
 455                 goto err_put;
 456
 457         err = -EFAULT;
 458         if (copy_from_user(key, ukey, map->key_size) != 0)
 459                 goto free_key;
 460
 461         preempt_disable();
 462         __this_cpu_inc(bpf_prog_active);
 463         rcu_read_lock();
 464         err = map->ops->map_delete_elem(map, key);
 465         rcu_read_unlock();
 466         __this_cpu_dec(bpf_prog_active);
 467         preempt_enable();
 468
 469 free_key:
 470         kfree(key);
 471 err_put:
 472         fdput(f);
 473         return err;
 474 }
 475
 476 /* last field in 'union bpf_attr' used by this command */
 477 #define BPF_MAP_GET_NEXT_KEY_LAST_FIELD next_key
 478
 479 static int map_get_next_key(union bpf_attr *attr)
 480 {
 481         void __user *ukey = u64_to_user_ptr(attr->key);
 482         void __user *unext_key = u64_to_user_ptr(attr->next_key);
 483         int ufd = attr->map_fd;
 484         struct bpf_map *map;
 485         void *key, *next_key;
 486         struct fd f;
 487         int err;
 488
 489         if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY))
 490                 return -EINVAL;
 491
 492         f = fdget(ufd);
 493         map = __bpf_map_get(f);
 494         if (IS_ERR(map))
 495                 return PTR_ERR(map);
 496
 497         err = -ENOMEM;
 498         key = kmalloc(map->key_size, GFP_USER);
 499         if (!key)
 500                 goto err_put;
 501
 502         err = -EFAULT;
 503         if (copy_from_user(key, ukey, map->key_size) != 0)
 504                 goto free_key;
 505
 506         err = -ENOMEM;
 507         next_key = kmalloc(map->key_size, GFP_USER);
 508         if (!next_key)
 509                 goto free_key;
 510
 511         rcu_read_lock();
 512         err = map->ops->map_get_next_key(map, key, next_key);
 513         rcu_read_unlock();
 514         if (err)
 515                 goto free_next_key;
 516
 517         err = -EFAULT;
 518         if (copy_to_user(unext_key, next_key, map->key_size) != 0)
 519                 goto free_next_key;
 520
 521         err = 0;
 522
 523 free_next_key:
 524         kfree(next_key);
 525 free_key:
 526         kfree(key);
 527 err_put:
 528         fdput(f);
 529         return err;
 530 }
 531
 532 static LIST_HEAD(bpf_prog_types);
 533
 534 static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog)
 535 {
 536         struct bpf_prog_type_list *tl;
 537
 538         list_for_each_entry(tl, &bpf_prog_types, list_node) {
 539                 if (tl->type == type) {
 540                         prog->aux->ops = tl->ops;
 541                         prog->type = type;
 542                         return 0;
 543                 }
 544         }
 545
 546         return -EINVAL;
 547 }
 548
 549 void bpf_register_prog_type(struct bpf_prog_type_list *tl)
 550 {
 551         list_add(&tl->list_node, &bpf_prog_types);
 552 }
 553
 554 /* fixup insn->imm field of bpf_call instructions:
 555  * if (insn->imm == BPF_FUNC_map_lookup_elem)
 556  *      insn->imm = bpf_map_lookup_elem - __bpf_call_base;
 557  * else if (insn->imm == BPF_FUNC_map_update_elem)
 558  *      insn->imm = bpf_map_update_elem - __bpf_call_base;
 559  * else ...
 560  *
 561  * this function is called after eBPF program passed verification
 562  */
 563 static void fixup_bpf_calls(struct bpf_prog *prog)
 564 {
 565         const struct bpf_func_proto *fn;
 566         int i;
 567
 568         for (i = 0; i < prog->len; i++) {
 569                 struct bpf_insn *insn = &prog->insnsi[i];
 570
 571                 if (insn->code == (BPF_JMP | BPF_CALL)) {
 572                         /* we reach here when program has bpf_call instructions
 573                          * and it passed bpf_check(), means that
 574                          * ops->get_func_proto must have been supplied, check it
 575                          */
 576                         BUG_ON(!prog->aux->ops->get_func_proto);
 577
 578                         if (insn->imm == BPF_FUNC_get_route_realm)
 579                                 prog->dst_needed = 1;
 580                         if (insn->imm == BPF_FUNC_get_prandom_u32)
 581                                 bpf_user_rnd_init_once();
 582                         if (insn->imm == BPF_FUNC_xdp_adjust_head)
 583                                 prog->xdp_adjust_head = 1;
 584                         if (insn->imm == BPF_FUNC_tail_call) {
 585                                 /* mark bpf_tail_call as different opcode
 586                                  * to avoid conditional branch in
 587                                  * interpeter for every normal call
 588                                  * and to prevent accidental JITing by
 589                                  * JIT compiler that doesn't support
 590                                  * bpf_tail_call yet
 591                                  */
 592                                 insn->imm = 0;
 593                                 insn->code |= BPF_X;
 594                                 continue;
 595                         }
 596
 597                         fn = prog->aux->ops->get_func_proto(insn->imm);
 598                         /* all functions that have prototype and verifier allowed
 599                          * programs to call them, must be real in-kernel functions
 600                          */
 601                         BUG_ON(!fn->func);
 602                         insn->imm = fn->func - __bpf_call_base;
 603                 }
 604         }
 605 }
 606
 607 /* drop refcnt on maps used by eBPF program and free auxilary data */
 608 static void free_used_maps(struct bpf_prog_aux *aux)
 609 {
 610         int i;
 611
 612         for (i = 0; i < aux->used_map_cnt; i++)
 613                 bpf_map_put(aux->used_maps[i]);
 614
 615         kfree(aux->used_maps);
 616 }
 617
 618 static int bpf_prog_charge_memlock(struct bpf_prog *prog)
 619 {
 620         struct user_struct *user = get_current_user();
 621         unsigned long memlock_limit;
 622
 623         memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
 624
 625         atomic_long_add(prog->pages, &user->locked_vm);
 626         if (atomic_long_read(&user->locked_vm) > memlock_limit) {
 627                 atomic_long_sub(prog->pages, &user->locked_vm);
 628                 free_uid(user);
 629                 return -EPERM;
 630         }
 631         prog->aux->user = user;
 632         return 0;
 633 }
 634
 635 static void bpf_prog_uncharge_memlock(struct bpf_prog *prog)
 636 {
 637         struct user_struct *user = prog->aux->user;
 638
 639         atomic_long_sub(prog->pages, &user->locked_vm);
 640         free_uid(user);
 641 }
 642
 643 static void __bpf_prog_put_rcu(struct rcu_head *rcu)
 644 {
 645         struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu);
 646
 647         free_used_maps(aux);
 648         bpf_prog_uncharge_memlock(aux->prog);
 649         bpf_prog_free(aux->prog);
 650 }
 651
 652 void bpf_prog_put(struct bpf_prog *prog)
 653 {
 654         if (atomic_dec_and_test(&prog->aux->refcnt))
 655                 call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu);
 656 }
 657 EXPORT_SYMBOL_GPL(bpf_prog_put);
 658
 659 static int bpf_prog_release(struct inode *inode, struct file *filp)
 660 {
 661         struct bpf_prog *prog = filp->private_data;
 662
 663         bpf_prog_put(prog);
 664         return 0;
 665 }
 666
 667 #ifdef CONFIG_PROC_FS
 668 static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp)
 669 {
 670         const struct bpf_prog *prog = filp->private_data;
 671         char prog_digest[sizeof(prog->digest) * 2 + 1] = { };
 672
 673         bin2hex(prog_digest, prog->digest, sizeof(prog->digest));
 674         seq_printf(m,
 675                    "prog_type:\t%u\n"
 676                    "prog_jited:\t%u\n"
 677                    "prog_digest:\t%s\n"
 678                    "memlock:\t%llu\n",
 679                    prog->type,
 680                    prog->jited,
 681                    prog_digest,
 682                    prog->pages * 1ULL << PAGE_SHIFT);
 683 }
 684 #endif
 685
 686 static const struct file_operations bpf_prog_fops = {
 687 #ifdef CONFIG_PROC_FS
 688         .show_fdinfo    = bpf_prog_show_fdinfo,
 689 #endif
 690         .release        = bpf_prog_release,
 691 };
 692
 693 int bpf_prog_new_fd(struct bpf_prog *prog)
 694 {
 695         return anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog,
 696                                 O_RDWR | O_CLOEXEC);
 697 }
 698
 699 static struct bpf_prog *____bpf_prog_get(struct fd f)
 700 {
 701         if (!f.file)
 702                 return ERR_PTR(-EBADF);
 703         if (f.file->f_op != &bpf_prog_fops) {
 704                 fdput(f);
 705                 return ERR_PTR(-EINVAL);
 706         }
 707
 708         return f.file->private_data;
 709 }
 710
 711 struct bpf_prog *bpf_prog_add(struct bpf_prog *prog, int i)
 712 {
 713         if (atomic_add_return(i, &prog->aux->refcnt) > BPF_MAX_REFCNT) {
 714                 atomic_sub(i, &prog->aux->refcnt);
 715                 return ERR_PTR(-EBUSY);
 716         }
 717         return prog;
 718 }
 719 EXPORT_SYMBOL_GPL(bpf_prog_add);
 720
 721 void bpf_prog_sub(struct bpf_prog *prog, int i)
 722 {
 723         /* Only to be used for undoing previous bpf_prog_add() in some
 724          * error path. We still know that another entity in our call
 725          * path holds a reference to the program, thus atomic_sub() can
 726          * be safely used in such cases!
 727          */
 728         WARN_ON(atomic_sub_return(i, &prog->aux->refcnt) == 0);
 729 }
 730 EXPORT_SYMBOL_GPL(bpf_prog_sub);
 731
 732 struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog)
 733 {
 734         return bpf_prog_add(prog, 1);
 735 }
 736 EXPORT_SYMBOL_GPL(bpf_prog_inc);
 737
 738 static struct bpf_prog *__bpf_prog_get(u32 ufd, enum bpf_prog_type *type)
 739 {
 740         struct fd f = fdget(ufd);
 741         struct bpf_prog *prog;
 742
 743         prog = ____bpf_prog_get(f);
 744         if (IS_ERR(prog))
 745                 return prog;
 746         if (type && prog->type != *type) {
 747                 prog = ERR_PTR(-EINVAL);
 748                 goto out;
 749         }
 750
 751         prog = bpf_prog_inc(prog);
 752 out:
 753         fdput(f);
 754         return prog;
 755 }
 756
 757 struct bpf_prog *bpf_prog_get(u32 ufd)
 758 {
 759         return __bpf_prog_get(ufd, NULL);
 760 }
 761
 762 struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type)
 763 {
 764         return __bpf_prog_get(ufd, &type);
 765 }
 766 EXPORT_SYMBOL_GPL(bpf_prog_get_type);
 767
 768 /* last field in 'union bpf_attr' used by this command */
 769 #define BPF_PROG_LOAD_LAST_FIELD kern_version
 770
 771 static int bpf_prog_load(union bpf_attr *attr)
 772 {
 773         enum bpf_prog_type type = attr->prog_type;
 774         struct bpf_prog *prog;
 775         int err;
 776         char license[128];
 777         bool is_gpl;
 778
 779         if (CHECK_ATTR(BPF_PROG_LOAD))
 780                 return -EINVAL;
 781
 782         /* copy eBPF program license from user space */
 783         if (strncpy_from_user(license, u64_to_user_ptr(attr->license),
 784                               sizeof(license) - 1) < 0)
 785                 return -EFAULT;
 786         license[sizeof(license) - 1] = 0;
 787
 788         /* eBPF programs must be GPL compatible to use GPL-ed functions */
 789         is_gpl = license_is_gpl_compatible(license);
 790
 791         if (attr->insn_cnt == 0 || attr->insn_cnt > BPF_MAXINSNS)
 792                 return -E2BIG;
 793
 794         if (type == BPF_PROG_TYPE_KPROBE &&
 795             attr->kern_version != LINUX_VERSION_CODE)
 796                 return -EINVAL;
 797
 798         if (type != BPF_PROG_TYPE_SOCKET_FILTER && !capable(CAP_SYS_ADMIN))
 799                 return -EPERM;
 800
 801         /* plain bpf_prog allocation */
 802         prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER);
 803         if (!prog)
 804                 return -ENOMEM;
 805
 806         err = bpf_prog_charge_memlock(prog);
 807         if (err)
 808                 goto free_prog_nouncharge;
 809
 810         prog->len = attr->insn_cnt;
 811
 812         err = -EFAULT;
 813         if (copy_from_user(prog->insns, u64_to_user_ptr(attr->insns),
 814                            bpf_prog_insn_size(prog)) != 0)
 815                 goto free_prog;
 816
 817         prog->orig_prog = NULL;
 818         prog->jited = 0;
 819
 820         atomic_set(&prog->aux->refcnt, 1);
 821         prog->gpl_compatible = is_gpl ? 1 : 0;
 822
 823         /* find program type: socket_filter vs tracing_filter */
 824         err = find_prog_type(type, prog);
 825         if (err < 0)
 826                 goto free_prog;
 827
 828         /* run eBPF verifier */
 829         err = bpf_check(&prog, attr);
 830         if (err < 0)
 831                 goto free_used_maps;
 832
 833         /* fixup BPF_CALL->imm field */
 834         fixup_bpf_calls(prog);
 835
 836         /* eBPF program is ready to be JITed */
 837         prog = bpf_prog_select_runtime(prog, &err);
 838         if (err < 0)
 839                 goto free_used_maps;
 840
 841         err = bpf_prog_new_fd(prog);
 842         if (err < 0)
 843                 /* failed to allocate fd */
 844                 goto free_used_maps;
 845
 846         return err;
 847
 848 free_used_maps:
 849         free_used_maps(prog->aux);
 850 free_prog:
 851         bpf_prog_uncharge_memlock(prog);
 852 free_prog_nouncharge:
 853         bpf_prog_free(prog);
 854         return err;
 855 }
 856
 857 #define BPF_OBJ_LAST_FIELD bpf_fd
 858
 859 static int bpf_obj_pin(const union bpf_attr *attr)
 860 {
 861         if (CHECK_ATTR(BPF_OBJ))
 862                 return -EINVAL;
 863
 864         return bpf_obj_pin_user(attr->bpf_fd, u64_to_user_ptr(attr->pathname));
 865 }
 866
 867 static int bpf_obj_get(const union bpf_attr *attr)
 868 {
 869         if (CHECK_ATTR(BPF_OBJ) || attr->bpf_fd != 0)
 870                 return -EINVAL;
 871
 872         return bpf_obj_get_user(u64_to_user_ptr(attr->pathname));
 873 }
 874
 875 #ifdef CONFIG_CGROUP_BPF
 876
 877 #define BPF_PROG_ATTACH_LAST_FIELD attach_type
 878
 879 static int bpf_prog_attach(const union bpf_attr *attr)
 880 {
 881         struct bpf_prog *prog;
 882         struct cgroup *cgrp;
 883         enum bpf_prog_type ptype;
 884
 885         if (!capable(CAP_NET_ADMIN))
 886                 return -EPERM;
 887
 888         if (CHECK_ATTR(BPF_PROG_ATTACH))
 889                 return -EINVAL;
 890
 891         switch (attr->attach_type) {
 892         case BPF_CGROUP_INET_INGRESS:
 893         case BPF_CGROUP_INET_EGRESS:
 894                 ptype = BPF_PROG_TYPE_CGROUP_SKB;
 895                 break;
 896         case BPF_CGROUP_INET_SOCK_CREATE:
 897                 ptype = BPF_PROG_TYPE_CGROUP_SOCK;
 898                 break;
 899         default:
 900                 return -EINVAL;
 901         }
 902
 903         prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype);
 904         if (IS_ERR(prog))
 905                 return PTR_ERR(prog);
 906
 907         cgrp = cgroup_get_from_fd(attr->target_fd);
 908         if (IS_ERR(cgrp)) {
 909                 bpf_prog_put(prog);
 910                 return PTR_ERR(cgrp);
 911         }
 912
 913         cgroup_bpf_update(cgrp, prog, attr->attach_type);
 914         cgroup_put(cgrp);
 915
 916         return 0;
 917 }
 918
 919 #define BPF_PROG_DETACH_LAST_FIELD attach_type
 920
 921 static int bpf_prog_detach(const union bpf_attr *attr)
 922 {
 923         struct cgroup *cgrp;
 924
 925         if (!capable(CAP_NET_ADMIN))
 926                 return -EPERM;
 927
 928         if (CHECK_ATTR(BPF_PROG_DETACH))
 929                 return -EINVAL;
 930
 931         switch (attr->attach_type) {
 932         case BPF_CGROUP_INET_INGRESS:
 933         case BPF_CGROUP_INET_EGRESS:
 934         case BPF_CGROUP_INET_SOCK_CREATE:
 935                 cgrp = cgroup_get_from_fd(attr->target_fd);
 936                 if (IS_ERR(cgrp))
 937                         return PTR_ERR(cgrp);
 938
 939                 cgroup_bpf_update(cgrp, NULL, attr->attach_type);
 940                 cgroup_put(cgrp);
 941                 break;
 942
 943         default:
 944                 return -EINVAL;
 945         }
 946
 947         return 0;
 948 }
 949 #endif /* CONFIG_CGROUP_BPF */
 950
 951 SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
 952 {
 953         union bpf_attr attr = {};
 954         int err;
 955
 956         if (!capable(CAP_SYS_ADMIN) && sysctl_unprivileged_bpf_disabled)
 957                 return -EPERM;
 958
 959         if (!access_ok(VERIFY_READ, uattr, 1))
 960                 return -EFAULT;
 961
 962         if (size > PAGE_SIZE)   /* silly large */
 963                 return -E2BIG;
 964
 965         /* If we're handed a bigger struct than we know of,
 966          * ensure all the unknown bits are 0 - i.e. new
 967          * user-space does not rely on any kernel feature
 968          * extensions we dont know about yet.
 969          */
 970         if (size > sizeof(attr)) {
 971                 unsigned char __user *addr;
 972                 unsigned char __user *end;
 973                 unsigned char val;
 974
 975                 addr = (void __user *)uattr + sizeof(attr);
 976                 end  = (void __user *)uattr + size;
 977
 978                 for (; addr < end; addr++) {
 979                         err = get_user(val, addr);
 980                         if (err)
 981                                 return err;
 982                         if (val)
 983                                 return -E2BIG;
 984                 }
 985                 size = sizeof(attr);
 986         }
 987
 988         /* copy attributes from user space, may be less than sizeof(bpf_attr) */
 989         if (copy_from_user(&attr, uattr, size) != 0)
 990                 return -EFAULT;
 991
 992         switch (cmd) {
 993         case BPF_MAP_CREATE:
 994                 err = map_create(&attr);
 995                 break;
 996         case BPF_MAP_LOOKUP_ELEM:
 997                 err = map_lookup_elem(&attr);
 998                 break;
 999         case BPF_MAP_UPDATE_ELEM:
1000                 err = map_update_elem(&attr);
1001                 break;
1002         case BPF_MAP_DELETE_ELEM:
1003                 err = map_delete_elem(&attr);
1004                 break;
1005         case BPF_MAP_GET_NEXT_KEY:
1006                 err = map_get_next_key(&attr);
1007                 break;
1008         case BPF_PROG_LOAD:
1009                 err = bpf_prog_load(&attr);
1010                 break;
1011         case BPF_OBJ_PIN:
1012                 err = bpf_obj_pin(&attr);
1013                 break;
1014         case BPF_OBJ_GET:
1015                 err = bpf_obj_get(&attr);
1016                 break;
1017
1018 #ifdef CONFIG_CGROUP_BPF
1019         case BPF_PROG_ATTACH:
1020                 err = bpf_prog_attach(&attr);
1021                 break;
1022         case BPF_PROG_DETACH:
1023                 err = bpf_prog_detach(&attr);
1024                 break;
1025 #endif
1026
1027         default:
1028                 err = -EINVAL;
1029                 break;
1030         }
1031
1032         return err;
1033 }