1 // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
4 * common eBPF ELF operations.
6 * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org>
7 * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
8 * Copyright (C) 2015 Huawei Inc.
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public
12 * License as published by the Free Software Foundation;
13 * version 2.1 of the License (not later!)
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with this program; if not, see <http://www.gnu.org/licenses>
28 #include <asm/unistd.h>
30 #include <linux/bpf.h>
33 #include "libbpf_internal.h"
35 /* make sure libbpf doesn't use kernel-only integer typedefs */
36 #pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
39 * When building perf, unistd.h is overridden. __NR_bpf is
40 * required to be defined explicitly.
43 # if defined(__i386__)
45 # elif defined(__x86_64__)
47 # elif defined(__aarch64__)
49 # elif defined(__sparc__)
51 # elif defined(__s390__)
53 # elif defined(__arc__)
56 # error __NR_bpf not defined. libbpf does not support your arch.
60 static inline __u64 ptr_to_u64(const void *ptr)
62 return (__u64) (unsigned long) ptr;
65 static inline int sys_bpf(enum bpf_cmd cmd, union bpf_attr *attr,
68 return syscall(__NR_bpf, cmd, attr, size);
71 static inline int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size)
76 fd = sys_bpf(BPF_PROG_LOAD, attr, size);
77 } while (fd < 0 && errno == EAGAIN);
82 int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr)
86 memset(&attr, '\0', sizeof(attr));
88 attr.map_type = create_attr->map_type;
89 attr.key_size = create_attr->key_size;
90 attr.value_size = create_attr->value_size;
91 attr.max_entries = create_attr->max_entries;
92 attr.map_flags = create_attr->map_flags;
93 if (create_attr->name)
94 memcpy(attr.map_name, create_attr->name,
95 min(strlen(create_attr->name), BPF_OBJ_NAME_LEN - 1));
96 attr.numa_node = create_attr->numa_node;
97 attr.btf_fd = create_attr->btf_fd;
98 attr.btf_key_type_id = create_attr->btf_key_type_id;
99 attr.btf_value_type_id = create_attr->btf_value_type_id;
100 attr.map_ifindex = create_attr->map_ifindex;
101 if (attr.map_type == BPF_MAP_TYPE_STRUCT_OPS)
102 attr.btf_vmlinux_value_type_id =
103 create_attr->btf_vmlinux_value_type_id;
105 attr.inner_map_fd = create_attr->inner_map_fd;
107 return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
110 int bpf_create_map_node(enum bpf_map_type map_type, const char *name,
111 int key_size, int value_size, int max_entries,
112 __u32 map_flags, int node)
114 struct bpf_create_map_attr map_attr = {};
116 map_attr.name = name;
117 map_attr.map_type = map_type;
118 map_attr.map_flags = map_flags;
119 map_attr.key_size = key_size;
120 map_attr.value_size = value_size;
121 map_attr.max_entries = max_entries;
123 map_attr.numa_node = node;
124 map_attr.map_flags |= BPF_F_NUMA_NODE;
127 return bpf_create_map_xattr(&map_attr);
130 int bpf_create_map(enum bpf_map_type map_type, int key_size,
131 int value_size, int max_entries, __u32 map_flags)
133 struct bpf_create_map_attr map_attr = {};
135 map_attr.map_type = map_type;
136 map_attr.map_flags = map_flags;
137 map_attr.key_size = key_size;
138 map_attr.value_size = value_size;
139 map_attr.max_entries = max_entries;
141 return bpf_create_map_xattr(&map_attr);
144 int bpf_create_map_name(enum bpf_map_type map_type, const char *name,
145 int key_size, int value_size, int max_entries,
148 struct bpf_create_map_attr map_attr = {};
150 map_attr.name = name;
151 map_attr.map_type = map_type;
152 map_attr.map_flags = map_flags;
153 map_attr.key_size = key_size;
154 map_attr.value_size = value_size;
155 map_attr.max_entries = max_entries;
157 return bpf_create_map_xattr(&map_attr);
160 int bpf_create_map_in_map_node(enum bpf_map_type map_type, const char *name,
161 int key_size, int inner_map_fd, int max_entries,
162 __u32 map_flags, int node)
166 memset(&attr, '\0', sizeof(attr));
168 attr.map_type = map_type;
169 attr.key_size = key_size;
171 attr.inner_map_fd = inner_map_fd;
172 attr.max_entries = max_entries;
173 attr.map_flags = map_flags;
175 memcpy(attr.map_name, name,
176 min(strlen(name), BPF_OBJ_NAME_LEN - 1));
179 attr.map_flags |= BPF_F_NUMA_NODE;
180 attr.numa_node = node;
183 return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
186 int bpf_create_map_in_map(enum bpf_map_type map_type, const char *name,
187 int key_size, int inner_map_fd, int max_entries,
190 return bpf_create_map_in_map_node(map_type, name, key_size,
191 inner_map_fd, max_entries, map_flags,
196 alloc_zero_tailing_info(const void *orecord, __u32 cnt,
197 __u32 actual_rec_size, __u32 expected_rec_size)
199 __u64 info_len = (__u64)actual_rec_size * cnt;
200 void *info, *nrecord;
203 info = malloc(info_len);
207 /* zero out bytes kernel does not understand */
209 for (i = 0; i < cnt; i++) {
210 memcpy(nrecord, orecord, expected_rec_size);
211 memset(nrecord + expected_rec_size, 0,
212 actual_rec_size - expected_rec_size);
213 orecord += actual_rec_size;
214 nrecord += actual_rec_size;
220 int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
221 char *log_buf, size_t log_buf_sz)
223 void *finfo = NULL, *linfo = NULL;
228 if (!load_attr || !log_buf != !log_buf_sz)
231 log_level = load_attr->log_level;
232 if (log_level > (4 | 2 | 1) || (log_level && !log_buf))
235 memset(&attr, 0, sizeof(attr));
236 attr.prog_type = load_attr->prog_type;
237 attr.expected_attach_type = load_attr->expected_attach_type;
238 if (attr.prog_type == BPF_PROG_TYPE_STRUCT_OPS) {
239 attr.attach_btf_id = load_attr->attach_btf_id;
240 } else if (attr.prog_type == BPF_PROG_TYPE_TRACING ||
241 attr.prog_type == BPF_PROG_TYPE_EXT) {
242 attr.attach_btf_id = load_attr->attach_btf_id;
243 attr.attach_prog_fd = load_attr->attach_prog_fd;
245 attr.prog_ifindex = load_attr->prog_ifindex;
246 attr.kern_version = load_attr->kern_version;
248 attr.insn_cnt = (__u32)load_attr->insns_cnt;
249 attr.insns = ptr_to_u64(load_attr->insns);
250 attr.license = ptr_to_u64(load_attr->license);
252 attr.log_level = log_level;
254 attr.log_buf = ptr_to_u64(log_buf);
255 attr.log_size = log_buf_sz;
257 attr.log_buf = ptr_to_u64(NULL);
261 attr.prog_btf_fd = load_attr->prog_btf_fd;
262 attr.func_info_rec_size = load_attr->func_info_rec_size;
263 attr.func_info_cnt = load_attr->func_info_cnt;
264 attr.func_info = ptr_to_u64(load_attr->func_info);
265 attr.line_info_rec_size = load_attr->line_info_rec_size;
266 attr.line_info_cnt = load_attr->line_info_cnt;
267 attr.line_info = ptr_to_u64(load_attr->line_info);
269 memcpy(attr.prog_name, load_attr->name,
270 min(strlen(load_attr->name), BPF_OBJ_NAME_LEN - 1));
271 attr.prog_flags = load_attr->prog_flags;
273 fd = sys_bpf_prog_load(&attr, sizeof(attr));
277 /* After bpf_prog_load, the kernel may modify certain attributes
278 * to give user space a hint how to deal with loading failure.
279 * Check to see whether we can make some changes and load again.
281 while (errno == E2BIG && (!finfo || !linfo)) {
282 if (!finfo && attr.func_info_cnt &&
283 attr.func_info_rec_size < load_attr->func_info_rec_size) {
284 /* try with corrected func info records */
285 finfo = alloc_zero_tailing_info(load_attr->func_info,
286 load_attr->func_info_cnt,
287 load_attr->func_info_rec_size,
288 attr.func_info_rec_size);
292 attr.func_info = ptr_to_u64(finfo);
293 attr.func_info_rec_size = load_attr->func_info_rec_size;
294 } else if (!linfo && attr.line_info_cnt &&
295 attr.line_info_rec_size <
296 load_attr->line_info_rec_size) {
297 linfo = alloc_zero_tailing_info(load_attr->line_info,
298 load_attr->line_info_cnt,
299 load_attr->line_info_rec_size,
300 attr.line_info_rec_size);
304 attr.line_info = ptr_to_u64(linfo);
305 attr.line_info_rec_size = load_attr->line_info_rec_size;
310 fd = sys_bpf_prog_load(&attr, sizeof(attr));
316 if (log_level || !log_buf)
319 /* Try again with log */
320 attr.log_buf = ptr_to_u64(log_buf);
321 attr.log_size = log_buf_sz;
324 fd = sys_bpf_prog_load(&attr, sizeof(attr));
331 int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns,
332 size_t insns_cnt, const char *license,
333 __u32 kern_version, char *log_buf,
336 struct bpf_load_program_attr load_attr;
338 memset(&load_attr, 0, sizeof(struct bpf_load_program_attr));
339 load_attr.prog_type = type;
340 load_attr.expected_attach_type = 0;
341 load_attr.name = NULL;
342 load_attr.insns = insns;
343 load_attr.insns_cnt = insns_cnt;
344 load_attr.license = license;
345 load_attr.kern_version = kern_version;
347 return bpf_load_program_xattr(&load_attr, log_buf, log_buf_sz);
350 int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns,
351 size_t insns_cnt, __u32 prog_flags, const char *license,
352 __u32 kern_version, char *log_buf, size_t log_buf_sz,
357 memset(&attr, 0, sizeof(attr));
358 attr.prog_type = type;
359 attr.insn_cnt = (__u32)insns_cnt;
360 attr.insns = ptr_to_u64(insns);
361 attr.license = ptr_to_u64(license);
362 attr.log_buf = ptr_to_u64(log_buf);
363 attr.log_size = log_buf_sz;
364 attr.log_level = log_level;
366 attr.kern_version = kern_version;
367 attr.prog_flags = prog_flags;
369 return sys_bpf_prog_load(&attr, sizeof(attr));
372 int bpf_map_update_elem(int fd, const void *key, const void *value,
377 memset(&attr, 0, sizeof(attr));
379 attr.key = ptr_to_u64(key);
380 attr.value = ptr_to_u64(value);
383 return sys_bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr));
386 int bpf_map_lookup_elem(int fd, const void *key, void *value)
390 memset(&attr, 0, sizeof(attr));
392 attr.key = ptr_to_u64(key);
393 attr.value = ptr_to_u64(value);
395 return sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr));
398 int bpf_map_lookup_elem_flags(int fd, const void *key, void *value, __u64 flags)
402 memset(&attr, 0, sizeof(attr));
404 attr.key = ptr_to_u64(key);
405 attr.value = ptr_to_u64(value);
408 return sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr));
411 int bpf_map_lookup_and_delete_elem(int fd, const void *key, void *value)
415 memset(&attr, 0, sizeof(attr));
417 attr.key = ptr_to_u64(key);
418 attr.value = ptr_to_u64(value);
420 return sys_bpf(BPF_MAP_LOOKUP_AND_DELETE_ELEM, &attr, sizeof(attr));
423 int bpf_map_delete_elem(int fd, const void *key)
427 memset(&attr, 0, sizeof(attr));
429 attr.key = ptr_to_u64(key);
431 return sys_bpf(BPF_MAP_DELETE_ELEM, &attr, sizeof(attr));
434 int bpf_map_get_next_key(int fd, const void *key, void *next_key)
438 memset(&attr, 0, sizeof(attr));
440 attr.key = ptr_to_u64(key);
441 attr.next_key = ptr_to_u64(next_key);
443 return sys_bpf(BPF_MAP_GET_NEXT_KEY, &attr, sizeof(attr));
446 int bpf_map_freeze(int fd)
450 memset(&attr, 0, sizeof(attr));
453 return sys_bpf(BPF_MAP_FREEZE, &attr, sizeof(attr));
456 static int bpf_map_batch_common(int cmd, int fd, void *in_batch,
457 void *out_batch, void *keys, void *values,
459 const struct bpf_map_batch_opts *opts)
464 if (!OPTS_VALID(opts, bpf_map_batch_opts))
467 memset(&attr, 0, sizeof(attr));
468 attr.batch.map_fd = fd;
469 attr.batch.in_batch = ptr_to_u64(in_batch);
470 attr.batch.out_batch = ptr_to_u64(out_batch);
471 attr.batch.keys = ptr_to_u64(keys);
472 attr.batch.values = ptr_to_u64(values);
473 attr.batch.count = *count;
474 attr.batch.elem_flags = OPTS_GET(opts, elem_flags, 0);
475 attr.batch.flags = OPTS_GET(opts, flags, 0);
477 ret = sys_bpf(cmd, &attr, sizeof(attr));
478 *count = attr.batch.count;
483 int bpf_map_delete_batch(int fd, void *keys, __u32 *count,
484 const struct bpf_map_batch_opts *opts)
486 return bpf_map_batch_common(BPF_MAP_DELETE_BATCH, fd, NULL,
487 NULL, keys, NULL, count, opts);
490 int bpf_map_lookup_batch(int fd, void *in_batch, void *out_batch, void *keys,
491 void *values, __u32 *count,
492 const struct bpf_map_batch_opts *opts)
494 return bpf_map_batch_common(BPF_MAP_LOOKUP_BATCH, fd, in_batch,
495 out_batch, keys, values, count, opts);
498 int bpf_map_lookup_and_delete_batch(int fd, void *in_batch, void *out_batch,
499 void *keys, void *values, __u32 *count,
500 const struct bpf_map_batch_opts *opts)
502 return bpf_map_batch_common(BPF_MAP_LOOKUP_AND_DELETE_BATCH,
503 fd, in_batch, out_batch, keys, values,
507 int bpf_map_update_batch(int fd, void *keys, void *values, __u32 *count,
508 const struct bpf_map_batch_opts *opts)
510 return bpf_map_batch_common(BPF_MAP_UPDATE_BATCH, fd, NULL, NULL,
511 keys, values, count, opts);
514 int bpf_obj_pin(int fd, const char *pathname)
518 memset(&attr, 0, sizeof(attr));
519 attr.pathname = ptr_to_u64((void *)pathname);
522 return sys_bpf(BPF_OBJ_PIN, &attr, sizeof(attr));
525 int bpf_obj_get(const char *pathname)
529 memset(&attr, 0, sizeof(attr));
530 attr.pathname = ptr_to_u64((void *)pathname);
532 return sys_bpf(BPF_OBJ_GET, &attr, sizeof(attr));
535 int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type,
538 DECLARE_LIBBPF_OPTS(bpf_prog_attach_opts, opts,
542 return bpf_prog_attach_xattr(prog_fd, target_fd, type, &opts);
545 int bpf_prog_attach_xattr(int prog_fd, int target_fd,
546 enum bpf_attach_type type,
547 const struct bpf_prog_attach_opts *opts)
551 if (!OPTS_VALID(opts, bpf_prog_attach_opts))
554 memset(&attr, 0, sizeof(attr));
555 attr.target_fd = target_fd;
556 attr.attach_bpf_fd = prog_fd;
557 attr.attach_type = type;
558 attr.attach_flags = OPTS_GET(opts, flags, 0);
559 attr.replace_bpf_fd = OPTS_GET(opts, replace_prog_fd, 0);
561 return sys_bpf(BPF_PROG_ATTACH, &attr, sizeof(attr));
564 int bpf_prog_detach(int target_fd, enum bpf_attach_type type)
568 memset(&attr, 0, sizeof(attr));
569 attr.target_fd = target_fd;
570 attr.attach_type = type;
572 return sys_bpf(BPF_PROG_DETACH, &attr, sizeof(attr));
575 int bpf_prog_detach2(int prog_fd, int target_fd, enum bpf_attach_type type)
579 memset(&attr, 0, sizeof(attr));
580 attr.target_fd = target_fd;
581 attr.attach_bpf_fd = prog_fd;
582 attr.attach_type = type;
584 return sys_bpf(BPF_PROG_DETACH, &attr, sizeof(attr));
587 int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags,
588 __u32 *attach_flags, __u32 *prog_ids, __u32 *prog_cnt)
593 memset(&attr, 0, sizeof(attr));
594 attr.query.target_fd = target_fd;
595 attr.query.attach_type = type;
596 attr.query.query_flags = query_flags;
597 attr.query.prog_cnt = *prog_cnt;
598 attr.query.prog_ids = ptr_to_u64(prog_ids);
600 ret = sys_bpf(BPF_PROG_QUERY, &attr, sizeof(attr));
602 *attach_flags = attr.query.attach_flags;
603 *prog_cnt = attr.query.prog_cnt;
607 int bpf_prog_test_run(int prog_fd, int repeat, void *data, __u32 size,
608 void *data_out, __u32 *size_out, __u32 *retval,
614 memset(&attr, 0, sizeof(attr));
615 attr.test.prog_fd = prog_fd;
616 attr.test.data_in = ptr_to_u64(data);
617 attr.test.data_out = ptr_to_u64(data_out);
618 attr.test.data_size_in = size;
619 attr.test.repeat = repeat;
621 ret = sys_bpf(BPF_PROG_TEST_RUN, &attr, sizeof(attr));
623 *size_out = attr.test.data_size_out;
625 *retval = attr.test.retval;
627 *duration = attr.test.duration;
631 int bpf_prog_test_run_xattr(struct bpf_prog_test_run_attr *test_attr)
636 if (!test_attr->data_out && test_attr->data_size_out > 0)
639 memset(&attr, 0, sizeof(attr));
640 attr.test.prog_fd = test_attr->prog_fd;
641 attr.test.data_in = ptr_to_u64(test_attr->data_in);
642 attr.test.data_out = ptr_to_u64(test_attr->data_out);
643 attr.test.data_size_in = test_attr->data_size_in;
644 attr.test.data_size_out = test_attr->data_size_out;
645 attr.test.ctx_in = ptr_to_u64(test_attr->ctx_in);
646 attr.test.ctx_out = ptr_to_u64(test_attr->ctx_out);
647 attr.test.ctx_size_in = test_attr->ctx_size_in;
648 attr.test.ctx_size_out = test_attr->ctx_size_out;
649 attr.test.repeat = test_attr->repeat;
651 ret = sys_bpf(BPF_PROG_TEST_RUN, &attr, sizeof(attr));
652 test_attr->data_size_out = attr.test.data_size_out;
653 test_attr->ctx_size_out = attr.test.ctx_size_out;
654 test_attr->retval = attr.test.retval;
655 test_attr->duration = attr.test.duration;
659 static int bpf_obj_get_next_id(__u32 start_id, __u32 *next_id, int cmd)
664 memset(&attr, 0, sizeof(attr));
665 attr.start_id = start_id;
667 err = sys_bpf(cmd, &attr, sizeof(attr));
669 *next_id = attr.next_id;
674 int bpf_prog_get_next_id(__u32 start_id, __u32 *next_id)
676 return bpf_obj_get_next_id(start_id, next_id, BPF_PROG_GET_NEXT_ID);
679 int bpf_map_get_next_id(__u32 start_id, __u32 *next_id)
681 return bpf_obj_get_next_id(start_id, next_id, BPF_MAP_GET_NEXT_ID);
684 int bpf_btf_get_next_id(__u32 start_id, __u32 *next_id)
686 return bpf_obj_get_next_id(start_id, next_id, BPF_BTF_GET_NEXT_ID);
689 int bpf_prog_get_fd_by_id(__u32 id)
693 memset(&attr, 0, sizeof(attr));
696 return sys_bpf(BPF_PROG_GET_FD_BY_ID, &attr, sizeof(attr));
699 int bpf_map_get_fd_by_id(__u32 id)
703 memset(&attr, 0, sizeof(attr));
706 return sys_bpf(BPF_MAP_GET_FD_BY_ID, &attr, sizeof(attr));
709 int bpf_btf_get_fd_by_id(__u32 id)
713 memset(&attr, 0, sizeof(attr));
716 return sys_bpf(BPF_BTF_GET_FD_BY_ID, &attr, sizeof(attr));
719 int bpf_obj_get_info_by_fd(int prog_fd, void *info, __u32 *info_len)
724 memset(&attr, 0, sizeof(attr));
725 attr.info.bpf_fd = prog_fd;
726 attr.info.info_len = *info_len;
727 attr.info.info = ptr_to_u64(info);
729 err = sys_bpf(BPF_OBJ_GET_INFO_BY_FD, &attr, sizeof(attr));
731 *info_len = attr.info.info_len;
736 int bpf_raw_tracepoint_open(const char *name, int prog_fd)
740 memset(&attr, 0, sizeof(attr));
741 attr.raw_tracepoint.name = ptr_to_u64(name);
742 attr.raw_tracepoint.prog_fd = prog_fd;
744 return sys_bpf(BPF_RAW_TRACEPOINT_OPEN, &attr, sizeof(attr));
747 int bpf_load_btf(void *btf, __u32 btf_size, char *log_buf, __u32 log_buf_size,
750 union bpf_attr attr = {};
753 attr.btf = ptr_to_u64(btf);
754 attr.btf_size = btf_size;
757 if (do_log && log_buf && log_buf_size) {
758 attr.btf_log_level = 1;
759 attr.btf_log_size = log_buf_size;
760 attr.btf_log_buf = ptr_to_u64(log_buf);
763 fd = sys_bpf(BPF_BTF_LOAD, &attr, sizeof(attr));
764 if (fd == -1 && !do_log && log_buf && log_buf_size) {
772 int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, __u32 *buf_len,
773 __u32 *prog_id, __u32 *fd_type, __u64 *probe_offset,
776 union bpf_attr attr = {};
779 attr.task_fd_query.pid = pid;
780 attr.task_fd_query.fd = fd;
781 attr.task_fd_query.flags = flags;
782 attr.task_fd_query.buf = ptr_to_u64(buf);
783 attr.task_fd_query.buf_len = *buf_len;
785 err = sys_bpf(BPF_TASK_FD_QUERY, &attr, sizeof(attr));
786 *buf_len = attr.task_fd_query.buf_len;
787 *prog_id = attr.task_fd_query.prog_id;
788 *fd_type = attr.task_fd_query.fd_type;
789 *probe_offset = attr.task_fd_query.probe_offset;
790 *probe_addr = attr.task_fd_query.probe_addr;