bpf: add documentation for eBPF helpers (58-64)

[linux.git] / include / uapi / linux / bpf.h
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h

index 4aaee0169421bbfc3c4daddb35a8c42d6f8596e2..27940a36501a04c75402547d2248b69ae22c5c44 100644 (file)
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1404,6 +1404,56 @@ union bpf_attr {
   *     Return
   *             0 on success, or a negative error in case of failure.
   *
+ * int bpf_redirect_map(struct bpf_map *map, u32 key, u64 flags)
+ *     Description
+ *             Redirect the packet to the endpoint referenced by *map* at
+ *             index *key*. Depending on its type, this *map* can contain
+ *             references to net devices (for forwarding packets through other
+ *             ports), or to CPUs (for redirecting XDP frames to another CPU;
+ *             but this is only implemented for native XDP (with driver
+ *             support) as of this writing).
+ *
+ *             All values for *flags* are reserved for future usage, and must
+ *             be left at zero.
+ *
+ *             When used to redirect packets to net devices, this helper
+ *             provides a high performance increase over **bpf_redirect**\ ().
+ *             This is due to various implementation details of the underlying
+ *             mechanisms, one of which is the fact that **bpf_redirect_map**\
+ *             () tries to send packet as a "bulk" to the device.
+ *     Return
+ *             **XDP_REDIRECT** on success, or **XDP_ABORTED** on error.
+ *
+ * int bpf_sk_redirect_map(struct bpf_map *map, u32 key, u64 flags)
+ *     Description
+ *             Redirect the packet to the socket referenced by *map* (of type
+ *             **BPF_MAP_TYPE_SOCKMAP**) at index *key*. Both ingress and
+ *             egress interfaces can be used for redirection. The
+ *             **BPF_F_INGRESS** value in *flags* is used to make the
+ *             distinction (ingress path is selected if the flag is present,
+ *             egress path otherwise). This is the only flag supported for now.
+ *     Return
+ *             **SK_PASS** on success, or **SK_DROP** on error.
+ *
+ * int bpf_sock_map_update(struct bpf_sock_ops_kern *skops, struct bpf_map *map, void *key, u64 flags)
+ *     Description
+ *             Add an entry to, or update a *map* referencing sockets. The
+ *             *skops* is used as a new value for the entry associated to
+ *             *key*. *flags* is one of:
+ *
+ *             **BPF_NOEXIST**
+ *                     The entry for *key* must not exist in the map.
+ *             **BPF_EXIST**
+ *                     The entry for *key* must already exist in the map.
+ *             **BPF_ANY**
+ *                     No condition on the existence of the entry for *key*.
+ *
+ *             If the *map* has eBPF programs (parser and verdict), those will
+ *             be inherited by the socket being added. If the socket is
+ *             already attached to eBPF programs, this results in an error.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
   * int bpf_xdp_adjust_meta(struct xdp_buff *xdp_md, int delta)
   *     Description
   *             Adjust the address pointed by *xdp_md*\ **->data_meta** by
@@ -1574,6 +1624,103 @@ union bpf_attr {
   *             be set is returned (which comes down to 0 if all bits were set
   *             as required).
   *
+ * int bpf_msg_redirect_map(struct sk_msg_buff *msg, struct bpf_map *map, u32 key, u64 flags)
+ *     Description
+ *             This helper is used in programs implementing policies at the
+ *             socket level. If the message *msg* is allowed to pass (i.e. if
+ *             the verdict eBPF program returns **SK_PASS**), redirect it to
+ *             the socket referenced by *map* (of type
+ *             **BPF_MAP_TYPE_SOCKMAP**) at index *key*. Both ingress and
+ *             egress interfaces can be used for redirection. The
+ *             **BPF_F_INGRESS** value in *flags* is used to make the
+ *             distinction (ingress path is selected if the flag is present,
+ *             egress path otherwise). This is the only flag supported for now.
+ *     Return
+ *             **SK_PASS** on success, or **SK_DROP** on error.
+ *
+ * int bpf_msg_apply_bytes(struct sk_msg_buff *msg, u32 bytes)
+ *     Description
+ *             For socket policies, apply the verdict of the eBPF program to
+ *             the next *bytes* (number of bytes) of message *msg*.
+ *
+ *             For example, this helper can be used in the following cases:
+ *
+ *             * A single **sendmsg**\ () or **sendfile**\ () system call
+ *               contains multiple logical messages that the eBPF program is
+ *               supposed to read and for which it should apply a verdict.
+ *             * An eBPF program only cares to read the first *bytes* of a
+ *               *msg*. If the message has a large payload, then setting up
+ *               and calling the eBPF program repeatedly for all bytes, even
+ *               though the verdict is already known, would create unnecessary
+ *               overhead.
+ *
+ *             When called from within an eBPF program, the helper sets a
+ *             counter internal to the BPF infrastructure, that is used to
+ *             apply the last verdict to the next *bytes*. If *bytes* is
+ *             smaller than the current data being processed from a
+ *             **sendmsg**\ () or **sendfile**\ () system call, the first
+ *             *bytes* will be sent and the eBPF program will be re-run with
+ *             the pointer for start of data pointing to byte number *bytes*
+ *             **+ 1**. If *bytes* is larger than the current data being
+ *             processed, then the eBPF verdict will be applied to multiple
+ *             **sendmsg**\ () or **sendfile**\ () calls until *bytes* are
+ *             consumed.
+ *
+ *             Note that if a socket closes with the internal counter holding
+ *             a non-zero value, this is not a problem because data is not
+ *             being buffered for *bytes* and is sent as it is received.
+ *     Return
+ *             0
+ *
+ * int bpf_msg_cork_bytes(struct sk_msg_buff *msg, u32 bytes)
+ *     Description
+ *             For socket policies, prevent the execution of the verdict eBPF
+ *             program for message *msg* until *bytes* (byte number) have been
+ *             accumulated.
+ *
+ *             This can be used when one needs a specific number of bytes
+ *             before a verdict can be assigned, even if the data spans
+ *             multiple **sendmsg**\ () or **sendfile**\ () calls. The extreme
+ *             case would be a user calling **sendmsg**\ () repeatedly with
+ *             1-byte long message segments. Obviously, this is bad for
+ *             performance, but it is still valid. If the eBPF program needs
+ *             *bytes* bytes to validate a header, this helper can be used to
+ *             prevent the eBPF program to be called again until *bytes* have
+ *             been accumulated.
+ *     Return
+ *             0
+ *
+ * int bpf_msg_pull_data(struct sk_msg_buff *msg, u32 start, u32 end, u64 flags)
+ *     Description
+ *             For socket policies, pull in non-linear data from user space
+ *             for *msg* and set pointers *msg*\ **->data** and *msg*\
+ *             **->data_end** to *start* and *end* bytes offsets into *msg*,
+ *             respectively.
+ *
+ *             If a program of type **BPF_PROG_TYPE_SK_MSG** is run on a
+ *             *msg* it can only parse data that the (**data**, **data_end**)
+ *             pointers have already consumed. For **sendmsg**\ () hooks this
+ *             is likely the first scatterlist element. But for calls relying
+ *             on the **sendpage** handler (e.g. **sendfile**\ ()) this will
+ *             be the range (**0**, **0**) because the data is shared with
+ *             user space and by default the objective is to avoid allowing
+ *             user space to modify data while (or after) eBPF verdict is
+ *             being decided. This helper can be used to pull in data and to
+ *             set the start and end pointer to given values. Data will be
+ *             copied if necessary (i.e. if data was not linear and if start
+ *             and end pointers do not point to the same chunk).
+ *
+ *             A call to this helper is susceptible to change the underlaying
+ *             packet buffer. Therefore, at load time, all checks on pointers
+ *             previously done by the verifier are invalidated and must be
+ *             performed again, if the helper is used in combination with
+ *             direct packet access.
+ *
+ *             All values for *flags* are reserved for future usage, and must
+ *             be left at zero.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
   * int bpf_bind(struct bpf_sock_addr_kern *ctx, struct sockaddr *addr, int addr_len)
   *     Description
   *             Bind the socket associated to *ctx* to the address pointed by