- 'ata_qc_for_each_with_internal'
- 'ax25_for_each'
- 'ax25_uid_for_each'
+ - '__bio_for_each_bvec'
+ - 'bio_for_each_bvec'
- 'bio_for_each_integrity_vec'
- '__bio_for_each_segment'
- 'bio_for_each_segment'
- 'drm_for_each_legacy_plane'
- 'drm_for_each_plane'
- 'drm_for_each_plane_mask'
+ - 'drm_for_each_privobj'
- 'drm_mm_for_each_hole'
- 'drm_mm_for_each_node'
- 'drm_mm_for_each_node_in_range'
- 'drm_mm_for_each_node_safe'
+ - 'flow_action_for_each'
- 'for_each_active_drhd_unit'
- 'for_each_active_iommu'
- 'for_each_available_child_of_node'
- 'for_each_dss_dev'
- 'for_each_efi_memory_desc'
- 'for_each_efi_memory_desc_in_map'
+ - 'for_each_element'
+ - 'for_each_element_extid'
+ - 'for_each_element_id'
- 'for_each_endpoint_of_node'
- 'for_each_evictable_lru'
- 'for_each_fib6_node_rt_rcu'
- 'for_each_net_rcu'
- 'for_each_new_connector_in_state'
- 'for_each_new_crtc_in_state'
+ - 'for_each_new_mst_mgr_in_state'
- 'for_each_new_plane_in_state'
- 'for_each_new_private_obj_in_state'
- 'for_each_node'
- 'for_each_of_pci_range'
- 'for_each_old_connector_in_state'
- 'for_each_old_crtc_in_state'
+ - 'for_each_old_mst_mgr_in_state'
- 'for_each_oldnew_connector_in_state'
- 'for_each_oldnew_crtc_in_state'
+ - 'for_each_oldnew_mst_mgr_in_state'
- 'for_each_oldnew_plane_in_state'
- 'for_each_oldnew_plane_in_state_reverse'
- 'for_each_oldnew_private_obj_in_state'
- 'for_each_sg_dma_page'
- 'for_each_sg_page'
- 'for_each_sibling_event'
+ - 'for_each_subelement'
+ - 'for_each_subelement_extid'
+ - 'for_each_subelement_id'
- '__for_each_thread'
- 'for_each_thread'
- 'for_each_zone'
- 'fwnode_for_each_child_node'
- 'fwnode_graph_for_each_endpoint'
- 'gadget_for_each_ep'
+ - 'genradix_for_each'
+ - 'genradix_for_each_from'
- 'hash_for_each'
- 'hash_for_each_possible'
- 'hash_for_each_possible_rcu'
- 'key_for_each'
- 'key_for_each_safe'
- 'klp_for_each_func'
+ - 'klp_for_each_func_safe'
+ - 'klp_for_each_func_static'
- 'klp_for_each_object'
+ - 'klp_for_each_object_safe'
+ - 'klp_for_each_object_static'
- 'kvm_for_each_memslot'
- 'kvm_for_each_vcpu'
- 'list_for_each'
- 'media_device_for_each_intf'
- 'media_device_for_each_link'
- 'media_device_for_each_pad'
+ - 'mp_bvec_for_each_page'
+ - 'mp_bvec_for_each_segment'
- 'nanddev_io_for_each_page'
- 'netdev_for_each_lower_dev'
- 'netdev_for_each_lower_private'
- 'rht_for_each_rcu'
- 'rht_for_each_rcu_from'
- '__rq_for_each_bio'
+ - 'rq_for_each_bvec'
- 'rq_for_each_segment'
- 'scsi_for_each_prot_sg'
- 'scsi_for_each_sg'
- 'v4l2_m2m_for_each_src_buf_safe'
- 'virtio_device_for_each_vq'
- 'xa_for_each'
+ - 'xa_for_each_marked'
+ - 'xa_for_each_start'
- 'xas_for_each'
- 'xas_for_each_conflict'
- 'xas_for_each_marked'
Alan Cox <root@hraefn.swansea.linux.org.uk>
Aleksey Gorelov <aleksey_gorelov@phoenix.com>
Aleksandar Markovic <aleksandar.markovic@mips.com> <aleksandar.markovic@imgtec.com>
+Alexei Starovoitov <ast@kernel.org> <ast@plumgrid.com>
+Alexei Starovoitov <ast@kernel.org> <alexei.starovoitov@gmail.com>
+Alexei Starovoitov <ast@kernel.org> <ast@fb.com>
Al Viro <viro@ftp.linux.org.uk>
Al Viro <viro@zenIV.linux.org.uk>
Andi Shyti <andi@etezian.org> <andi.shyti@samsung.com>
Christophe Ricard <christophe.ricard@gmail.com>
Corey Minyard <minyard@acm.org>
Damian Hobson-Garcia <dhobsong@igel.co.jp>
+Daniel Borkmann <daniel@iogearbox.net> <dborkman@redhat.com>
+Daniel Borkmann <daniel@iogearbox.net> <dborkmann@redhat.com>
+Daniel Borkmann <daniel@iogearbox.net> <danborkmann@iogearbox.net>
+Daniel Borkmann <daniel@iogearbox.net> <daniel.borkmann@tik.ee.ethz.ch>
+Daniel Borkmann <daniel@iogearbox.net> <danborkmann@googlemail.com>
+Daniel Borkmann <daniel@iogearbox.net> <dxchgb@gmail.com>
David Brownell <david-b@pacbell.net>
David Woodhouse <dwmw2@shinybook.infradead.org>
Dengcheng Zhu <dzhu@wavecomp.com> <dengcheng.zhu@mips.com>
still doing productive work. As such, time spent in this subset of the
stall state is tracked separately and exported in the "full" averages.
-The ratios are tracked as recent trends over ten, sixty, and three
-hundred second windows, which gives insight into short term events as
-well as medium and long term trends. The total absolute stall time is
-tracked and exported as well, to allow detection of latency spikes
-which wouldn't necessarily make a dent in the time averages, or to
-average trends over custom time frames.
+The ratios (in %) are tracked as recent trends over ten, sixty, and
+three hundred second windows, which gives insight into short term events
+as well as medium and long term trends. The total absolute stall time
+(in us) is tracked and exported as well, to allow detection of latency
+spikes which wouldn't necessarily make a dent in the time averages,
+or to average trends over custom time frames.
Cgroup2 interface
=================
A: It's not clear yet.
BPF developers are trying to find a way to
-support bounded loops where the verifier can guarantee that
-the program terminates in less than 4096 instructions.
+support bounded loops.
+
+Q: What are the verifier limits?
+--------------------------------
+A: The only limit known to the user space is BPF_MAXINSNS (4096).
+It's the maximum number of instructions that the unprivileged bpf
+program can have. The verifier has various internal limits.
+Like the maximum number of instructions that can be explored during
+program analysis. Currently, that limit is set to 1 million.
+Which essentially means that the largest program can consist
+of 1 million NOP instructions. There is a limit to the maximum number
+of subsequent branches, a limit to the number of nested bpf-to-bpf
+calls, a limit to the number of the verifier states per instruction,
+a limit to the number of maps used by the program.
+All these limits can be hit with a sufficiently complex program.
+There are also non-numerical limits that can cause the program
+to be rejected. The verifier used to recognize only pointer + constant
+expressions. Now it can recognize pointer + bounded_register.
+bpf_lookup_map_elem(key) had a requirement that 'key' must be
+a pointer to the stack. Now, 'key' can be a pointer to map value.
+The verifier is steadily getting 'smarter'. The limits are
+being removed. The only way to know that the program is going to
+be accepted by the verifier is to try to load it.
+The bpf development process guarantees that the future kernel
+versions will accept all bpf programs that were accepted by
+the earlier versions.
+
Instruction level questions
---------------------------
#define BTF_KIND_RESTRICT 11 /* Restrict */
#define BTF_KIND_FUNC 12 /* Function */
#define BTF_KIND_FUNC_PROTO 13 /* Function Proto */
+ #define BTF_KIND_VAR 14 /* Variable */
+ #define BTF_KIND_DATASEC 15 /* Section */
Note that the type section encodes debug info, not just pure types.
``BTF_KIND_FUNC`` is not a type, and it represents a defined subprogram.
If the function has variable arguments, the last parameter is encoded with
``name_off = 0`` and ``type = 0``.
+2.2.14 BTF_KIND_VAR
+~~~~~~~~~~~~~~~~~~~
+
+``struct btf_type`` encoding requirement:
+ * ``name_off``: offset to a valid C identifier
+ * ``info.kind_flag``: 0
+ * ``info.kind``: BTF_KIND_VAR
+ * ``info.vlen``: 0
+ * ``type``: the type of the variable
+
+``btf_type`` is followed by a single ``struct btf_variable`` with the
+following data::
+
+ struct btf_var {
+ __u32 linkage;
+ };
+
+``struct btf_var`` encoding:
+ * ``linkage``: currently only static variable 0, or globally allocated
+ variable in ELF sections 1
+
+Not all type of global variables are supported by LLVM at this point.
+The following is currently available:
+
+ * static variables with or without section attributes
+ * global variables with section attributes
+
+The latter is for future extraction of map key/value type id's from a
+map definition.
+
+2.2.15 BTF_KIND_DATASEC
+~~~~~~~~~~~~~~~~~~~~~~~
+
+``struct btf_type`` encoding requirement:
+ * ``name_off``: offset to a valid name associated with a variable or
+ one of .data/.bss/.rodata
+ * ``info.kind_flag``: 0
+ * ``info.kind``: BTF_KIND_DATASEC
+ * ``info.vlen``: # of variables
+ * ``size``: total section size in bytes (0 at compilation time, patched
+ to actual size by BPF loaders such as libbpf)
+
+``btf_type`` is followed by ``info.vlen`` number of ``struct btf_var_secinfo``.::
+
+ struct btf_var_secinfo {
+ __u32 type;
+ __u32 offset;
+ __u32 size;
+ };
+
+``struct btf_var_secinfo`` encoding:
+ * ``type``: the type of the BTF_KIND_VAR variable
+ * ``offset``: the in-section offset of the variable
+ * ``size``: the size of the variable in bytes
+
3. BTF Kernel API
*****************
bpf_devel_QA
+Program types
+=============
+
+.. toctree::
+ :maxdepth: 1
+
+ prog_cgroup_sysctl
+ prog_flow_dissector
+
+
.. Links:
.. _Documentation/networking/filter.txt: ../networking/filter.txt
.. _man-pages: https://www.kernel.org/doc/man-pages/
--- /dev/null
+.. SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+===========================
+BPF_PROG_TYPE_CGROUP_SYSCTL
+===========================
+
+This document describes ``BPF_PROG_TYPE_CGROUP_SYSCTL`` program type that
+provides cgroup-bpf hook for sysctl.
+
+The hook has to be attached to a cgroup and will be called every time a
+process inside that cgroup tries to read from or write to sysctl knob in proc.
+
+1. Attach type
+**************
+
+``BPF_CGROUP_SYSCTL`` attach type has to be used to attach
+``BPF_PROG_TYPE_CGROUP_SYSCTL`` program to a cgroup.
+
+2. Context
+**********
+
+``BPF_PROG_TYPE_CGROUP_SYSCTL`` provides access to the following context from
+BPF program::
+
+ struct bpf_sysctl {
+ __u32 write;
+ __u32 file_pos;
+ };
+
+* ``write`` indicates whether sysctl value is being read (``0``) or written
+ (``1``). This field is read-only.
+
+* ``file_pos`` indicates file position sysctl is being accessed at, read
+ or written. This field is read-write. Writing to the field sets the starting
+ position in sysctl proc file ``read(2)`` will be reading from or ``write(2)``
+ will be writing to. Writing zero to the field can be used e.g. to override
+ whole sysctl value by ``bpf_sysctl_set_new_value()`` on ``write(2)`` even
+ when it's called by user space on ``file_pos > 0``. Writing non-zero
+ value to the field can be used to access part of sysctl value starting from
+ specified ``file_pos``. Not all sysctl support access with ``file_pos !=
+ 0``, e.g. writes to numeric sysctl entries must always be at file position
+ ``0``. See also ``kernel.sysctl_writes_strict`` sysctl.
+
+See `linux/bpf.h`_ for more details on how context field can be accessed.
+
+3. Return code
+**************
+
+``BPF_PROG_TYPE_CGROUP_SYSCTL`` program must return one of the following
+return codes:
+
+* ``0`` means "reject access to sysctl";
+* ``1`` means "proceed with access".
+
+If program returns ``0`` user space will get ``-1`` from ``read(2)`` or
+``write(2)`` and ``errno`` will be set to ``EPERM``.
+
+4. Helpers
+**********
+
+Since sysctl knob is represented by a name and a value, sysctl specific BPF
+helpers focus on providing access to these properties:
+
+* ``bpf_sysctl_get_name()`` to get sysctl name as it is visible in
+ ``/proc/sys`` into provided by BPF program buffer;
+
+* ``bpf_sysctl_get_current_value()`` to get string value currently held by
+ sysctl into provided by BPF program buffer. This helper is available on both
+ ``read(2)`` from and ``write(2)`` to sysctl;
+
+* ``bpf_sysctl_get_new_value()`` to get new string value currently being
+ written to sysctl before actual write happens. This helper can be used only
+ on ``ctx->write == 1``;
+
+* ``bpf_sysctl_set_new_value()`` to override new string value currently being
+ written to sysctl before actual write happens. Sysctl value will be
+ overridden starting from the current ``ctx->file_pos``. If the whole value
+ has to be overridden BPF program can set ``file_pos`` to zero before calling
+ to the helper. This helper can be used only on ``ctx->write == 1``. New
+ string value set by the helper is treated and verified by kernel same way as
+ an equivalent string passed by user space.
+
+BPF program sees sysctl value same way as user space does in proc filesystem,
+i.e. as a string. Since many sysctl values represent an integer or a vector
+of integers, the following helpers can be used to get numeric value from the
+string:
+
+* ``bpf_strtol()`` to convert initial part of the string to long integer
+ similar to user space `strtol(3)`_;
+* ``bpf_strtoul()`` to convert initial part of the string to unsigned long
+ integer similar to user space `strtoul(3)`_;
+
+See `linux/bpf.h`_ for more details on helpers described here.
+
+5. Examples
+***********
+
+See `test_sysctl_prog.c`_ for an example of BPF program in C that access
+sysctl name and value, parses string value to get vector of integers and uses
+the result to make decision whether to allow or deny access to sysctl.
+
+6. Notes
+********
+
+``BPF_PROG_TYPE_CGROUP_SYSCTL`` is intended to be used in **trusted** root
+environment, for example to monitor sysctl usage or catch unreasonable values
+an application, running as root in a separate cgroup, is trying to set.
+
+Since `task_dfl_cgroup(current)` is called at `sys_read` / `sys_write` time it
+may return results different from that at `sys_open` time, i.e. process that
+opened sysctl file in proc filesystem may differ from process that is trying
+to read from / write to it and two such processes may run in different
+cgroups, what means ``BPF_PROG_TYPE_CGROUP_SYSCTL`` should not be used as a
+security mechanism to limit sysctl usage.
+
+As with any cgroup-bpf program additional care should be taken if an
+application running as root in a cgroup should not be allowed to
+detach/replace BPF program attached by administrator.
+
+.. Links
+.. _linux/bpf.h: ../../include/uapi/linux/bpf.h
+.. _strtol(3): http://man7.org/linux/man-pages/man3/strtol.3p.html
+.. _strtoul(3): http://man7.org/linux/man-pages/man3/strtoul.3p.html
+.. _test_sysctl_prog.c:
+ ../../tools/testing/selftests/bpf/progs/test_sysctl_prog.c
.. SPDX-License-Identifier: GPL-2.0
-==================
-BPF Flow Dissector
-==================
+============================
+BPF_PROG_TYPE_FLOW_DISSECTOR
+============================
Overview
========
- renesas,r9a06g032-smp
- rockchip,rk3036-smp
- rockchip,rk3066-smp
- - socionext,milbeaut-m10v-smp
+ - socionext,milbeaut-m10v-smp
- ste,dbx500-smp
cpu-release-addr:
Optional properties:
- phy-handle: See ethernet.txt file in the same directory.
If absent, davinci_emac driver defaults to 100/FULL.
+- nvmem-cells: phandle, reference to an nvmem node for the MAC address
+- nvmem-cell-names: string, should be "mac-address" if nvmem is to be used
- ti,davinci-rmii-en: 1 byte, 1 means use RMII
- ti,davinci-no-bd-ram: boolean, does EMAC have BD RAM?
the boot program; should be used in cases where the MAC address assigned to
the device by the boot program is different from the "local-mac-address"
property;
-- nvmem-cells: phandle, reference to an nvmem node for the MAC address;
-- nvmem-cell-names: string, should be "mac-address" if nvmem is to be used;
- max-speed: number, specifies maximum speed in Mbit/s supported by the device;
- max-frame-size: number, maximum transfer unit (IEEE defined MTU), rather than
the maximum frame size (there's contradiction in the Devicetree
Specification).
- phy-mode: string, operation mode of the PHY interface. This is now a de-facto
standard property; supported values are:
- * "internal"
+ * "internal" (Internal means there is not a standard bus between the MAC and
+ the PHY, something proprietary is being used to embed the PHY in the MAC.)
* "mii"
* "gmii"
* "sgmii"
Optional elements: 'tsu_clk'
- clocks: Phandles to input clocks.
+Optional properties:
+- nvmem-cells: phandle, reference to an nvmem node for the MAC address
+- nvmem-cell-names: string, should be "mac-address" if nvmem is to be used
+
Optional properties for PHY child node:
- reset-gpios : Should specify the gpio for phy reset
- magic-packet : If present, indicates that the hardware supports waking
to ensure the integrated PHY is used. The absence of this property indicates
the muxers should be configured so that the external PHY is used.
+- resets: The reset-controller phandle and specifier for the PHY reset signal.
+
+- reset-names: Must be "phy" for the PHY reset signal.
+
- reset-gpios: The GPIO phandle and specifier for the PHY reset signal.
- reset-assert-us: Delay after the reset was asserted in microseconds.
interrupts = <35 IRQ_TYPE_EDGE_RISING>;
reg = <0>;
+ resets = <&rst 8>;
+ reset-names = "phy";
reset-gpios = <&gpio1 4 GPIO_ACTIVE_LOW>;
reset-assert-us = <1000>;
reset-deassert-us = <2000>;
dictionary which is empty, and that it will always be
invalid at this place.
- 17 : bitstream version. If the first byte is 17, the next byte
- gives the bitstream version (version 1 only). If the first byte
- is not 17, the bitstream version is 0.
+ 17 : bitstream version. If the first byte is 17, and compressed
+ stream length is at least 5 bytes (length of shortest possible
+ versioned bitstream), the next byte gives the bitstream version
+ (version 1 only).
+ Otherwise, the bitstream version is 0.
18..21 : copy 0..3 literals
state = (byte - 17) = 0..3 [ copy <state> literals ]
- .. row 78
- - ``KEY_SCREEN``
+ - ``KEY_ASPECT_RATIO``
- Select screen aspect ratio
- .. row 79
- - ``KEY_ZOOM``
+ - ``KEY_FULL_SCREEN``
- Put device into zoom/full screen mode
CONFIG_DECNET_ROUTER (to be able to add/delete routes)
CONFIG_NETFILTER (will be required for the DECnet routing daemon)
- CONFIG_DECNET_ROUTE_FWMARK is optional
-
Don't turn on SIOCGIFCONF support for DECnet unless you are really sure
that you need it, in general you won't and it can cause ifconfig to
malfunction.
Version of the software responsible for supporting/handling the
Network Controller Sideband Interface.
+
+fw.psid
+=======
+
+Unique identifier of the firmware parameter set.
+=============================================
Broadcom Starfighter 2 Ethernet switch driver
=============================================
The switch hardware block is typically interfaced using MMIO accesses and
contains a bunch of sub-blocks/registers:
-* SWITCH_CORE: common switch registers
-* SWITCH_REG: external interfaces switch register
-* SWITCH_MDIO: external MDIO bus controller (there is another one in SWITCH_CORE,
+- ``SWITCH_CORE``: common switch registers
+- ``SWITCH_REG``: external interfaces switch register
+- ``SWITCH_MDIO``: external MDIO bus controller (there is another one in SWITCH_CORE,
which is used for indirect PHY accesses)
-* SWITCH_INDIR_RW: 64-bits wide register helper block
-* SWITCH_INTRL2_0/1: Level-2 interrupt controllers
-* SWITCH_ACB: Admission control block
-* SWITCH_FCB: Fail-over control block
+- ``SWITCH_INDIR_RW``: 64-bits wide register helper block
+- ``SWITCH_INTRL2_0/1``: Level-2 interrupt controllers
+- ``SWITCH_ACB``: Admission control block
+- ``SWITCH_FCB``: Fail-over control block
Implementation details
======================
-The driver is located in drivers/net/dsa/bcm_sf2.c and is implemented as a DSA
-driver; see Documentation/networking/dsa/dsa.txt for details on the subsystem
+The driver is located in ``drivers/net/dsa/bcm_sf2.c`` and is implemented as a DSA
+driver; see ``Documentation/networking/dsa/dsa.rst`` for details on the subsystem
and what it provides.
The SF2 switch is configured to enable a Broadcom specific 4-bytes switch tag
which gets inserted by the switch for every packet forwarded to the CPU
interface, conversely, the CPU network interface should insert a similar tag for
packets entering the CPU port. The tag format is described in
-net/dsa/tag_brcm.c.
+``net/dsa/tag_brcm.c``.
Overall, the SF2 driver is a fairly regular DSA driver; there are a few
specifics covered below.
-------------------
The DSA platform device driver is probed using a specific compatible string
-provided in net/dsa/dsa.c. The reason for that is because the DSA subsystem gets
+provided in ``net/dsa/dsa.c``. The reason for that is because the DSA subsystem gets
registered as a platform device driver currently. DSA will provide the needed
device_node pointers which are then accessible by the switch driver setup
function to setup resources such as register ranges and interrupts. This
in order to properly configure them. By default, the SF2 pseudo-PHY address, and
an external switch pseudo-PHY address will both be snooping for incoming MDIO
transactions, since they are at the same address (30), resulting in some kind of
-"double" programming. Using DSA, and setting ds->phys_mii_mask accordingly, we
+"double" programming. Using DSA, and setting ``ds->phys_mii_mask`` accordingly, we
selectively divert reads and writes towards external Broadcom switches
pseudo-PHY addresses. Newer revisions of the SF2 hardware have introduced a
configurable pseudo-PHY address which circumvents the initial design limitation.
MoCA interface carrier state and properly report this to the networking stack.
The MoCA interfaces are supported using the PHY library's fixed PHY/emulated PHY
-device and the switch driver registers a fixed_link_update callback for such
+device and the switch driver registers a ``fixed_link_update`` callback for such
PHYs which reflects the link state obtained from the interrupt handler.
-Distributed Switch Architecture
-===============================
-
-Introduction
+============
+Architecture
============
-This document describes the Distributed Switch Architecture (DSA) subsystem
+This document describes the **Distributed Switch Architecture (DSA)** subsystem
design principles, limitations, interactions with other subsystems, and how to
develop drivers for this subsystem as well as a TODO for developers interested
in joining the effort.
DSA currently supports 5 different tagging protocols, and a tag-less mode as
well. The different protocols are implemented in:
-net/dsa/tag_trailer.c: Marvell's 4 trailer tag mode (legacy)
-net/dsa/tag_dsa.c: Marvell's original DSA tag
-net/dsa/tag_edsa.c: Marvell's enhanced DSA tag
-net/dsa/tag_brcm.c: Broadcom's 4 bytes tag
-net/dsa/tag_qca.c: Qualcomm's 2 bytes tag
+- ``net/dsa/tag_trailer.c``: Marvell's 4 trailer tag mode (legacy)
+- ``net/dsa/tag_dsa.c``: Marvell's original DSA tag
+- ``net/dsa/tag_edsa.c``: Marvell's enhanced DSA tag
+- ``net/dsa/tag_brcm.c``: Broadcom's 4 bytes tag
+- ``net/dsa/tag_qca.c``: Qualcomm's 2 bytes tag
The exact format of the tag protocol is vendor specific, but in general, they
all contain something which:
the CPU/management Ethernet interface. Such a driver might occasionally need to
know whether DSA is enabled (e.g.: to enable/disable specific offload features),
but the DSA subsystem has been proven to work with industry standard drivers:
-e1000e, mv643xx_eth etc. without having to introduce modifications to these
+``e1000e,`` ``mv643xx_eth`` etc. without having to introduce modifications to these
drivers. Such network devices are also often referred to as conduit network
devices since they act as a pipe between the host processor and the hardware
Ethernet switch.
When a master netdev is used with DSA, a small hook is placed in in the
networking stack is in order to have the DSA subsystem process the Ethernet
switch specific tagging protocol. DSA accomplishes this by registering a
-specific (and fake) Ethernet type (later becoming skb->protocol) with the
-networking stack, this is also known as a ptype or packet_type. A typical
+specific (and fake) Ethernet type (later becoming ``skb->protocol``) with the
+networking stack, this is also known as a ``ptype`` or ``packet_type``. A typical
Ethernet Frame receive sequence looks like this:
Master network device (e.g.: e1000e):
-Receive interrupt fires:
-- receive function is invoked
-- basic packet processing is done: getting length, status etc.
-- packet is prepared to be processed by the Ethernet layer by calling
- eth_type_trans
+1. Receive interrupt fires:
+
+ - receive function is invoked
+ - basic packet processing is done: getting length, status etc.
+ - packet is prepared to be processed by the Ethernet layer by calling
+ ``eth_type_trans``
+
+2. net/ethernet/eth.c::
+
+ eth_type_trans(skb, dev)
+ if (dev->dsa_ptr != NULL)
+ -> skb->protocol = ETH_P_XDSA
-net/ethernet/eth.c:
+3. drivers/net/ethernet/\*::
-eth_type_trans(skb, dev)
- if (dev->dsa_ptr != NULL)
- -> skb->protocol = ETH_P_XDSA
+ netif_receive_skb(skb)
+ -> iterate over registered packet_type
+ -> invoke handler for ETH_P_XDSA, calls dsa_switch_rcv()
-drivers/net/ethernet/*:
+4. net/dsa/dsa.c::
-netif_receive_skb(skb)
- -> iterate over registered packet_type
- -> invoke handler for ETH_P_XDSA, calls dsa_switch_rcv()
+ -> dsa_switch_rcv()
+ -> invoke switch tag specific protocol handler in 'net/dsa/tag_*.c'
-net/dsa/dsa.c:
- -> dsa_switch_rcv()
- -> invoke switch tag specific protocol handler in
- net/dsa/tag_*.c
+5. net/dsa/tag_*.c:
-net/dsa/tag_*.c:
- -> inspect and strip switch tag protocol to determine originating port
- -> locate per-port network device
- -> invoke eth_type_trans() with the DSA slave network device
- -> invoked netif_receive_skb()
+ - inspect and strip switch tag protocol to determine originating port
+ - locate per-port network device
+ - invoke ``eth_type_trans()`` with the DSA slave network device
+ - invoked ``netif_receive_skb()``
Past this point, the DSA slave network devices get delivered regular Ethernet
frames that can be processed by the networking stack.
switch tag in the Ethernet frames.
These frames are then queued for transmission using the master network device
-ndo_start_xmit() function, since they contain the appropriate switch tag, the
+``ndo_start_xmit()`` function, since they contain the appropriate switch tag, the
Ethernet switch will be able to process these incoming frames from the
management interface and delivers these frames to the physical switch port.
------------------------
Summarized, this is basically how DSA looks like from a network device
-perspective:
-
-
- |---------------------------
- | CPU network device (eth0)|
- ----------------------------
- | <tag added by switch |
- | |
- | |
- | tag added by CPU> |
- |--------------------------------------------|
- | Switch driver |
- |--------------------------------------------|
- || || ||
- |-------| |-------| |-------|
- | sw0p0 | | sw0p1 | | sw0p2 |
- |-------| |-------| |-------|
+perspective::
+
+
+ |---------------------------
+ | CPU network device (eth0)|
+ ----------------------------
+ | <tag added by switch |
+ | |
+ | |
+ | tag added by CPU> |
+ |--------------------------------------------|
+ | Switch driver |
+ |--------------------------------------------|
+ || || ||
+ |-------| |-------| |-------|
+ | sw0p0 | | sw0p1 | | sw0p2 |
+ |-------| |-------| |-------|
+
+
Slave MDIO bus
--------------
Data structures
---------------
-DSA data structures are defined in include/net/dsa.h as well as
-net/dsa/dsa_priv.h.
+DSA data structures are defined in ``include/net/dsa.h`` as well as
+``net/dsa/dsa_priv.h``:
-dsa_chip_data: platform data configuration for a given switch device, this
-structure describes a switch device's parent device, its address, as well as
-various properties of its ports: names/labels, and finally a routing table
-indication (when cascading switches)
+- ``dsa_chip_data``: platform data configuration for a given switch device,
+ this structure describes a switch device's parent device, its address, as
+ well as various properties of its ports: names/labels, and finally a routing
+ table indication (when cascading switches)
-dsa_platform_data: platform device configuration data which can reference a
-collection of dsa_chip_data structure if multiples switches are cascaded, the
-master network device this switch tree is attached to needs to be referenced
+- ``dsa_platform_data``: platform device configuration data which can reference
+ a collection of dsa_chip_data structure if multiples switches are cascaded,
+ the master network device this switch tree is attached to needs to be
+ referenced
-dsa_switch_tree: structure assigned to the master network device under
-"dsa_ptr", this structure references a dsa_platform_data structure as well as
-the tagging protocol supported by the switch tree, and which receive/transmit
-function hooks should be invoked, information about the directly attached switch
-is also provided: CPU port. Finally, a collection of dsa_switch are referenced
-to address individual switches in the tree.
+- ``dsa_switch_tree``: structure assigned to the master network device under
+ ``dsa_ptr``, this structure references a dsa_platform_data structure as well as
+ the tagging protocol supported by the switch tree, and which receive/transmit
+ function hooks should be invoked, information about the directly attached
+ switch is also provided: CPU port. Finally, a collection of dsa_switch are
+ referenced to address individual switches in the tree.
-dsa_switch: structure describing a switch device in the tree, referencing a
-dsa_switch_tree as a backpointer, slave network devices, master network device,
-and a reference to the backing dsa_switch_ops
+- ``dsa_switch``: structure describing a switch device in the tree, referencing
+ a ``dsa_switch_tree`` as a backpointer, slave network devices, master network
+ device, and a reference to the backing``dsa_switch_ops``
-dsa_switch_ops: structure referencing function pointers, see below for a full
-description.
+- ``dsa_switch_ops``: structure referencing function pointers, see below for a
+ full description.
Design limitations
==================
-----------------------------------------
DSA currently limits the number of maximum switches within a tree to 4
-(DSA_MAX_SWITCHES), and the number of ports per switch to 12 (DSA_MAX_PORTS).
+(``DSA_MAX_SWITCHES``), and the number of ports per switch to 12 (``DSA_MAX_PORTS``).
These limits could be extended to support larger configurations would this need
arise.
DSA currently leverages the following subsystems:
-- MDIO/PHY library: drivers/net/phy/phy.c, mdio_bus.c
-- Switchdev: net/switchdev/*
+- MDIO/PHY library: ``drivers/net/phy/phy.c``, ``mdio_bus.c``
+- Switchdev:``net/switchdev/*``
- Device Tree for various of_* functions
MDIO/PHY library
----------------
Slave network devices exposed by DSA may or may not be interfacing with PHY
-devices (struct phy_device as defined in include/linux/phy.h), but the DSA
+devices (``struct phy_device`` as defined in ``include/linux/phy.h)``, but the DSA
subsystem deals with all possible combinations:
- internal PHY devices, built into the Ethernet switch hardware
- special, non-autonegotiated or non MDIO-managed PHY devices: SFPs, MoCA; a.k.a
fixed PHYs
-The PHY configuration is done by the dsa_slave_phy_setup() function and the
+The PHY configuration is done by the ``dsa_slave_phy_setup()`` function and the
logic basically looks like this:
- if Device Tree is used, the PHY device is looked up using the standard
"phy-handle" property, if found, this PHY device is created and registered
- using of_phy_connect()
+ using ``of_phy_connect()``
- if Device Tree is used, and the PHY device is "fixed", that is, conforms to
the definition of a non-MDIO managed PHY as defined in
- Documentation/devicetree/bindings/net/fixed-link.txt, the PHY is registered
+ ``Documentation/devicetree/bindings/net/fixed-link.txt``, the PHY is registered
and connected transparently using the special fixed MDIO bus driver
- finally, if the PHY is built into the switch, as is very common with
-----------
DSA features a standardized binding which is documented in
-Documentation/devicetree/bindings/net/dsa/dsa.txt. PHY/MDIO library helper
-functions such as of_get_phy_mode(), of_phy_connect() are also used to query
+``Documentation/devicetree/bindings/net/dsa/dsa.txt``. PHY/MDIO library helper
+functions such as ``of_get_phy_mode()``, ``of_phy_connect()`` are also used to query
per-port PHY specific details: interface connection, MDIO bus location etc..
Driver development
DSA switch drivers need to implement a dsa_switch_ops structure which will
contain the various members described below.
-register_switch_driver() registers this dsa_switch_ops in its internal list
-of drivers to probe for. unregister_switch_driver() does the exact opposite.
+``register_switch_driver()`` registers this dsa_switch_ops in its internal list
+of drivers to probe for. ``unregister_switch_driver()`` does the exact opposite.
Unless requested differently by setting the priv_size member accordingly, DSA
does not allocate any driver private context space.
Switch configuration
--------------------
-- tag_protocol: this is to indicate what kind of tagging protocol is supported,
- should be a valid value from the dsa_tag_protocol enum
+- ``tag_protocol``: this is to indicate what kind of tagging protocol is supported,
+ should be a valid value from the ``dsa_tag_protocol`` enum
-- probe: probe routine which will be invoked by the DSA platform device upon
+- ``probe``: probe routine which will be invoked by the DSA platform device upon
registration to test for the presence/absence of a switch device. For MDIO
devices, it is recommended to issue a read towards internal registers using
the switch pseudo-PHY and return whether this is a supported device. For other
buses, return a non-NULL string
-- setup: setup function for the switch, this function is responsible for setting
- up the dsa_switch_ops private structure with all it needs: register maps,
+- ``setup``: setup function for the switch, this function is responsible for setting
+ up the ``dsa_switch_ops`` private structure with all it needs: register maps,
interrupts, mutexes, locks etc.. This function is also expected to properly
configure the switch to separate all network interfaces from each other, that
is, they should be isolated by the switch hardware itself, typically by creating
PHY devices and link management
-------------------------------
-- get_phy_flags: Some switches are interfaced to various kinds of Ethernet PHYs,
+- ``get_phy_flags``: Some switches are interfaced to various kinds of Ethernet PHYs,
if the PHY library PHY driver needs to know about information it cannot obtain
on its own (e.g.: coming from switch memory mapped registers), this function
should return a 32-bits bitmask of "flags", that is private between the switch
- driver and the Ethernet PHY driver in drivers/net/phy/*.
+ driver and the Ethernet PHY driver in ``drivers/net/phy/\*``.
-- phy_read: Function invoked by the DSA slave MDIO bus when attempting to read
+- ``phy_read``: Function invoked by the DSA slave MDIO bus when attempting to read
the switch port MDIO registers. If unavailable, return 0xffff for each read.
For builtin switch Ethernet PHYs, this function should allow reading the link
status, auto-negotiation results, link partner pages etc..
-- phy_write: Function invoked by the DSA slave MDIO bus when attempting to write
+- ``phy_write``: Function invoked by the DSA slave MDIO bus when attempting to write
to the switch port MDIO registers. If unavailable return a negative error
code.
-- adjust_link: Function invoked by the PHY library when a slave network device
+- ``adjust_link``: Function invoked by the PHY library when a slave network device
is attached to a PHY device. This function is responsible for appropriately
configuring the switch port link parameters: speed, duplex, pause based on
- what the phy_device is providing.
+ what the ``phy_device`` is providing.
-- fixed_link_update: Function invoked by the PHY library, and specifically by
+- ``fixed_link_update``: Function invoked by the PHY library, and specifically by
the fixed PHY driver asking the switch driver for link parameters that could
not be auto-negotiated, or obtained by reading the PHY registers through MDIO.
This is particularly useful for specific kinds of hardware such as QSGMII,
Ethtool operations
------------------
-- get_strings: ethtool function used to query the driver's strings, will
+- ``get_strings``: ethtool function used to query the driver's strings, will
typically return statistics strings, private flags strings etc.
-- get_ethtool_stats: ethtool function used to query per-port statistics and
+- ``get_ethtool_stats``: ethtool function used to query per-port statistics and
return their values. DSA overlays slave network devices general statistics:
RX/TX counters from the network device, with switch driver specific statistics
per port
-- get_sset_count: ethtool function used to query the number of statistics items
+- ``get_sset_count``: ethtool function used to query the number of statistics items
-- get_wol: ethtool function used to obtain Wake-on-LAN settings per-port, this
+- ``get_wol``: ethtool function used to obtain Wake-on-LAN settings per-port, this
function may, for certain implementations also query the master network device
Wake-on-LAN settings if this interface needs to participate in Wake-on-LAN
-- set_wol: ethtool function used to configure Wake-on-LAN settings per-port,
+- ``set_wol``: ethtool function used to configure Wake-on-LAN settings per-port,
direct counterpart to set_wol with similar restrictions
-- set_eee: ethtool function which is used to configure a switch port EEE (Green
+- ``set_eee``: ethtool function which is used to configure a switch port EEE (Green
Ethernet) settings, can optionally invoke the PHY library to enable EEE at the
PHY level if relevant. This function should enable EEE at the switch port MAC
controller and data-processing logic
-- get_eee: ethtool function which is used to query a switch port EEE settings,
+- ``get_eee``: ethtool function which is used to query a switch port EEE settings,
this function should return the EEE state of the switch port MAC controller
and data-processing logic as well as query the PHY for its currently configured
EEE settings
-- get_eeprom_len: ethtool function returning for a given switch the EEPROM
+- ``get_eeprom_len``: ethtool function returning for a given switch the EEPROM
length/size in bytes
-- get_eeprom: ethtool function returning for a given switch the EEPROM contents
+- ``get_eeprom``: ethtool function returning for a given switch the EEPROM contents
-- set_eeprom: ethtool function writing specified data to a given switch EEPROM
+- ``set_eeprom``: ethtool function writing specified data to a given switch EEPROM
-- get_regs_len: ethtool function returning the register length for a given
+- ``get_regs_len``: ethtool function returning the register length for a given
switch
-- get_regs: ethtool function returning the Ethernet switch internal register
+- ``get_regs``: ethtool function returning the Ethernet switch internal register
contents. This function might require user-land code in ethtool to
pretty-print register values and registers
Power management
----------------
-- suspend: function invoked by the DSA platform device when the system goes to
+- ``suspend``: function invoked by the DSA platform device when the system goes to
suspend, should quiesce all Ethernet switch activities, but keep ports
participating in Wake-on-LAN active as well as additional wake-up logic if
supported
-- resume: function invoked by the DSA platform device when the system resumes,
+- ``resume``: function invoked by the DSA platform device when the system resumes,
should resume all Ethernet switch activities and re-configure the switch to be
in a fully active state
-- port_enable: function invoked by the DSA slave network device ndo_open
+- ``port_enable``: function invoked by the DSA slave network device ndo_open
function when a port is administratively brought up, this function should be
fully enabling a given switch port. DSA takes care of marking the port with
- BR_STATE_BLOCKING if the port is a bridge member, or BR_STATE_FORWARDING if it
+ ``BR_STATE_BLOCKING`` if the port is a bridge member, or ``BR_STATE_FORWARDING`` if it
was not, and propagating these changes down to the hardware
-- port_disable: function invoked by the DSA slave network device ndo_close
+- ``port_disable``: function invoked by the DSA slave network device ndo_close
function when a port is administratively brought down, this function should be
fully disabling a given switch port. DSA takes care of marking the port with
- BR_STATE_DISABLED and propagating changes to the hardware if this port is
+ ``BR_STATE_DISABLED`` and propagating changes to the hardware if this port is
disabled while being a bridge member
Bridge layer
------------
-- port_bridge_join: bridge layer function invoked when a given switch port is
+- ``port_bridge_join``: bridge layer function invoked when a given switch port is
added to a bridge, this function should be doing the necessary at the switch
level to permit the joining port from being added to the relevant logical
domain for it to ingress/egress traffic with other members of the bridge.
-- port_bridge_leave: bridge layer function invoked when a given switch port is
+- ``port_bridge_leave``: bridge layer function invoked when a given switch port is
removed from a bridge, this function should be doing the necessary at the
switch level to deny the leaving port from ingress/egress traffic from the
remaining bridge members. When the port leaves the bridge, it should be aged
out at the switch hardware for the switch to (re) learn MAC addresses behind
this port.
-- port_stp_state_set: bridge layer function invoked when a given switch port STP
+- ``port_stp_state_set``: bridge layer function invoked when a given switch port STP
state is computed by the bridge layer and should be propagated to switch
hardware to forward/block/learn traffic. The switch driver is responsible for
computing a STP state change based on current and asked parameters and perform
Bridge VLAN filtering
---------------------
-- port_vlan_filtering: bridge layer function invoked when the bridge gets
+- ``port_vlan_filtering``: bridge layer function invoked when the bridge gets
configured for turning on or off VLAN filtering. If nothing specific needs to
be done at the hardware level, this callback does not need to be implemented.
When VLAN filtering is turned on, the hardware must be programmed with
accept any 802.1Q frames irrespective of their VLAN ID, and untagged frames are
allowed.
-- port_vlan_prepare: bridge layer function invoked when the bridge prepares the
+- ``port_vlan_prepare``: bridge layer function invoked when the bridge prepares the
configuration of a VLAN on the given port. If the operation is not supported
- by the hardware, this function should return -EOPNOTSUPP to inform the bridge
+ by the hardware, this function should return ``-EOPNOTSUPP`` to inform the bridge
code to fallback to a software implementation. No hardware setup must be done
in this function. See port_vlan_add for this and details.
-- port_vlan_add: bridge layer function invoked when a VLAN is configured
+- ``port_vlan_add``: bridge layer function invoked when a VLAN is configured
(tagged or untagged) for the given switch port
-- port_vlan_del: bridge layer function invoked when a VLAN is removed from the
+- ``port_vlan_del``: bridge layer function invoked when a VLAN is removed from the
given switch port
-- port_vlan_dump: bridge layer function invoked with a switchdev callback
+- ``port_vlan_dump``: bridge layer function invoked with a switchdev callback
function that the driver has to call for each VLAN the given port is a member
of. A switchdev object is used to carry the VID and bridge flags.
-- port_fdb_add: bridge layer function invoked when the bridge wants to install a
+- ``port_fdb_add``: bridge layer function invoked when the bridge wants to install a
Forwarding Database entry, the switch hardware should be programmed with the
specified address in the specified VLAN Id in the forwarding database
associated with this VLAN ID. If the operation is not supported, this
- function should return -EOPNOTSUPP to inform the bridge code to fallback to
+ function should return ``-EOPNOTSUPP`` to inform the bridge code to fallback to
a software implementation.
-Note: VLAN ID 0 corresponds to the port private database, which, in the context
-of DSA, would be the its port-based VLAN, used by the associated bridge device.
+.. note:: VLAN ID 0 corresponds to the port private database, which, in the context
+ of DSA, would be the its port-based VLAN, used by the associated bridge device.
-- port_fdb_del: bridge layer function invoked when the bridge wants to remove a
+- ``port_fdb_del``: bridge layer function invoked when the bridge wants to remove a
Forwarding Database entry, the switch hardware should be programmed to delete
the specified MAC address from the specified VLAN ID if it was mapped into
this port forwarding database
-- port_fdb_dump: bridge layer function invoked with a switchdev callback
+- ``port_fdb_dump``: bridge layer function invoked with a switchdev callback
function that the driver has to call for each MAC address known to be behind
the given port. A switchdev object is used to carry the VID and FDB info.
-- port_mdb_prepare: bridge layer function invoked when the bridge prepares the
+- ``port_mdb_prepare``: bridge layer function invoked when the bridge prepares the
installation of a multicast database entry. If the operation is not supported,
- this function should return -EOPNOTSUPP to inform the bridge code to fallback
+ this function should return ``-EOPNOTSUPP`` to inform the bridge code to fallback
to a software implementation. No hardware setup must be done in this function.
- See port_fdb_add for this and details.
+ See ``port_fdb_add`` for this and details.
-- port_mdb_add: bridge layer function invoked when the bridge wants to install
+- ``port_mdb_add``: bridge layer function invoked when the bridge wants to install
a multicast database entry, the switch hardware should be programmed with the
specified address in the specified VLAN ID in the forwarding database
associated with this VLAN ID.
-Note: VLAN ID 0 corresponds to the port private database, which, in the context
-of DSA, would be the its port-based VLAN, used by the associated bridge device.
+.. note:: VLAN ID 0 corresponds to the port private database, which, in the context
+ of DSA, would be the its port-based VLAN, used by the associated bridge device.
-- port_mdb_del: bridge layer function invoked when the bridge wants to remove a
+- ``port_mdb_del``: bridge layer function invoked when the bridge wants to remove a
multicast database entry, the switch hardware should be programmed to delete
the specified MAC address from the specified VLAN ID if it was mapped into
this port forwarding database.
-- port_mdb_dump: bridge layer function invoked with a switchdev callback
+- ``port_mdb_dump``: bridge layer function invoked with a switchdev callback
function that the driver has to call for each MAC address known to be behind
the given port. A switchdev object is used to carry the VID and MDB info.
Other hanging fruits
--------------------
-- making the number of ports fully dynamic and not dependent on DSA_MAX_PORTS
+- making the number of ports fully dynamic and not dependent on ``DSA_MAX_PORTS``
- allowing more than one CPU/management interface:
http://comments.gmane.org/gmane.linux.network/365657
- porting more drivers from other vendors:
--- /dev/null
+===============================
+Distributed Switch Architecture
+===============================
+
+.. toctree::
+ :maxdepth: 1
+
+ dsa
+ bcm_sf2
+ lan9303
+==============================
LAN9303 Ethernet switch driver
==============================
Driver details
==============
-The driver is implemented as a DSA driver, see
-Documentation/networking/dsa/dsa.txt.
+The driver is implemented as a DSA driver, see ``Documentation/networking/dsa/dsa.rst``.
-See Documentation/devicetree/bindings/net/dsa/lan9303.txt for device tree
+See ``Documentation/devicetree/bindings/net/dsa/lan9303.txt`` for device tree
binding.
The LAN9303 can be managed both via MDIO and I2C, both supported by this driver.
netdev-FAQ
af_xdp
batman-adv
- bpf_flow_dissector
can
can_ucan_protocol
device_drivers/freescale/dpaa2/index
device_drivers/intel/i40e
device_drivers/intel/iavf
device_drivers/intel/ice
+ dsa/index
devlink-info-versions
ieee802154
kapi
minimum RTT when it is moved to a longer path (e.g., due to traffic
engineering). A longer window makes the filter more resistant to RTT
inflations such as transient congestion. The unit is seconds.
+ Possible values: 0 - 86400 (1 day)
Default: 300
tcp_moderate_rcvbuf - BOOLEAN
icmp/*:
ratelimit - INTEGER
- Limit the maximal rates for sending ICMPv6 packets.
+ Limit the maximal rates for sending ICMPv6 messages.
0 to disable any limiting,
otherwise the minimal space between responses in milliseconds.
Default: 1000
+ratemask - list of comma separated ranges
+ For ICMPv6 message types matching the ranges in the ratemask, limit
+ the sending of the message according to ratelimit parameter.
+
+ The format used for both input and output is a comma separated
+ list of ranges (e.g. "0-127,129" for ICMPv6 message type 0 to 127 and
+ 129). Writing to the file will clear all previous ranges of ICMPv6
+ message types and update the current list with the input.
+
+ Refer to: https://www.iana.org/assignments/icmpv6-parameters/icmpv6-parameters.xhtml
+ for numerical values of ICMPv6 message types, e.g. echo request is 128
+ and echo reply is 129.
+
+ Default: 0-1,3-127 (rate limit ICMPv6 errors except Packet Too Big)
+
echo_ignore_all - BOOLEAN
If set non-zero, then the kernel will ignore all ICMP ECHO
requests sent to it over the IPv6 protocol.
(*) Check call still alive.
- u32 rxrpc_kernel_check_life(struct socket *sock,
- struct rxrpc_call *call);
+ bool rxrpc_kernel_check_life(struct socket *sock,
+ struct rxrpc_call *call,
+ u32 *_life);
void rxrpc_kernel_probe_life(struct socket *sock,
struct rxrpc_call *call);
- The first function returns a number that is updated when ACKs are received
- from the peer (notably including PING RESPONSE ACKs which we can elicit by
- sending PING ACKs to see if the call still exists on the server). The
- caller should compare the numbers of two calls to see if the call is still
- alive after waiting for a suitable interval.
+ The first function passes back in *_life a number that is updated when
+ ACKs are received from the peer (notably including PING RESPONSE ACKs
+ which we can elicit by sending PING ACKs to see if the call still exists
+ on the server). The caller should compare the numbers of two calls to see
+ if the call is still alive after waiting for a suitable interval. It also
+ returns true as long as the call hasn't yet reached the completed state.
This allows the caller to work out if the server is still contactable and
if the call is still alive on the server while waiting for the server to
ARM/NUVOTON NPCM ARCHITECTURE
M: Avi Fishman <avifishman70@gmail.com>
M: Tomer Maimon <tmaimon77@gmail.com>
+M: Tali Perry <tali.perry1@gmail.com>
R: Patrick Venture <venture@google.com>
R: Nancy Yuen <yuenn@google.com>
-R: Brendan Higgins <brendanhiggins@google.com>
+R: Benjamin Fair <benjaminfair@google.com>
L: openbmc@lists.ozlabs.org (moderated for non-subscribers)
S: Supported
F: arch/arm/mach-npcm/
F: arch/arm/boot/dts/nuvoton-npcm*
-F: include/dt-bindings/clock/nuvoton,npcm7xx-clks.h
+F: include/dt-bindings/clock/nuvoton,npcm7xx-clock.h
F: drivers/*/*npcm*
F: Documentation/devicetree/bindings/*/*npcm*
F: Documentation/devicetree/bindings/*/*/*npcm*
BROADCOM BMIPS MIPS ARCHITECTURE
M: Kevin Cernekee <cernekee@gmail.com>
M: Florian Fainelli <f.fainelli@gmail.com>
+L: bcm-kernel-feedback-list@broadcom.com
L: linux-mips@vger.kernel.org
T: git git://github.com/broadcom/stblinux.git
S: Maintained
F: Documentation/driver-api/i3c
F: drivers/i3c/
F: include/linux/i3c/
-F: include/dt-bindings/i3c/
I3C DRIVER FOR SYNOPSYS DESIGNWARE
M: Vitor Soares <vitor.soares@synopsys.com>
F: include/net/af_ieee802154.h
F: include/net/cfg802154.h
F: include/net/ieee802154_netdev.h
-F: Documentation/networking/ieee802154.txt
+F: Documentation/networking/ieee802154.rst
IFE PROTOCOL
M: Yotam Gigi <yotam.gi@gmail.com>
LED SUBSYSTEM
M: Jacek Anaszewski <jacek.anaszewski@gmail.com>
M: Pavel Machek <pavel@ucw.cz>
+R: Dan Murphy <dmurphy@ti.com>
L: linux-leds@vger.kernel.org
T: git git://git.kernel.org/pub/scm/linux/kernel/git/j.anaszewski/linux-leds.git
S: Maintained
F: Documentation/devicetree/bindings/mfd/atmel-usart.txt
MICROCHIP KSZ SERIES ETHERNET SWITCH DRIVER
-M: Woojung Huh <Woojung.Huh@microchip.com>
+M: Woojung Huh <woojung.huh@microchip.com>
M: Microchip Linux Driver Support <UNGLinuxDriver@microchip.com>
L: netdev@vger.kernel.org
S: Maintained
F: include/linux/virtio_console.h
F: include/uapi/linux/virtio_console.h
-VIRTIO CORE, NET AND BLOCK DRIVERS
+VIRTIO CORE AND NET DRIVERS
M: "Michael S. Tsirkin" <mst@redhat.com>
M: Jason Wang <jasowang@redhat.com>
L: virtualization@lists.linux-foundation.org
F: drivers/crypto/virtio/
F: mm/balloon_compaction.c
+VIRTIO BLOCK AND SCSI DRIVERS
+M: "Michael S. Tsirkin" <mst@redhat.com>
+M: Jason Wang <jasowang@redhat.com>
+R: Paolo Bonzini <pbonzini@redhat.com>
+R: Stefan Hajnoczi <stefanha@redhat.com>
+L: virtualization@lists.linux-foundation.org
+S: Maintained
+F: drivers/block/virtio_blk.c
+F: drivers/scsi/virtio_scsi.c
+F: include/uapi/linux/virtio_blk.h
+F: include/uapi/linux/virtio_scsi.h
+F: drivers/vhost/scsi.c
+
VIRTIO CRYPTO DRIVER
M: Gonglei <arei.gonglei@huawei.com>
L: virtualization@lists.linux-foundation.org
VERSION = 5
PATCHLEVEL = 1
SUBLEVEL = 0
-EXTRAVERSION = -rc3
+EXTRAVERSION = -rc6
NAME = Shy Crocodile
# *DOCUMENTATION*
STRIP = $(CROSS_COMPILE)strip
OBJCOPY = $(CROSS_COMPILE)objcopy
OBJDUMP = $(CROSS_COMPILE)objdump
+PAHOLE = pahole
LEX = flex
YACC = bison
AWK = awk
GCC_PLUGINS_CFLAGS :=
export ARCH SRCARCH CONFIG_SHELL HOSTCC KBUILD_HOSTCFLAGS CROSS_COMPILE AS LD CC
-export CPP AR NM STRIP OBJCOPY OBJDUMP KBUILD_HOSTLDFLAGS KBUILD_HOSTLDLIBS
+export CPP AR NM STRIP OBJCOPY OBJDUMP PAHOLE KBUILD_HOSTLDFLAGS KBUILD_HOSTLDLIBS
export MAKE LEX YACC AWK INSTALLKERNEL PERL PYTHON PYTHON2 PYTHON3 UTS_MACHINE
export HOSTCXX KBUILD_HOSTCXXFLAGS LDFLAGS_MODULE CHECK CHECKFLAGS
#define SIOCSPGRP _IOW('s', 8, pid_t)
#define SIOCGPGRP _IOR('s', 9, pid_t)
-#define SIOCGSTAMP 0x8906 /* Get stamp (timeval) */
-#define SIOCGSTAMPNS 0x8907 /* Get stamp (timespec) */
+#define SIOCGSTAMP_OLD 0x8906 /* Get stamp (timeval) */
+#define SIOCGSTAMPNS_OLD 0x8907 /* Get stamp (timespec) */
#endif /* _ASM_ALPHA_SOCKIOS_H */
532 common getppid sys_getppid
# all other architectures have common numbers for new syscall, alpha
# is the exception.
+534 common pidfd_send_signal sys_pidfd_send_signal
+535 common io_uring_setup sys_io_uring_setup
+536 common io_uring_enter sys_io_uring_enter
+537 common io_uring_register sys_io_uring_register
*/
static inline void
syscall_get_arguments(struct task_struct *task, struct pt_regs *regs,
- unsigned int i, unsigned int n, unsigned long *args)
+ unsigned long *args)
{
unsigned long *inside_ptregs = &(regs->r0);
- inside_ptregs -= i;
-
- BUG_ON((i + n) > 6);
+ unsigned int n = 6;
+ unsigned int i = 0;
while (n--) {
args[i++] = (*inside_ptregs);
enable-active-high;
};
+ /* TPS79501 */
+ v1_8d_reg: fixedregulator-v1_8d {
+ compatible = "regulator-fixed";
+ regulator-name = "v1_8d";
+ vin-supply = <&vbat>;
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ };
+
+ /* TPS79501 */
+ v3_3d_reg: fixedregulator-v3_3d {
+ compatible = "regulator-fixed";
+ regulator-name = "v3_3d";
+ vin-supply = <&vbat>;
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ };
+
matrix_keypad: matrix_keypad0 {
compatible = "gpio-matrix-keypad";
debounce-delay-ms = <5>;
status = "okay";
/* Regulators */
- AVDD-supply = <&vaux2_reg>;
- IOVDD-supply = <&vaux2_reg>;
- DRVDD-supply = <&vaux2_reg>;
- DVDD-supply = <&vbat>;
+ AVDD-supply = <&v3_3d_reg>;
+ IOVDD-supply = <&v3_3d_reg>;
+ DRVDD-supply = <&v3_3d_reg>;
+ DVDD-supply = <&v1_8d_reg>;
};
};
enable-active-high;
};
+ /* TPS79518 */
+ v1_8d_reg: fixedregulator-v1_8d {
+ compatible = "regulator-fixed";
+ regulator-name = "v1_8d";
+ vin-supply = <&vbat>;
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ };
+
+ /* TPS78633 */
+ v3_3d_reg: fixedregulator-v3_3d {
+ compatible = "regulator-fixed";
+ regulator-name = "v3_3d";
+ vin-supply = <&vbat>;
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ };
+
leds {
pinctrl-names = "default";
pinctrl-0 = <&user_leds_s0>;
status = "okay";
/* Regulators */
- AVDD-supply = <&vaux2_reg>;
- IOVDD-supply = <&vaux2_reg>;
- DRVDD-supply = <&vaux2_reg>;
- DVDD-supply = <&vbat>;
+ AVDD-supply = <&v3_3d_reg>;
+ IOVDD-supply = <&v3_3d_reg>;
+ DRVDD-supply = <&v3_3d_reg>;
+ DVDD-supply = <&v1_8d_reg>;
};
};
reg = <0xcc000 0x4>;
reg-names = "rev";
/* Domains (P, C): per_pwrdm, l4ls_clkdm */
- clocks = <&l4ls_clkctrl AM3_D_CAN0_CLKCTRL 0>;
+ clocks = <&l4ls_clkctrl AM3_L4LS_D_CAN0_CLKCTRL 0>;
clock-names = "fck";
#address-cells = <1>;
#size-cells = <1>;
reg = <0xd0000 0x4>;
reg-names = "rev";
/* Domains (P, C): per_pwrdm, l4ls_clkdm */
- clocks = <&l4ls_clkctrl AM3_D_CAN1_CLKCTRL 0>;
+ clocks = <&l4ls_clkctrl AM3_L4LS_D_CAN1_CLKCTRL 0>;
clock-names = "fck";
#address-cells = <1>;
#size-cells = <1>;
};
vccio_sd: LDO_REG5 {
+ regulator-boot-on;
regulator-min-microvolt = <1800000>;
regulator-max-microvolt = <3300000>;
regulator-name = "vccio_sd";
bus-width = <4>;
cap-mmc-highspeed;
cap-sd-highspeed;
- card-detect-delay = <200>;
+ broken-cd;
disable-wp; /* wp not hooked up */
pinctrl-names = "default";
pinctrl-0 = <&sdmmc_clk &sdmmc_cmd &sdmmc_cd &sdmmc_bus4>;
gpio_keys: gpio-keys {
compatible = "gpio-keys";
- #address-cells = <1>;
- #size-cells = <0>;
pinctrl-names = "default";
pinctrl-0 = <&pwr_key_l>;
compatible = "arm,cortex-a12";
reg = <0x501>;
resets = <&cru SRST_CORE1>;
- operating-points = <&cpu_opp_table>;
+ operating-points-v2 = <&cpu_opp_table>;
#cooling-cells = <2>; /* min followed by max */
clock-latency = <40000>;
clocks = <&cru ARMCLK>;
compatible = "arm,cortex-a12";
reg = <0x502>;
resets = <&cru SRST_CORE2>;
- operating-points = <&cpu_opp_table>;
+ operating-points-v2 = <&cpu_opp_table>;
#cooling-cells = <2>; /* min followed by max */
clock-latency = <40000>;
clocks = <&cru ARMCLK>;
compatible = "arm,cortex-a12";
reg = <0x503>;
resets = <&cru SRST_CORE3>;
- operating-points = <&cpu_opp_table>;
+ operating-points-v2 = <&cpu_opp_table>;
#cooling-cells = <2>; /* min followed by max */
clock-latency = <40000>;
clocks = <&cru ARMCLK>;
clock-names = "ref", "pclk";
power-domains = <&power RK3288_PD_VIO>;
rockchip,grf = <&grf>;
- #address-cells = <1>;
- #size-cells = <0>;
status = "disabled";
ports {
gpu_opp_table: gpu-opp-table {
compatible = "operating-points-v2";
- opp@100000000 {
+ opp-100000000 {
opp-hz = /bits/ 64 <100000000>;
opp-microvolt = <950000>;
};
- opp@200000000 {
+ opp-200000000 {
opp-hz = /bits/ 64 <200000000>;
opp-microvolt = <950000>;
};
- opp@300000000 {
+ opp-300000000 {
opp-hz = /bits/ 64 <300000000>;
opp-microvolt = <1000000>;
};
- opp@400000000 {
+ opp-400000000 {
opp-hz = /bits/ 64 <400000000>;
opp-microvolt = <1100000>;
};
- opp@500000000 {
+ opp-500000000 {
opp-hz = /bits/ 64 <500000000>;
opp-microvolt = <1200000>;
};
- opp@600000000 {
+ opp-600000000 {
opp-hz = /bits/ 64 <600000000>;
opp-microvolt = <1250000>;
};
#define PIN_PC9__GPIO PINMUX_PIN(PIN_PC9, 0, 0)
#define PIN_PC9__FIQ PINMUX_PIN(PIN_PC9, 1, 3)
#define PIN_PC9__GTSUCOMP PINMUX_PIN(PIN_PC9, 2, 1)
-#define PIN_PC9__ISC_D0 PINMUX_PIN(PIN_PC9, 2, 1)
+#define PIN_PC9__ISC_D0 PINMUX_PIN(PIN_PC9, 3, 1)
#define PIN_PC9__TIOA4 PINMUX_PIN(PIN_PC9, 4, 2)
#define PIN_PC10 74
#define PIN_PC10__GPIO PINMUX_PIN(PIN_PC10, 0, 0)
gpio-sck = <&gpio0 5 GPIO_ACTIVE_HIGH>;
gpio-mosi = <&gpio0 4 GPIO_ACTIVE_HIGH>;
/*
- * This chipselect is active high. Just setting the flags
- * to GPIO_ACTIVE_HIGH is not enough for the SPI DT bindings,
- * it will be ignored, only the special "spi-cs-high" flag
- * really counts.
+ * It's not actually active high, but the frameworks assume
+ * the polarity of the passed-in GPIO is "normal" (active
+ * high) then actively drives the line low to select the
+ * chip.
*/
cs-gpios = <&gpio0 6 GPIO_ACTIVE_HIGH>;
- spi-cs-high;
num-chipselects = <1>;
/*
static inline void syscall_get_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
unsigned long *args)
{
- if (n == 0)
- return;
-
- if (i + n > SYSCALL_MAX_ARGS) {
- unsigned long *args_bad = args + SYSCALL_MAX_ARGS - i;
- unsigned int n_bad = n + i - SYSCALL_MAX_ARGS;
- pr_warn("%s called with max args %d, handling only %d\n",
- __func__, i + n, SYSCALL_MAX_ARGS);
- memset(args_bad, 0, n_bad * sizeof(args[0]));
- n = SYSCALL_MAX_ARGS - i;
- }
-
- if (i == 0) {
- args[0] = regs->ARM_ORIG_r0;
- args++;
- i++;
- n--;
- }
-
- memcpy(args, ®s->ARM_r0 + i, n * sizeof(args[0]));
+ args[0] = regs->ARM_ORIG_r0;
+ args++;
+
+ memcpy(args, ®s->ARM_r0 + 1, 5 * sizeof(args[0]));
}
static inline void syscall_set_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
const unsigned long *args)
{
- if (n == 0)
- return;
-
- if (i + n > SYSCALL_MAX_ARGS) {
- pr_warn("%s called with max args %d, handling only %d\n",
- __func__, i + n, SYSCALL_MAX_ARGS);
- n = SYSCALL_MAX_ARGS - i;
- }
-
- if (i == 0) {
- regs->ARM_ORIG_r0 = args[0];
- args++;
- i++;
- n--;
- }
-
- memcpy(®s->ARM_r0 + i, args, n * sizeof(args[0]));
+ regs->ARM_ORIG_r0 = args[0];
+ args++;
+
+ memcpy(®s->ARM_r0 + 1, args, 5 * sizeof(args[0]));
}
static inline int syscall_get_arch(void)
np = of_find_compatible_node(NULL, NULL, "atmel,sama5d2-securam");
if (!np)
- goto securam_fail;
+ goto securam_fail_no_ref_dev;
pdev = of_find_device_by_node(np);
of_node_put(np);
if (!pdev) {
pr_warn("%s: failed to find securam device!\n", __func__);
- goto securam_fail;
+ goto securam_fail_no_ref_dev;
}
sram_pool = gen_pool_get(&pdev->dev, NULL);
return 0;
securam_fail:
+ put_device(&pdev->dev);
+securam_fail_no_ref_dev:
iounmap(pm_data.sfrbu);
pm_data.sfrbu = NULL;
return ret;
}
};
-static u64 iop13xx_adma_dmamask = DMA_BIT_MASK(64);
+static u64 iop13xx_adma_dmamask = DMA_BIT_MASK(32);
static struct iop_adma_platform_data iop13xx_adma_0_data = {
.hw_id = 0,
.pool_size = PAGE_SIZE,
.resource = iop13xx_adma_0_resources,
.dev = {
.dma_mask = &iop13xx_adma_dmamask,
- .coherent_dma_mask = DMA_BIT_MASK(64),
+ .coherent_dma_mask = DMA_BIT_MASK(32),
.platform_data = (void *) &iop13xx_adma_0_data,
},
};
.resource = iop13xx_adma_1_resources,
.dev = {
.dma_mask = &iop13xx_adma_dmamask,
- .coherent_dma_mask = DMA_BIT_MASK(64),
+ .coherent_dma_mask = DMA_BIT_MASK(32),
.platform_data = (void *) &iop13xx_adma_1_data,
},
};
.resource = iop13xx_adma_2_resources,
.dev = {
.dma_mask = &iop13xx_adma_dmamask,
- .coherent_dma_mask = DMA_BIT_MASK(64),
+ .coherent_dma_mask = DMA_BIT_MASK(32),
.platform_data = (void *) &iop13xx_adma_2_data,
},
};
}
};
-u64 iop13xx_tpmi_mask = DMA_BIT_MASK(64);
+u64 iop13xx_tpmi_mask = DMA_BIT_MASK(32);
static struct platform_device iop13xx_tpmi_0_device = {
.name = "iop-tpmi",
.id = 0,
.resource = iop13xx_tpmi_0_resources,
.dev = {
.dma_mask = &iop13xx_tpmi_mask,
- .coherent_dma_mask = DMA_BIT_MASK(64),
+ .coherent_dma_mask = DMA_BIT_MASK(32),
},
};
.resource = iop13xx_tpmi_1_resources,
.dev = {
.dma_mask = &iop13xx_tpmi_mask,
- .coherent_dma_mask = DMA_BIT_MASK(64),
+ .coherent_dma_mask = DMA_BIT_MASK(32),
},
};
.resource = iop13xx_tpmi_2_resources,
.dev = {
.dma_mask = &iop13xx_tpmi_mask,
- .coherent_dma_mask = DMA_BIT_MASK(64),
+ .coherent_dma_mask = DMA_BIT_MASK(32),
},
};
.resource = iop13xx_tpmi_3_resources,
.dev = {
.dma_mask = &iop13xx_tpmi_mask,
- .coherent_dma_mask = DMA_BIT_MASK(64),
+ .coherent_dma_mask = DMA_BIT_MASK(32),
},
};
writel(KERNEL_UNBOOT_FLAG, m10v_smp_base + cpu * 4);
}
+#ifdef CONFIG_HOTPLUG_CPU
static void m10v_cpu_die(unsigned int l_cpu)
{
gic_cpu_if_down(0);
return 1;
}
+#endif
static struct smp_operations m10v_smp_ops __initdata = {
.smp_prepare_cpus = m10v_smp_init,
.smp_boot_secondary = m10v_boot_secondary,
+#ifdef CONFIG_HOTPLUG_CPU
.cpu_die = m10v_cpu_die,
.cpu_kill = m10v_cpu_kill,
+#endif
};
CPU_METHOD_OF_DECLARE(m10v_smp, "socionext,milbeaut-m10v-smp", &m10v_smp_ops);
static struct bgpio_pdata latch1_pdata = {
.label = LATCH1_LABEL,
+ .base = -1,
.ngpio = LATCH1_NGPIO,
};
static struct bgpio_pdata latch2_pdata = {
.label = LATCH2_LABEL,
+ .base = -1,
.ngpio = LATCH2_NGPIO,
};
if (!node)
return 0;
- if (!of_device_is_available(node))
+ if (!of_device_is_available(node)) {
+ of_node_put(node);
return 0;
+ }
pdev = of_find_device_by_node(node);
.resource = iop3xx_dma_0_resources,
.dev = {
.dma_mask = &iop3xx_adma_dmamask,
- .coherent_dma_mask = DMA_BIT_MASK(64),
+ .coherent_dma_mask = DMA_BIT_MASK(32),
.platform_data = (void *) &iop3xx_dma_0_data,
},
};
.resource = iop3xx_dma_1_resources,
.dev = {
.dma_mask = &iop3xx_adma_dmamask,
- .coherent_dma_mask = DMA_BIT_MASK(64),
+ .coherent_dma_mask = DMA_BIT_MASK(32),
.platform_data = (void *) &iop3xx_dma_1_data,
},
};
.resource = iop3xx_aau_resources,
.dev = {
.dma_mask = &iop3xx_adma_dmamask,
- .coherent_dma_mask = DMA_BIT_MASK(64),
+ .coherent_dma_mask = DMA_BIT_MASK(32),
.platform_data = (void *) &iop3xx_aau_data,
},
};
.resource = orion_xor0_shared_resources,
.dev = {
.dma_mask = &orion_xor_dmamask,
- .coherent_dma_mask = DMA_BIT_MASK(64),
+ .coherent_dma_mask = DMA_BIT_MASK(32),
.platform_data = &orion_xor0_pdata,
},
};
.resource = orion_xor1_shared_resources,
.dev = {
.dma_mask = &orion_xor_dmamask,
- .coherent_dma_mask = DMA_BIT_MASK(64),
+ .coherent_dma_mask = DMA_BIT_MASK(32),
.platform_data = &orion_xor1_pdata,
},
};
421 common rt_sigtimedwait_time64 sys_rt_sigtimedwait
422 common futex_time64 sys_futex
423 common sched_rr_get_interval_time64 sys_sched_rr_get_interval
+424 common pidfd_send_signal sys_pidfd_send_signal
+425 common io_uring_setup sys_io_uring_setup
+426 common io_uring_enter sys_io_uring_enter
+427 common io_uring_register sys_io_uring_register
rx-fifo-depth = <16384>;
snps,multicast-filter-bins = <256>;
iommus = <&smmu 1>;
+ altr,sysmgr-syscon = <&sysmgr 0x44 0>;
status = "disabled";
};
rx-fifo-depth = <16384>;
snps,multicast-filter-bins = <256>;
iommus = <&smmu 2>;
+ altr,sysmgr-syscon = <&sysmgr 0x48 0>;
status = "disabled";
};
rx-fifo-depth = <16384>;
snps,multicast-filter-bins = <256>;
iommus = <&smmu 3>;
+ altr,sysmgr-syscon = <&sysmgr 0x4c 0>;
status = "disabled";
};
snps,reset-gpio = <&gpio1 RK_PC2 GPIO_ACTIVE_LOW>;
snps,reset-active-low;
snps,reset-delays-us = <0 10000 50000>;
- tx_delay = <0x25>;
- rx_delay = <0x11>;
+ tx_delay = <0x24>;
+ rx_delay = <0x18>;
status = "okay";
};
vcc_host1_5v: vcc_otg_5v: vcc-host1-5v-regulator {
compatible = "regulator-fixed";
- enable-active-high;
- gpio = <&gpio0 RK_PA2 GPIO_ACTIVE_HIGH>;
+ gpio = <&gpio0 RK_PA2 GPIO_ACTIVE_LOW>;
pinctrl-names = "default";
pinctrl-0 = <&usb20_host_drv>;
regulator-name = "vcc_host1_5v";
sdmmc0 {
sdmmc0_clk: sdmmc0-clk {
- rockchip,pins = <1 RK_PA6 1 &pcfg_pull_none_4ma>;
+ rockchip,pins = <1 RK_PA6 1 &pcfg_pull_none_8ma>;
};
sdmmc0_cmd: sdmmc0-cmd {
- rockchip,pins = <1 RK_PA4 1 &pcfg_pull_up_4ma>;
+ rockchip,pins = <1 RK_PA4 1 &pcfg_pull_up_8ma>;
};
sdmmc0_dectn: sdmmc0-dectn {
};
sdmmc0_bus1: sdmmc0-bus1 {
- rockchip,pins = <1 RK_PA0 1 &pcfg_pull_up_4ma>;
+ rockchip,pins = <1 RK_PA0 1 &pcfg_pull_up_8ma>;
};
sdmmc0_bus4: sdmmc0-bus4 {
- rockchip,pins = <1 RK_PA0 1 &pcfg_pull_up_4ma>,
- <1 RK_PA1 1 &pcfg_pull_up_4ma>,
- <1 RK_PA2 1 &pcfg_pull_up_4ma>,
- <1 RK_PA3 1 &pcfg_pull_up_4ma>;
+ rockchip,pins = <1 RK_PA0 1 &pcfg_pull_up_8ma>,
+ <1 RK_PA1 1 &pcfg_pull_up_8ma>,
+ <1 RK_PA2 1 &pcfg_pull_up_8ma>,
+ <1 RK_PA3 1 &pcfg_pull_up_8ma>;
};
sdmmc0_gpio: sdmmc0-gpio {
rgmiim1_pins: rgmiim1-pins {
rockchip,pins =
/* mac_txclk */
- <1 RK_PB4 2 &pcfg_pull_none_12ma>,
+ <1 RK_PB4 2 &pcfg_pull_none_8ma>,
/* mac_rxclk */
- <1 RK_PB5 2 &pcfg_pull_none_2ma>,
+ <1 RK_PB5 2 &pcfg_pull_none_4ma>,
/* mac_mdio */
- <1 RK_PC3 2 &pcfg_pull_none_2ma>,
+ <1 RK_PC3 2 &pcfg_pull_none_4ma>,
/* mac_txen */
- <1 RK_PD1 2 &pcfg_pull_none_12ma>,
+ <1 RK_PD1 2 &pcfg_pull_none_8ma>,
/* mac_clk */
- <1 RK_PC5 2 &pcfg_pull_none_2ma>,
+ <1 RK_PC5 2 &pcfg_pull_none_4ma>,
/* mac_rxdv */
- <1 RK_PC6 2 &pcfg_pull_none_2ma>,
+ <1 RK_PC6 2 &pcfg_pull_none_4ma>,
/* mac_mdc */
- <1 RK_PC7 2 &pcfg_pull_none_2ma>,
+ <1 RK_PC7 2 &pcfg_pull_none_4ma>,
/* mac_rxd1 */
- <1 RK_PB2 2 &pcfg_pull_none_2ma>,
+ <1 RK_PB2 2 &pcfg_pull_none_4ma>,
/* mac_rxd0 */
- <1 RK_PB3 2 &pcfg_pull_none_2ma>,
+ <1 RK_PB3 2 &pcfg_pull_none_4ma>,
/* mac_txd1 */
- <1 RK_PB0 2 &pcfg_pull_none_12ma>,
+ <1 RK_PB0 2 &pcfg_pull_none_8ma>,
/* mac_txd0 */
- <1 RK_PB1 2 &pcfg_pull_none_12ma>,
+ <1 RK_PB1 2 &pcfg_pull_none_8ma>,
/* mac_rxd3 */
- <1 RK_PB6 2 &pcfg_pull_none_2ma>,
+ <1 RK_PB6 2 &pcfg_pull_none_4ma>,
/* mac_rxd2 */
- <1 RK_PB7 2 &pcfg_pull_none_2ma>,
+ <1 RK_PB7 2 &pcfg_pull_none_4ma>,
/* mac_txd3 */
- <1 RK_PC0 2 &pcfg_pull_none_12ma>,
+ <1 RK_PC0 2 &pcfg_pull_none_8ma>,
/* mac_txd2 */
- <1 RK_PC1 2 &pcfg_pull_none_12ma>,
+ <1 RK_PC1 2 &pcfg_pull_none_8ma>,
/* mac_txclk */
- <0 RK_PB0 1 &pcfg_pull_none>,
+ <0 RK_PB0 1 &pcfg_pull_none_8ma>,
/* mac_txen */
- <0 RK_PB4 1 &pcfg_pull_none>,
+ <0 RK_PB4 1 &pcfg_pull_none_8ma>,
/* mac_clk */
- <0 RK_PD0 1 &pcfg_pull_none>,
+ <0 RK_PD0 1 &pcfg_pull_none_4ma>,
/* mac_txd1 */
- <0 RK_PC0 1 &pcfg_pull_none>,
+ <0 RK_PC0 1 &pcfg_pull_none_8ma>,
/* mac_txd0 */
- <0 RK_PC1 1 &pcfg_pull_none>,
+ <0 RK_PC1 1 &pcfg_pull_none_8ma>,
/* mac_txd3 */
- <0 RK_PC7 1 &pcfg_pull_none>,
+ <0 RK_PC7 1 &pcfg_pull_none_8ma>,
/* mac_txd2 */
- <0 RK_PC6 1 &pcfg_pull_none>;
+ <0 RK_PC6 1 &pcfg_pull_none_8ma>;
};
rmiim1_pins: rmiim1-pins {
};
&hdmi {
+ ddc-i2c-bus = <&i2c3>;
pinctrl-names = "default";
pinctrl-0 = <&hdmi_cec>;
status = "okay";
" prfm pstl1strm, %2\n" \
"1: ldxr %w1, %2\n" \
insn "\n" \
-"2: stlxr %w3, %w0, %2\n" \
-" cbnz %w3, 1b\n" \
+"2: stlxr %w0, %w3, %2\n" \
+" cbnz %w0, 1b\n" \
" dmb ish\n" \
"3:\n" \
" .pushsection .fixup,\"ax\"\n" \
switch (op) {
case FUTEX_OP_SET:
- __futex_atomic_op("mov %w0, %w4",
+ __futex_atomic_op("mov %w3, %w4",
ret, oldval, uaddr, tmp, oparg);
break;
case FUTEX_OP_ADD:
- __futex_atomic_op("add %w0, %w1, %w4",
+ __futex_atomic_op("add %w3, %w1, %w4",
ret, oldval, uaddr, tmp, oparg);
break;
case FUTEX_OP_OR:
- __futex_atomic_op("orr %w0, %w1, %w4",
+ __futex_atomic_op("orr %w3, %w1, %w4",
ret, oldval, uaddr, tmp, oparg);
break;
case FUTEX_OP_ANDN:
- __futex_atomic_op("and %w0, %w1, %w4",
+ __futex_atomic_op("and %w3, %w1, %w4",
ret, oldval, uaddr, tmp, ~oparg);
break;
case FUTEX_OP_XOR:
- __futex_atomic_op("eor %w0, %w1, %w4",
+ __futex_atomic_op("eor %w3, %w1, %w4",
ret, oldval, uaddr, tmp, oparg);
break;
default:
struct plt_entry get_plt_entry(u64 dst, void *pc);
bool plt_entries_equal(const struct plt_entry *a, const struct plt_entry *b);
+static inline bool plt_entry_is_initialized(const struct plt_entry *e)
+{
+ return e->adrp || e->add || e->br;
+}
+
#endif /* __ASM_MODULE_H */
static inline void syscall_get_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
unsigned long *args)
{
- if (n == 0)
- return;
-
- if (i + n > SYSCALL_MAX_ARGS) {
- unsigned long *args_bad = args + SYSCALL_MAX_ARGS - i;
- unsigned int n_bad = n + i - SYSCALL_MAX_ARGS;
- pr_warning("%s called with max args %d, handling only %d\n",
- __func__, i + n, SYSCALL_MAX_ARGS);
- memset(args_bad, 0, n_bad * sizeof(args[0]));
- }
-
- if (i == 0) {
- args[0] = regs->orig_x0;
- args++;
- i++;
- n--;
- }
-
- memcpy(args, ®s->regs[i], n * sizeof(args[0]));
+ args[0] = regs->orig_x0;
+ args++;
+
+ memcpy(args, ®s->regs[1], 5 * sizeof(args[0]));
}
static inline void syscall_set_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
const unsigned long *args)
{
- if (n == 0)
- return;
-
- if (i + n > SYSCALL_MAX_ARGS) {
- pr_warning("%s called with max args %d, handling only %d\n",
- __func__, i + n, SYSCALL_MAX_ARGS);
- n = SYSCALL_MAX_ARGS - i;
- }
-
- if (i == 0) {
- regs->orig_x0 = args[0];
- args++;
- i++;
- n--;
- }
-
- memcpy(®s->regs[i], args, n * sizeof(args[0]));
+ regs->orig_x0 = args[0];
+ args++;
+
+ memcpy(®s->regs[1], args, 5 * sizeof(args[0]));
}
/*
#define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE + 5)
#define __ARM_NR_COMPAT_END (__ARM_NR_COMPAT_BASE + 0x800)
-#define __NR_compat_syscalls 424
+#define __NR_compat_syscalls 428
#endif
#define __ARCH_WANT_SYS_CLONE
__SYSCALL(__NR_futex_time64, sys_futex)
#define __NR_sched_rr_get_interval_time64 423
__SYSCALL(__NR_sched_rr_get_interval_time64, sys_sched_rr_get_interval)
+#define __NR_pidfd_send_signal 424
+__SYSCALL(__NR_pidfd_send_signal, sys_pidfd_send_signal)
+#define __NR_io_uring_setup 425
+__SYSCALL(__NR_io_uring_setup, sys_io_uring_setup)
+#define __NR_io_uring_enter 426
+__SYSCALL(__NR_io_uring_enter, sys_io_uring_enter)
+#define __NR_io_uring_register 427
+__SYSCALL(__NR_io_uring_register, sys_io_uring_register)
/*
* Please add new compat syscalls above this comment and update
trampoline = get_plt_entry(addr, mod->arch.ftrace_trampoline);
if (!plt_entries_equal(mod->arch.ftrace_trampoline,
&trampoline)) {
- if (!plt_entries_equal(mod->arch.ftrace_trampoline,
- &(struct plt_entry){})) {
+ if (plt_entry_is_initialized(mod->arch.ftrace_trampoline)) {
pr_err("ftrace: far branches to multiple entry points unsupported inside a single module\n");
return -EINVAL;
}
unsigned long low = (unsigned long)raw_cpu_read(sdei_stack_normal_ptr);
unsigned long high = low + SDEI_STACK_SIZE;
+ if (!low)
+ return false;
+
if (sp < low || sp >= high)
return false;
unsigned long low = (unsigned long)raw_cpu_read(sdei_stack_critical_ptr);
unsigned long high = low + SDEI_STACK_SIZE;
+ if (!low)
+ return false;
+
if (sp < low || sp >= high)
return false;
void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
{
struct stackframe frame;
- int skip;
+ int skip = 0;
pr_debug("%s(regs = %p tsk = %p)\n", __func__, regs, tsk);
+ if (regs) {
+ if (user_mode(regs))
+ return;
+ skip = 1;
+ }
+
if (!tsk)
tsk = current;
frame.graph = 0;
#endif
- skip = !!regs;
printk("Call trace:\n");
do {
/* skip until specified stack frame */
return ret;
print_modules();
- __show_regs(regs);
pr_emerg("Process %.*s (pid: %d, stack limit = 0x%p)\n",
TASK_COMM_LEN, tsk->comm, task_pid_nr(tsk),
end_of_stack(tsk));
+ show_regs(regs);
- if (!user_mode(regs)) {
- dump_backtrace(regs, tsk);
+ if (!user_mode(regs))
dump_instr(KERN_EMERG, regs);
- }
return ret;
}
}
static inline void syscall_get_arguments(struct task_struct *task,
- struct pt_regs *regs, unsigned int i,
- unsigned int n, unsigned long *args)
+ struct pt_regs *regs,
+ unsigned long *args)
{
- switch (i) {
- case 0:
- if (!n--)
- break;
- *args++ = regs->a4;
- case 1:
- if (!n--)
- break;
- *args++ = regs->b4;
- case 2:
- if (!n--)
- break;
- *args++ = regs->a6;
- case 3:
- if (!n--)
- break;
- *args++ = regs->b6;
- case 4:
- if (!n--)
- break;
- *args++ = regs->a8;
- case 5:
- if (!n--)
- break;
- *args++ = regs->b8;
- case 6:
- if (!n--)
- break;
- default:
- BUG();
- }
+ *args++ = regs->a4;
+ *args++ = regs->b4;
+ *args++ = regs->a6;
+ *args++ = regs->b6;
+ *args++ = regs->a8;
+ *args = regs->b8;
}
static inline void syscall_set_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
const unsigned long *args)
{
- switch (i) {
- case 0:
- if (!n--)
- break;
- regs->a4 = *args++;
- case 1:
- if (!n--)
- break;
- regs->b4 = *args++;
- case 2:
- if (!n--)
- break;
- regs->a6 = *args++;
- case 3:
- if (!n--)
- break;
- regs->b6 = *args++;
- case 4:
- if (!n--)
- break;
- regs->a8 = *args++;
- case 5:
- if (!n--)
- break;
- regs->a9 = *args++;
- case 6:
- if (!n)
- break;
- default:
- BUG();
- }
+ regs->a4 = *args++;
+ regs->b4 = *args++;
+ regs->a6 = *args++;
+ regs->b6 = *args++;
+ regs->a8 = *args++;
+ regs->a9 = *args;
}
#endif /* __ASM_C6X_SYSCALLS_H */
static inline void
syscall_get_arguments(struct task_struct *task, struct pt_regs *regs,
- unsigned int i, unsigned int n, unsigned long *args)
+ unsigned long *args)
{
- BUG_ON(i + n > 6);
- if (i == 0) {
- args[0] = regs->orig_a0;
- args++;
- i++;
- n--;
- }
- memcpy(args, ®s->a1 + i * sizeof(regs->a1), n * sizeof(args[0]));
+ args[0] = regs->orig_a0;
+ args++;
+ memcpy(args, ®s->a1, 5 * sizeof(args[0]));
}
static inline void
syscall_set_arguments(struct task_struct *task, struct pt_regs *regs,
- unsigned int i, unsigned int n, const unsigned long *args)
+ const unsigned long *args)
{
- BUG_ON(i + n > 6);
- if (i == 0) {
- regs->orig_a0 = args[0];
- args++;
- i++;
- n--;
- }
- memcpy(®s->a1 + i * sizeof(regs->a1), args, n * sizeof(regs->a0));
+ regs->orig_a0 = args[0];
+ args++;
+ memcpy(®s->a1, args, 5 * sizeof(regs->a1));
}
static inline int
static inline void
syscall_get_arguments(struct task_struct *task, struct pt_regs *regs,
- unsigned int i, unsigned int n, unsigned long *args)
+ unsigned long *args)
{
- BUG_ON(i + n > 6);
-
- while (n > 0) {
- switch (i) {
- case 0:
- *args++ = regs->er1;
- break;
- case 1:
- *args++ = regs->er2;
- break;
- case 2:
- *args++ = regs->er3;
- break;
- case 3:
- *args++ = regs->er4;
- break;
- case 4:
- *args++ = regs->er5;
- break;
- case 5:
- *args++ = regs->er6;
- break;
- }
- i++;
- n--;
- }
+ *args++ = regs->er1;
+ *args++ = regs->er2;
+ *args++ = regs->er3;
+ *args++ = regs->er4;
+ *args++ = regs->er5;
+ *args = regs->er6;
}
static inline void syscall_get_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
unsigned long *args)
{
- BUG_ON(i + n > 6);
- memcpy(args, &(®s->r00)[i], n * sizeof(args[0]));
+ memcpy(args, &(®s->r00)[0], 6 * sizeof(args[0]));
}
#endif
}
extern void ia64_syscall_get_set_arguments(struct task_struct *task,
- struct pt_regs *regs, unsigned int i, unsigned int n,
- unsigned long *args, int rw);
+ struct pt_regs *regs, unsigned long *args, int rw);
static inline void syscall_get_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
unsigned long *args)
{
- BUG_ON(i + n > 6);
-
- ia64_syscall_get_set_arguments(task, regs, i, n, args, 0);
+ ia64_syscall_get_set_arguments(task, regs, args, 0);
}
static inline void syscall_set_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
unsigned long *args)
{
- BUG_ON(i + n > 6);
-
- ia64_syscall_get_set_arguments(task, regs, i, n, args, 1);
+ ia64_syscall_get_set_arguments(task, regs, args, 1);
}
static inline int syscall_get_arch(void)
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-#ifndef _ASM_IA64_SOCKIOS_H
-#define _ASM_IA64_SOCKIOS_H
-
-/*
- * Socket-level I/O control calls.
- *
- * Based on <asm-i386/sockios.h>.
- *
- * Modified 1998, 1999
- * David Mosberger-Tang <davidm@hpl.hp.com>, Hewlett-Packard Co
- */
-#define FIOSETOWN 0x8901
-#define SIOCSPGRP 0x8902
-#define FIOGETOWN 0x8903
-#define SIOCGPGRP 0x8904
-#define SIOCATMARK 0x8905
-#define SIOCGSTAMP 0x8906 /* Get stamp (timeval) */
-#define SIOCGSTAMPNS 0x8907 /* Get stamp (timespec) */
-
-#endif /* _ASM_IA64_SOCKIOS_H */
}
void ia64_syscall_get_set_arguments(struct task_struct *task,
- struct pt_regs *regs, unsigned int i, unsigned int n,
- unsigned long *args, int rw)
+ struct pt_regs *regs, unsigned long *args, int rw)
{
struct syscall_get_set_args data = {
- .i = i,
- .n = n,
+ .i = 0,
+ .n = 6,
.args = args,
.regs = regs,
.rw = rw,
332 common pkey_free sys_pkey_free
333 common rseq sys_rseq
# 334 through 423 are reserved to sync up with other architectures
+424 common pidfd_send_signal sys_pidfd_send_signal
+425 common io_uring_setup sys_io_uring_setup
+426 common io_uring_enter sys_io_uring_enter
+427 common io_uring_register sys_io_uring_register
421 common rt_sigtimedwait_time64 sys_rt_sigtimedwait
422 common futex_time64 sys_futex
423 common sched_rr_get_interval_time64 sys_sched_rr_get_interval
+424 common pidfd_send_signal sys_pidfd_send_signal
+425 common io_uring_setup sys_io_uring_setup
+426 common io_uring_enter sys_io_uring_enter
+427 common io_uring_register sys_io_uring_register
static inline void syscall_get_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
unsigned long *args)
{
+ unsigned int i = 0;
+ unsigned int n = 6;
+
while (n--)
*args++ = microblaze_get_syscall_arg(regs, i++);
}
static inline void syscall_set_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
const unsigned long *args)
{
+ unsigned int i = 0;
+ unsigned int n = 6;
+
while (n--)
microblaze_set_syscall_arg(regs, i++, *args++);
}
421 common rt_sigtimedwait_time64 sys_rt_sigtimedwait
422 common futex_time64 sys_futex
423 common sched_rr_get_interval_time64 sys_sched_rr_get_interval
+424 common pidfd_send_signal sys_pidfd_send_signal
+425 common io_uring_setup sys_io_uring_setup
+426 common io_uring_enter sys_io_uring_enter
+427 common io_uring_register sys_io_uring_register
return ath79_sys_type;
}
-int get_c0_perfcount_int(void)
-{
- return ATH79_MISC_IRQ(5);
-}
-EXPORT_SYMBOL_GPL(get_c0_perfcount_int);
-
unsigned int get_c0_compare_int(void)
{
return CP0_LEGACY_COMPARE_IRQ;
# require CONFIG_CPU_MIPS32_R2=y
CONFIG_LEGACY_BOARD_OCELOT=y
+CONFIG_FIT_IMAGE_FDT_OCELOT=y
+
+CONFIG_BRIDGE=y
+CONFIG_GENERIC_PHY=y
CONFIG_MTD=y
CONFIG_MTD_CMDLINE_PARTS=y
CONFIG_SERIAL_OF_PLATFORM=y
CONFIG_NETDEVICES=y
+CONFIG_NET_SWITCHDEV=y
+CONFIG_NET_DSA=y
CONFIG_MSCC_OCELOT_SWITCH=y
CONFIG_MSCC_OCELOT_SWITCH_OCELOT=y
CONFIG_MDIO_MSCC_MIIM=y
CONFIG_SPI_DW_MMIO=y
CONFIG_SPI_SPIDEV=y
+CONFIG_PINCTRL_OCELOT=y
+
CONFIG_GPIO_SYSFS=y
CONFIG_POWER_RESET=y
static inline void syscall_get_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
unsigned long *args)
{
+ unsigned int i = 0;
+ unsigned int n = 6;
int ret;
/* O32 ABI syscall() */
#define SIOCSPGRP _IOW('s', 8, pid_t)
#define SIOCGPGRP _IOR('s', 9, pid_t)
-#define SIOCGSTAMP 0x8906 /* Get stamp (timeval) */
-#define SIOCGSTAMPNS 0x8907 /* Get stamp (timespec) */
+#define SIOCGSTAMP_OLD 0x8906 /* Get stamp (timeval) */
+#define SIOCGSTAMPNS_OLD 0x8907 /* Get stamp (timespec) */
#endif /* _ASM_SOCKIOS_H */
#include <asm/processor.h>
#include <asm/sigcontext.h>
#include <linux/uaccess.h>
+#include <asm/irq_regs.h>
static struct hard_trap_info {
unsigned char tt; /* Trap type code for MIPS R3xxx and R4xxx */
old_fs = get_fs();
set_fs(KERNEL_DS);
- kgdb_nmicallback(raw_smp_processor_id(), NULL);
+ kgdb_nmicallback(raw_smp_processor_id(), get_irq_regs());
set_fs(old_fs);
}
sd.nr = syscall;
sd.arch = syscall_get_arch();
- syscall_get_arguments(current, regs, 0, 6, args);
+ syscall_get_arguments(current, regs, args);
for (i = 0; i < 6; i++)
sd.args[i] = args[i];
sd.instruction_pointer = KSTK_EIP(current);
subu t1, v0, __NR_O32_Linux
move a1, v0
bnez t1, 1f /* __NR_syscall at offset 0 */
- lw a1, PT_R4(sp) /* Arg1 for __NR_syscall case */
+ ld a1, PT_R4(sp) /* Arg1 for __NR_syscall case */
.set pop
1: jal syscall_trace_enter
421 n32 rt_sigtimedwait_time64 compat_sys_rt_sigtimedwait_time64
422 n32 futex_time64 sys_futex
423 n32 sched_rr_get_interval_time64 sys_sched_rr_get_interval
+424 n32 pidfd_send_signal sys_pidfd_send_signal
+425 n32 io_uring_setup sys_io_uring_setup
+426 n32 io_uring_enter sys_io_uring_enter
+427 n32 io_uring_register sys_io_uring_register
327 n64 rseq sys_rseq
328 n64 io_pgetevents sys_io_pgetevents
# 329 through 423 are reserved to sync up with other architectures
+424 n64 pidfd_send_signal sys_pidfd_send_signal
+425 n64 io_uring_setup sys_io_uring_setup
+426 n64 io_uring_enter sys_io_uring_enter
+427 n64 io_uring_register sys_io_uring_register
421 o32 rt_sigtimedwait_time64 sys_rt_sigtimedwait compat_sys_rt_sigtimedwait_time64
422 o32 futex_time64 sys_futex sys_futex
423 o32 sched_rr_get_interval_time64 sys_sched_rr_get_interval sys_sched_rr_get_interval
+424 o32 pidfd_send_signal sys_pidfd_send_signal
+425 o32 io_uring_setup sys_io_uring_setup
+426 o32 io_uring_enter sys_io_uring_enter
+427 o32 io_uring_register sys_io_uring_register
{
struct hub_irq_data *hd = irq_data_get_irq_chip_data(d);
struct bridge_controller *bc;
- int pin = hd->pin;
if (!hd)
return;
disable_hub_irq(d);
bc = hd->bc;
- bridge_clr(bc, b_int_enable, (1 << pin));
+ bridge_clr(bc, b_int_enable, (1 << hd->pin));
bridge_read(bc, b_wid_tflush);
}
* syscall_get_arguments - extract system call parameter values
* @task: task of interest, must be blocked
* @regs: task_pt_regs() of @task
- * @i: argument index [0,5]
- * @n: number of arguments; n+i must be [1,6].
* @args: array filled with argument values
*
- * Fetches @n arguments to the system call starting with the @i'th argument
- * (from 0 through 5). Argument @i is stored in @args[0], and so on.
- * An arch inline version is probably optimal when @i and @n are constants.
+ * Fetches 6 arguments to the system call (from 0 through 5). The first
+ * argument is stored in @args[0], and so on.
*
* It's only valid to call this when @task is stopped for tracing on
* entry to a system call, due to %TIF_SYSCALL_TRACE or %TIF_SYSCALL_AUDIT.
- * It's invalid to call this with @i + @n > 6; we only support system calls
- * taking up to 6 arguments.
*/
#define SYSCALL_MAX_ARGS 6
void syscall_get_arguments(struct task_struct *task, struct pt_regs *regs,
- unsigned int i, unsigned int n, unsigned long *args)
+ unsigned long *args)
{
- if (n == 0)
- return;
- if (i + n > SYSCALL_MAX_ARGS) {
- unsigned long *args_bad = args + SYSCALL_MAX_ARGS - i;
- unsigned int n_bad = n + i - SYSCALL_MAX_ARGS;
- pr_warning("%s called with max args %d, handling only %d\n",
- __func__, i + n, SYSCALL_MAX_ARGS);
- memset(args_bad, 0, n_bad * sizeof(args[0]));
- memset(args_bad, 0, n_bad * sizeof(args[0]));
- }
-
- if (i == 0) {
- args[0] = regs->orig_r0;
- args++;
- i++;
- n--;
- }
-
- memcpy(args, ®s->uregs[0] + i, n * sizeof(args[0]));
+ args[0] = regs->orig_r0;
+ args++;
+ memcpy(args, ®s->uregs[0] + 1, 5 * sizeof(args[0]));
}
/**
* syscall_set_arguments - change system call parameter value
* @task: task of interest, must be in system call entry tracing
* @regs: task_pt_regs() of @task
- * @i: argument index [0,5]
- * @n: number of arguments; n+i must be [1,6].
* @args: array of argument values to store
*
- * Changes @n arguments to the system call starting with the @i'th argument.
- * Argument @i gets value @args[0], and so on.
- * An arch inline version is probably optimal when @i and @n are constants.
+ * Changes 6 arguments to the system call. The first argument gets value
+ * @args[0], and so on.
*
* It's only valid to call this when @task is stopped for tracing on
* entry to a system call, due to %TIF_SYSCALL_TRACE or %TIF_SYSCALL_AUDIT.
- * It's invalid to call this with @i + @n > 6; we only support system calls
- * taking up to 6 arguments.
*/
void syscall_set_arguments(struct task_struct *task, struct pt_regs *regs,
- unsigned int i, unsigned int n,
const unsigned long *args)
{
- if (n == 0)
- return;
-
- if (i + n > SYSCALL_MAX_ARGS) {
- pr_warn("%s called with max args %d, handling only %d\n",
- __func__, i + n, SYSCALL_MAX_ARGS);
- n = SYSCALL_MAX_ARGS - i;
- }
-
- if (i == 0) {
- regs->orig_r0 = args[0];
- args++;
- i++;
- n--;
- }
+ regs->orig_r0 = args[0];
+ args++;
- memcpy(®s->uregs[0] + i, args, n * sizeof(args[0]));
+ memcpy(®s->uregs[0] + 1, args, 5 * sizeof(args[0]));
}
#endif /* _ASM_NDS32_SYSCALL_H */
}
static inline void syscall_get_arguments(struct task_struct *task,
- struct pt_regs *regs, unsigned int i, unsigned int n,
- unsigned long *args)
+ struct pt_regs *regs, unsigned long *args)
{
- BUG_ON(i + n > 6);
-
- switch (i) {
- case 0:
- if (!n--)
- break;
- *args++ = regs->r4;
- case 1:
- if (!n--)
- break;
- *args++ = regs->r5;
- case 2:
- if (!n--)
- break;
- *args++ = regs->r6;
- case 3:
- if (!n--)
- break;
- *args++ = regs->r7;
- case 4:
- if (!n--)
- break;
- *args++ = regs->r8;
- case 5:
- if (!n--)
- break;
- *args++ = regs->r9;
- case 6:
- if (!n--)
- break;
- default:
- BUG();
- }
+ *args++ = regs->r4;
+ *args++ = regs->r5;
+ *args++ = regs->r6;
+ *args++ = regs->r7;
+ *args++ = regs->r8;
+ *args = regs->r9;
}
static inline void syscall_set_arguments(struct task_struct *task,
- struct pt_regs *regs, unsigned int i, unsigned int n,
- const unsigned long *args)
+ struct pt_regs *regs, const unsigned long *args)
{
- BUG_ON(i + n > 6);
-
- switch (i) {
- case 0:
- if (!n--)
- break;
- regs->r4 = *args++;
- case 1:
- if (!n--)
- break;
- regs->r5 = *args++;
- case 2:
- if (!n--)
- break;
- regs->r6 = *args++;
- case 3:
- if (!n--)
- break;
- regs->r7 = *args++;
- case 4:
- if (!n--)
- break;
- regs->r8 = *args++;
- case 5:
- if (!n--)
- break;
- regs->r9 = *args++;
- case 6:
- if (!n)
- break;
- default:
- BUG();
- }
+ regs->r4 = *args++;
+ regs->r5 = *args++;
+ regs->r6 = *args++;
+ regs->r7 = *args++;
+ regs->r8 = *args++;
+ regs->r9 = *args;
}
#endif
static inline void
syscall_get_arguments(struct task_struct *task, struct pt_regs *regs,
- unsigned int i, unsigned int n, unsigned long *args)
+ unsigned long *args)
{
- BUG_ON(i + n > 6);
-
- memcpy(args, ®s->gpr[3 + i], n * sizeof(args[0]));
+ memcpy(args, ®s->gpr[3], 6 * sizeof(args[0]));
}
static inline void
syscall_set_arguments(struct task_struct *task, struct pt_regs *regs,
- unsigned int i, unsigned int n, const unsigned long *args)
+ const unsigned long *args)
{
- BUG_ON(i + n > 6);
-
- memcpy(®s->gpr[3 + i], args, n * sizeof(args[0]));
+ memcpy(®s->gpr[3], args, 6 * sizeof(args[0]));
}
static inline int syscall_get_arch(void)
static inline unsigned long regs_return_value(struct pt_regs *regs)
{
- return regs->gr[20];
+ return regs->gr[28];
}
static inline void instruction_pointer_set(struct pt_regs *regs,
unsigned long val)
{
- regs->iaoq[0] = val;
+ regs->iaoq[0] = val;
+ regs->iaoq[1] = val + 4;
}
/* Query offset/name of register from its name/offset */
}
static inline void syscall_get_arguments(struct task_struct *tsk,
- struct pt_regs *regs, unsigned int i,
- unsigned int n, unsigned long *args)
+ struct pt_regs *regs,
+ unsigned long *args)
{
- BUG_ON(i);
-
- switch (n) {
- case 6:
- args[5] = regs->gr[21];
- case 5:
- args[4] = regs->gr[22];
- case 4:
- args[3] = regs->gr[23];
- case 3:
- args[2] = regs->gr[24];
- case 2:
- args[1] = regs->gr[25];
- case 1:
- args[0] = regs->gr[26];
- case 0:
- break;
- default:
- BUG();
- }
+ args[5] = regs->gr[21];
+ args[4] = regs->gr[22];
+ args[3] = regs->gr[23];
+ args[2] = regs->gr[24];
+ args[1] = regs->gr[25];
+ args[0] = regs->gr[26];
}
static inline long syscall_get_return_value(struct task_struct *task,
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-#ifndef __ARCH_PARISC_SOCKIOS__
-#define __ARCH_PARISC_SOCKIOS__
-
-/* Socket-level I/O control calls. */
-#define FIOSETOWN 0x8901
-#define SIOCSPGRP 0x8902
-#define FIOGETOWN 0x8903
-#define SIOCGPGRP 0x8904
-#define SIOCATMARK 0x8905
-#define SIOCGSTAMP 0x8906 /* Get stamp (timeval) */
-#define SIOCGSTAMPNS 0x8907 /* Get stamp (timespec) */
-
-#endif
static int __init parisc_idle_init(void)
{
- const char *marker;
-
- /* check QEMU/SeaBIOS marker in PAGE0 */
- marker = (char *) &PAGE0->pad0;
- running_on_qemu = (memcmp(marker, "SeaBIOS", 8) == 0);
-
if (!running_on_qemu)
cpu_idle_poll_ctrl(1);
int ret, cpunum;
struct pdc_coproc_cfg coproc_cfg;
+ /* check QEMU/SeaBIOS marker in PAGE0 */
+ running_on_qemu = (memcmp(&PAGE0->pad0, "SeaBIOS", 8) == 0);
+
cpunum = smp_processor_id();
init_cpu_topology();
421 32 rt_sigtimedwait_time64 sys_rt_sigtimedwait compat_sys_rt_sigtimedwait_time64
422 32 futex_time64 sys_futex sys_futex
423 32 sched_rr_get_interval_time64 sys_sched_rr_get_interval sys_sched_rr_get_interval
+424 common pidfd_send_signal sys_pidfd_send_signal
+425 common io_uring_setup sys_io_uring_setup
+426 common io_uring_enter sys_io_uring_enter
+427 common io_uring_register sys_io_uring_register
#if defined(CONFIG_SPARSEMEM_VMEMMAP) && defined(CONFIG_SPARSEMEM_EXTREME) && \
defined (CONFIG_PPC_64K_PAGES)
#define MAX_PHYSMEM_BITS 51
-#elif defined(CONFIG_SPARSEMEM)
+#elif defined(CONFIG_PPC64)
#define MAX_PHYSMEM_BITS 46
#endif
static inline void syscall_get_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
unsigned long *args)
{
unsigned long val, mask = -1UL;
-
- BUG_ON(i + n > 6);
+ unsigned int n = 6;
#ifdef CONFIG_COMPAT
if (test_tsk_thread_flag(task, TIF_32BIT))
mask = 0xffffffff;
#endif
while (n--) {
- if (n == 0 && i == 0)
+ if (n == 0)
val = regs->orig_gpr3;
else
- val = regs->gpr[3 + i + n];
+ val = regs->gpr[3 + n];
args[n] = val & mask;
}
static inline void syscall_set_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
const unsigned long *args)
{
- BUG_ON(i + n > 6);
- memcpy(®s->gpr[3 + i], args, n * sizeof(args[0]));
+ memcpy(®s->gpr[3], args, 6 * sizeof(args[0]));
/* Also copy the first argument into orig_gpr3 */
- if (i == 0 && n > 0)
- regs->orig_gpr3 = args[0];
+ regs->orig_gpr3 = args[0];
}
static inline int syscall_get_arch(void)
ld r4,PACA_EXSLB+EX_DAR(r13)
std r4,_DAR(r1)
addi r3,r1,STACK_FRAME_OVERHEAD
+BEGIN_MMU_FTR_SECTION
+ /* HPT case, do SLB fault */
bl do_slb_fault
cmpdi r3,0
bne- 1f
b fast_exception_return
1: /* Error case */
+MMU_FTR_SECTION_ELSE
+ /* Radix case, access is outside page table range */
+ li r3,-EFAULT
+ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
std r3,RESULT(r1)
bl save_nvgprs
RECONCILE_IRQ_STATE(r10, r11)
EXCEPTION_PROLOG_COMMON(0x480, PACA_EXSLB)
ld r4,_NIP(r1)
addi r3,r1,STACK_FRAME_OVERHEAD
+BEGIN_MMU_FTR_SECTION
+ /* HPT case, do SLB fault */
bl do_slb_fault
cmpdi r3,0
bne- 1f
b fast_exception_return
1: /* Error case */
+MMU_FTR_SECTION_ELSE
+ /* Radix case, access is outside page table range */
+ li r3,-EFAULT
+ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
std r3,RESULT(r1)
bl save_nvgprs
RECONCILE_IRQ_STATE(r10, r11)
tophys(r4,r2)
addi r4,r4,THREAD /* phys address of our thread_struct */
mtspr SPRN_SPRG_THREAD,r4
-#ifdef CONFIG_PPC_RTAS
- li r3,0
- stw r3, RTAS_SP(r4) /* 0 => not in RTAS */
-#endif
lis r4, (swapper_pg_dir - PAGE_OFFSET)@h
ori r4, r4, (swapper_pg_dir - PAGE_OFFSET)@l
mtspr SPRN_SPRG_PGDIR, r4
tophys(r4,r2)
addi r4,r4,THREAD /* init task's THREAD */
mtspr SPRN_SPRG_THREAD,r4
-#ifdef CONFIG_PPC_RTAS
- li r3,0
- stw r3, RTAS_SP(r4) /* 0 => not in RTAS */
-#endif
lis r4, (swapper_pg_dir - PAGE_OFFSET)@h
ori r4, r4, (swapper_pg_dir - PAGE_OFFSET)@l
mtspr SPRN_SPRG_PGDIR, r4
#include <linux/kvm_host.h>
#include <linux/init.h>
#include <linux/export.h>
+#include <linux/kmemleak.h>
#include <linux/kvm_para.h>
#include <linux/slab.h>
#include <linux/of.h>
static __init void kvm_free_tmp(void)
{
+ /*
+ * Inform kmemleak about the hole in the .bss section since the
+ * corresponding pages will be unmapped with DEBUG_PAGEALLOC=y.
+ */
+ kmemleak_free_part(&kvm_tmp[kvm_tmp_index],
+ ARRAY_SIZE(kvm_tmp) - kvm_tmp_index);
free_reserved_area(&kvm_tmp[kvm_tmp_index],
&kvm_tmp[ARRAY_SIZE(kvm_tmp)], -1, NULL);
}
421 32 rt_sigtimedwait_time64 sys_rt_sigtimedwait compat_sys_rt_sigtimedwait_time64
422 32 futex_time64 sys_futex sys_futex
423 32 sched_rr_get_interval_time64 sys_sched_rr_get_interval sys_sched_rr_get_interval
+424 common pidfd_send_signal sys_pidfd_send_signal
+425 common io_uring_setup sys_io_uring_setup
+426 common io_uring_enter sys_io_uring_enter
+427 common io_uring_register sys_io_uring_register
* can be used, r7 contains NSEC_PER_SEC.
*/
- lwz r5,WTOM_CLOCK_SEC(r9)
+ lwz r5,(WTOM_CLOCK_SEC+LOPART)(r9)
lwz r6,WTOM_CLOCK_NSEC(r9)
/* We now have our offset in r5,r6. We create a fake dependency
--- /dev/null
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_CGROUPS=y
+CONFIG_CGROUP_SCHED=y
+CONFIG_CFS_BANDWIDTH=y
+CONFIG_CGROUP_BPF=y
+CONFIG_NAMESPACES=y
+CONFIG_USER_NS=y
+CONFIG_CHECKPOINT_RESTORE=y
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_EXPERT=y
+CONFIG_BPF_SYSCALL=y
+CONFIG_ARCH_RV32I=y
+CONFIG_SMP=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_IP_PNP_RARP=y
+CONFIG_NETLINK_DIAG=y
+CONFIG_PCI=y
+CONFIG_PCIEPORTBUS=y
+CONFIG_PCI_HOST_GENERIC=y
+CONFIG_PCIE_XILINX=y
+CONFIG_DEVTMPFS=y
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_VIRTIO_BLK=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_BLK_DEV_SR=y
+CONFIG_ATA=y
+CONFIG_SATA_AHCI=y
+CONFIG_SATA_AHCI_PLATFORM=y
+CONFIG_NETDEVICES=y
+CONFIG_VIRTIO_NET=y
+CONFIG_MACB=y
+CONFIG_E1000E=y
+CONFIG_R8169=y
+CONFIG_MICROSEMI_PHY=y
+CONFIG_INPUT_MOUSEDEV=y
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_OF_PLATFORM=y
+CONFIG_SERIAL_EARLYCON_RISCV_SBI=y
+CONFIG_HVC_RISCV_SBI=y
+# CONFIG_PTP_1588_CLOCK is not set
+CONFIG_DRM=y
+CONFIG_DRM_RADEON=y
+CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_USB=y
+CONFIG_USB_XHCI_HCD=y
+CONFIG_USB_XHCI_PLATFORM=y
+CONFIG_USB_EHCI_HCD=y
+CONFIG_USB_EHCI_HCD_PLATFORM=y
+CONFIG_USB_OHCI_HCD=y
+CONFIG_USB_OHCI_HCD_PLATFORM=y
+CONFIG_USB_STORAGE=y
+CONFIG_USB_UAS=y
+CONFIG_VIRTIO_MMIO=y
+CONFIG_SIFIVE_PLIC=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_AUTOFS4_FS=y
+CONFIG_MSDOS_FS=y
+CONFIG_VFAT_FS=y
+CONFIG_TMPFS=y
+CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_NFS_FS=y
+CONFIG_NFS_V4=y
+CONFIG_NFS_V4_1=y
+CONFIG_NFS_V4_2=y
+CONFIG_ROOT_NFS=y
+CONFIG_CRYPTO_USER_API_HASH=y
+CONFIG_CRYPTO_DEV_VIRTIO=y
+CONFIG_PRINTK_TIME=y
+# CONFIG_RCU_TRACE is not set
static inline void syscall_get_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
unsigned long *args)
{
- BUG_ON(i + n > 6);
- if (i == 0) {
- args[0] = regs->orig_a0;
- args++;
- i++;
- n--;
- }
- memcpy(args, ®s->a1 + i * sizeof(regs->a1), n * sizeof(args[0]));
+ args[0] = regs->orig_a0;
+ args++;
+ memcpy(args, ®s->a1, 5 * sizeof(args[0]));
}
static inline void syscall_set_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
const unsigned long *args)
{
- BUG_ON(i + n > 6);
- if (i == 0) {
- regs->orig_a0 = args[0];
- args++;
- i++;
- n--;
- }
- memcpy(®s->a1 + i * sizeof(regs->a1), args, n * sizeof(regs->a0));
+ regs->orig_a0 = args[0];
+ args++;
+ memcpy(®s->a1, args, 5 * sizeof(regs->a1));
}
static inline int syscall_get_arch(void)
*/
memblock_reserve(reg->base, vmlinux_end - reg->base);
mem_size = min(reg->size, (phys_addr_t)-PAGE_OFFSET);
+
+ /*
+ * Remove memblock from the end of usable area to the
+ * end of region
+ */
+ if (reg->base + mem_size < end)
+ memblock_remove(reg->base + mem_size,
+ end - reg->base - mem_size);
}
}
BUG_ON(mem_size == 0);
{
unsigned long offset = ALIGN(mem_safe_offset(), sizeof(u64));
- if (IS_ENABLED(BLK_DEV_INITRD) && INITRD_START && INITRD_SIZE &&
+ if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && INITRD_START && INITRD_SIZE &&
INITRD_START < offset + ENTRIES_EXTENDED_MAX)
offset = ALIGN(INITRD_START + INITRD_SIZE, sizeof(u64));
static inline void syscall_get_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
unsigned long *args)
{
unsigned long mask = -1UL;
+ unsigned int n = 6;
- /*
- * No arguments for this syscall, there's nothing to do.
- */
- if (!n)
- return;
-
- BUG_ON(i + n > 6);
#ifdef CONFIG_COMPAT
if (test_tsk_thread_flag(task, TIF_31BIT))
mask = 0xffffffff;
#endif
while (n-- > 0)
- if (i + n > 0)
- args[n] = regs->gprs[2 + i + n] & mask;
- if (i == 0)
- args[0] = regs->orig_gpr2 & mask;
+ if (n > 0)
+ args[n] = regs->gprs[2 + n] & mask;
+
+ args[0] = regs->orig_gpr2 & mask;
}
static inline void syscall_set_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
const unsigned long *args)
{
- BUG_ON(i + n > 6);
+ unsigned int n = 6;
+
while (n-- > 0)
- if (i + n > 0)
- regs->gprs[2 + i + n] = args[n];
- if (i == 0)
- regs->orig_gpr2 = args[0];
+ if (n > 0)
+ regs->gprs[2 + n] = args[n];
+ regs->orig_gpr2 = args[0];
}
static inline int syscall_get_arch(void)
if (flags & KERNEL_FPC)
/* Save floating point control */
- asm volatile("stfpc %0" : "=m" (state->fpc));
+ asm volatile("stfpc %0" : "=Q" (state->fpc));
if (!MACHINE_HAS_VX) {
if (flags & KERNEL_VXR_V0V7) {
421 32 rt_sigtimedwait_time64 - compat_sys_rt_sigtimedwait_time64
422 32 futex_time64 - sys_futex
423 32 sched_rr_get_interval_time64 - sys_sched_rr_get_interval
+424 common pidfd_send_signal sys_pidfd_send_signal sys_pidfd_send_signal
+425 common io_uring_setup sys_io_uring_setup sys_io_uring_setup
+426 common io_uring_enter sys_io_uring_enter sys_io_uring_enter
+427 common io_uring_register sys_io_uring_register sys_io_uring_register
{
u64 timer;
- asm volatile("stpt %0" : "=m" (timer));
+ asm volatile("stpt %0" : "=Q" (timer));
return timer;
}
asm volatile(
" stpt %0\n" /* Store current cpu timer value */
" spt %1" /* Set new value imm. afterwards */
- : "=m" (timer) : "m" (expires));
+ : "=Q" (timer) : "Q" (expires));
S390_lowcore.system_timer += S390_lowcore.last_update_timer - timer;
S390_lowcore.last_update_timer = expires;
}
#else
" stck %1" /* Store current tod clock value */
#endif
- : "=m" (S390_lowcore.last_update_timer),
- "=m" (S390_lowcore.last_update_clock));
+ : "=Q" (S390_lowcore.last_update_timer),
+ "=Q" (S390_lowcore.last_update_clock));
clock = S390_lowcore.last_update_clock - clock;
timer -= S390_lowcore.last_update_timer;
struct sh_clk_ops;
-void __init arch_init_clk_ops(struct sh_clk_ops **ops, int idx)
+void __init __weak arch_init_clk_ops(struct sh_clk_ops **ops, int idx)
{
}
-void __init plat_irq_setup(void)
+void __init __weak plat_irq_setup(void)
{
}
static inline void syscall_get_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
unsigned long *args)
{
- /*
- * Do this simply for now. If we need to start supporting
- * fetching arguments from arbitrary indices, this will need some
- * extra logic. Presently there are no in-tree users that depend
- * on this behaviour.
- */
- BUG_ON(i);
/* Argument pattern is: R4, R5, R6, R7, R0, R1 */
- switch (n) {
- case 6: args[5] = regs->regs[1];
- case 5: args[4] = regs->regs[0];
- case 4: args[3] = regs->regs[7];
- case 3: args[2] = regs->regs[6];
- case 2: args[1] = regs->regs[5];
- case 1: args[0] = regs->regs[4];
- case 0:
- break;
- default:
- BUG();
- }
+ args[5] = regs->regs[1];
+ args[4] = regs->regs[0];
+ args[3] = regs->regs[7];
+ args[2] = regs->regs[6];
+ args[1] = regs->regs[5];
+ args[0] = regs->regs[4];
}
static inline void syscall_set_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
const unsigned long *args)
{
- /* Same note as above applies */
- BUG_ON(i);
-
- switch (n) {
- case 6: regs->regs[1] = args[5];
- case 5: regs->regs[0] = args[4];
- case 4: regs->regs[7] = args[3];
- case 3: regs->regs[6] = args[2];
- case 2: regs->regs[5] = args[1];
- case 1: regs->regs[4] = args[0];
- break;
- default:
- BUG();
- }
+ regs->regs[1] = args[5];
+ regs->regs[0] = args[4];
+ regs->regs[7] = args[3];
+ regs->regs[6] = args[2];
+ regs->regs[5] = args[1];
+ regs->regs[4] = args[0];
}
static inline int syscall_get_arch(void)
static inline void syscall_get_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
unsigned long *args)
{
- BUG_ON(i + n > 6);
- memcpy(args, ®s->regs[2 + i], n * sizeof(args[0]));
+ memcpy(args, ®s->regs[2], 6 * sizeof(args[0]));
}
static inline void syscall_set_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
const unsigned long *args)
{
- BUG_ON(i + n > 6);
- memcpy(®s->regs[2 + i], args, n * sizeof(args[0]));
+ memcpy(®s->regs[2], args, 6 * sizeof(args[0]));
}
static inline int syscall_get_arch(void)
#define SIOCSPGRP _IOW('s', 8, pid_t)
#define SIOCGPGRP _IOR('s', 9, pid_t)
-#define SIOCGSTAMP _IOR('s', 100, struct timeval) /* Get stamp (timeval) */
-#define SIOCGSTAMPNS _IOR('s', 101, struct timespec) /* Get stamp (timespec) */
+#define SIOCGSTAMP_OLD _IOR('s', 100, struct timeval) /* Get stamp (timeval) */
+#define SIOCGSTAMPNS_OLD _IOR('s', 101, struct timespec) /* Get stamp (timespec) */
+
#endif /* __ASM_SH_SOCKIOS_H */
421 common rt_sigtimedwait_time64 sys_rt_sigtimedwait
422 common futex_time64 sys_futex
423 common sched_rr_get_interval_time64 sys_sched_rr_get_interval
+424 common pidfd_send_signal sys_pidfd_send_signal
+425 common io_uring_setup sys_io_uring_setup
+426 common io_uring_enter sys_io_uring_enter
+427 common io_uring_register sys_io_uring_register
static inline void syscall_get_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
unsigned long *args)
{
int zero_extend = 0;
unsigned int j;
+ unsigned int n = 6;
#ifdef CONFIG_SPARC64
if (test_tsk_thread_flag(task, TIF_32BIT))
#endif
for (j = 0; j < n; j++) {
- unsigned long val = regs->u_regs[UREG_I0 + i + j];
+ unsigned long val = regs->u_regs[UREG_I0 + j];
if (zero_extend)
args[j] = (u32) val;
static inline void syscall_set_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
const unsigned long *args)
{
- unsigned int j;
+ unsigned int i;
- for (j = 0; j < n; j++)
- regs->u_regs[UREG_I0 + i + j] = args[j];
+ for (i = 0; i < 6; i++)
+ regs->u_regs[UREG_I0 + i] = args[i];
}
static inline int syscall_get_arch(void)
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-#ifndef _ASM_SPARC_SOCKIOS_H
-#define _ASM_SPARC_SOCKIOS_H
-
-/* Socket-level I/O control calls. */
-#define FIOSETOWN 0x8901
-#define SIOCSPGRP 0x8902
-#define FIOGETOWN 0x8903
-#define SIOCGPGRP 0x8904
-#define SIOCATMARK 0x8905
-#define SIOCGSTAMP 0x8906 /* Get stamp (timeval) */
-#define SIOCGSTAMPNS 0x8907 /* Get stamp (timespec) */
-
-#endif /* !(_ASM_SPARC_SOCKIOS_H) */
-
p->npages = 0;
}
+static inline bool iommu_use_atu(struct iommu *iommu, u64 mask)
+{
+ return iommu->atu && mask > DMA_BIT_MASK(32);
+}
+
/* Interrupts must be disabled. */
static long iommu_batch_flush(struct iommu_batch *p, u64 mask)
{
prot &= (HV_PCI_MAP_ATTR_READ | HV_PCI_MAP_ATTR_WRITE);
while (npages != 0) {
- if (mask <= DMA_BIT_MASK(32) || !pbm->iommu->atu) {
+ if (!iommu_use_atu(pbm->iommu, mask)) {
num = pci_sun4v_iommu_map(devhandle,
HV_PCI_TSBID(0, entry),
npages,
unsigned long flags, order, first_page, npages, n;
unsigned long prot = 0;
struct iommu *iommu;
- struct atu *atu;
struct iommu_map_table *tbl;
struct page *page;
void *ret;
memset((char *)first_page, 0, PAGE_SIZE << order);
iommu = dev->archdata.iommu;
- atu = iommu->atu;
-
mask = dev->coherent_dma_mask;
- if (mask <= DMA_BIT_MASK(32) || !atu)
+ if (!iommu_use_atu(iommu, mask))
tbl = &iommu->tbl;
else
- tbl = &atu->tbl;
+ tbl = &iommu->atu->tbl;
entry = iommu_tbl_range_alloc(dev, tbl, npages, NULL,
(unsigned long)(-1), 0);
atu = iommu->atu;
devhandle = pbm->devhandle;
- if (dvma <= DMA_BIT_MASK(32)) {
+ if (!iommu_use_atu(iommu, dvma)) {
tbl = &iommu->tbl;
iotsb_num = 0; /* we don't care for legacy iommu */
} else {
npages >>= IO_PAGE_SHIFT;
mask = *dev->dma_mask;
- if (mask <= DMA_BIT_MASK(32))
+ if (!iommu_use_atu(iommu, mask))
tbl = &iommu->tbl;
else
tbl = &atu->tbl;
IO_PAGE_SIZE) >> IO_PAGE_SHIFT;
mask = *dev->dma_mask;
- if (mask <= DMA_BIT_MASK(32))
+ if (!iommu_use_atu(iommu, mask))
tbl = &iommu->tbl;
else
tbl = &atu->tbl;
421 32 rt_sigtimedwait_time64 sys_rt_sigtimedwait compat_sys_rt_sigtimedwait_time64
422 32 futex_time64 sys_futex sys_futex
423 32 sched_rr_get_interval_time64 sys_sched_rr_get_interval sys_sched_rr_get_interval
+424 common pidfd_send_signal sys_pidfd_send_signal
+425 common io_uring_setup sys_io_uring_setup
+426 common io_uring_enter sys_io_uring_enter
+427 common io_uring_register sys_io_uring_register
static inline void syscall_get_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
unsigned long *args)
{
const struct uml_pt_regs *r = ®s->regs;
- switch (i) {
- case 0:
- if (!n--)
- break;
- *args++ = UPT_SYSCALL_ARG1(r);
- case 1:
- if (!n--)
- break;
- *args++ = UPT_SYSCALL_ARG2(r);
- case 2:
- if (!n--)
- break;
- *args++ = UPT_SYSCALL_ARG3(r);
- case 3:
- if (!n--)
- break;
- *args++ = UPT_SYSCALL_ARG4(r);
- case 4:
- if (!n--)
- break;
- *args++ = UPT_SYSCALL_ARG5(r);
- case 5:
- if (!n--)
- break;
- *args++ = UPT_SYSCALL_ARG6(r);
- case 6:
- if (!n--)
- break;
- default:
- BUG();
- break;
- }
+ *args++ = UPT_SYSCALL_ARG1(r);
+ *args++ = UPT_SYSCALL_ARG2(r);
+ *args++ = UPT_SYSCALL_ARG3(r);
+ *args++ = UPT_SYSCALL_ARG4(r);
+ *args++ = UPT_SYSCALL_ARG5(r);
+ *args = UPT_SYSCALL_ARG6(r);
}
static inline void syscall_set_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
const unsigned long *args)
{
struct uml_pt_regs *r = ®s->regs;
- switch (i) {
- case 0:
- if (!n--)
- break;
- UPT_SYSCALL_ARG1(r) = *args++;
- case 1:
- if (!n--)
- break;
- UPT_SYSCALL_ARG2(r) = *args++;
- case 2:
- if (!n--)
- break;
- UPT_SYSCALL_ARG3(r) = *args++;
- case 3:
- if (!n--)
- break;
- UPT_SYSCALL_ARG4(r) = *args++;
- case 4:
- if (!n--)
- break;
- UPT_SYSCALL_ARG5(r) = *args++;
- case 5:
- if (!n--)
- break;
- UPT_SYSCALL_ARG6(r) = *args++;
- case 6:
- if (!n--)
- break;
- default:
- BUG();
- break;
- }
+ UPT_SYSCALL_ARG1(r) = *args++;
+ UPT_SYSCALL_ARG2(r) = *args++;
+ UPT_SYSCALL_ARG3(r) = *args++;
+ UPT_SYSCALL_ARG4(r) = *args++;
+ UPT_SYSCALL_ARG5(r) = *args++;
+ UPT_SYSCALL_ARG6(r) = *args;
}
/* See arch/x86/um/asm/syscall.h for syscall_get_arch() definition. */
depends on DEBUG_FS
---help---
Expose statistics about the Change Page Attribute mechanims, which
- helps to determine the effectivness of preserving large and huge
+ helps to determine the effectiveness of preserving large and huge
page mappings when mapping protections are changed.
config ARCH_HAS_MEM_ENCRYPT
vpaddq t2,t1,t1
vmovq t1x,d4
+ # Now do a partial reduction mod (2^130)-5, carrying h0 -> h1 -> h2 ->
+ # h3 -> h4 -> h0 -> h1 to get h0,h2,h3,h4 < 2^26 and h1 < 2^26 + a small
+ # amount. Careful: we must not assume the carry bits 'd0 >> 26',
+ # 'd1 >> 26', 'd2 >> 26', 'd3 >> 26', and '(d4 >> 26) * 5' fit in 32-bit
+ # integers. It's true in a single-block implementation, but not here.
+
# d1 += d0 >> 26
mov d0,%rax
shr $26,%rax
# h0 += (d4 >> 26) * 5
mov d4,%rax
shr $26,%rax
- lea (%eax,%eax,4),%eax
- add %eax,%ebx
+ lea (%rax,%rax,4),%rax
+ add %rax,%rbx
# h4 = d4 & 0x3ffffff
mov d4,%rax
and $0x3ffffff,%eax
mov %eax,h4
# h1 += h0 >> 26
- mov %ebx,%eax
- shr $26,%eax
+ mov %rbx,%rax
+ shr $26,%rax
add %eax,h1
# h0 = h0 & 0x3ffffff
andl $0x3ffffff,%ebx
# h0 += (d4 >> 26) * 5
mov d4,%rax
shr $26,%rax
- lea (%eax,%eax,4),%eax
- add %eax,%ebx
+ lea (%rax,%rax,4),%rax
+ add %rax,%rbx
# h4 = d4 & 0x3ffffff
mov d4,%rax
and $0x3ffffff,%eax
mov %eax,h4
# h1 += h0 >> 26
- mov %ebx,%eax
- shr $26,%eax
+ mov %rbx,%rax
+ shr $26,%rax
add %eax,h1
# h0 = h0 & 0x3ffffff
andl $0x3ffffff,%ebx
paddq t2,t1
movq t1,d4
+ # Now do a partial reduction mod (2^130)-5, carrying h0 -> h1 -> h2 ->
+ # h3 -> h4 -> h0 -> h1 to get h0,h2,h3,h4 < 2^26 and h1 < 2^26 + a small
+ # amount. Careful: we must not assume the carry bits 'd0 >> 26',
+ # 'd1 >> 26', 'd2 >> 26', 'd3 >> 26', and '(d4 >> 26) * 5' fit in 32-bit
+ # integers. It's true in a single-block implementation, but not here.
+
# d1 += d0 >> 26
mov d0,%rax
shr $26,%rax
# h0 += (d4 >> 26) * 5
mov d4,%rax
shr $26,%rax
- lea (%eax,%eax,4),%eax
- add %eax,%ebx
+ lea (%rax,%rax,4),%rax
+ add %rax,%rbx
# h4 = d4 & 0x3ffffff
mov d4,%rax
and $0x3ffffff,%eax
mov %eax,h4
# h1 += h0 >> 26
- mov %ebx,%eax
- shr $26,%eax
+ mov %rbx,%rax
+ shr $26,%rax
add %eax,h1
# h0 = h0 & 0x3ffffff
andl $0x3ffffff,%ebx
#include <linux/types.h>
#include <linux/init.h>
#include <linux/slab.h>
+#include <linux/delay.h>
#include <asm/apicdef.h>
+#include <asm/nmi.h>
#include "../perf_event.h"
+static DEFINE_PER_CPU(unsigned int, perf_nmi_counter);
+
static __initconst const u64 amd_hw_cache_event_ids
[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
};
/*
- * AMD Performance Monitor K7 and later.
+ * AMD Performance Monitor K7 and later, up to and including Family 16h:
*/
static const u64 amd_perfmon_event_map[PERF_COUNT_HW_MAX] =
{
- [PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
- [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
- [PERF_COUNT_HW_CACHE_REFERENCES] = 0x077d,
- [PERF_COUNT_HW_CACHE_MISSES] = 0x077e,
- [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2,
- [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3,
- [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00d0, /* "Decoder empty" event */
- [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x00d1, /* "Dispatch stalls" event */
+ [PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
+ [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = 0x077d,
+ [PERF_COUNT_HW_CACHE_MISSES] = 0x077e,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2,
+ [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3,
+ [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00d0, /* "Decoder empty" event */
+ [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x00d1, /* "Dispatch stalls" event */
+};
+
+/*
+ * AMD Performance Monitor Family 17h and later:
+ */
+static const u64 amd_f17h_perfmon_event_map[PERF_COUNT_HW_MAX] =
+{
+ [PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
+ [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = 0xff60,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2,
+ [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3,
+ [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x0287,
+ [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x0187,
};
static u64 amd_pmu_event_map(int hw_event)
{
+ if (boot_cpu_data.x86 >= 0x17)
+ return amd_f17h_perfmon_event_map[hw_event];
+
return amd_perfmon_event_map[hw_event];
}
}
}
+/*
+ * When a PMC counter overflows, an NMI is used to process the event and
+ * reset the counter. NMI latency can result in the counter being updated
+ * before the NMI can run, which can result in what appear to be spurious
+ * NMIs. This function is intended to wait for the NMI to run and reset
+ * the counter to avoid possible unhandled NMI messages.
+ */
+#define OVERFLOW_WAIT_COUNT 50
+
+static void amd_pmu_wait_on_overflow(int idx)
+{
+ unsigned int i;
+ u64 counter;
+
+ /*
+ * Wait for the counter to be reset if it has overflowed. This loop
+ * should exit very, very quickly, but just in case, don't wait
+ * forever...
+ */
+ for (i = 0; i < OVERFLOW_WAIT_COUNT; i++) {
+ rdmsrl(x86_pmu_event_addr(idx), counter);
+ if (counter & (1ULL << (x86_pmu.cntval_bits - 1)))
+ break;
+
+ /* Might be in IRQ context, so can't sleep */
+ udelay(1);
+ }
+}
+
+static void amd_pmu_disable_all(void)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ int idx;
+
+ x86_pmu_disable_all();
+
+ /*
+ * This shouldn't be called from NMI context, but add a safeguard here
+ * to return, since if we're in NMI context we can't wait for an NMI
+ * to reset an overflowed counter value.
+ */
+ if (in_nmi())
+ return;
+
+ /*
+ * Check each counter for overflow and wait for it to be reset by the
+ * NMI if it has overflowed. This relies on the fact that all active
+ * counters are always enabled when this function is caled and
+ * ARCH_PERFMON_EVENTSEL_INT is always set.
+ */
+ for (idx = 0; idx < x86_pmu.num_counters; idx++) {
+ if (!test_bit(idx, cpuc->active_mask))
+ continue;
+
+ amd_pmu_wait_on_overflow(idx);
+ }
+}
+
+static void amd_pmu_disable_event(struct perf_event *event)
+{
+ x86_pmu_disable_event(event);
+
+ /*
+ * This can be called from NMI context (via x86_pmu_stop). The counter
+ * may have overflowed, but either way, we'll never see it get reset
+ * by the NMI if we're already in the NMI. And the NMI latency support
+ * below will take care of any pending NMI that might have been
+ * generated by the overflow.
+ */
+ if (in_nmi())
+ return;
+
+ amd_pmu_wait_on_overflow(event->hw.idx);
+}
+
+/*
+ * Because of NMI latency, if multiple PMC counters are active or other sources
+ * of NMIs are received, the perf NMI handler can handle one or more overflowed
+ * PMC counters outside of the NMI associated with the PMC overflow. If the NMI
+ * doesn't arrive at the LAPIC in time to become a pending NMI, then the kernel
+ * back-to-back NMI support won't be active. This PMC handler needs to take into
+ * account that this can occur, otherwise this could result in unknown NMI
+ * messages being issued. Examples of this is PMC overflow while in the NMI
+ * handler when multiple PMCs are active or PMC overflow while handling some
+ * other source of an NMI.
+ *
+ * Attempt to mitigate this by using the number of active PMCs to determine
+ * whether to return NMI_HANDLED if the perf NMI handler did not handle/reset
+ * any PMCs. The per-CPU perf_nmi_counter variable is set to a minimum of the
+ * number of active PMCs or 2. The value of 2 is used in case an NMI does not
+ * arrive at the LAPIC in time to be collapsed into an already pending NMI.
+ */
+static int amd_pmu_handle_irq(struct pt_regs *regs)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ int active, handled;
+
+ /*
+ * Obtain the active count before calling x86_pmu_handle_irq() since
+ * it is possible that x86_pmu_handle_irq() may make a counter
+ * inactive (through x86_pmu_stop).
+ */
+ active = __bitmap_weight(cpuc->active_mask, X86_PMC_IDX_MAX);
+
+ /* Process any counter overflows */
+ handled = x86_pmu_handle_irq(regs);
+
+ /*
+ * If a counter was handled, record the number of possible remaining
+ * NMIs that can occur.
+ */
+ if (handled) {
+ this_cpu_write(perf_nmi_counter,
+ min_t(unsigned int, 2, active));
+
+ return handled;
+ }
+
+ if (!this_cpu_read(perf_nmi_counter))
+ return NMI_DONE;
+
+ this_cpu_dec(perf_nmi_counter);
+
+ return NMI_HANDLED;
+}
+
static struct event_constraint *
amd_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
struct perf_event *event)
static __initconst const struct x86_pmu amd_pmu = {
.name = "AMD",
- .handle_irq = x86_pmu_handle_irq,
- .disable_all = x86_pmu_disable_all,
+ .handle_irq = amd_pmu_handle_irq,
+ .disable_all = amd_pmu_disable_all,
.enable_all = x86_pmu_enable_all,
.enable = x86_pmu_enable_event,
- .disable = x86_pmu_disable_event,
+ .disable = amd_pmu_disable_event,
.hw_config = amd_pmu_hw_config,
.schedule_events = x86_schedule_events,
.eventsel = MSR_K7_EVNTSEL0,
cpuc->perf_ctr_virt_mask = 0;
/* Reload all events */
- x86_pmu_disable_all();
+ amd_pmu_disable_all();
x86_pmu_enable_all(0);
}
EXPORT_SYMBOL_GPL(amd_pmu_enable_virt);
cpuc->perf_ctr_virt_mask = AMD64_EVENTSEL_HOSTONLY;
/* Reload all events */
- x86_pmu_disable_all();
+ amd_pmu_disable_all();
x86_pmu_enable_all(0);
}
EXPORT_SYMBOL_GPL(amd_pmu_disable_virt);
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
- if (__test_and_clear_bit(hwc->idx, cpuc->active_mask)) {
+ if (test_bit(hwc->idx, cpuc->active_mask)) {
x86_pmu.disable(event);
+ __clear_bit(hwc->idx, cpuc->active_mask);
cpuc->events[hwc->idx] = NULL;
WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
hwc->state |= PERF_HES_STOPPED;
apic_write(APIC_LVTPC, APIC_DM_NMI);
for (idx = 0; idx < x86_pmu.num_counters; idx++) {
- if (!test_bit(idx, cpuc->active_mask)) {
- /*
- * Though we deactivated the counter some cpus
- * might still deliver spurious interrupts still
- * in flight. Catch them:
- */
- if (__test_and_clear_bit(idx, cpuc->running))
- handled++;
+ if (!test_bit(idx, cpuc->active_mask))
continue;
- }
event = cpuc->events[idx];
flags &= ~PERF_SAMPLE_TIME;
if (!event->attr.exclude_kernel)
flags &= ~PERF_SAMPLE_REGS_USER;
- if (event->attr.sample_regs_user & ~PEBS_REGS)
+ if (event->attr.sample_regs_user & ~PEBS_GP_REGS)
flags &= ~(PERF_SAMPLE_REGS_USER | PERF_SAMPLE_REGS_INTR);
return flags;
}
return ret;
if (event->attr.precise_ip) {
- if (!event->attr.freq) {
+ if (!(event->attr.freq || event->attr.wakeup_events)) {
event->hw.flags |= PERF_X86_EVENT_AUTO_RELOAD;
if (!(event->attr.sample_type &
~intel_pmu_large_pebs_flags(event)))
cpuc->lbr_sel = NULL;
+ if (x86_pmu.flags & PMU_FL_TFA) {
+ WARN_ON_ONCE(cpuc->tfa_shadow);
+ cpuc->tfa_shadow = ~0ULL;
+ intel_set_tfa(cpuc, false);
+ }
+
if (x86_pmu.version > 1)
flip_smm_bit(&x86_pmu.attr_freeze_on_smi);
PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER | \
PERF_SAMPLE_PERIOD)
-#define PEBS_REGS \
- (PERF_REG_X86_AX | \
- PERF_REG_X86_BX | \
- PERF_REG_X86_CX | \
- PERF_REG_X86_DX | \
- PERF_REG_X86_DI | \
- PERF_REG_X86_SI | \
- PERF_REG_X86_SP | \
- PERF_REG_X86_BP | \
- PERF_REG_X86_IP | \
- PERF_REG_X86_FLAGS | \
- PERF_REG_X86_R8 | \
- PERF_REG_X86_R9 | \
- PERF_REG_X86_R10 | \
- PERF_REG_X86_R11 | \
- PERF_REG_X86_R12 | \
- PERF_REG_X86_R13 | \
- PERF_REG_X86_R14 | \
- PERF_REG_X86_R15)
+#define PEBS_GP_REGS \
+ ((1ULL << PERF_REG_X86_AX) | \
+ (1ULL << PERF_REG_X86_BX) | \
+ (1ULL << PERF_REG_X86_CX) | \
+ (1ULL << PERF_REG_X86_DX) | \
+ (1ULL << PERF_REG_X86_DI) | \
+ (1ULL << PERF_REG_X86_SI) | \
+ (1ULL << PERF_REG_X86_SP) | \
+ (1ULL << PERF_REG_X86_BP) | \
+ (1ULL << PERF_REG_X86_IP) | \
+ (1ULL << PERF_REG_X86_FLAGS) | \
+ (1ULL << PERF_REG_X86_R8) | \
+ (1ULL << PERF_REG_X86_R9) | \
+ (1ULL << PERF_REG_X86_R10) | \
+ (1ULL << PERF_REG_X86_R11) | \
+ (1ULL << PERF_REG_X86_R12) | \
+ (1ULL << PERF_REG_X86_R13) | \
+ (1ULL << PERF_REG_X86_R14) | \
+ (1ULL << PERF_REG_X86_R15))
/*
* Per register state.
* bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1).
*/
-#define BITOP_ADDR(x) "+m" (*(volatile long *) (x))
+#define RLONG_ADDR(x) "m" (*(volatile long *) (x))
+#define WBYTE_ADDR(x) "+m" (*(volatile char *) (x))
-#define ADDR BITOP_ADDR(addr)
+#define ADDR RLONG_ADDR(addr)
/*
* We do the locked ops that don't return the old value as
* a mask operation on a byte.
*/
#define IS_IMMEDIATE(nr) (__builtin_constant_p(nr))
-#define CONST_MASK_ADDR(nr, addr) BITOP_ADDR((void *)(addr) + ((nr)>>3))
+#define CONST_MASK_ADDR(nr, addr) WBYTE_ADDR((void *)(addr) + ((nr)>>3))
#define CONST_MASK(nr) (1 << ((nr) & 7))
/**
: "memory");
} else {
asm volatile(LOCK_PREFIX __ASM_SIZE(bts) " %1,%0"
- : BITOP_ADDR(addr) : "Ir" (nr) : "memory");
+ : : RLONG_ADDR(addr), "Ir" (nr) : "memory");
}
}
*/
static __always_inline void __set_bit(long nr, volatile unsigned long *addr)
{
- asm volatile(__ASM_SIZE(bts) " %1,%0" : ADDR : "Ir" (nr) : "memory");
+ asm volatile(__ASM_SIZE(bts) " %1,%0" : : ADDR, "Ir" (nr) : "memory");
}
/**
: "iq" ((u8)~CONST_MASK(nr)));
} else {
asm volatile(LOCK_PREFIX __ASM_SIZE(btr) " %1,%0"
- : BITOP_ADDR(addr)
- : "Ir" (nr));
+ : : RLONG_ADDR(addr), "Ir" (nr) : "memory");
}
}
static __always_inline void __clear_bit(long nr, volatile unsigned long *addr)
{
- asm volatile(__ASM_SIZE(btr) " %1,%0" : ADDR : "Ir" (nr));
+ asm volatile(__ASM_SIZE(btr) " %1,%0" : : ADDR, "Ir" (nr) : "memory");
}
static __always_inline bool clear_bit_unlock_is_negative_byte(long nr, volatile unsigned long *addr)
bool negative;
asm volatile(LOCK_PREFIX "andb %2,%1"
CC_SET(s)
- : CC_OUT(s) (negative), ADDR
+ : CC_OUT(s) (negative), WBYTE_ADDR(addr)
: "ir" ((char) ~(1 << nr)) : "memory");
return negative;
}
* __clear_bit() is non-atomic and implies release semantics before the memory
* operation. It can be used for an unlock if no other CPUs can concurrently
* modify other bits in the word.
- *
- * No memory barrier is required here, because x86 cannot reorder stores past
- * older loads. Same principle as spin_unlock.
*/
static __always_inline void __clear_bit_unlock(long nr, volatile unsigned long *addr)
{
- barrier();
__clear_bit(nr, addr);
}
*/
static __always_inline void __change_bit(long nr, volatile unsigned long *addr)
{
- asm volatile(__ASM_SIZE(btc) " %1,%0" : ADDR : "Ir" (nr));
+ asm volatile(__ASM_SIZE(btc) " %1,%0" : : ADDR, "Ir" (nr) : "memory");
}
/**
: "iq" ((u8)CONST_MASK(nr)));
} else {
asm volatile(LOCK_PREFIX __ASM_SIZE(btc) " %1,%0"
- : BITOP_ADDR(addr)
- : "Ir" (nr));
+ : : RLONG_ADDR(addr), "Ir" (nr) : "memory");
}
}
asm(__ASM_SIZE(bts) " %2,%1"
CC_SET(c)
- : CC_OUT(c) (oldbit), ADDR
- : "Ir" (nr));
+ : CC_OUT(c) (oldbit)
+ : ADDR, "Ir" (nr) : "memory");
return oldbit;
}
asm volatile(__ASM_SIZE(btr) " %2,%1"
CC_SET(c)
- : CC_OUT(c) (oldbit), ADDR
- : "Ir" (nr));
+ : CC_OUT(c) (oldbit)
+ : ADDR, "Ir" (nr) : "memory");
return oldbit;
}
asm volatile(__ASM_SIZE(btc) " %2,%1"
CC_SET(c)
- : CC_OUT(c) (oldbit), ADDR
- : "Ir" (nr) : "memory");
+ : CC_OUT(c) (oldbit)
+ : ADDR, "Ir" (nr) : "memory");
return oldbit;
}
asm volatile(__ASM_SIZE(bt) " %2,%1"
CC_SET(c)
: CC_OUT(c) (oldbit)
- : "m" (*(unsigned long *)addr), "Ir" (nr));
+ : "m" (*(unsigned long *)addr), "Ir" (nr) : "memory");
return oldbit;
}
unsigned (*get_hflags)(struct x86_emulate_ctxt *ctxt);
void (*set_hflags)(struct x86_emulate_ctxt *ctxt, unsigned hflags);
- int (*pre_leave_smm)(struct x86_emulate_ctxt *ctxt, u64 smbase);
+ int (*pre_leave_smm)(struct x86_emulate_ctxt *ctxt,
+ const char *smstate);
+ void (*post_leave_smm)(struct x86_emulate_ctxt *ctxt);
};
}
#define KVM_PERMILLE_MMU_PAGES 20
-#define KVM_MIN_ALLOC_MMU_PAGES 64
+#define KVM_MIN_ALLOC_MMU_PAGES 64UL
#define KVM_MMU_HASH_SHIFT 12
#define KVM_NUM_MMU_PAGES (1 << KVM_MMU_HASH_SHIFT)
#define KVM_MIN_FREE_MMU_PAGES 5
};
struct kvm_arch {
- unsigned int n_used_mmu_pages;
- unsigned int n_requested_mmu_pages;
- unsigned int n_max_mmu_pages;
+ unsigned long n_used_mmu_pages;
+ unsigned long n_requested_mmu_pages;
+ unsigned long n_max_mmu_pages;
unsigned int indirect_shadow_pages;
struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
/*
int (*smi_allowed)(struct kvm_vcpu *vcpu);
int (*pre_enter_smm)(struct kvm_vcpu *vcpu, char *smstate);
- int (*pre_leave_smm)(struct kvm_vcpu *vcpu, u64 smbase);
+ int (*pre_leave_smm)(struct kvm_vcpu *vcpu, const char *smstate);
int (*enable_smi_window)(struct kvm_vcpu *vcpu);
int (*mem_enc_op)(struct kvm *kvm, void __user *argp);
gfn_t gfn_offset, unsigned long mask);
void kvm_mmu_zap_all(struct kvm *kvm);
void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen);
-unsigned int kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm);
-void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages);
+unsigned long kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm);
+void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned long kvm_nr_mmu_pages);
int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3);
bool pdptrs_changed(struct kvm_vcpu *vcpu);
#define put_smstate(type, buf, offset, val) \
*(type *)((buf) + (offset) - 0x7e00) = val
+#define GET_SMSTATE(type, buf, offset) \
+ (*(type *)((buf) + (offset) - 0x7e00))
+
#endif /* _ASM_X86_KVM_HOST_H */
static inline void syscall_get_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
unsigned long *args)
{
- BUG_ON(i + n > 6);
- memcpy(args, ®s->bx + i, n * sizeof(args[0]));
+ memcpy(args, ®s->bx, 6 * sizeof(args[0]));
}
static inline void syscall_set_arguments(struct task_struct *task,
static inline void syscall_get_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
unsigned long *args)
{
# ifdef CONFIG_IA32_EMULATION
- if (task->thread_info.status & TS_COMPAT)
- switch (i) {
- case 0:
- if (!n--) break;
- *args++ = regs->bx;
- case 1:
- if (!n--) break;
- *args++ = regs->cx;
- case 2:
- if (!n--) break;
- *args++ = regs->dx;
- case 3:
- if (!n--) break;
- *args++ = regs->si;
- case 4:
- if (!n--) break;
- *args++ = regs->di;
- case 5:
- if (!n--) break;
- *args++ = regs->bp;
- case 6:
- if (!n--) break;
- default:
- BUG();
- break;
- }
- else
+ if (task->thread_info.status & TS_COMPAT) {
+ *args++ = regs->bx;
+ *args++ = regs->cx;
+ *args++ = regs->dx;
+ *args++ = regs->si;
+ *args++ = regs->di;
+ *args = regs->bp;
+ } else
# endif
- switch (i) {
- case 0:
- if (!n--) break;
- *args++ = regs->di;
- case 1:
- if (!n--) break;
- *args++ = regs->si;
- case 2:
- if (!n--) break;
- *args++ = regs->dx;
- case 3:
- if (!n--) break;
- *args++ = regs->r10;
- case 4:
- if (!n--) break;
- *args++ = regs->r8;
- case 5:
- if (!n--) break;
- *args++ = regs->r9;
- case 6:
- if (!n--) break;
- default:
- BUG();
- break;
- }
+ {
+ *args++ = regs->di;
+ *args++ = regs->si;
+ *args++ = regs->dx;
+ *args++ = regs->r10;
+ *args++ = regs->r8;
+ *args = regs->r9;
+ }
}
static inline void syscall_set_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
const unsigned long *args)
{
# ifdef CONFIG_IA32_EMULATION
- if (task->thread_info.status & TS_COMPAT)
- switch (i) {
- case 0:
- if (!n--) break;
- regs->bx = *args++;
- case 1:
- if (!n--) break;
- regs->cx = *args++;
- case 2:
- if (!n--) break;
- regs->dx = *args++;
- case 3:
- if (!n--) break;
- regs->si = *args++;
- case 4:
- if (!n--) break;
- regs->di = *args++;
- case 5:
- if (!n--) break;
- regs->bp = *args++;
- case 6:
- if (!n--) break;
- default:
- BUG();
- break;
- }
- else
+ if (task->thread_info.status & TS_COMPAT) {
+ regs->bx = *args++;
+ regs->cx = *args++;
+ regs->dx = *args++;
+ regs->si = *args++;
+ regs->di = *args++;
+ regs->bp = *args;
+ } else
# endif
- switch (i) {
- case 0:
- if (!n--) break;
- regs->di = *args++;
- case 1:
- if (!n--) break;
- regs->si = *args++;
- case 2:
- if (!n--) break;
- regs->dx = *args++;
- case 3:
- if (!n--) break;
- regs->r10 = *args++;
- case 4:
- if (!n--) break;
- regs->r8 = *args++;
- case 5:
- if (!n--) break;
- regs->r9 = *args++;
- case 6:
- if (!n--) break;
- default:
- BUG();
- break;
- }
+ {
+ regs->di = *args++;
+ regs->si = *args++;
+ regs->dx = *args++;
+ regs->r10 = *args++;
+ regs->r8 = *args++;
+ regs->r9 = *args;
+ }
}
static inline int syscall_get_arch(void)
__HYPERCALL_DECLS;
__HYPERCALL_5ARG(a1, a2, a3, a4, a5);
+ if (call >= PAGE_SIZE / sizeof(hypercall_page[0]))
+ return -EINVAL;
+
asm volatile(CALL_NOSPEC
: __HYPERCALL_5PARAM
: [thunk_target] "a" (&hypercall_page[call])
+++ /dev/null
-#include <asm-generic/sockios.h>
#define VMX_ABORT_SAVE_GUEST_MSR_FAIL 1
#define VMX_ABORT_LOAD_HOST_PDPTE_FAIL 2
+#define VMX_ABORT_VMCS_CORRUPTED 3
#define VMX_ABORT_LOAD_HOST_MSR_FAIL 4
#endif /* _UAPIVMX_H */
const char *option;
enum spectre_v2_user_cmd cmd;
bool secure;
-} v2_user_options[] __initdata = {
+} v2_user_options[] __initconst = {
{ "auto", SPECTRE_V2_USER_CMD_AUTO, false },
{ "off", SPECTRE_V2_USER_CMD_NONE, false },
{ "on", SPECTRE_V2_USER_CMD_FORCE, true },
const char *option;
enum spectre_v2_mitigation_cmd cmd;
bool secure;
-} mitigation_options[] __initdata = {
+} mitigation_options[] __initconst = {
{ "off", SPECTRE_V2_CMD_NONE, false },
{ "on", SPECTRE_V2_CMD_FORCE, true },
{ "retpoline", SPECTRE_V2_CMD_RETPOLINE, false },
static const struct {
const char *option;
enum ssb_mitigation_cmd cmd;
-} ssb_mitigation_options[] __initdata = {
+} ssb_mitigation_options[] __initconst = {
{ "auto", SPEC_STORE_BYPASS_CMD_AUTO }, /* Platform decides */
{ "on", SPEC_STORE_BYPASS_CMD_ON }, /* Disable Speculative Store Bypass */
{ "off", SPEC_STORE_BYPASS_CMD_NONE }, /* Don't touch Speculative Store Bypass */
if ((epb & 0xF) != ENERGY_PERF_BIAS_PERFORMANCE)
return;
- pr_warn_once("ENERGY_PERF_BIAS: Set to 'normal', was 'performance'\n");
- pr_warn_once("ENERGY_PERF_BIAS: View and update with x86_energy_perf_policy(8)\n");
+ pr_info_once("ENERGY_PERF_BIAS: Set to 'normal', was 'performance'\n");
+ pr_info_once("ENERGY_PERF_BIAS: View and update with x86_energy_perf_policy(8)\n");
epb = (epb & ~0xF) | ENERGY_PERF_BIAS_NORMAL;
wrmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb);
}
enum rdt_param {
Opt_cdp,
Opt_cdpl2,
- Opt_mba_mpbs,
+ Opt_mba_mbps,
nr__rdt_params
};
static const struct fs_parameter_spec rdt_param_specs[] = {
fsparam_flag("cdp", Opt_cdp),
fsparam_flag("cdpl2", Opt_cdpl2),
- fsparam_flag("mba_mpbs", Opt_mba_mpbs),
+ fsparam_flag("mba_MBps", Opt_mba_mbps),
{}
};
case Opt_cdpl2:
ctx->enable_cdpl2 = true;
return 0;
- case Opt_mba_mpbs:
+ case Opt_mba_mbps:
if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
return -EINVAL;
ctx->enable_mba_mbps = true;
rdt_last_cmd_puts("Failed to initialize allocations\n");
return ret;
}
- rdtgrp->mode = RDT_MODE_SHAREABLE;
}
+ rdtgrp->mode = RDT_MODE_SHAREABLE;
+
return 0;
}
unsigned long *sara = stack_addr(regs);
ri->ret_addr = (kprobe_opcode_t *) *sara;
+ ri->fp = sara;
/* Replace the return addr with trampoline addr */
*sara = (unsigned long) &kretprobe_trampoline;
NOKPROBE_SYMBOL(kretprobe_trampoline);
STACK_FRAME_NON_STANDARD(kretprobe_trampoline);
+static struct kprobe kretprobe_kprobe = {
+ .addr = (void *)kretprobe_trampoline,
+};
+
/*
* Called from kretprobe_trampoline
*/
static __used void *trampoline_handler(struct pt_regs *regs)
{
+ struct kprobe_ctlblk *kcb;
struct kretprobe_instance *ri = NULL;
struct hlist_head *head, empty_rp;
struct hlist_node *tmp;
unsigned long flags, orig_ret_address = 0;
unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline;
kprobe_opcode_t *correct_ret_addr = NULL;
+ void *frame_pointer;
+ bool skipped = false;
+
+ preempt_disable();
+
+ /*
+ * Set a dummy kprobe for avoiding kretprobe recursion.
+ * Since kretprobe never run in kprobe handler, kprobe must not
+ * be running at this point.
+ */
+ kcb = get_kprobe_ctlblk();
+ __this_cpu_write(current_kprobe, &kretprobe_kprobe);
+ kcb->kprobe_status = KPROBE_HIT_ACTIVE;
INIT_HLIST_HEAD(&empty_rp);
kretprobe_hash_lock(current, &head, &flags);
/* fixup registers */
#ifdef CONFIG_X86_64
regs->cs = __KERNEL_CS;
+ /* On x86-64, we use pt_regs->sp for return address holder. */
+ frame_pointer = ®s->sp;
#else
regs->cs = __KERNEL_CS | get_kernel_rpl();
regs->gs = 0;
+ /* On x86-32, we use pt_regs->flags for return address holder. */
+ frame_pointer = ®s->flags;
#endif
regs->ip = trampoline_address;
regs->orig_ax = ~0UL;
if (ri->task != current)
/* another task is sharing our hash bucket */
continue;
+ /*
+ * Return probes must be pushed on this hash list correct
+ * order (same as return order) so that it can be poped
+ * correctly. However, if we find it is pushed it incorrect
+ * order, this means we find a function which should not be
+ * probed, because the wrong order entry is pushed on the
+ * path of processing other kretprobe itself.
+ */
+ if (ri->fp != frame_pointer) {
+ if (!skipped)
+ pr_warn("kretprobe is stacked incorrectly. Trying to fixup.\n");
+ skipped = true;
+ continue;
+ }
orig_ret_address = (unsigned long)ri->ret_addr;
+ if (skipped)
+ pr_warn("%ps must be blacklisted because of incorrect kretprobe order\n",
+ ri->rp->kp.addr);
if (orig_ret_address != trampoline_address)
/*
if (ri->task != current)
/* another task is sharing our hash bucket */
continue;
+ if (ri->fp != frame_pointer)
+ continue;
orig_ret_address = (unsigned long)ri->ret_addr;
if (ri->rp && ri->rp->handler) {
__this_cpu_write(current_kprobe, &ri->rp->kp);
- get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE;
ri->ret_addr = correct_ret_addr;
ri->rp->handler(ri, regs);
- __this_cpu_write(current_kprobe, NULL);
+ __this_cpu_write(current_kprobe, &kretprobe_kprobe);
}
recycle_rp_inst(ri, &empty_rp);
kretprobe_hash_unlock(current, &flags);
+ __this_cpu_write(current_kprobe, NULL);
+ preempt_enable();
+
hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) {
hlist_del(&ri->hlist);
kfree(ri);
u64 msr = x86_spec_ctrl_base;
bool updmsr = false;
+ lockdep_assert_irqs_disabled();
+
/*
* If TIF_SSBD is different, select the proper mitigation
* method. Note that if SSBD mitigation is disabled or permanentely
void speculation_ctrl_update(unsigned long tif)
{
+ unsigned long flags;
+
/* Forced update. Make sure all relevant TIF flags are different */
- preempt_disable();
+ local_irq_save(flags);
__speculation_ctrl_update(~tif, tif);
- preempt_enable();
+ local_irq_restore(flags);
}
/* Called from seccomp/prctl update */
return 0;
}
+/*
+ * Some machines don't handle the default ACPI reboot method and
+ * require the EFI reboot method:
+ */
+static int __init set_efi_reboot(const struct dmi_system_id *d)
+{
+ if (reboot_type != BOOT_EFI && !efi_runtime_disabled()) {
+ reboot_type = BOOT_EFI;
+ pr_info("%s series board detected. Selecting EFI-method for reboot.\n", d->ident);
+ }
+ return 0;
+}
+
void __noreturn machine_real_restart(unsigned int type)
{
local_irq_disable();
DMI_MATCH(DMI_PRODUCT_NAME, "AOA110"),
},
},
+ { /* Handle reboot issue on Acer TravelMate X514-51T */
+ .callback = set_efi_reboot,
+ .ident = "Acer TravelMate X514-51T",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "TravelMate X514-51T"),
+ },
+ },
/* Apple */
{ /* Handle problems with rebooting on Apple MacBook5 */
.bss : AT(ADDR(.bss) - LOAD_OFFSET) {
__bss_start = .;
*(.bss..page_aligned)
- *(.bss)
+ *(BSS_MAIN)
BSS_DECRYPTED
. = ALIGN(PAGE_SIZE);
__bss_stop = .;
static int emulator_has_longmode(struct x86_emulate_ctxt *ctxt)
{
+#ifdef CONFIG_X86_64
u32 eax, ebx, ecx, edx;
eax = 0x80000001;
ecx = 0;
ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, false);
return edx & bit(X86_FEATURE_LM);
+#else
+ return false;
+#endif
}
-#define GET_SMSTATE(type, smbase, offset) \
- ({ \
- type __val; \
- int r = ctxt->ops->read_phys(ctxt, smbase + offset, &__val, \
- sizeof(__val)); \
- if (r != X86EMUL_CONTINUE) \
- return X86EMUL_UNHANDLEABLE; \
- __val; \
- })
-
static void rsm_set_desc_flags(struct desc_struct *desc, u32 flags)
{
desc->g = (flags >> 23) & 1;
desc->type = (flags >> 8) & 15;
}
-static int rsm_load_seg_32(struct x86_emulate_ctxt *ctxt, u64 smbase, int n)
+static int rsm_load_seg_32(struct x86_emulate_ctxt *ctxt, const char *smstate,
+ int n)
{
struct desc_struct desc;
int offset;
u16 selector;
- selector = GET_SMSTATE(u32, smbase, 0x7fa8 + n * 4);
+ selector = GET_SMSTATE(u32, smstate, 0x7fa8 + n * 4);
if (n < 3)
offset = 0x7f84 + n * 12;
else
offset = 0x7f2c + (n - 3) * 12;
- set_desc_base(&desc, GET_SMSTATE(u32, smbase, offset + 8));
- set_desc_limit(&desc, GET_SMSTATE(u32, smbase, offset + 4));
- rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smbase, offset));
+ set_desc_base(&desc, GET_SMSTATE(u32, smstate, offset + 8));
+ set_desc_limit(&desc, GET_SMSTATE(u32, smstate, offset + 4));
+ rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, offset));
ctxt->ops->set_segment(ctxt, selector, &desc, 0, n);
return X86EMUL_CONTINUE;
}
-static int rsm_load_seg_64(struct x86_emulate_ctxt *ctxt, u64 smbase, int n)
+#ifdef CONFIG_X86_64
+static int rsm_load_seg_64(struct x86_emulate_ctxt *ctxt, const char *smstate,
+ int n)
{
struct desc_struct desc;
int offset;
offset = 0x7e00 + n * 16;
- selector = GET_SMSTATE(u16, smbase, offset);
- rsm_set_desc_flags(&desc, GET_SMSTATE(u16, smbase, offset + 2) << 8);
- set_desc_limit(&desc, GET_SMSTATE(u32, smbase, offset + 4));
- set_desc_base(&desc, GET_SMSTATE(u32, smbase, offset + 8));
- base3 = GET_SMSTATE(u32, smbase, offset + 12);
+ selector = GET_SMSTATE(u16, smstate, offset);
+ rsm_set_desc_flags(&desc, GET_SMSTATE(u16, smstate, offset + 2) << 8);
+ set_desc_limit(&desc, GET_SMSTATE(u32, smstate, offset + 4));
+ set_desc_base(&desc, GET_SMSTATE(u32, smstate, offset + 8));
+ base3 = GET_SMSTATE(u32, smstate, offset + 12);
ctxt->ops->set_segment(ctxt, selector, &desc, base3, n);
return X86EMUL_CONTINUE;
}
+#endif
static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
u64 cr0, u64 cr3, u64 cr4)
return X86EMUL_CONTINUE;
}
-static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, u64 smbase)
+static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt,
+ const char *smstate)
{
struct desc_struct desc;
struct desc_ptr dt;
u32 val, cr0, cr3, cr4;
int i;
- cr0 = GET_SMSTATE(u32, smbase, 0x7ffc);
- cr3 = GET_SMSTATE(u32, smbase, 0x7ff8);
- ctxt->eflags = GET_SMSTATE(u32, smbase, 0x7ff4) | X86_EFLAGS_FIXED;
- ctxt->_eip = GET_SMSTATE(u32, smbase, 0x7ff0);
+ cr0 = GET_SMSTATE(u32, smstate, 0x7ffc);
+ cr3 = GET_SMSTATE(u32, smstate, 0x7ff8);
+ ctxt->eflags = GET_SMSTATE(u32, smstate, 0x7ff4) | X86_EFLAGS_FIXED;
+ ctxt->_eip = GET_SMSTATE(u32, smstate, 0x7ff0);
for (i = 0; i < 8; i++)
- *reg_write(ctxt, i) = GET_SMSTATE(u32, smbase, 0x7fd0 + i * 4);
+ *reg_write(ctxt, i) = GET_SMSTATE(u32, smstate, 0x7fd0 + i * 4);
- val = GET_SMSTATE(u32, smbase, 0x7fcc);
+ val = GET_SMSTATE(u32, smstate, 0x7fcc);
ctxt->ops->set_dr(ctxt, 6, (val & DR6_VOLATILE) | DR6_FIXED_1);
- val = GET_SMSTATE(u32, smbase, 0x7fc8);
+ val = GET_SMSTATE(u32, smstate, 0x7fc8);
ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1);
- selector = GET_SMSTATE(u32, smbase, 0x7fc4);
- set_desc_base(&desc, GET_SMSTATE(u32, smbase, 0x7f64));
- set_desc_limit(&desc, GET_SMSTATE(u32, smbase, 0x7f60));
- rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smbase, 0x7f5c));
+ selector = GET_SMSTATE(u32, smstate, 0x7fc4);
+ set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7f64));
+ set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7f60));
+ rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7f5c));
ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_TR);
- selector = GET_SMSTATE(u32, smbase, 0x7fc0);
- set_desc_base(&desc, GET_SMSTATE(u32, smbase, 0x7f80));
- set_desc_limit(&desc, GET_SMSTATE(u32, smbase, 0x7f7c));
- rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smbase, 0x7f78));
+ selector = GET_SMSTATE(u32, smstate, 0x7fc0);
+ set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7f80));
+ set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7f7c));
+ rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7f78));
ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_LDTR);
- dt.address = GET_SMSTATE(u32, smbase, 0x7f74);
- dt.size = GET_SMSTATE(u32, smbase, 0x7f70);
+ dt.address = GET_SMSTATE(u32, smstate, 0x7f74);
+ dt.size = GET_SMSTATE(u32, smstate, 0x7f70);
ctxt->ops->set_gdt(ctxt, &dt);
- dt.address = GET_SMSTATE(u32, smbase, 0x7f58);
- dt.size = GET_SMSTATE(u32, smbase, 0x7f54);
+ dt.address = GET_SMSTATE(u32, smstate, 0x7f58);
+ dt.size = GET_SMSTATE(u32, smstate, 0x7f54);
ctxt->ops->set_idt(ctxt, &dt);
for (i = 0; i < 6; i++) {
- int r = rsm_load_seg_32(ctxt, smbase, i);
+ int r = rsm_load_seg_32(ctxt, smstate, i);
if (r != X86EMUL_CONTINUE)
return r;
}
- cr4 = GET_SMSTATE(u32, smbase, 0x7f14);
+ cr4 = GET_SMSTATE(u32, smstate, 0x7f14);
- ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7ef8));
+ ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smstate, 0x7ef8));
return rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
}
-static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase)
+#ifdef CONFIG_X86_64
+static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt,
+ const char *smstate)
{
struct desc_struct desc;
struct desc_ptr dt;
int i, r;
for (i = 0; i < 16; i++)
- *reg_write(ctxt, i) = GET_SMSTATE(u64, smbase, 0x7ff8 - i * 8);
+ *reg_write(ctxt, i) = GET_SMSTATE(u64, smstate, 0x7ff8 - i * 8);
- ctxt->_eip = GET_SMSTATE(u64, smbase, 0x7f78);
- ctxt->eflags = GET_SMSTATE(u32, smbase, 0x7f70) | X86_EFLAGS_FIXED;
+ ctxt->_eip = GET_SMSTATE(u64, smstate, 0x7f78);
+ ctxt->eflags = GET_SMSTATE(u32, smstate, 0x7f70) | X86_EFLAGS_FIXED;
- val = GET_SMSTATE(u32, smbase, 0x7f68);
+ val = GET_SMSTATE(u32, smstate, 0x7f68);
ctxt->ops->set_dr(ctxt, 6, (val & DR6_VOLATILE) | DR6_FIXED_1);
- val = GET_SMSTATE(u32, smbase, 0x7f60);
+ val = GET_SMSTATE(u32, smstate, 0x7f60);
ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1);
- cr0 = GET_SMSTATE(u64, smbase, 0x7f58);
- cr3 = GET_SMSTATE(u64, smbase, 0x7f50);
- cr4 = GET_SMSTATE(u64, smbase, 0x7f48);
- ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7f00));
- val = GET_SMSTATE(u64, smbase, 0x7ed0);
+ cr0 = GET_SMSTATE(u64, smstate, 0x7f58);
+ cr3 = GET_SMSTATE(u64, smstate, 0x7f50);
+ cr4 = GET_SMSTATE(u64, smstate, 0x7f48);
+ ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smstate, 0x7f00));
+ val = GET_SMSTATE(u64, smstate, 0x7ed0);
ctxt->ops->set_msr(ctxt, MSR_EFER, val & ~EFER_LMA);
- selector = GET_SMSTATE(u32, smbase, 0x7e90);
- rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smbase, 0x7e92) << 8);
- set_desc_limit(&desc, GET_SMSTATE(u32, smbase, 0x7e94));
- set_desc_base(&desc, GET_SMSTATE(u32, smbase, 0x7e98));
- base3 = GET_SMSTATE(u32, smbase, 0x7e9c);
+ selector = GET_SMSTATE(u32, smstate, 0x7e90);
+ rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7e92) << 8);
+ set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7e94));
+ set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7e98));
+ base3 = GET_SMSTATE(u32, smstate, 0x7e9c);
ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_TR);
- dt.size = GET_SMSTATE(u32, smbase, 0x7e84);
- dt.address = GET_SMSTATE(u64, smbase, 0x7e88);
+ dt.size = GET_SMSTATE(u32, smstate, 0x7e84);
+ dt.address = GET_SMSTATE(u64, smstate, 0x7e88);
ctxt->ops->set_idt(ctxt, &dt);
- selector = GET_SMSTATE(u32, smbase, 0x7e70);
- rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smbase, 0x7e72) << 8);
- set_desc_limit(&desc, GET_SMSTATE(u32, smbase, 0x7e74));
- set_desc_base(&desc, GET_SMSTATE(u32, smbase, 0x7e78));
- base3 = GET_SMSTATE(u32, smbase, 0x7e7c);
+ selector = GET_SMSTATE(u32, smstate, 0x7e70);
+ rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7e72) << 8);
+ set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7e74));
+ set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7e78));
+ base3 = GET_SMSTATE(u32, smstate, 0x7e7c);
ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_LDTR);
- dt.size = GET_SMSTATE(u32, smbase, 0x7e64);
- dt.address = GET_SMSTATE(u64, smbase, 0x7e68);
+ dt.size = GET_SMSTATE(u32, smstate, 0x7e64);
+ dt.address = GET_SMSTATE(u64, smstate, 0x7e68);
ctxt->ops->set_gdt(ctxt, &dt);
r = rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
return r;
for (i = 0; i < 6; i++) {
- r = rsm_load_seg_64(ctxt, smbase, i);
+ r = rsm_load_seg_64(ctxt, smstate, i);
if (r != X86EMUL_CONTINUE)
return r;
}
return X86EMUL_CONTINUE;
}
+#endif
static int em_rsm(struct x86_emulate_ctxt *ctxt)
{
unsigned long cr0, cr4, efer;
+ char buf[512];
u64 smbase;
int ret;
if ((ctxt->ops->get_hflags(ctxt) & X86EMUL_SMM_MASK) == 0)
return emulate_ud(ctxt);
+ smbase = ctxt->ops->get_smbase(ctxt);
+
+ ret = ctxt->ops->read_phys(ctxt, smbase + 0xfe00, buf, sizeof(buf));
+ if (ret != X86EMUL_CONTINUE)
+ return X86EMUL_UNHANDLEABLE;
+
+ if ((ctxt->ops->get_hflags(ctxt) & X86EMUL_SMM_INSIDE_NMI_MASK) == 0)
+ ctxt->ops->set_nmi_mask(ctxt, false);
+
+ ctxt->ops->set_hflags(ctxt, ctxt->ops->get_hflags(ctxt) &
+ ~(X86EMUL_SMM_INSIDE_NMI_MASK | X86EMUL_SMM_MASK));
+
/*
* Get back to real mode, to prepare a safe state in which to load
* CR0/CR3/CR4/EFER. It's all a bit more complicated if the vCPU
* supports long mode.
*/
- cr4 = ctxt->ops->get_cr(ctxt, 4);
if (emulator_has_longmode(ctxt)) {
struct desc_struct cs_desc;
/* Zero CR4.PCIDE before CR0.PG. */
- if (cr4 & X86_CR4_PCIDE) {
+ cr4 = ctxt->ops->get_cr(ctxt, 4);
+ if (cr4 & X86_CR4_PCIDE)
ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PCIDE);
- cr4 &= ~X86_CR4_PCIDE;
- }
/* A 32-bit code segment is required to clear EFER.LMA. */
memset(&cs_desc, 0, sizeof(cs_desc));
if (cr0 & X86_CR0_PE)
ctxt->ops->set_cr(ctxt, 0, cr0 & ~(X86_CR0_PG | X86_CR0_PE));
- /* Now clear CR4.PAE (which must be done before clearing EFER.LME). */
- if (cr4 & X86_CR4_PAE)
- ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PAE);
-
- /* And finally go back to 32-bit mode. */
- efer = 0;
- ctxt->ops->set_msr(ctxt, MSR_EFER, efer);
+ if (emulator_has_longmode(ctxt)) {
+ /* Clear CR4.PAE before clearing EFER.LME. */
+ cr4 = ctxt->ops->get_cr(ctxt, 4);
+ if (cr4 & X86_CR4_PAE)
+ ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PAE);
- smbase = ctxt->ops->get_smbase(ctxt);
+ /* And finally go back to 32-bit mode. */
+ efer = 0;
+ ctxt->ops->set_msr(ctxt, MSR_EFER, efer);
+ }
/*
* Give pre_leave_smm() a chance to make ISA-specific changes to the
* vCPU state (e.g. enter guest mode) before loading state from the SMM
* state-save area.
*/
- if (ctxt->ops->pre_leave_smm(ctxt, smbase))
+ if (ctxt->ops->pre_leave_smm(ctxt, buf))
return X86EMUL_UNHANDLEABLE;
+#ifdef CONFIG_X86_64
if (emulator_has_longmode(ctxt))
- ret = rsm_load_state_64(ctxt, smbase + 0x8000);
+ ret = rsm_load_state_64(ctxt, buf);
else
- ret = rsm_load_state_32(ctxt, smbase + 0x8000);
+#endif
+ ret = rsm_load_state_32(ctxt, buf);
if (ret != X86EMUL_CONTINUE) {
/* FIXME: should triple fault */
return X86EMUL_UNHANDLEABLE;
}
- if ((ctxt->ops->get_hflags(ctxt) & X86EMUL_SMM_INSIDE_NMI_MASK) == 0)
- ctxt->ops->set_nmi_mask(ctxt, false);
+ ctxt->ops->post_leave_smm(ctxt);
- ctxt->ops->set_hflags(ctxt, ctxt->ops->get_hflags(ctxt) &
- ~(X86EMUL_SMM_INSIDE_NMI_MASK | X86EMUL_SMM_MASK));
return X86EMUL_CONTINUE;
}
if (offset <= max_apic_id) {
u8 cluster_size = min(max_apic_id - offset + 1, 16U);
+ offset = array_index_nospec(offset, map->max_apic_id + 1);
*cluster = &map->phys_map[offset];
*mask = dest_id & (0xffff >> (16 - cluster_size));
} else {
if (irq->dest_id > map->max_apic_id) {
*bitmap = 0;
} else {
- *dst = &map->phys_map[irq->dest_id];
+ u32 dest_id = array_index_nospec(irq->dest_id, map->max_apic_id + 1);
+ *dst = &map->phys_map[dest_id];
*bitmap = 1;
}
return true;
* aggregate version in order to make the slab shrinker
* faster
*/
-static inline void kvm_mod_used_mmu_pages(struct kvm *kvm, int nr)
+static inline void kvm_mod_used_mmu_pages(struct kvm *kvm, unsigned long nr)
{
kvm->arch.n_used_mmu_pages += nr;
percpu_counter_add(&kvm_total_used_mmu_pages, nr);
struct list_head *invalid_list,
bool remote_flush)
{
- if (!remote_flush && !list_empty(invalid_list))
+ if (!remote_flush && list_empty(invalid_list))
return false;
if (!list_empty(invalid_list))
* Changing the number of mmu pages allocated to the vm
* Note: if goal_nr_mmu_pages is too small, you will get dead lock
*/
-void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int goal_nr_mmu_pages)
+void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned long goal_nr_mmu_pages)
{
LIST_HEAD(invalid_list);
/*
* Calculate mmu pages needed for kvm.
*/
-unsigned int kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm)
+unsigned long kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm)
{
- unsigned int nr_mmu_pages;
- unsigned int nr_pages = 0;
+ unsigned long nr_mmu_pages;
+ unsigned long nr_pages = 0;
struct kvm_memslots *slots;
struct kvm_memory_slot *memslot;
int i;
}
nr_mmu_pages = nr_pages * KVM_PERMILLE_MMU_PAGES / 1000;
- nr_mmu_pages = max(nr_mmu_pages,
- (unsigned int) KVM_MIN_ALLOC_MMU_PAGES);
+ nr_mmu_pages = max(nr_mmu_pages, KVM_MIN_ALLOC_MMU_PAGES);
return nr_mmu_pages;
}
int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code,
u64 fault_address, char *insn, int insn_len);
-static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm)
+static inline unsigned long kvm_mmu_available_pages(struct kvm *kvm)
{
if (kvm->arch.n_max_mmu_pages > kvm->arch.n_used_mmu_pages)
return kvm->arch.n_max_mmu_pages -
int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned idx, u64 *data)
{
bool fast_mode = idx & (1u << 31);
+ struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
struct kvm_pmc *pmc;
u64 ctr_val;
+ if (!pmu->version)
+ return 1;
+
if (is_vmware_backdoor_pmc(idx))
return kvm_pmu_rdpmc_vmware(vcpu, idx, data);
};
#define AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK (0xFF)
+#define AVIC_LOGICAL_ID_ENTRY_VALID_BIT 31
#define AVIC_LOGICAL_ID_ENTRY_VALID_MASK (1 << 31)
#define AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK (0xFFULL)
static int db_interception(struct vcpu_svm *svm)
{
struct kvm_run *kvm_run = svm->vcpu.run;
+ struct kvm_vcpu *vcpu = &svm->vcpu;
if (!(svm->vcpu.guest_debug &
(KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) &&
if (svm->nmi_singlestep) {
disable_nmi_singlestep(svm);
+ /* Make sure we check for pending NMIs upon entry */
+ kvm_make_request(KVM_REQ_EVENT, vcpu);
}
if (svm->vcpu.guest_debug &
kvm_lapic_reg_write(apic, APIC_ICR, icrl);
break;
case AVIC_IPI_FAILURE_TARGET_NOT_RUNNING: {
+ int i;
+ struct kvm_vcpu *vcpu;
+ struct kvm *kvm = svm->vcpu.kvm;
struct kvm_lapic *apic = svm->vcpu.arch.apic;
/*
- * Update ICR high and low, then emulate sending IPI,
- * which is handled when writing APIC_ICR.
+ * At this point, we expect that the AVIC HW has already
+ * set the appropriate IRR bits on the valid target
+ * vcpus. So, we just need to kick the appropriate vcpu.
*/
- kvm_lapic_reg_write(apic, APIC_ICR2, icrh);
- kvm_lapic_reg_write(apic, APIC_ICR, icrl);
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ bool m = kvm_apic_match_dest(vcpu, apic,
+ icrl & KVM_APIC_SHORT_MASK,
+ GET_APIC_DEST_FIELD(icrh),
+ icrl & KVM_APIC_DEST_MASK);
+
+ if (m && !avic_vcpu_is_running(vcpu))
+ kvm_vcpu_wake_up(vcpu);
+ }
break;
}
case AVIC_IPI_FAILURE_INVALID_TARGET:
u32 *entry = avic_get_logical_id_entry(vcpu, svm->ldr_reg, flat);
if (entry)
- WRITE_ONCE(*entry, (u32) ~AVIC_LOGICAL_ID_ENTRY_VALID_MASK);
+ clear_bit(AVIC_LOGICAL_ID_ENTRY_VALID_BIT, (unsigned long *)entry);
}
static int avic_handle_ldr_update(struct kvm_vcpu *vcpu)
svm->vmcb->save.cr2 = vcpu->arch.cr2;
clgi();
+ kvm_load_guest_xcr0(vcpu);
/*
* If this vCPU has touched SPEC_CTRL, restore the guest's value if
if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI))
kvm_before_interrupt(&svm->vcpu);
+ kvm_put_guest_xcr0(vcpu);
stgi();
/* Any pending NMI will happen here */
return 0;
}
-static int svm_pre_leave_smm(struct kvm_vcpu *vcpu, u64 smbase)
+static int svm_pre_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
{
struct vcpu_svm *svm = to_svm(vcpu);
struct vmcb *nested_vmcb;
struct page *page;
- struct {
- u64 guest;
- u64 vmcb;
- } svm_state_save;
- int ret;
+ u64 guest;
+ u64 vmcb;
- ret = kvm_vcpu_read_guest(vcpu, smbase + 0xfed8, &svm_state_save,
- sizeof(svm_state_save));
- if (ret)
- return ret;
+ guest = GET_SMSTATE(u64, smstate, 0x7ed8);
+ vmcb = GET_SMSTATE(u64, smstate, 0x7ee0);
- if (svm_state_save.guest) {
- vcpu->arch.hflags &= ~HF_SMM_MASK;
- nested_vmcb = nested_svm_map(svm, svm_state_save.vmcb, &page);
- if (nested_vmcb)
- enter_svm_guest_mode(svm, svm_state_save.vmcb, nested_vmcb, page);
- else
- ret = 1;
- vcpu->arch.hflags |= HF_SMM_MASK;
+ if (guest) {
+ nested_vmcb = nested_svm_map(svm, vmcb, &page);
+ if (!nested_vmcb)
+ return 1;
+ enter_svm_guest_mode(svm, vmcb, nested_vmcb, page);
}
- return ret;
+ return 0;
}
static int enable_smi_window(struct kvm_vcpu *vcpu)
return ret;
}
-static int get_num_contig_pages(int idx, struct page **inpages,
- unsigned long npages)
+static unsigned long get_num_contig_pages(unsigned long idx,
+ struct page **inpages, unsigned long npages)
{
unsigned long paddr, next_paddr;
- int i = idx + 1, pages = 1;
+ unsigned long i = idx + 1, pages = 1;
/* find the number of contiguous pages starting from idx */
paddr = __sme_page_pa(inpages[idx]);
static int sev_launch_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
- unsigned long vaddr, vaddr_end, next_vaddr, npages, size;
+ unsigned long vaddr, vaddr_end, next_vaddr, npages, pages, size, i;
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
struct kvm_sev_launch_update_data params;
struct sev_data_launch_update_data *data;
struct page **inpages;
- int i, ret, pages;
+ int ret;
if (!sev_guest(kvm))
return -ENOTTY;
struct page **src_p, **dst_p;
struct kvm_sev_dbg debug;
unsigned long n;
- int ret, size;
+ unsigned int size;
+ int ret;
if (!sev_guest(kvm))
return -ENOTTY;
if (copy_from_user(&debug, (void __user *)(uintptr_t)argp->data, sizeof(debug)))
return -EFAULT;
+ if (!debug.len || debug.src_uaddr + debug.len < debug.src_uaddr)
+ return -EINVAL;
+ if (!debug.dst_uaddr)
+ return -EINVAL;
+
vaddr = debug.src_uaddr;
size = debug.len;
vaddr_end = vaddr + size;
dst_vaddr,
len, &argp->error);
- sev_unpin_memory(kvm, src_p, 1);
- sev_unpin_memory(kvm, dst_p, 1);
+ sev_unpin_memory(kvm, src_p, n);
+ sev_unpin_memory(kvm, dst_p, n);
if (ret)
goto err;
);
TRACE_EVENT(kvm_apic_accept_irq,
- TP_PROTO(__u32 apicid, __u16 dm, __u8 tm, __u8 vec),
+ TP_PROTO(__u32 apicid, __u16 dm, __u16 tm, __u8 vec),
TP_ARGS(apicid, dm, tm, vec),
TP_STRUCT__entry(
__field( __u32, apicid )
__field( __u16, dm )
- __field( __u8, tm )
+ __field( __u16, tm )
__field( __u8, vec )
),
}
}
+static inline void enable_x2apic_msr_intercepts(unsigned long *msr_bitmap) {
+ int msr;
+
+ for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
+ unsigned word = msr / BITS_PER_LONG;
+
+ msr_bitmap[word] = ~0;
+ msr_bitmap[word + (0x800 / sizeof(long))] = ~0;
+ }
+}
+
/*
* Merge L0's and L1's MSR bitmap, return false to indicate that
* we do not use the hardware.
return false;
msr_bitmap_l1 = (unsigned long *)kmap(page);
- if (nested_cpu_has_apic_reg_virt(vmcs12)) {
- /*
- * L0 need not intercept reads for MSRs between 0x800 and 0x8ff, it
- * just lets the processor take the value from the virtual-APIC page;
- * take those 256 bits directly from the L1 bitmap.
- */
- for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
- unsigned word = msr / BITS_PER_LONG;
- msr_bitmap_l0[word] = msr_bitmap_l1[word];
- msr_bitmap_l0[word + (0x800 / sizeof(long))] = ~0;
- }
- } else {
- for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
- unsigned word = msr / BITS_PER_LONG;
- msr_bitmap_l0[word] = ~0;
- msr_bitmap_l0[word + (0x800 / sizeof(long))] = ~0;
- }
- }
- nested_vmx_disable_intercept_for_msr(
- msr_bitmap_l1, msr_bitmap_l0,
- X2APIC_MSR(APIC_TASKPRI),
- MSR_TYPE_W);
+ /*
+ * To keep the control flow simple, pay eight 8-byte writes (sixteen
+ * 4-byte writes on 32-bit systems) up front to enable intercepts for
+ * the x2APIC MSR range and selectively disable them below.
+ */
+ enable_x2apic_msr_intercepts(msr_bitmap_l0);
+
+ if (nested_cpu_has_virt_x2apic_mode(vmcs12)) {
+ if (nested_cpu_has_apic_reg_virt(vmcs12)) {
+ /*
+ * L0 need not intercept reads for MSRs between 0x800
+ * and 0x8ff, it just lets the processor take the value
+ * from the virtual-APIC page; take those 256 bits
+ * directly from the L1 bitmap.
+ */
+ for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
+ unsigned word = msr / BITS_PER_LONG;
+
+ msr_bitmap_l0[word] = msr_bitmap_l1[word];
+ }
+ }
- if (nested_cpu_has_vid(vmcs12)) {
- nested_vmx_disable_intercept_for_msr(
- msr_bitmap_l1, msr_bitmap_l0,
- X2APIC_MSR(APIC_EOI),
- MSR_TYPE_W);
nested_vmx_disable_intercept_for_msr(
msr_bitmap_l1, msr_bitmap_l0,
- X2APIC_MSR(APIC_SELF_IPI),
- MSR_TYPE_W);
+ X2APIC_MSR(APIC_TASKPRI),
+ MSR_TYPE_R | MSR_TYPE_W);
+
+ if (nested_cpu_has_vid(vmcs12)) {
+ nested_vmx_disable_intercept_for_msr(
+ msr_bitmap_l1, msr_bitmap_l0,
+ X2APIC_MSR(APIC_EOI),
+ MSR_TYPE_W);
+ nested_vmx_disable_intercept_for_msr(
+ msr_bitmap_l1, msr_bitmap_l0,
+ X2APIC_MSR(APIC_SELF_IPI),
+ MSR_TYPE_W);
+ }
}
if (spec_ctrl)
/*
* If translation failed, VM entry will fail because
* prepare_vmcs02 set VIRTUAL_APIC_PAGE_ADDR to -1ull.
- * Failing the vm entry is _not_ what the processor
- * does but it's basically the only possibility we
- * have. We could still enter the guest if CR8 load
- * exits are enabled, CR8 store exits are enabled, and
- * virtualize APIC access is disabled; in this case
- * the processor would never use the TPR shadow and we
- * could simply clear the bit from the execution
- * control. But such a configuration is useless, so
- * let's keep the code simple.
*/
if (!is_error_page(page)) {
vmx->nested.virtual_apic_page = page;
hpa = page_to_phys(vmx->nested.virtual_apic_page);
vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, hpa);
+ } else if (nested_cpu_has(vmcs12, CPU_BASED_CR8_LOAD_EXITING) &&
+ nested_cpu_has(vmcs12, CPU_BASED_CR8_STORE_EXITING) &&
+ !nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
+ /*
+ * The processor will never use the TPR shadow, simply
+ * clear the bit from the execution control. Such a
+ * configuration is useless, but it happens in tests.
+ * For any other configuration, failing the vm entry is
+ * _not_ what the processor does but it's basically the
+ * only possibility we have.
+ */
+ vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL,
+ CPU_BASED_TPR_SHADOW);
+ } else {
+ printk("bad virtual-APIC page address\n");
+ dump_vmcs();
}
}
vmx_set_cr4(vcpu, vmcs_readl(CR4_READ_SHADOW));
nested_ept_uninit_mmu_context(vcpu);
- vcpu->arch.cr3 = vmcs_readl(GUEST_CR3);
- __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
+
+ /*
+ * This is only valid if EPT is in use, otherwise the vmcs01 GUEST_CR3
+ * points to shadow pages! Fortunately we only get here after a WARN_ON
+ * if EPT is disabled, so a VMabort is perfectly fine.
+ */
+ if (enable_ept) {
+ vcpu->arch.cr3 = vmcs_readl(GUEST_CR3);
+ __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
+ } else {
+ nested_vmx_abort(vcpu, VMX_ABORT_VMCS_CORRUPTED);
+ }
/*
* Use ept_save_pdptrs(vcpu) to load the MMU's cached PDPTRs
{
int i;
+ /*
+ * Without EPT it is not possible to restore L1's CR3 and PDPTR on
+ * VMfail, because they are not available in vmcs01. Just always
+ * use hardware checks.
+ */
+ if (!enable_ept)
+ nested_early_check = 1;
+
if (!cpu_has_vmx_shadow_vmcs())
enable_shadow_vmcs = 0;
if (enable_shadow_vmcs) {
vmcs_readl(limit + GUEST_GDTR_BASE - GUEST_GDTR_LIMIT));
}
-static void dump_vmcs(void)
+void dump_vmcs(void)
{
u32 vmentry_ctl = vmcs_read32(VM_ENTRY_CONTROLS);
u32 vmexit_ctl = vmcs_read32(VM_EXIT_CONTROLS);
if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
vmx_set_interrupt_shadow(vcpu, 0);
+ kvm_load_guest_xcr0(vcpu);
+
if (static_cpu_has(X86_FEATURE_PKU) &&
kvm_read_cr4_bits(vcpu, X86_CR4_PKE) &&
vcpu->arch.pkru != vmx->host_pkru)
__write_pkru(vmx->host_pkru);
}
+ kvm_put_guest_xcr0(vcpu);
+
vmx->nested.nested_run_pending = 0;
vmx->idt_vectoring_info = 0;
}
}
+static bool guest_cpuid_has_pmu(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpuid_entry2 *entry;
+ union cpuid10_eax eax;
+
+ entry = kvm_find_cpuid_entry(vcpu, 0xa, 0);
+ if (!entry)
+ return false;
+
+ eax.full = entry->eax;
+ return (eax.split.version_id > 0);
+}
+
+static void nested_vmx_procbased_ctls_update(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+ bool pmu_enabled = guest_cpuid_has_pmu(vcpu);
+
+ if (pmu_enabled)
+ vmx->nested.msrs.procbased_ctls_high |= CPU_BASED_RDPMC_EXITING;
+ else
+ vmx->nested.msrs.procbased_ctls_high &= ~CPU_BASED_RDPMC_EXITING;
+}
+
static void update_intel_pt_cfg(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
if (nested_vmx_allowed(vcpu)) {
nested_vmx_cr_fixed1_bits_update(vcpu);
nested_vmx_entry_exit_ctls_update(vcpu);
+ nested_vmx_procbased_ctls_update(vcpu);
}
if (boot_cpu_has(X86_FEATURE_INTEL_PT) &&
return 0;
}
-static int vmx_pre_leave_smm(struct kvm_vcpu *vcpu, u64 smbase)
+static int vmx_pre_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
int ret;
}
if (vmx->nested.smm.guest_mode) {
- vcpu->arch.hflags &= ~HF_SMM_MASK;
ret = nested_vmx_enter_non_root_mode(vcpu, false);
- vcpu->arch.hflags |= HF_SMM_MASK;
if (ret)
return ret;
vmcs_write64(TSC_MULTIPLIER, vmx->current_tsc_ratio);
}
+void dump_vmcs(void);
+
#endif /* __KVM_X86_VMX_H */
}
EXPORT_SYMBOL_GPL(kvm_lmsw);
-static void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu)
+void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu)
{
if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE) &&
!vcpu->guest_xcr0_loaded) {
vcpu->guest_xcr0_loaded = 1;
}
}
+EXPORT_SYMBOL_GPL(kvm_load_guest_xcr0);
-static void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu)
+void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu)
{
if (vcpu->guest_xcr0_loaded) {
if (vcpu->arch.xcr0 != host_xcr0)
vcpu->guest_xcr0_loaded = 0;
}
}
+EXPORT_SYMBOL_GPL(kvm_put_guest_xcr0);
static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
{
break;
case KVM_CAP_NESTED_STATE:
r = kvm_x86_ops->get_nested_state ?
- kvm_x86_ops->get_nested_state(NULL, 0, 0) : 0;
+ kvm_x86_ops->get_nested_state(NULL, NULL, 0) : 0;
break;
default:
break;
memset(&events->reserved, 0, sizeof(events->reserved));
}
-static void kvm_set_hflags(struct kvm_vcpu *vcpu, unsigned emul_flags);
+static void kvm_smm_changed(struct kvm_vcpu *vcpu);
static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
struct kvm_vcpu_events *events)
vcpu->arch.apic->sipi_vector = events->sipi_vector;
if (events->flags & KVM_VCPUEVENT_VALID_SMM) {
- u32 hflags = vcpu->arch.hflags;
- if (events->smi.smm)
- hflags |= HF_SMM_MASK;
- else
- hflags &= ~HF_SMM_MASK;
- kvm_set_hflags(vcpu, hflags);
+ if (!!(vcpu->arch.hflags & HF_SMM_MASK) != events->smi.smm) {
+ if (events->smi.smm)
+ vcpu->arch.hflags |= HF_SMM_MASK;
+ else
+ vcpu->arch.hflags &= ~HF_SMM_MASK;
+ kvm_smm_changed(vcpu);
+ }
vcpu->arch.smi_pending = events->smi.pending;
}
static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
- u32 kvm_nr_mmu_pages)
+ unsigned long kvm_nr_mmu_pages)
{
if (kvm_nr_mmu_pages < KVM_MIN_ALLOC_MMU_PAGES)
return -EINVAL;
return 0;
}
-static int kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm)
+static unsigned long kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm)
{
return kvm->arch.n_max_mmu_pages;
}
static void emulator_set_hflags(struct x86_emulate_ctxt *ctxt, unsigned emul_flags)
{
- kvm_set_hflags(emul_to_vcpu(ctxt), emul_flags);
+ emul_to_vcpu(ctxt)->arch.hflags = emul_flags;
+}
+
+static int emulator_pre_leave_smm(struct x86_emulate_ctxt *ctxt,
+ const char *smstate)
+{
+ return kvm_x86_ops->pre_leave_smm(emul_to_vcpu(ctxt), smstate);
}
-static int emulator_pre_leave_smm(struct x86_emulate_ctxt *ctxt, u64 smbase)
+static void emulator_post_leave_smm(struct x86_emulate_ctxt *ctxt)
{
- return kvm_x86_ops->pre_leave_smm(emul_to_vcpu(ctxt), smbase);
+ kvm_smm_changed(emul_to_vcpu(ctxt));
}
static const struct x86_emulate_ops emulate_ops = {
.get_hflags = emulator_get_hflags,
.set_hflags = emulator_set_hflags,
.pre_leave_smm = emulator_pre_leave_smm,
+ .post_leave_smm = emulator_post_leave_smm,
};
static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask)
kvm_mmu_reset_context(vcpu);
}
-static void kvm_set_hflags(struct kvm_vcpu *vcpu, unsigned emul_flags)
-{
- unsigned changed = vcpu->arch.hflags ^ emul_flags;
-
- vcpu->arch.hflags = emul_flags;
-
- if (changed & HF_SMM_MASK)
- kvm_smm_changed(vcpu);
-}
-
static int kvm_vcpu_check_hw_bp(unsigned long addr, u32 type, u32 dr7,
unsigned long *db)
{
put_smstate(u32, buf, 0x7ef8, vcpu->arch.smbase);
}
+#ifdef CONFIG_X86_64
static void enter_smm_save_state_64(struct kvm_vcpu *vcpu, char *buf)
{
-#ifdef CONFIG_X86_64
struct desc_ptr dt;
struct kvm_segment seg;
unsigned long val;
for (i = 0; i < 6; i++)
enter_smm_save_seg_64(vcpu, buf, i);
-#else
- WARN_ON_ONCE(1);
-#endif
}
+#endif
static void enter_smm(struct kvm_vcpu *vcpu)
{
trace_kvm_enter_smm(vcpu->vcpu_id, vcpu->arch.smbase, true);
memset(buf, 0, 512);
+#ifdef CONFIG_X86_64
if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
enter_smm_save_state_64(vcpu, buf);
else
+#endif
enter_smm_save_state_32(vcpu, buf);
/*
kvm_set_segment(vcpu, &ds, VCPU_SREG_GS);
kvm_set_segment(vcpu, &ds, VCPU_SREG_SS);
+#ifdef CONFIG_X86_64
if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
kvm_x86_ops->set_efer(vcpu, 0);
+#endif
kvm_update_cpuid(vcpu);
kvm_mmu_reset_context(vcpu);
goto cancel_injection;
}
- kvm_load_guest_xcr0(vcpu);
-
if (req_immediate_exit) {
kvm_make_request(KVM_REQ_EVENT, vcpu);
kvm_x86_ops->request_immediate_exit(vcpu);
vcpu->mode = OUTSIDE_GUEST_MODE;
smp_wmb();
- kvm_put_guest_xcr0(vcpu);
-
kvm_before_interrupt(vcpu);
kvm_x86_ops->handle_external_intr(vcpu);
kvm_after_interrupt(vcpu);
__this_cpu_write(current_vcpu, NULL);
}
+void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu);
+void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu);
#endif
#endif
/* Account the WX pages */
st->wx_pages += npages;
- WARN_ONCE(1, "x86/mm: Found insecure W+X mapping at address %pS\n",
+ WARN_ONCE(__supported_pte_mask & _PAGE_NX,
+ "x86/mm: Found insecure W+X mapping at address %pS\n",
(void *)st->start_address);
}
pte = early_ioremap_pte(addr);
/* Sanitize 'prot' against any unsupported bits: */
- pgprot_val(flags) &= __default_kernel_pte_mask;
+ pgprot_val(flags) &= __supported_pte_mask;
if (pgprot_val(flags))
set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags));
if (!kaslr_memory_enabled())
return;
- kaslr_regions[0].size_tb = 1 << (__PHYSICAL_MASK_SHIFT - TB_SHIFT);
+ kaslr_regions[0].size_tb = 1 << (MAX_PHYSMEM_BITS - TB_SHIFT);
kaslr_regions[1].size_tb = VMALLOC_SIZE_TB;
/*
{
int cpu;
- struct flush_tlb_info info __aligned(SMP_CACHE_BYTES) = {
+ struct flush_tlb_info info = {
.mm = mm,
.stride_shift = stride_shift,
.freed_tables = freed_tables,
/* Clearing a0 terminates the backtrace. */
#define start_thread(regs, new_pc, new_sp) \
- memset(regs, 0, sizeof(*regs)); \
- regs->pc = new_pc; \
- regs->ps = USER_PS_VALUE; \
- regs->areg[1] = new_sp; \
- regs->areg[0] = 0; \
- regs->wmask = 1; \
- regs->depc = 0; \
- regs->windowbase = 0; \
- regs->windowstart = 1;
+ do { \
+ memset((regs), 0, sizeof(*(regs))); \
+ (regs)->pc = (new_pc); \
+ (regs)->ps = USER_PS_VALUE; \
+ (regs)->areg[1] = (new_sp); \
+ (regs)->areg[0] = 0; \
+ (regs)->wmask = 1; \
+ (regs)->depc = 0; \
+ (regs)->windowbase = 0; \
+ (regs)->windowstart = 1; \
+ (regs)->syscall = NO_SYSCALL; \
+ } while (0)
/* Forward declaration */
struct task_struct;
static inline void syscall_get_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
unsigned long *args)
{
static const unsigned int reg[] = XTENSA_SYSCALL_ARGUMENT_REGS;
- unsigned int j;
+ unsigned int i;
- if (n == 0)
- return;
-
- WARN_ON_ONCE(i + n > SYSCALL_MAX_ARGS);
-
- for (j = 0; j < n; ++j) {
- if (i + j < SYSCALL_MAX_ARGS)
- args[j] = regs->areg[reg[i + j]];
- else
- args[j] = 0;
- }
+ for (i = 0; i < 6; ++i)
+ args[i] = regs->areg[reg[i]];
}
static inline void syscall_set_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
const unsigned long *args)
{
static const unsigned int reg[] = XTENSA_SYSCALL_ARGUMENT_REGS;
- unsigned int j;
-
- if (n == 0)
- return;
-
- if (WARN_ON_ONCE(i + n > SYSCALL_MAX_ARGS)) {
- if (i < SYSCALL_MAX_ARGS)
- n = SYSCALL_MAX_ARGS - i;
- else
- return;
- }
+ unsigned int i;
- for (j = 0; j < n; ++j)
- regs->areg[reg[i + j]] = args[j];
+ for (i = 0; i < 6; ++i)
+ regs->areg[reg[i]] = args[i];
}
asmlinkage long xtensa_rt_sigreturn(struct pt_regs*);
#define SIOCSPGRP _IOW('s', 8, pid_t)
#define SIOCGPGRP _IOR('s', 9, pid_t)
-#define SIOCGSTAMP 0x8906 /* Get stamp (timeval) */
-#define SIOCGSTAMPNS 0x8907 /* Get stamp (timespec) */
+#define SIOCGSTAMP_OLD 0x8906 /* Get stamp (timeval) */
+#define SIOCGSTAMPNS_OLD 0x8907 /* Get stamp (timespec) */
#endif /* _XTENSA_SOCKIOS_H */
l32i a7, a2, PT_SYSCALL
1:
+ s32i a7, a1, 4
+
/* syscall = sys_call_table[syscall_nr] */
movi a4, sys_call_table
retw
1:
+ l32i a4, a1, 4
+ l32i a3, a2, PT_SYSCALL
+ s32i a4, a2, PT_SYSCALL
mov a6, a2
call4 do_syscall_trace_leave
+ s32i a3, a2, PT_SYSCALL
retw
ENDPROC(system_call)
return 1;
}
+/*
+ * level == 0 is for the return address from the caller of this function,
+ * not from this function itself.
+ */
unsigned long return_address(unsigned level)
{
struct return_addr_data r = {
- .skip = level + 1,
+ .skip = level,
};
walk_stackframe(stack_pointer(NULL), return_address_cb, &r);
return r.addr;
421 common rt_sigtimedwait_time64 sys_rt_sigtimedwait
422 common futex_time64 sys_futex
423 common sched_rr_get_interval_time64 sys_sched_rr_get_interval
+424 common pidfd_send_signal sys_pidfd_send_signal
+425 common io_uring_setup sys_io_uring_setup
+426 common io_uring_enter sys_io_uring_enter
+427 common io_uring_register sys_io_uring_register
pte = memblock_alloc_low(n_pages * sizeof(pte_t), PAGE_SIZE);
if (!pte)
- panic("%s: Failed to allocate %zu bytes align=%lx\n",
+ panic("%s: Failed to allocate %lu bytes align=%lx\n",
__func__, n_pages * sizeof(pte_t), PAGE_SIZE);
for (i = 0; i < n_pages; ++i)
* at least two nodes.
*/
return !(varied_queue_weights || multiple_classes_busy
-#ifdef BFQ_GROUP_IOSCHED_ENABLED
+#ifdef CONFIG_BFQ_GROUP_IOSCHED
|| bfqd->num_groups_with_pending_reqs > 0
#endif
);
bfq_remove_request(q, rq);
}
-static void __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+static bool __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq)
{
/*
* If this bfqq is shared between multiple processes, check
/*
* All in-service entities must have been properly deactivated
* or requeued before executing the next function, which
- * resets all in-service entites as no more in service.
+ * resets all in-service entities as no more in service. This
+ * may cause bfqq to be freed. If this happens, the next
+ * function returns true.
*/
- __bfq_bfqd_reset_in_service(bfqd);
+ return __bfq_bfqd_reset_in_service(bfqd);
}
/**
bool slow;
unsigned long delta = 0;
struct bfq_entity *entity = &bfqq->entity;
- int ref;
/*
* Check whether the process is slow (see bfq_bfqq_is_slow).
* reason.
*/
__bfq_bfqq_recalc_budget(bfqd, bfqq, reason);
- ref = bfqq->ref;
- __bfq_bfqq_expire(bfqd, bfqq);
-
- if (ref == 1) /* bfqq is gone, no more actions on it */
+ if (__bfq_bfqq_expire(bfqd, bfqq))
+ /* bfqq is gone, no more actions on it */
return;
bfqq->injected_service = 0;
return min_shallow;
}
-static int bfq_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int index)
+static void bfq_depth_updated(struct blk_mq_hw_ctx *hctx)
{
struct bfq_data *bfqd = hctx->queue->elevator->elevator_data;
struct blk_mq_tags *tags = hctx->sched_tags;
min_shallow = bfq_update_depths(bfqd, &tags->bitmap_tags);
sbitmap_queue_min_shallow_depth(&tags->bitmap_tags, min_shallow);
+}
+
+static int bfq_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int index)
+{
+ bfq_depth_updated(hctx);
return 0;
}
.requests_merged = bfq_requests_merged,
.request_merged = bfq_request_merged,
.has_work = bfq_has_work,
+ .depth_updated = bfq_depth_updated,
.init_hctx = bfq_init_hctx,
.init_sched = bfq_init_queue,
.exit_sched = bfq_exit_queue,
bool ins_into_idle_tree);
bool next_queue_may_preempt(struct bfq_data *bfqd);
struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd);
-void __bfq_bfqd_reset_in_service(struct bfq_data *bfqd);
+bool __bfq_bfqd_reset_in_service(struct bfq_data *bfqd);
void bfq_deactivate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
bool ins_into_idle_tree, bool expiration);
void bfq_activate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq);
entity->on_st = true;
}
-#ifdef BFQ_GROUP_IOSCHED_ENABLED
+#ifdef CONFIG_BFQ_GROUP_IOSCHED
if (!bfq_entity_to_bfqq(entity)) { /* bfq_group */
struct bfq_group *bfqg =
container_of(entity, struct bfq_group, entity);
return bfqq;
}
-void __bfq_bfqd_reset_in_service(struct bfq_data *bfqd)
+/* returns true if the in-service queue gets freed */
+bool __bfq_bfqd_reset_in_service(struct bfq_data *bfqd)
{
struct bfq_queue *in_serv_bfqq = bfqd->in_service_queue;
struct bfq_entity *in_serv_entity = &in_serv_bfqq->entity;
* service tree either, then release the service reference to
* the queue it represents (taken with bfq_get_entity).
*/
- if (!in_serv_entity->on_st)
+ if (!in_serv_entity->on_st) {
+ /*
+ * If no process is referencing in_serv_bfqq any
+ * longer, then the service reference may be the only
+ * reference to the queue. If this is the case, then
+ * bfqq gets freed here.
+ */
+ int ref = in_serv_bfqq->ref;
bfq_put_queue(in_serv_bfqq);
+ if (ref == 1)
+ return true;
+ }
+
+ return false;
}
void bfq_deactivate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
}
}
- if (bio_add_pc_page(q, bio, page, bytes, offset) < bytes)
+ if (bio_add_pc_page(q, bio, page, bytes, offset) < bytes) {
+ if (!map_data)
+ __free_page(page);
break;
+ }
len -= bytes;
offset = 0;
*/
blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request *rq)
{
- blk_qc_t unused;
-
if (blk_cloned_rq_check_limits(q, rq))
return BLK_STS_IOERR;
* bypass a potential scheduler on the bottom device for
* insert.
*/
- return blk_mq_try_issue_directly(rq->mq_hctx, rq, &unused, true, true);
+ return blk_mq_request_issue_directly(rq, true);
}
EXPORT_SYMBOL_GPL(blk_insert_cloned_request);
* busy in case of 'none' scheduler, and this way may save
* us one extra enqueue & dequeue to sw queue.
*/
- if (!hctx->dispatch_busy && !e && !run_queue_async)
+ if (!hctx->dispatch_busy && !e && !run_queue_async) {
blk_mq_try_issue_list_directly(hctx, list);
- else
- blk_mq_insert_requests(hctx, ctx, list);
+ if (list_empty(list))
+ return;
+ }
+ blk_mq_insert_requests(hctx, ctx, list);
}
blk_mq_run_hw_queue(hctx, run_queue_async);
}
EXPORT_SYMBOL(blk_mq_complete_request);
+void blk_mq_complete_request_sync(struct request *rq)
+{
+ WRITE_ONCE(rq->state, MQ_RQ_COMPLETE);
+ rq->q->mq_ops->complete(rq);
+}
+EXPORT_SYMBOL_GPL(blk_mq_complete_request_sync);
+
int blk_mq_request_started(struct request *rq)
{
return blk_mq_rq_state(rq) != MQ_RQ_IDLE;
unsigned int depth;
list_splice_init(&plug->mq_list, &list);
- plug->rq_count = 0;
if (plug->rq_count > 2 && plug->multiple_queues)
list_sort(NULL, &list, plug_rq_cmp);
+ plug->rq_count = 0;
+
this_q = NULL;
this_hctx = NULL;
this_ctx = NULL;
return ret;
}
-blk_status_t blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
+static blk_status_t __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
struct request *rq,
blk_qc_t *cookie,
- bool bypass, bool last)
+ bool bypass_insert, bool last)
{
struct request_queue *q = rq->q;
bool run_queue = true;
- blk_status_t ret = BLK_STS_RESOURCE;
- int srcu_idx;
- bool force = false;
- hctx_lock(hctx, &srcu_idx);
/*
- * hctx_lock is needed before checking quiesced flag.
+ * RCU or SRCU read lock is needed before checking quiesced flag.
*
- * When queue is stopped or quiesced, ignore 'bypass', insert
- * and return BLK_STS_OK to caller, and avoid driver to try to
- * dispatch again.
+ * When queue is stopped or quiesced, ignore 'bypass_insert' from
+ * blk_mq_request_issue_directly(), and return BLK_STS_OK to caller,
+ * and avoid driver to try to dispatch again.
*/
- if (unlikely(blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(q))) {
+ if (blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(q)) {
run_queue = false;
- bypass = false;
- goto out_unlock;
+ bypass_insert = false;
+ goto insert;
}
- if (unlikely(q->elevator && !bypass))
- goto out_unlock;
+ if (q->elevator && !bypass_insert)
+ goto insert;
if (!blk_mq_get_dispatch_budget(hctx))
- goto out_unlock;
+ goto insert;
if (!blk_mq_get_driver_tag(rq)) {
blk_mq_put_dispatch_budget(hctx);
- goto out_unlock;
+ goto insert;
}
- /*
- * Always add a request that has been through
- *.queue_rq() to the hardware dispatch list.
- */
- force = true;
- ret = __blk_mq_issue_directly(hctx, rq, cookie, last);
-out_unlock:
+ return __blk_mq_issue_directly(hctx, rq, cookie, last);
+insert:
+ if (bypass_insert)
+ return BLK_STS_RESOURCE;
+
+ blk_mq_request_bypass_insert(rq, run_queue);
+ return BLK_STS_OK;
+}
+
+static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
+ struct request *rq, blk_qc_t *cookie)
+{
+ blk_status_t ret;
+ int srcu_idx;
+
+ might_sleep_if(hctx->flags & BLK_MQ_F_BLOCKING);
+
+ hctx_lock(hctx, &srcu_idx);
+
+ ret = __blk_mq_try_issue_directly(hctx, rq, cookie, false, true);
+ if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE)
+ blk_mq_request_bypass_insert(rq, true);
+ else if (ret != BLK_STS_OK)
+ blk_mq_end_request(rq, ret);
+
+ hctx_unlock(hctx, srcu_idx);
+}
+
+blk_status_t blk_mq_request_issue_directly(struct request *rq, bool last)
+{
+ blk_status_t ret;
+ int srcu_idx;
+ blk_qc_t unused_cookie;
+ struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
+
+ hctx_lock(hctx, &srcu_idx);
+ ret = __blk_mq_try_issue_directly(hctx, rq, &unused_cookie, true, last);
hctx_unlock(hctx, srcu_idx);
- switch (ret) {
- case BLK_STS_OK:
- break;
- case BLK_STS_DEV_RESOURCE:
- case BLK_STS_RESOURCE:
- if (force) {
- blk_mq_request_bypass_insert(rq, run_queue);
- /*
- * We have to return BLK_STS_OK for the DM
- * to avoid livelock. Otherwise, we return
- * the real result to indicate whether the
- * request is direct-issued successfully.
- */
- ret = bypass ? BLK_STS_OK : ret;
- } else if (!bypass) {
- blk_mq_sched_insert_request(rq, false,
- run_queue, false);
- }
- break;
- default:
- if (!bypass)
- blk_mq_end_request(rq, ret);
- break;
- }
return ret;
}
void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx,
struct list_head *list)
{
- blk_qc_t unused;
- blk_status_t ret = BLK_STS_OK;
-
while (!list_empty(list)) {
+ blk_status_t ret;
struct request *rq = list_first_entry(list, struct request,
queuelist);
list_del_init(&rq->queuelist);
- if (ret == BLK_STS_OK)
- ret = blk_mq_try_issue_directly(hctx, rq, &unused,
- false,
+ ret = blk_mq_request_issue_directly(rq, list_empty(list));
+ if (ret != BLK_STS_OK) {
+ if (ret == BLK_STS_RESOURCE ||
+ ret == BLK_STS_DEV_RESOURCE) {
+ blk_mq_request_bypass_insert(rq,
list_empty(list));
- else
- blk_mq_sched_insert_request(rq, false, true, false);
+ break;
+ }
+ blk_mq_end_request(rq, ret);
+ }
}
/*
* the driver there was more coming, but that turned out to
* be a lie.
*/
- if (ret != BLK_STS_OK && hctx->queue->mq_ops->commit_rqs)
+ if (!list_empty(list) && hctx->queue->mq_ops->commit_rqs)
hctx->queue->mq_ops->commit_rqs(hctx);
}
plug->rq_count--;
}
blk_add_rq_to_plug(plug, rq);
+ trace_block_plug(q);
blk_mq_put_ctx(data.ctx);
if (same_queue_rq) {
data.hctx = same_queue_rq->mq_hctx;
+ trace_block_unplug(q, 1, true);
blk_mq_try_issue_directly(data.hctx, same_queue_rq,
- &cookie, false, true);
+ &cookie);
}
} else if ((q->nr_hw_queues > 1 && is_sync) || (!q->elevator &&
!data.hctx->dispatch_busy)) {
blk_mq_put_ctx(data.ctx);
blk_mq_bio_to_request(rq, bio);
- blk_mq_try_issue_directly(data.hctx, rq, &cookie, false, true);
+ blk_mq_try_issue_directly(data.hctx, rq, &cookie);
} else {
blk_mq_put_ctx(data.ctx);
blk_mq_bio_to_request(rq, bio);
return 0;
free_fq:
- kfree(hctx->fq);
+ blk_free_flush_queue(hctx->fq);
exit_hctx:
if (set->ops->exit_hctx)
set->ops->exit_hctx(hctx, hctx_idx);
}
if (ret)
break;
+ if (q->elevator && q->elevator->type->ops.depth_updated)
+ q->elevator->type->ops.depth_updated(hctx);
}
if (!ret)
void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
struct list_head *list);
-blk_status_t blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
- struct request *rq,
- blk_qc_t *cookie,
- bool bypass, bool last);
+/* Used by blk_insert_cloned_request() to issue request directly */
+blk_status_t blk_mq_request_issue_directly(struct request *rq, bool last);
void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx,
struct list_head *list);
.psize = 80,
.digest = "\x13\x00\x00\x00\x00\x00\x00\x00"
"\x00\x00\x00\x00\x00\x00\x00\x00",
- },
+ }, { /* Regression test for overflow in AVX2 implementation */
+ .plaintext = "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff",
+ .psize = 300,
+ .digest = "\xfb\x5e\x96\xd8\x61\xd5\xc7\xc8"
+ "\x78\xe5\x87\xcc\x2d\x5a\x22\xe1",
+ }
};
/* NHPoly1305 test vectors from https://github.com/google/adiantum */
}
}
+ if (obj_desc->common.type == ACPI_TYPE_REGION) {
+ acpi_ut_remove_address_range(obj_desc->region.space_id, node);
+ }
+
/* Clear the Node entry in all cases */
node->object = NULL;
goto out;
}
+ dev_dbg(dev, "%s cmd: %s output length: %d\n", dimm_name,
+ cmd_name, out_obj->buffer.length);
+ print_hex_dump_debug(cmd_name, DUMP_PREFIX_OFFSET, 4, 4,
+ out_obj->buffer.pointer,
+ min_t(u32, 128, out_obj->buffer.length), true);
+
if (call_pkg) {
call_pkg->nd_fw_size = out_obj->buffer.length;
memcpy(call_pkg->nd_payload + call_pkg->nd_size_in,
return 0;
}
- dev_dbg(dev, "%s cmd: %s output length: %d\n", dimm_name,
- cmd_name, out_obj->buffer.length);
- print_hex_dump_debug(cmd_name, DUMP_PREFIX_OFFSET, 4, 4,
- out_obj->buffer.pointer,
- min_t(u32, 128, out_obj->buffer.length), true);
-
for (i = 0, offset = 0; i < desc->out_num; i++) {
u32 out_size = nd_cmd_out_size(nvdimm, cmd, desc, i, buf,
(u32 *) out_obj->buffer.pointer,
if (!test_bit(cmd, &nfit_mem->dsm_mask))
return -ENOTTY;
- if (old_data)
- memcpy(nd_cmd.cmd.old_pass, old_data->data,
- sizeof(nd_cmd.cmd.old_pass));
+ memcpy(nd_cmd.cmd.old_pass, old_data->data,
+ sizeof(nd_cmd.cmd.old_pass));
memcpy(nd_cmd.cmd.new_pass, new_data->data,
sizeof(nd_cmd.cmd.new_pass));
rc = nvdimm_ctl(nvdimm, ND_CMD_CALL, &nd_cmd, sizeof(nd_cmd), NULL);
/* flush all cache before we erase DIMM */
nvdimm_invalidate_cache();
- if (nkey)
- memcpy(nd_cmd.cmd.passphrase, nkey->data,
- sizeof(nd_cmd.cmd.passphrase));
+ memcpy(nd_cmd.cmd.passphrase, nkey->data,
+ sizeof(nd_cmd.cmd.passphrase));
rc = nvdimm_ctl(nvdimm, ND_CMD_CALL, &nd_cmd, sizeof(nd_cmd), NULL);
if (rc < 0)
return rc;
}
if (status & ISR_TBRQ_W) {
- fs_dprintk (FS_DEBUG_IRQ, "Data tramsitted!\n");
+ fs_dprintk (FS_DEBUG_IRQ, "Data transmitted!\n");
process_txdone_queue (dev, &dev->tx_relq);
}
case 0x6:
{
ia_cmds.status = 0;
- printk("skb = 0x%lx\n", (long)skb_peek(&iadev->tx_backlog));
- printk("rtn_q: 0x%lx\n",(long)ia_deque_rtn_q(&iadev->tx_return_q));
+ printk("skb = 0x%p\n", skb_peek(&iadev->tx_backlog));
+ printk("rtn_q: 0x%p\n",ia_deque_rtn_q(&iadev->tx_return_q));
}
break;
case 0x8:
ret = lock_device_hotplug_sysfs();
if (ret)
- goto out;
+ return ret;
nid = memory_add_physaddr_to_nid(phys_addr);
ret = __add_memory(nid, phys_addr,
return -EINVAL;
}
+ if (g_home_node != NUMA_NO_NODE && g_home_node >= nr_online_nodes) {
+ pr_err("null_blk: invalid home_node value\n");
+ g_home_node = NUMA_NO_NODE;
+ }
+
if (g_queue_mode == NULL_Q_RQ) {
pr_err("null_blk: legacy IO path no longer available\n");
return -EINVAL;
disk->queue = blk_mq_init_sq_queue(&cd->tag_set, &pcd_mq_ops,
1, BLK_MQ_F_SHOULD_MERGE);
if (IS_ERR(disk->queue)) {
+ put_disk(disk);
disk->queue = NULL;
continue;
}
printk("%s: No CD-ROM drive found\n", name);
for (unit = 0, cd = pcd; unit < PCD_UNITS; unit++, cd++) {
+ if (!cd->disk)
+ continue;
blk_cleanup_queue(cd->disk->queue);
cd->disk->queue = NULL;
blk_mq_free_tag_set(&cd->tag_set);
pcd_probe_capabilities();
if (register_blkdev(major, name)) {
- for (unit = 0, cd = pcd; unit < PCD_UNITS; unit++, cd++)
+ for (unit = 0, cd = pcd; unit < PCD_UNITS; unit++, cd++) {
+ if (!cd->disk)
+ continue;
+
+ blk_cleanup_queue(cd->disk->queue);
+ blk_mq_free_tag_set(&cd->tag_set);
put_disk(cd->disk);
+ }
return -EBUSY;
}
int unit;
for (unit = 0, cd = pcd; unit < PCD_UNITS; unit++, cd++) {
+ if (!cd->disk)
+ continue;
+
if (cd->present) {
del_gendisk(cd->disk);
pi_release(cd->pi);
printk("%s: No ATAPI disk detected\n", name);
for (pf = units, unit = 0; unit < PF_UNITS; pf++, unit++) {
+ if (!pf->disk)
+ continue;
blk_cleanup_queue(pf->disk->queue);
pf->disk->queue = NULL;
blk_mq_free_tag_set(&pf->tag_set);
pf_busy = 0;
if (register_blkdev(major, name)) {
- for (pf = units, unit = 0; unit < PF_UNITS; pf++, unit++)
+ for (pf = units, unit = 0; unit < PF_UNITS; pf++, unit++) {
+ if (!pf->disk)
+ continue;
+ blk_cleanup_queue(pf->disk->queue);
+ blk_mq_free_tag_set(&pf->tag_set);
put_disk(pf->disk);
+ }
return -EBUSY;
}
int unit;
unregister_blkdev(major, name);
for (pf = units, unit = 0; unit < PF_UNITS; pf++, unit++) {
+ if (!pf->disk)
+ continue;
+
if (pf->present)
del_gendisk(pf->disk);
if (err)
num_vqs = 1;
+ num_vqs = min_t(unsigned int, nr_cpu_ids, num_vqs);
+
vblk->vqs = kmalloc_array(num_vqs, sizeof(*vblk->vqs), GFP_KERNEL);
if (!vblk->vqs)
return -ENOMEM;
return 0;
err_read:
+ /* prevent double queue cleanup */
+ ace->gd->queue = NULL;
put_disk(ace->gd);
err_alloc_disk:
blk_cleanup_queue(ace->queue);
The core driver to support Marvell Bluetooth devices.
This driver is required if you want to support
- Marvell Bluetooth devices, such as 8688/8787/8797/8887/8897/8977/8997.
+ Marvell Bluetooth devices, such as 8688/8787/8797/8887/8897/8977/8987/8997.
Say Y here to compile Marvell Bluetooth driver
into the kernel or say M to compile it as module.
The driver for Marvell Bluetooth chipsets with SDIO interface.
This driver is required if you want to use Marvell Bluetooth
- devices with SDIO interface. Currently SD8688/SD8787/SD8797/SD8887/SD8897/SD8977/SD8997
+ devices with SDIO interface. Currently SD8688/SD8787/SD8797/SD8887/SD8897/SD8977/SD8987/SD8997
chipsets are supported.
Say Y here to compile support for Marvell BT-over-SDIO driver
Say Y here to compile support for Texas Instrument's WiLink7 driver
into the kernel or say M to compile it as module (btwilink).
+config BT_MTKSDIO
+ tristate "MediaTek HCI SDIO driver"
+ depends on MMC
+ help
+ MediaTek Bluetooth HCI SDIO driver.
+ This driver is required if you want to use MediaTek Bluetooth
+ with SDIO interface.
+
+ Say Y here to compile support for MediaTek Bluetooth SDIO devices
+ into the kernel or say M to compile it as module (btmtksdio).
+
config BT_MTKUART
tristate "MediaTek HCI UART driver"
depends on SERIAL_DEV_BUS
obj-$(CONFIG_BT_MRVL) += btmrvl.o
obj-$(CONFIG_BT_MRVL_SDIO) += btmrvl_sdio.o
obj-$(CONFIG_BT_WILINK) += btwilink.o
+obj-$(CONFIG_BT_MTKSDIO) += btmtksdio.o
obj-$(CONFIG_BT_MTKUART) += btmtkuart.o
obj-$(CONFIG_BT_QCOMSMD) += btqcomsmd.o
obj-$(CONFIG_BT_BCM) += btbcm.o
#define BDADDR_BCM43430A0 (&(bdaddr_t) {{0xac, 0x1f, 0x12, 0xa0, 0x43, 0x43}})
#define BDADDR_BCM4324B3 (&(bdaddr_t) {{0x00, 0x00, 0x00, 0xb3, 0x24, 0x43}})
#define BDADDR_BCM4330B1 (&(bdaddr_t) {{0x00, 0x00, 0x00, 0xb1, 0x30, 0x43}})
+#define BDADDR_BCM43341B (&(bdaddr_t) {{0xac, 0x1f, 0x00, 0x1b, 0x34, 0x43}})
int btbcm_check_bdaddr(struct hci_dev *hdev)
{
!bacmp(&bda->bdaddr, BDADDR_BCM20702A1) ||
!bacmp(&bda->bdaddr, BDADDR_BCM4324B3) ||
!bacmp(&bda->bdaddr, BDADDR_BCM4330B1) ||
- !bacmp(&bda->bdaddr, BDADDR_BCM43430A0)) {
+ !bacmp(&bda->bdaddr, BDADDR_BCM43430A0) ||
+ !bacmp(&bda->bdaddr, BDADDR_BCM43341B)) {
bt_dev_info(hdev, "BCM: Using default device address (%pMR)",
&bda->bdaddr);
set_bit(HCI_QUIRK_INVALID_BDADDR, &hdev->quirks);
static const struct bcm_subver_table bcm_uart_subver_table[] = {
{ 0x4103, "BCM4330B1" }, /* 002.001.003 */
{ 0x410e, "BCM43341B0" }, /* 002.001.014 */
+ { 0x4204, "BCM2076B1" }, /* 002.002.004 */
{ 0x4406, "BCM4324B3" }, /* 002.004.006 */
{ 0x6109, "BCM4335C0" }, /* 003.001.009 */
{ 0x610c, "BCM4354" }, /* 003.001.012 */
.fw_dump_end = 0xf8,
};
+static const struct btmrvl_sdio_card_reg btmrvl_reg_8987 = {
+ .cfg = 0x00,
+ .host_int_mask = 0x08,
+ .host_intstatus = 0x0c,
+ .card_status = 0x5c,
+ .sq_read_base_addr_a0 = 0xf8,
+ .sq_read_base_addr_a1 = 0xf9,
+ .card_revision = 0xc8,
+ .card_fw_status0 = 0xe8,
+ .card_fw_status1 = 0xe9,
+ .card_rx_len = 0xea,
+ .card_rx_unit = 0xeb,
+ .io_port_0 = 0xe4,
+ .io_port_1 = 0xe5,
+ .io_port_2 = 0xe6,
+ .int_read_to_clear = true,
+ .host_int_rsr = 0x04,
+ .card_misc_cfg = 0xd8,
+ .fw_dump_ctrl = 0xf0,
+ .fw_dump_start = 0xf1,
+ .fw_dump_end = 0xf8,
+};
+
static const struct btmrvl_sdio_card_reg btmrvl_reg_8997 = {
.cfg = 0x00,
.host_int_mask = 0x08,
.supports_fw_dump = true,
};
+static const struct btmrvl_sdio_device btmrvl_sdio_sd8987 = {
+ .helper = NULL,
+ .firmware = "mrvl/sd8987_uapsta.bin",
+ .reg = &btmrvl_reg_8987,
+ .support_pscan_win_report = true,
+ .sd_blksz_fw_dl = 256,
+ .supports_fw_dump = true,
+};
+
static const struct btmrvl_sdio_device btmrvl_sdio_sd8997 = {
.helper = NULL,
.firmware = "mrvl/sd8997_uapsta.bin",
/* Marvell SD8977 Bluetooth device */
{ SDIO_DEVICE(SDIO_VENDOR_ID_MARVELL, 0x9146),
.driver_data = (unsigned long)&btmrvl_sdio_sd8977 },
+ /* Marvell SD8987 Bluetooth device */
+ { SDIO_DEVICE(SDIO_VENDOR_ID_MARVELL, 0x914A),
+ .driver_data = (unsigned long)&btmrvl_sdio_sd8987 },
/* Marvell SD8997 Bluetooth device */
{ SDIO_DEVICE(SDIO_VENDOR_ID_MARVELL, 0x9142),
.driver_data = (unsigned long)&btmrvl_sdio_sd8997 },
MODULE_FIRMWARE("mrvl/sd8887_uapsta.bin");
MODULE_FIRMWARE("mrvl/sd8897_uapsta.bin");
MODULE_FIRMWARE("mrvl/sd8977_uapsta.bin");
+MODULE_FIRMWARE("mrvl/sd8987_uapsta.bin");
MODULE_FIRMWARE("mrvl/sd8997_uapsta.bin");
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 MediaTek Inc.
+
+/*
+ * Bluetooth support for MediaTek SDIO devices
+ *
+ * This file is written based on btsdio.c and btmtkuart.c.
+ *
+ * Author: Sean Wang <sean.wang@mediatek.com>
+ *
+ */
+
+#include <asm/unaligned.h>
+#include <linux/atomic.h>
+#include <linux/firmware.h>
+#include <linux/init.h>
+#include <linux/iopoll.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/pm_runtime.h>
+#include <linux/skbuff.h>
+
+#include <linux/mmc/host.h>
+#include <linux/mmc/sdio_ids.h>
+#include <linux/mmc/sdio_func.h>
+
+#include <net/bluetooth/bluetooth.h>
+#include <net/bluetooth/hci_core.h>
+
+#include "h4_recv.h"
+
+#define VERSION "0.1"
+
+#define FIRMWARE_MT7663 "mediatek/mt7663pr2h.bin"
+#define FIRMWARE_MT7668 "mediatek/mt7668pr2h.bin"
+
+#define MTKBTSDIO_AUTOSUSPEND_DELAY 8000
+
+static bool enable_autosuspend;
+
+struct btmtksdio_data {
+ const char *fwname;
+};
+
+static const struct btmtksdio_data mt7663_data = {
+ .fwname = FIRMWARE_MT7663,
+};
+
+static const struct btmtksdio_data mt7668_data = {
+ .fwname = FIRMWARE_MT7668,
+};
+
+static const struct sdio_device_id btmtksdio_table[] = {
+ {SDIO_DEVICE(SDIO_VENDOR_ID_MEDIATEK, 0x7663),
+ .driver_data = (kernel_ulong_t)&mt7663_data },
+ {SDIO_DEVICE(SDIO_VENDOR_ID_MEDIATEK, 0x7668),
+ .driver_data = (kernel_ulong_t)&mt7668_data },
+ { } /* Terminating entry */
+};
+
+#define MTK_REG_CHLPCR 0x4 /* W1S */
+#define C_INT_EN_SET BIT(0)
+#define C_INT_EN_CLR BIT(1)
+#define C_FW_OWN_REQ_SET BIT(8) /* For write */
+#define C_COM_DRV_OWN BIT(8) /* For read */
+#define C_FW_OWN_REQ_CLR BIT(9)
+
+#define MTK_REG_CSDIOCSR 0x8
+#define SDIO_RE_INIT_EN BIT(0)
+#define SDIO_INT_CTL BIT(2)
+
+#define MTK_REG_CHCR 0xc
+#define C_INT_CLR_CTRL BIT(1)
+
+/* CHISR have the same bits field definition with CHIER */
+#define MTK_REG_CHISR 0x10
+#define MTK_REG_CHIER 0x14
+#define FW_OWN_BACK_INT BIT(0)
+#define RX_DONE_INT BIT(1)
+#define TX_EMPTY BIT(2)
+#define TX_FIFO_OVERFLOW BIT(8)
+#define RX_PKT_LEN GENMASK(31, 16)
+
+#define MTK_REG_CTDR 0x18
+
+#define MTK_REG_CRDR 0x1c
+
+#define MTK_SDIO_BLOCK_SIZE 256
+
+#define BTMTKSDIO_TX_WAIT_VND_EVT 1
+
+enum {
+ MTK_WMT_PATCH_DWNLD = 0x1,
+ MTK_WMT_TEST = 0x2,
+ MTK_WMT_WAKEUP = 0x3,
+ MTK_WMT_HIF = 0x4,
+ MTK_WMT_FUNC_CTRL = 0x6,
+ MTK_WMT_RST = 0x7,
+ MTK_WMT_SEMAPHORE = 0x17,
+};
+
+enum {
+ BTMTK_WMT_INVALID,
+ BTMTK_WMT_PATCH_UNDONE,
+ BTMTK_WMT_PATCH_DONE,
+ BTMTK_WMT_ON_UNDONE,
+ BTMTK_WMT_ON_DONE,
+ BTMTK_WMT_ON_PROGRESS,
+};
+
+struct mtkbtsdio_hdr {
+ __le16 len;
+ __le16 reserved;
+ u8 bt_type;
+} __packed;
+
+struct mtk_wmt_hdr {
+ u8 dir;
+ u8 op;
+ __le16 dlen;
+ u8 flag;
+} __packed;
+
+struct mtk_hci_wmt_cmd {
+ struct mtk_wmt_hdr hdr;
+ u8 data[256];
+} __packed;
+
+struct btmtk_hci_wmt_evt {
+ struct hci_event_hdr hhdr;
+ struct mtk_wmt_hdr whdr;
+} __packed;
+
+struct btmtk_hci_wmt_evt_funcc {
+ struct btmtk_hci_wmt_evt hwhdr;
+ __be16 status;
+} __packed;
+
+struct btmtk_tci_sleep {
+ u8 mode;
+ __le16 duration;
+ __le16 host_duration;
+ u8 host_wakeup_pin;
+ u8 time_compensation;
+} __packed;
+
+struct btmtk_hci_wmt_params {
+ u8 op;
+ u8 flag;
+ u16 dlen;
+ const void *data;
+ u32 *status;
+};
+
+struct btmtksdio_dev {
+ struct hci_dev *hdev;
+ struct sdio_func *func;
+ struct device *dev;
+
+ struct work_struct tx_work;
+ unsigned long tx_state;
+ struct sk_buff_head txq;
+
+ struct sk_buff *evt_skb;
+
+ const struct btmtksdio_data *data;
+};
+
+static int mtk_hci_wmt_sync(struct hci_dev *hdev,
+ struct btmtk_hci_wmt_params *wmt_params)
+{
+ struct btmtksdio_dev *bdev = hci_get_drvdata(hdev);
+ struct btmtk_hci_wmt_evt_funcc *wmt_evt_funcc;
+ u32 hlen, status = BTMTK_WMT_INVALID;
+ struct btmtk_hci_wmt_evt *wmt_evt;
+ struct mtk_hci_wmt_cmd wc;
+ struct mtk_wmt_hdr *hdr;
+ int err;
+
+ hlen = sizeof(*hdr) + wmt_params->dlen;
+ if (hlen > 255)
+ return -EINVAL;
+
+ hdr = (struct mtk_wmt_hdr *)&wc;
+ hdr->dir = 1;
+ hdr->op = wmt_params->op;
+ hdr->dlen = cpu_to_le16(wmt_params->dlen + 1);
+ hdr->flag = wmt_params->flag;
+ memcpy(wc.data, wmt_params->data, wmt_params->dlen);
+
+ set_bit(BTMTKSDIO_TX_WAIT_VND_EVT, &bdev->tx_state);
+
+ err = __hci_cmd_send(hdev, 0xfc6f, hlen, &wc);
+ if (err < 0) {
+ clear_bit(BTMTKSDIO_TX_WAIT_VND_EVT, &bdev->tx_state);
+ return err;
+ }
+
+ /* The vendor specific WMT commands are all answered by a vendor
+ * specific event and will not have the Command Status or Command
+ * Complete as with usual HCI command flow control.
+ *
+ * After sending the command, wait for BTMTKSDIO_TX_WAIT_VND_EVT
+ * state to be cleared. The driver specific event receive routine
+ * will clear that state and with that indicate completion of the
+ * WMT command.
+ */
+ err = wait_on_bit_timeout(&bdev->tx_state, BTMTKSDIO_TX_WAIT_VND_EVT,
+ TASK_INTERRUPTIBLE, HCI_INIT_TIMEOUT);
+ if (err == -EINTR) {
+ bt_dev_err(hdev, "Execution of wmt command interrupted");
+ clear_bit(BTMTKSDIO_TX_WAIT_VND_EVT, &bdev->tx_state);
+ return err;
+ }
+
+ if (err) {
+ bt_dev_err(hdev, "Execution of wmt command timed out");
+ clear_bit(BTMTKSDIO_TX_WAIT_VND_EVT, &bdev->tx_state);
+ return -ETIMEDOUT;
+ }
+
+ /* Parse and handle the return WMT event */
+ wmt_evt = (struct btmtk_hci_wmt_evt *)bdev->evt_skb->data;
+ if (wmt_evt->whdr.op != hdr->op) {
+ bt_dev_err(hdev, "Wrong op received %d expected %d",
+ wmt_evt->whdr.op, hdr->op);
+ err = -EIO;
+ goto err_free_skb;
+ }
+
+ switch (wmt_evt->whdr.op) {
+ case MTK_WMT_SEMAPHORE:
+ if (wmt_evt->whdr.flag == 2)
+ status = BTMTK_WMT_PATCH_UNDONE;
+ else
+ status = BTMTK_WMT_PATCH_DONE;
+ break;
+ case MTK_WMT_FUNC_CTRL:
+ wmt_evt_funcc = (struct btmtk_hci_wmt_evt_funcc *)wmt_evt;
+ if (be16_to_cpu(wmt_evt_funcc->status) == 0x404)
+ status = BTMTK_WMT_ON_DONE;
+ else if (be16_to_cpu(wmt_evt_funcc->status) == 0x420)
+ status = BTMTK_WMT_ON_PROGRESS;
+ else
+ status = BTMTK_WMT_ON_UNDONE;
+ break;
+ }
+
+ if (wmt_params->status)
+ *wmt_params->status = status;
+
+err_free_skb:
+ kfree_skb(bdev->evt_skb);
+ bdev->evt_skb = NULL;
+
+ return err;
+}
+
+static int btmtksdio_tx_packet(struct btmtksdio_dev *bdev,
+ struct sk_buff *skb)
+{
+ struct mtkbtsdio_hdr *sdio_hdr;
+ int err;
+
+ /* Make sure that there are enough rooms for SDIO header */
+ if (unlikely(skb_headroom(skb) < sizeof(*sdio_hdr))) {
+ err = pskb_expand_head(skb, sizeof(*sdio_hdr), 0,
+ GFP_ATOMIC);
+ if (err < 0)
+ return err;
+ }
+
+ /* Prepend MediaTek SDIO Specific Header */
+ skb_push(skb, sizeof(*sdio_hdr));
+
+ sdio_hdr = (void *)skb->data;
+ sdio_hdr->len = cpu_to_le16(skb->len);
+ sdio_hdr->reserved = cpu_to_le16(0);
+ sdio_hdr->bt_type = hci_skb_pkt_type(skb);
+
+ err = sdio_writesb(bdev->func, MTK_REG_CTDR, skb->data,
+ round_up(skb->len, MTK_SDIO_BLOCK_SIZE));
+ if (err < 0)
+ goto err_skb_pull;
+
+ bdev->hdev->stat.byte_tx += skb->len;
+
+ kfree_skb(skb);
+
+ return 0;
+
+err_skb_pull:
+ skb_pull(skb, sizeof(*sdio_hdr));
+
+ return err;
+}
+
+static u32 btmtksdio_drv_own_query(struct btmtksdio_dev *bdev)
+{
+ return sdio_readl(bdev->func, MTK_REG_CHLPCR, NULL);
+}
+
+static void btmtksdio_tx_work(struct work_struct *work)
+{
+ struct btmtksdio_dev *bdev = container_of(work, struct btmtksdio_dev,
+ tx_work);
+ struct sk_buff *skb;
+ int err;
+
+ pm_runtime_get_sync(bdev->dev);
+
+ sdio_claim_host(bdev->func);
+
+ while ((skb = skb_dequeue(&bdev->txq))) {
+ err = btmtksdio_tx_packet(bdev, skb);
+ if (err < 0) {
+ bdev->hdev->stat.err_tx++;
+ skb_queue_head(&bdev->txq, skb);
+ break;
+ }
+ }
+
+ sdio_release_host(bdev->func);
+
+ pm_runtime_mark_last_busy(bdev->dev);
+ pm_runtime_put_autosuspend(bdev->dev);
+}
+
+static int btmtksdio_recv_event(struct hci_dev *hdev, struct sk_buff *skb)
+{
+ struct btmtksdio_dev *bdev = hci_get_drvdata(hdev);
+ struct hci_event_hdr *hdr = (void *)skb->data;
+ int err;
+
+ /* Fix up the vendor event id with 0xff for vendor specific instead
+ * of 0xe4 so that event send via monitoring socket can be parsed
+ * properly.
+ */
+ if (hdr->evt == 0xe4)
+ hdr->evt = HCI_EV_VENDOR;
+
+ /* When someone waits for the WMT event, the skb is being cloned
+ * and being processed the events from there then.
+ */
+ if (test_bit(BTMTKSDIO_TX_WAIT_VND_EVT, &bdev->tx_state)) {
+ bdev->evt_skb = skb_clone(skb, GFP_KERNEL);
+ if (!bdev->evt_skb) {
+ err = -ENOMEM;
+ goto err_out;
+ }
+ }
+
+ err = hci_recv_frame(hdev, skb);
+ if (err < 0)
+ goto err_free_skb;
+
+ if (hdr->evt == HCI_EV_VENDOR) {
+ if (test_and_clear_bit(BTMTKSDIO_TX_WAIT_VND_EVT,
+ &bdev->tx_state)) {
+ /* Barrier to sync with other CPUs */
+ smp_mb__after_atomic();
+ wake_up_bit(&bdev->tx_state, BTMTKSDIO_TX_WAIT_VND_EVT);
+ }
+ }
+
+ return 0;
+
+err_free_skb:
+ kfree_skb(bdev->evt_skb);
+ bdev->evt_skb = NULL;
+
+err_out:
+ return err;
+}
+
+static const struct h4_recv_pkt mtk_recv_pkts[] = {
+ { H4_RECV_ACL, .recv = hci_recv_frame },
+ { H4_RECV_SCO, .recv = hci_recv_frame },
+ { H4_RECV_EVENT, .recv = btmtksdio_recv_event },
+};
+
+static int btmtksdio_rx_packet(struct btmtksdio_dev *bdev, u16 rx_size)
+{
+ const struct h4_recv_pkt *pkts = mtk_recv_pkts;
+ int pkts_count = ARRAY_SIZE(mtk_recv_pkts);
+ struct mtkbtsdio_hdr *sdio_hdr;
+ int err, i, pad_size;
+ struct sk_buff *skb;
+ u16 dlen;
+
+ if (rx_size < sizeof(*sdio_hdr))
+ return -EILSEQ;
+
+ /* A SDIO packet is exactly containing a Bluetooth packet */
+ skb = bt_skb_alloc(rx_size, GFP_KERNEL);
+ if (!skb)
+ return -ENOMEM;
+
+ skb_put(skb, rx_size);
+
+ err = sdio_readsb(bdev->func, skb->data, MTK_REG_CRDR, rx_size);
+ if (err < 0)
+ goto err_kfree_skb;
+
+ sdio_hdr = (void *)skb->data;
+
+ /* We assume the default error as -EILSEQ simply to make the error path
+ * be cleaner.
+ */
+ err = -EILSEQ;
+
+ if (rx_size != le16_to_cpu(sdio_hdr->len)) {
+ bt_dev_err(bdev->hdev, "Rx size in sdio header is mismatched ");
+ goto err_kfree_skb;
+ }
+
+ hci_skb_pkt_type(skb) = sdio_hdr->bt_type;
+
+ /* Remove MediaTek SDIO header */
+ skb_pull(skb, sizeof(*sdio_hdr));
+
+ /* We have to dig into the packet to get payload size and then know how
+ * many padding bytes at the tail, these padding bytes should be removed
+ * before the packet is indicated to the core layer.
+ */
+ for (i = 0; i < pkts_count; i++) {
+ if (sdio_hdr->bt_type == (&pkts[i])->type)
+ break;
+ }
+
+ if (i >= pkts_count) {
+ bt_dev_err(bdev->hdev, "Invalid bt type 0x%02x",
+ sdio_hdr->bt_type);
+ goto err_kfree_skb;
+ }
+
+ /* Remaining bytes cannot hold a header*/
+ if (skb->len < (&pkts[i])->hlen) {
+ bt_dev_err(bdev->hdev, "The size of bt header is mismatched");
+ goto err_kfree_skb;
+ }
+
+ switch ((&pkts[i])->lsize) {
+ case 1:
+ dlen = skb->data[(&pkts[i])->loff];
+ break;
+ case 2:
+ dlen = get_unaligned_le16(skb->data +
+ (&pkts[i])->loff);
+ break;
+ default:
+ goto err_kfree_skb;
+ }
+
+ pad_size = skb->len - (&pkts[i])->hlen - dlen;
+
+ /* Remaining bytes cannot hold a payload */
+ if (pad_size < 0) {
+ bt_dev_err(bdev->hdev, "The size of bt payload is mismatched");
+ goto err_kfree_skb;
+ }
+
+ /* Remove padding bytes */
+ skb_trim(skb, skb->len - pad_size);
+
+ /* Complete frame */
+ (&pkts[i])->recv(bdev->hdev, skb);
+
+ bdev->hdev->stat.byte_rx += rx_size;
+
+ return 0;
+
+err_kfree_skb:
+ kfree_skb(skb);
+
+ return err;
+}
+
+static void btmtksdio_interrupt(struct sdio_func *func)
+{
+ struct btmtksdio_dev *bdev = sdio_get_drvdata(func);
+ u32 int_status;
+ u16 rx_size;
+
+ /* It is required that the host gets ownership from the device before
+ * accessing any register, however, if SDIO host is not being released,
+ * a potential deadlock probably happens in a circular wait between SDIO
+ * IRQ work and PM runtime work. So, we have to explicitly release SDIO
+ * host here and claim again after the PM runtime work is all done.
+ */
+ sdio_release_host(bdev->func);
+
+ pm_runtime_get_sync(bdev->dev);
+
+ sdio_claim_host(bdev->func);
+
+ /* Disable interrupt */
+ sdio_writel(func, C_INT_EN_CLR, MTK_REG_CHLPCR, 0);
+
+ int_status = sdio_readl(func, MTK_REG_CHISR, NULL);
+
+ /* Ack an interrupt as soon as possible before any operation on
+ * hardware.
+ *
+ * Note that we don't ack any status during operations to avoid race
+ * condition between the host and the device such as it's possible to
+ * mistakenly ack RX_DONE for the next packet and then cause interrupts
+ * not be raised again but there is still pending data in the hardware
+ * FIFO.
+ */
+ sdio_writel(func, int_status, MTK_REG_CHISR, NULL);
+
+ if (unlikely(!int_status))
+ bt_dev_err(bdev->hdev, "CHISR is 0");
+
+ if (int_status & FW_OWN_BACK_INT)
+ bt_dev_dbg(bdev->hdev, "Get fw own back");
+
+ if (int_status & TX_EMPTY)
+ schedule_work(&bdev->tx_work);
+ else if (unlikely(int_status & TX_FIFO_OVERFLOW))
+ bt_dev_warn(bdev->hdev, "Tx fifo overflow");
+
+ if (int_status & RX_DONE_INT) {
+ rx_size = (int_status & RX_PKT_LEN) >> 16;
+
+ if (btmtksdio_rx_packet(bdev, rx_size) < 0)
+ bdev->hdev->stat.err_rx++;
+ }
+
+ /* Enable interrupt */
+ sdio_writel(func, C_INT_EN_SET, MTK_REG_CHLPCR, 0);
+
+ pm_runtime_mark_last_busy(bdev->dev);
+ pm_runtime_put_autosuspend(bdev->dev);
+}
+
+static int btmtksdio_open(struct hci_dev *hdev)
+{
+ struct btmtksdio_dev *bdev = hci_get_drvdata(hdev);
+ int err;
+ u32 status;
+
+ sdio_claim_host(bdev->func);
+
+ err = sdio_enable_func(bdev->func);
+ if (err < 0)
+ goto err_release_host;
+
+ /* Get ownership from the device */
+ sdio_writel(bdev->func, C_FW_OWN_REQ_CLR, MTK_REG_CHLPCR, &err);
+ if (err < 0)
+ goto err_disable_func;
+
+ err = readx_poll_timeout(btmtksdio_drv_own_query, bdev, status,
+ status & C_COM_DRV_OWN, 2000, 1000000);
+ if (err < 0) {
+ bt_dev_err(bdev->hdev, "Cannot get ownership from device");
+ goto err_disable_func;
+ }
+
+ /* Disable interrupt & mask out all interrupt sources */
+ sdio_writel(bdev->func, C_INT_EN_CLR, MTK_REG_CHLPCR, &err);
+ if (err < 0)
+ goto err_disable_func;
+
+ sdio_writel(bdev->func, 0, MTK_REG_CHIER, &err);
+ if (err < 0)
+ goto err_disable_func;
+
+ err = sdio_claim_irq(bdev->func, btmtksdio_interrupt);
+ if (err < 0)
+ goto err_disable_func;
+
+ err = sdio_set_block_size(bdev->func, MTK_SDIO_BLOCK_SIZE);
+ if (err < 0)
+ goto err_release_irq;
+
+ /* SDIO CMD 5 allows the SDIO device back to idle state an
+ * synchronous interrupt is supported in SDIO 4-bit mode
+ */
+ sdio_writel(bdev->func, SDIO_INT_CTL | SDIO_RE_INIT_EN,
+ MTK_REG_CSDIOCSR, &err);
+ if (err < 0)
+ goto err_release_irq;
+
+ /* Setup write-1-clear for CHISR register */
+ sdio_writel(bdev->func, C_INT_CLR_CTRL, MTK_REG_CHCR, &err);
+ if (err < 0)
+ goto err_release_irq;
+
+ /* Setup interrupt sources */
+ sdio_writel(bdev->func, RX_DONE_INT | TX_EMPTY | TX_FIFO_OVERFLOW,
+ MTK_REG_CHIER, &err);
+ if (err < 0)
+ goto err_release_irq;
+
+ /* Enable interrupt */
+ sdio_writel(bdev->func, C_INT_EN_SET, MTK_REG_CHLPCR, &err);
+ if (err < 0)
+ goto err_release_irq;
+
+ sdio_release_host(bdev->func);
+
+ return 0;
+
+err_release_irq:
+ sdio_release_irq(bdev->func);
+
+err_disable_func:
+ sdio_disable_func(bdev->func);
+
+err_release_host:
+ sdio_release_host(bdev->func);
+
+ return err;
+}
+
+static int btmtksdio_close(struct hci_dev *hdev)
+{
+ struct btmtksdio_dev *bdev = hci_get_drvdata(hdev);
+ u32 status;
+ int err;
+
+ sdio_claim_host(bdev->func);
+
+ /* Disable interrupt */
+ sdio_writel(bdev->func, C_INT_EN_CLR, MTK_REG_CHLPCR, NULL);
+
+ sdio_release_irq(bdev->func);
+
+ /* Return ownership to the device */
+ sdio_writel(bdev->func, C_FW_OWN_REQ_SET, MTK_REG_CHLPCR, NULL);
+
+ err = readx_poll_timeout(btmtksdio_drv_own_query, bdev, status,
+ !(status & C_COM_DRV_OWN), 2000, 1000000);
+ if (err < 0)
+ bt_dev_err(bdev->hdev, "Cannot return ownership to device");
+
+ sdio_disable_func(bdev->func);
+
+ sdio_release_host(bdev->func);
+
+ return 0;
+}
+
+static int btmtksdio_flush(struct hci_dev *hdev)
+{
+ struct btmtksdio_dev *bdev = hci_get_drvdata(hdev);
+
+ skb_queue_purge(&bdev->txq);
+
+ cancel_work_sync(&bdev->tx_work);
+
+ return 0;
+}
+
+static int btmtksdio_func_query(struct hci_dev *hdev)
+{
+ struct btmtk_hci_wmt_params wmt_params;
+ int status, err;
+ u8 param = 0;
+
+ /* Query whether the function is enabled */
+ wmt_params.op = MTK_WMT_FUNC_CTRL;
+ wmt_params.flag = 4;
+ wmt_params.dlen = sizeof(param);
+ wmt_params.data = ¶m;
+ wmt_params.status = &status;
+
+ err = mtk_hci_wmt_sync(hdev, &wmt_params);
+ if (err < 0) {
+ bt_dev_err(hdev, "Failed to query function status (%d)", err);
+ return err;
+ }
+
+ return status;
+}
+
+static int mtk_setup_firmware(struct hci_dev *hdev, const char *fwname)
+{
+ struct btmtk_hci_wmt_params wmt_params;
+ const struct firmware *fw;
+ const u8 *fw_ptr;
+ size_t fw_size;
+ int err, dlen;
+ u8 flag;
+
+ err = request_firmware(&fw, fwname, &hdev->dev);
+ if (err < 0) {
+ bt_dev_err(hdev, "Failed to load firmware file (%d)", err);
+ return err;
+ }
+
+ fw_ptr = fw->data;
+ fw_size = fw->size;
+
+ /* The size of patch header is 30 bytes, should be skip */
+ if (fw_size < 30) {
+ err = -EINVAL;
+ goto free_fw;
+ }
+
+ fw_size -= 30;
+ fw_ptr += 30;
+ flag = 1;
+
+ wmt_params.op = MTK_WMT_PATCH_DWNLD;
+ wmt_params.status = NULL;
+
+ while (fw_size > 0) {
+ dlen = min_t(int, 250, fw_size);
+
+ /* Tell device the position in sequence */
+ if (fw_size - dlen <= 0)
+ flag = 3;
+ else if (fw_size < fw->size - 30)
+ flag = 2;
+
+ wmt_params.flag = flag;
+ wmt_params.dlen = dlen;
+ wmt_params.data = fw_ptr;
+
+ err = mtk_hci_wmt_sync(hdev, &wmt_params);
+ if (err < 0) {
+ bt_dev_err(hdev, "Failed to send wmt patch dwnld (%d)",
+ err);
+ goto free_fw;
+ }
+
+ fw_size -= dlen;
+ fw_ptr += dlen;
+ }
+
+ wmt_params.op = MTK_WMT_RST;
+ wmt_params.flag = 4;
+ wmt_params.dlen = 0;
+ wmt_params.data = NULL;
+ wmt_params.status = NULL;
+
+ /* Activate funciton the firmware providing to */
+ err = mtk_hci_wmt_sync(hdev, &wmt_params);
+ if (err < 0) {
+ bt_dev_err(hdev, "Failed to send wmt rst (%d)", err);
+ goto free_fw;
+ }
+
+ /* Wait a few moments for firmware activation done */
+ usleep_range(10000, 12000);
+
+free_fw:
+ release_firmware(fw);
+ return err;
+}
+
+static int btmtksdio_setup(struct hci_dev *hdev)
+{
+ struct btmtksdio_dev *bdev = hci_get_drvdata(hdev);
+ struct btmtk_hci_wmt_params wmt_params;
+ ktime_t calltime, delta, rettime;
+ struct btmtk_tci_sleep tci_sleep;
+ unsigned long long duration;
+ struct sk_buff *skb;
+ int err, status;
+ u8 param = 0x1;
+
+ calltime = ktime_get();
+
+ /* Query whether the firmware is already download */
+ wmt_params.op = MTK_WMT_SEMAPHORE;
+ wmt_params.flag = 1;
+ wmt_params.dlen = 0;
+ wmt_params.data = NULL;
+ wmt_params.status = &status;
+
+ err = mtk_hci_wmt_sync(hdev, &wmt_params);
+ if (err < 0) {
+ bt_dev_err(hdev, "Failed to query firmware status (%d)", err);
+ return err;
+ }
+
+ if (status == BTMTK_WMT_PATCH_DONE) {
+ bt_dev_info(hdev, "Firmware already downloaded");
+ goto ignore_setup_fw;
+ }
+
+ /* Setup a firmware which the device definitely requires */
+ err = mtk_setup_firmware(hdev, bdev->data->fwname);
+ if (err < 0)
+ return err;
+
+ignore_setup_fw:
+ /* Query whether the device is already enabled */
+ err = readx_poll_timeout(btmtksdio_func_query, hdev, status,
+ status < 0 || status != BTMTK_WMT_ON_PROGRESS,
+ 2000, 5000000);
+ /* -ETIMEDOUT happens */
+ if (err < 0)
+ return err;
+
+ /* The other errors happen in btusb_mtk_func_query */
+ if (status < 0)
+ return status;
+
+ if (status == BTMTK_WMT_ON_DONE) {
+ bt_dev_info(hdev, "function already on");
+ goto ignore_func_on;
+ }
+
+ /* Enable Bluetooth protocol */
+ wmt_params.op = MTK_WMT_FUNC_CTRL;
+ wmt_params.flag = 0;
+ wmt_params.dlen = sizeof(param);
+ wmt_params.data = ¶m;
+ wmt_params.status = NULL;
+
+ err = mtk_hci_wmt_sync(hdev, &wmt_params);
+ if (err < 0) {
+ bt_dev_err(hdev, "Failed to send wmt func ctrl (%d)", err);
+ return err;
+ }
+
+ignore_func_on:
+ /* Apply the low power environment setup */
+ tci_sleep.mode = 0x5;
+ tci_sleep.duration = cpu_to_le16(0x640);
+ tci_sleep.host_duration = cpu_to_le16(0x640);
+ tci_sleep.host_wakeup_pin = 0;
+ tci_sleep.time_compensation = 0;
+
+ skb = __hci_cmd_sync(hdev, 0xfc7a, sizeof(tci_sleep), &tci_sleep,
+ HCI_INIT_TIMEOUT);
+ if (IS_ERR(skb)) {
+ err = PTR_ERR(skb);
+ bt_dev_err(hdev, "Failed to apply low power setting (%d)", err);
+ return err;
+ }
+ kfree_skb(skb);
+
+ rettime = ktime_get();
+ delta = ktime_sub(rettime, calltime);
+ duration = (unsigned long long)ktime_to_ns(delta) >> 10;
+
+ pm_runtime_set_autosuspend_delay(bdev->dev,
+ MTKBTSDIO_AUTOSUSPEND_DELAY);
+ pm_runtime_use_autosuspend(bdev->dev);
+
+ err = pm_runtime_set_active(bdev->dev);
+ if (err < 0)
+ return err;
+
+ /* Default forbid runtime auto suspend, that can be allowed by
+ * enable_autosuspend flag or the PM runtime entry under sysfs.
+ */
+ pm_runtime_forbid(bdev->dev);
+ pm_runtime_enable(bdev->dev);
+
+ if (enable_autosuspend)
+ pm_runtime_allow(bdev->dev);
+
+ bt_dev_info(hdev, "Device setup in %llu usecs", duration);
+
+ return 0;
+}
+
+static int btmtksdio_shutdown(struct hci_dev *hdev)
+{
+ struct btmtksdio_dev *bdev = hci_get_drvdata(hdev);
+ struct btmtk_hci_wmt_params wmt_params;
+ u8 param = 0x0;
+ int err;
+
+ /* Get back the state to be consistent with the state
+ * in btmtksdio_setup.
+ */
+ pm_runtime_get_sync(bdev->dev);
+
+ /* Disable the device */
+ wmt_params.op = MTK_WMT_FUNC_CTRL;
+ wmt_params.flag = 0;
+ wmt_params.dlen = sizeof(param);
+ wmt_params.data = ¶m;
+ wmt_params.status = NULL;
+
+ err = mtk_hci_wmt_sync(hdev, &wmt_params);
+ if (err < 0) {
+ bt_dev_err(hdev, "Failed to send wmt func ctrl (%d)", err);
+ return err;
+ }
+
+ pm_runtime_put_noidle(bdev->dev);
+ pm_runtime_disable(bdev->dev);
+
+ return 0;
+}
+
+static int btmtksdio_send_frame(struct hci_dev *hdev, struct sk_buff *skb)
+{
+ struct btmtksdio_dev *bdev = hci_get_drvdata(hdev);
+
+ switch (hci_skb_pkt_type(skb)) {
+ case HCI_COMMAND_PKT:
+ hdev->stat.cmd_tx++;
+ break;
+
+ case HCI_ACLDATA_PKT:
+ hdev->stat.acl_tx++;
+ break;
+
+ case HCI_SCODATA_PKT:
+ hdev->stat.sco_tx++;
+ break;
+
+ default:
+ return -EILSEQ;
+ }
+
+ skb_queue_tail(&bdev->txq, skb);
+
+ schedule_work(&bdev->tx_work);
+
+ return 0;
+}
+
+static int btmtksdio_probe(struct sdio_func *func,
+ const struct sdio_device_id *id)
+{
+ struct btmtksdio_dev *bdev;
+ struct hci_dev *hdev;
+ int err;
+
+ bdev = devm_kzalloc(&func->dev, sizeof(*bdev), GFP_KERNEL);
+ if (!bdev)
+ return -ENOMEM;
+
+ bdev->data = (void *)id->driver_data;
+ if (!bdev->data)
+ return -ENODEV;
+
+ bdev->dev = &func->dev;
+ bdev->func = func;
+
+ INIT_WORK(&bdev->tx_work, btmtksdio_tx_work);
+ skb_queue_head_init(&bdev->txq);
+
+ /* Initialize and register HCI device */
+ hdev = hci_alloc_dev();
+ if (!hdev) {
+ dev_err(&func->dev, "Can't allocate HCI device\n");
+ return -ENOMEM;
+ }
+
+ bdev->hdev = hdev;
+
+ hdev->bus = HCI_SDIO;
+ hci_set_drvdata(hdev, bdev);
+
+ hdev->open = btmtksdio_open;
+ hdev->close = btmtksdio_close;
+ hdev->flush = btmtksdio_flush;
+ hdev->setup = btmtksdio_setup;
+ hdev->shutdown = btmtksdio_shutdown;
+ hdev->send = btmtksdio_send_frame;
+ SET_HCIDEV_DEV(hdev, &func->dev);
+
+ hdev->manufacturer = 70;
+ set_bit(HCI_QUIRK_NON_PERSISTENT_SETUP, &hdev->quirks);
+
+ err = hci_register_dev(hdev);
+ if (err < 0) {
+ dev_err(&func->dev, "Can't register HCI device\n");
+ hci_free_dev(hdev);
+ return err;
+ }
+
+ sdio_set_drvdata(func, bdev);
+
+ /* pm_runtime_enable would be done after the firmware is being
+ * downloaded because the core layer probably already enables
+ * runtime PM for this func such as the case host->caps &
+ * MMC_CAP_POWER_OFF_CARD.
+ */
+ if (pm_runtime_enabled(bdev->dev))
+ pm_runtime_disable(bdev->dev);
+
+ /* As explaination in drivers/mmc/core/sdio_bus.c tells us:
+ * Unbound SDIO functions are always suspended.
+ * During probe, the function is set active and the usage count
+ * is incremented. If the driver supports runtime PM,
+ * it should call pm_runtime_put_noidle() in its probe routine and
+ * pm_runtime_get_noresume() in its remove routine.
+ *
+ * So, put a pm_runtime_put_noidle here !
+ */
+ pm_runtime_put_noidle(bdev->dev);
+
+ return 0;
+}
+
+static void btmtksdio_remove(struct sdio_func *func)
+{
+ struct btmtksdio_dev *bdev = sdio_get_drvdata(func);
+ struct hci_dev *hdev;
+
+ if (!bdev)
+ return;
+
+ /* Be consistent the state in btmtksdio_probe */
+ pm_runtime_get_noresume(bdev->dev);
+
+ hdev = bdev->hdev;
+
+ sdio_set_drvdata(func, NULL);
+ hci_unregister_dev(hdev);
+ hci_free_dev(hdev);
+}
+
+#ifdef CONFIG_PM
+static int btmtksdio_runtime_suspend(struct device *dev)
+{
+ struct sdio_func *func = dev_to_sdio_func(dev);
+ struct btmtksdio_dev *bdev;
+ u32 status;
+ int err;
+
+ bdev = sdio_get_drvdata(func);
+ if (!bdev)
+ return 0;
+
+ sdio_claim_host(bdev->func);
+
+ sdio_writel(bdev->func, C_FW_OWN_REQ_SET, MTK_REG_CHLPCR, &err);
+ if (err < 0)
+ goto out;
+
+ err = readx_poll_timeout(btmtksdio_drv_own_query, bdev, status,
+ !(status & C_COM_DRV_OWN), 2000, 1000000);
+out:
+ bt_dev_info(bdev->hdev, "status (%d) return ownership to device", err);
+
+ sdio_release_host(bdev->func);
+
+ return err;
+}
+
+static int btmtksdio_runtime_resume(struct device *dev)
+{
+ struct sdio_func *func = dev_to_sdio_func(dev);
+ struct btmtksdio_dev *bdev;
+ u32 status;
+ int err;
+
+ bdev = sdio_get_drvdata(func);
+ if (!bdev)
+ return 0;
+
+ sdio_claim_host(bdev->func);
+
+ sdio_writel(bdev->func, C_FW_OWN_REQ_CLR, MTK_REG_CHLPCR, &err);
+ if (err < 0)
+ goto out;
+
+ err = readx_poll_timeout(btmtksdio_drv_own_query, bdev, status,
+ status & C_COM_DRV_OWN, 2000, 1000000);
+out:
+ bt_dev_info(bdev->hdev, "status (%d) get ownership from device", err);
+
+ sdio_release_host(bdev->func);
+
+ return err;
+}
+
+static UNIVERSAL_DEV_PM_OPS(btmtksdio_pm_ops, btmtksdio_runtime_suspend,
+ btmtksdio_runtime_resume, NULL);
+#define BTMTKSDIO_PM_OPS (&btmtksdio_pm_ops)
+#else /* CONFIG_PM */
+#define BTMTKSDIO_PM_OPS NULL
+#endif /* CONFIG_PM */
+
+static struct sdio_driver btmtksdio_driver = {
+ .name = "btmtksdio",
+ .probe = btmtksdio_probe,
+ .remove = btmtksdio_remove,
+ .id_table = btmtksdio_table,
+ .drv = {
+ .owner = THIS_MODULE,
+ .pm = BTMTKSDIO_PM_OPS,
+ }
+};
+
+module_sdio_driver(btmtksdio_driver);
+
+module_param(enable_autosuspend, bool, 0644);
+MODULE_PARM_DESC(enable_autosuspend, "Enable autosuspend by default");
+
+MODULE_AUTHOR("Sean Wang <sean.wang@mediatek.com>");
+MODULE_DESCRIPTION("MediaTek Bluetooth SDIO driver ver " VERSION);
+MODULE_VERSION(VERSION);
+MODULE_LICENSE("GPL");
+MODULE_FIRMWARE(FIRMWARE_MT7663);
+MODULE_FIRMWARE(FIRMWARE_MT7668);
{
struct btmtkuart_dev *bdev = hci_get_drvdata(hdev);
struct btmtk_hci_wmt_params wmt_params;
- u32 baudrate;
+ __le32 baudrate;
u8 param;
int err;
#define QCA_WCN3990_POWERON_PULSE 0xFC
#define QCA_WCN3990_POWEROFF_PULSE 0xC0
-enum qca_bardrate {
+enum qca_baudrate {
QCA_BAUDRATE_115200 = 0,
QCA_BAUDRATE_57600,
QCA_BAUDRATE_38400,
.id_table = btsdio_table,
};
-static int __init btsdio_init(void)
-{
- BT_INFO("Generic Bluetooth SDIO driver ver %s", VERSION);
-
- return sdio_register_driver(&btsdio_driver);
-}
-
-static void __exit btsdio_exit(void)
-{
- sdio_unregister_driver(&btsdio_driver);
-}
-
-module_init(btsdio_init);
-module_exit(btsdio_exit);
+module_sdio_driver(btsdio_driver);
MODULE_AUTHOR("Marcel Holtmann <marcel@holtmann.org>");
MODULE_DESCRIPTION("Generic Bluetooth SDIO driver ver " VERSION);
return 0;
}
+ irq_set_status_flags(irq, IRQ_NOAUTOEN);
ret = devm_request_irq(&hdev->dev, irq, btusb_oob_wake_handler,
0, "OOB Wake-on-BT", data);
if (ret) {
}
data->oob_wake_irq = irq;
- disable_irq(irq);
bt_dev_info(hdev, "OOB Wake-on-BT configured at IRQ %u", irq);
return 0;
}
int err;
if (powered && !dev->res_enabled) {
- err = regulator_bulk_enable(BCM_NUM_SUPPLIES, dev->supplies);
- if (err)
- return err;
+ /* Intel Macs use bcm_apple_get_resources() and don't
+ * have regulator supplies configured.
+ */
+ if (dev->supplies[0].supply) {
+ err = regulator_bulk_enable(BCM_NUM_SUPPLIES,
+ dev->supplies);
+ if (err)
+ return err;
+ }
/* LPO clock needs to be 32.768 kHz */
err = clk_set_rate(dev->lpo_clk, 32768);
if (!powered && dev->res_enabled) {
clk_disable_unprepare(dev->txco_clk);
clk_disable_unprepare(dev->lpo_clk);
- regulator_bulk_disable(BCM_NUM_SUPPLIES, dev->supplies);
+
+ /* Intel Macs use bcm_apple_get_resources() and don't
+ * have regulator supplies configured.
+ */
+ if (dev->supplies[0].supply)
+ regulator_bulk_disable(BCM_NUM_SUPPLIES,
+ dev->supplies);
}
/* wait for device to power on and come out of reset */
skb_put_data(h5->rx_skb, byte, 1);
h5->rx_pending--;
- BT_DBG("unsliped 0x%02hhx, rx_pending %zu", *byte, h5->rx_pending);
+ BT_DBG("unslipped 0x%02hhx, rx_pending %zu", *byte, h5->rx_pending);
}
static void h5_reset_rx(struct h5 *h5)
static void qca_power_shutdown(struct hci_uart *hu);
static int qca_power_off(struct hci_dev *hdev);
+static enum qca_btsoc_type qca_soc_type(struct hci_uart *hu)
+{
+ enum qca_btsoc_type soc_type;
+
+ if (hu->serdev) {
+ struct qca_serdev *qsd = serdev_device_get_drvdata(hu->serdev);
+
+ soc_type = qsd->btsoc_type;
+ } else {
+ soc_type = QCA_ROME;
+ }
+
+ return soc_type;
+}
+
static void __serial_clock_on(struct tty_struct *tty)
{
/* TODO: Some chipset requires to enable UART clock on client
qcadev = serdev_device_get_drvdata(hu->serdev);
if (qcadev->btsoc_type != QCA_WCN3990) {
gpiod_set_value_cansleep(qcadev->bt_en, 1);
+ /* Controller needs time to bootup. */
+ msleep(150);
} else {
hu->init_speed = qcadev->init_speed;
hu->oper_speed = qcadev->oper_speed;
{
struct hci_uart *hu = hci_get_drvdata(hdev);
struct qca_data *qca = hu->priv;
- struct qca_serdev *qcadev;
struct sk_buff *skb;
u8 cmd[] = { 0x01, 0x48, 0xFC, 0x01, 0x00 };
skb_queue_tail(&qca->txq, skb);
hci_uart_tx_wakeup(hu);
- qcadev = serdev_device_get_drvdata(hu->serdev);
-
/* Wait for the baudrate change request to be sent */
while (!skb_queue_empty(&qca->txq))
usleep_range(100, 200);
- serdev_device_wait_until_sent(hu->serdev,
+ if (hu->serdev)
+ serdev_device_wait_until_sent(hu->serdev,
msecs_to_jiffies(CMD_TRANS_TIMEOUT_MS));
/* Give the controller time to process the request */
- if (qcadev->btsoc_type == QCA_WCN3990)
+ if (qca_soc_type(hu) == QCA_WCN3990)
msleep(10);
else
msleep(300);
static int qca_check_speeds(struct hci_uart *hu)
{
- struct qca_serdev *qcadev;
-
- qcadev = serdev_device_get_drvdata(hu->serdev);
- if (qcadev->btsoc_type == QCA_WCN3990) {
+ if (qca_soc_type(hu) == QCA_WCN3990) {
if (!qca_get_speed(hu, QCA_INIT_SPEED) &&
!qca_get_speed(hu, QCA_OPER_SPEED))
return -EINVAL;
static int qca_set_speed(struct hci_uart *hu, enum qca_speed_type speed_type)
{
unsigned int speed, qca_baudrate;
- struct qca_serdev *qcadev;
int ret = 0;
if (speed_type == QCA_INIT_SPEED) {
if (speed)
host_set_baudrate(hu, speed);
} else {
+ enum qca_btsoc_type soc_type = qca_soc_type(hu);
+
speed = qca_get_speed(hu, QCA_OPER_SPEED);
if (!speed)
return 0;
/* Disable flow control for wcn3990 to deassert RTS while
* changing the baudrate of chip and host.
*/
- qcadev = serdev_device_get_drvdata(hu->serdev);
- if (qcadev->btsoc_type == QCA_WCN3990)
+ if (soc_type == QCA_WCN3990)
hci_uart_set_flow_control(hu, true);
qca_baudrate = qca_get_baudrate_value(speed);
host_set_baudrate(hu, speed);
error:
- if (qcadev->btsoc_type == QCA_WCN3990)
+ if (soc_type == QCA_WCN3990)
hci_uart_set_flow_control(hu, false);
}
struct hci_dev *hdev = hu->hdev;
struct qca_data *qca = hu->priv;
unsigned int speed, qca_baudrate = QCA_BAUDRATE_115200;
- struct qca_serdev *qcadev;
+ enum qca_btsoc_type soc_type = qca_soc_type(hu);
int ret;
int soc_ver = 0;
- qcadev = serdev_device_get_drvdata(hu->serdev);
-
ret = qca_check_speeds(hu);
if (ret)
return ret;
/* Patch downloading has to be done without IBS mode */
clear_bit(STATE_IN_BAND_SLEEP_ENABLED, &qca->flags);
- if (qcadev->btsoc_type == QCA_WCN3990) {
+ if (soc_type == QCA_WCN3990) {
bt_dev_info(hdev, "setting up wcn3990");
/* Enable NON_PERSISTENT_SETUP QUIRK to ensure to execute
qca_baudrate = qca_get_baudrate_value(speed);
}
- if (qcadev->btsoc_type != QCA_WCN3990) {
+ if (soc_type != QCA_WCN3990) {
/* Get QCA version information */
ret = qca_read_soc_version(hdev, &soc_ver);
if (ret)
bt_dev_info(hdev, "QCA controller version 0x%08x", soc_ver);
/* Setup patch / NVM configurations */
- ret = qca_uart_setup(hdev, qca_baudrate, qcadev->btsoc_type, soc_ver);
+ ret = qca_uart_setup(hdev, qca_baudrate, soc_type, soc_ver);
if (!ret) {
set_bit(STATE_IN_BAND_SLEEP_ENABLED, &qca->flags);
qca_debugfs_init(hdev);
}
/* Setup bdaddr */
- if (qcadev->btsoc_type == QCA_WCN3990)
+ if (soc_type == QCA_WCN3990)
hu->hdev->set_bdaddr = qca_set_bdaddr;
else
hu->hdev->set_bdaddr = qca_set_bdaddr_rome;
return;
}
- memset(&p, 0, sizeof(p));
p.addr = base_addr;
p.space = space;
p.regspacing = offset;
/* Does this interface receive IPMI events? */
bool gets_events;
+
+ /* Free must run in process context for RCU cleanup. */
+ struct work_struct remove_work;
};
static struct ipmi_user *acquire_ipmi_user(struct ipmi_user *user, int *index)
return rv;
}
+static void free_user_work(struct work_struct *work)
+{
+ struct ipmi_user *user = container_of(work, struct ipmi_user,
+ remove_work);
+
+ cleanup_srcu_struct(&user->release_barrier);
+ kfree(user);
+}
+
int ipmi_create_user(unsigned int if_num,
const struct ipmi_user_hndl *handler,
void *handler_data,
goto out_kfree;
found:
+ INIT_WORK(&new_user->remove_work, free_user_work);
+
rv = init_srcu_struct(&new_user->release_barrier);
if (rv)
goto out_kfree;
static void free_user(struct kref *ref)
{
struct ipmi_user *user = container_of(ref, struct ipmi_user, refcount);
- cleanup_srcu_struct(&user->release_barrier);
- kfree(user);
+
+ /* SRCU cleanup must happen in task context. */
+ schedule_work(&user->remove_work);
}
static void _ipmi_destroy_user(struct ipmi_user *user)
char *str;
char *si_type[SI_MAX_PARMS];
+ memset(si_type, 0, sizeof(si_type));
+
/* Parse out the si_type string into its components. */
str = si_type_str;
if (*str != '\0') {
*
* Returns size of the event. If it is an invalid event, returns 0.
*/
-static int calc_tpm2_event_size(struct tcg_pcr_event2_head *event,
- struct tcg_pcr_event *event_header)
+static size_t calc_tpm2_event_size(struct tcg_pcr_event2_head *event,
+ struct tcg_pcr_event *event_header)
{
struct tcg_efi_specid_event_head *efispecid;
struct tcg_event_field *event_field;
__poll_t mask = 0;
poll_wait(file, &priv->async_wait, wait);
+ mutex_lock(&priv->buffer_mutex);
- if (!priv->response_read || priv->response_length)
+ /*
+ * The response_length indicates if there is still response
+ * (or part of it) to be consumed. Partial reads decrease it
+ * by the number of bytes read, and write resets it the zero.
+ */
+ if (priv->response_length)
mask = EPOLLIN | EPOLLRDNORM;
else
mask = EPOLLOUT | EPOLLWRNORM;
+ mutex_unlock(&priv->buffer_mutex);
return mask;
}
if (chip->flags & TPM_CHIP_FLAG_ALWAYS_POWERED)
return 0;
- if (chip->flags & TPM_CHIP_FLAG_TPM2) {
- mutex_lock(&chip->tpm_mutex);
- if (!tpm_chip_start(chip)) {
+ if (!tpm_chip_start(chip)) {
+ if (chip->flags & TPM_CHIP_FLAG_TPM2)
tpm2_shutdown(chip, TPM2_SU_STATE);
- tpm_chip_stop(chip);
- }
- mutex_unlock(&chip->tpm_mutex);
- } else {
- rc = tpm1_pm_suspend(chip, tpm_suspend_pcr);
+ else
+ rc = tpm1_pm_suspend(chip, tpm_suspend_pcr);
+
+ tpm_chip_stop(chip);
}
return rc;
#define PROG_ID_MAX 7
#define PROG_STATUS_MASK(id) (1 << ((id) + 8))
-#define PROG_PRES_MASK 0x7
-#define PROG_PRES(layout, pckr) ((pckr >> layout->pres_shift) & PROG_PRES_MASK)
+#define PROG_PRES(layout, pckr) ((pckr >> layout->pres_shift) & layout->pres_mask)
#define PROG_MAX_RM9200_CSS 3
struct clk_programmable {
unsigned long parent_rate)
{
struct clk_programmable *prog = to_clk_programmable(hw);
+ const struct clk_programmable_layout *layout = prog->layout;
unsigned int pckr;
+ unsigned long rate;
regmap_read(prog->regmap, AT91_PMC_PCKR(prog->id), &pckr);
- return parent_rate >> PROG_PRES(prog->layout, pckr);
+ if (layout->is_pres_direct)
+ rate = parent_rate / (PROG_PRES(layout, pckr) + 1);
+ else
+ rate = parent_rate >> PROG_PRES(layout, pckr);
+
+ return rate;
}
static int clk_programmable_determine_rate(struct clk_hw *hw,
struct clk_rate_request *req)
{
+ struct clk_programmable *prog = to_clk_programmable(hw);
+ const struct clk_programmable_layout *layout = prog->layout;
struct clk_hw *parent;
long best_rate = -EINVAL;
unsigned long parent_rate;
- unsigned long tmp_rate;
+ unsigned long tmp_rate = 0;
int shift;
int i;
continue;
parent_rate = clk_hw_get_rate(parent);
- for (shift = 0; shift < PROG_PRES_MASK; shift++) {
- tmp_rate = parent_rate >> shift;
- if (tmp_rate <= req->rate)
- break;
+ if (layout->is_pres_direct) {
+ for (shift = 0; shift <= layout->pres_mask; shift++) {
+ tmp_rate = parent_rate / (shift + 1);
+ if (tmp_rate <= req->rate)
+ break;
+ }
+ } else {
+ for (shift = 0; shift < layout->pres_mask; shift++) {
+ tmp_rate = parent_rate >> shift;
+ if (tmp_rate <= req->rate)
+ break;
+ }
}
if (tmp_rate > req->rate)
if (!div)
return -EINVAL;
- shift = fls(div) - 1;
+ if (layout->is_pres_direct) {
+ shift = div - 1;
- if (div != (1 << shift))
- return -EINVAL;
+ if (shift > layout->pres_mask)
+ return -EINVAL;
+ } else {
+ shift = fls(div) - 1;
- if (shift >= PROG_PRES_MASK)
- return -EINVAL;
+ if (div != (1 << shift))
+ return -EINVAL;
+
+ if (shift >= layout->pres_mask)
+ return -EINVAL;
+ }
regmap_update_bits(prog->regmap, AT91_PMC_PCKR(prog->id),
- PROG_PRES_MASK << layout->pres_shift,
+ layout->pres_mask << layout->pres_shift,
shift << layout->pres_shift);
return 0;
}
const struct clk_programmable_layout at91rm9200_programmable_layout = {
+ .pres_mask = 0x7,
.pres_shift = 2,
.css_mask = 0x3,
.have_slck_mck = 0,
+ .is_pres_direct = 0,
};
const struct clk_programmable_layout at91sam9g45_programmable_layout = {
+ .pres_mask = 0x7,
.pres_shift = 2,
.css_mask = 0x3,
.have_slck_mck = 1,
+ .is_pres_direct = 0,
};
const struct clk_programmable_layout at91sam9x5_programmable_layout = {
+ .pres_mask = 0x7,
.pres_shift = 4,
.css_mask = 0x7,
.have_slck_mck = 0,
+ .is_pres_direct = 0,
};
};
struct clk_programmable_layout {
+ u8 pres_mask;
u8 pres_shift;
u8 css_mask;
u8 have_slck_mck;
+ u8 is_pres_direct;
};
extern const struct clk_programmable_layout at91rm9200_programmable_layout;
.pll = true },
};
+static const struct clk_programmable_layout sama5d2_programmable_layout = {
+ .pres_mask = 0xff,
+ .pres_shift = 4,
+ .css_mask = 0x7,
+ .have_slck_mck = 0,
+ .is_pres_direct = 1,
+};
+
static void __init sama5d2_pmc_setup(struct device_node *np)
{
struct clk_range range = CLK_RANGE(0, 0);
hw = at91_clk_register_programmable(regmap, name,
parent_names, 6, i,
- &at91sam9x5_programmable_layout);
+ &sama5d2_programmable_layout);
if (IS_ERR(hw))
goto err_free;
}
switch (pll_clk->type) {
case PLL_1416X:
- if (!pll->rate_table)
+ if (!pll_clk->rate_table)
init.ops = &clk_pll1416x_min_ops;
else
init.ops = &clk_pll1416x_ops;
return ERR_PTR(-ENOMEM);
init.name = name;
- init.flags = CLK_SET_RATE_PARENT;
+ init.flags = flags | CLK_SET_RATE_PARENT;
init.parent_names = parent_name ? &parent_name : NULL;
init.num_parents = parent_name ? 1 : 0;
init.ops = ops;
- init.flags = flags;
cg->regmap = regmap;
cg->set_ofs = set_ofs;
return true;
} else {
/* Round down */
- if (now < rate && best < now)
+ if (now <= rate && best < now)
return true;
}
/* VPU Clock */
static const char * const g12a_vpu_parent_names[] = {
- "fclk_div4", "fclk_div3", "fclk_div5", "fclk_div7",
+ "fclk_div3", "fclk_div4", "fclk_div5", "fclk_div7",
"mpll1", "vid_pll", "hifi_pll", "gp0_pll",
};
static struct clk_regmap g12a_vpu_0_sel = {
.data = &(struct clk_regmap_mux_data){
.offset = HHI_VPU_CLK_CNTL,
- .mask = 0x3,
+ .mask = 0x7,
.shift = 9,
},
.hw.init = &(struct clk_init_data){
static struct clk_regmap g12a_vpu_1_sel = {
.data = &(struct clk_regmap_mux_data){
.offset = HHI_VPU_CLK_CNTL,
- .mask = 0x3,
+ .mask = 0x7,
.shift = 25,
},
.hw.init = &(struct clk_init_data){
.offset = HHI_VDEC_CLK_CNTL,
.shift = 0,
.width = 7,
+ .flags = CLK_DIVIDER_ROUND_CLOSEST,
},
.hw.init = &(struct clk_init_data){
.name = "vdec_1_div",
.offset = HHI_VDEC2_CLK_CNTL,
.shift = 16,
.width = 7,
+ .flags = CLK_DIVIDER_ROUND_CLOSEST,
},
.hw.init = &(struct clk_init_data){
.name = "vdec_hevc_div",
div = _get_table_val(meson_parm_read(clk->map, &pll_div->val),
meson_parm_read(clk->map, &pll_div->sel));
if (!div || !div->divider) {
- pr_info("%s: Invalid config value for vid_pll_div\n", __func__);
- return parent_rate;
+ pr_debug("%s: Invalid config value for vid_pll_div\n", __func__);
+ return 0;
}
return DIV_ROUND_UP_ULL(parent_rate * div->multiplier, div->divider);
};
static struct clk_plt *plt_clk_register(struct platform_device *pdev, int id,
- void __iomem *base,
+ const struct pmc_clk_data *pmc_data,
const char **parent_names,
int num_parents)
{
init.num_parents = num_parents;
pclk->hw.init = &init;
- pclk->reg = base + PMC_CLK_CTL_OFFSET + id * PMC_CLK_CTL_SIZE;
+ pclk->reg = pmc_data->base + PMC_CLK_CTL_OFFSET + id * PMC_CLK_CTL_SIZE;
spin_lock_init(&pclk->lock);
+ /*
+ * On some systems, the pmc_plt_clocks already enabled by the
+ * firmware are being marked as critical to avoid them being
+ * gated by the clock framework.
+ */
+ if (pmc_data->critical && plt_clk_is_enabled(&pclk->hw))
+ init.flags |= CLK_IS_CRITICAL;
+
ret = devm_clk_hw_register(&pdev->dev, &pclk->hw);
if (ret) {
pclk = ERR_PTR(ret);
return PTR_ERR(parent_names);
for (i = 0; i < PMC_CLK_NUM; i++) {
- data->clks[i] = plt_clk_register(pdev, i, pmc_data->base,
+ data->clks[i] = plt_clk_register(pdev, i, pmc_data,
parent_names, data->nparents);
if (IS_ERR(data->clks[i])) {
err = PTR_ERR(data->clks[i]);
config NPCM7XX_TIMER
bool "NPCM7xx timer driver" if COMPILE_TEST
depends on HAS_IOMEM
+ select TIMER_OF
select CLKSRC_MMIO
help
Enable 24-bit TIMER0 and TIMER1 counters in the NPCM7xx architecture,
* published by the Free Software Foundation.
*/
-#define pr_fmt(fmt) "arm_arch_timer: " fmt
+#define pr_fmt(fmt) "arch_timer: " fmt
#include <linux/init.h>
#include <linux/kernel.h>
#include <clocksource/arm_arch_timer.h>
-#undef pr_fmt
-#define pr_fmt(fmt) "arch_timer: " fmt
-
#define CNTTIDR 0x08
#define CNTTIDR_VIRT(n) (BIT(1) << ((n) * 4))
TIMER_OF_DECLARE(ox810se_rps,
"oxsemi,ox810se-rps-timer", oxnas_rps_timer_init);
TIMER_OF_DECLARE(ox820_rps,
- "oxsemi,ox820se-rps-timer", oxnas_rps_timer_init);
+ "oxsemi,ox820-rps-timer", oxnas_rps_timer_init);
return 0;
}
-/* Optimized set_load which removes costly spin wait in timer_start */
-static int omap_dm_timer_set_load_start(struct omap_dm_timer *timer,
- int autoreload, unsigned int load)
-{
- u32 l;
-
- if (unlikely(!timer))
- return -EINVAL;
-
- omap_dm_timer_enable(timer);
-
- l = omap_dm_timer_read_reg(timer, OMAP_TIMER_CTRL_REG);
- if (autoreload) {
- l |= OMAP_TIMER_CTRL_AR;
- omap_dm_timer_write_reg(timer, OMAP_TIMER_LOAD_REG, load);
- } else {
- l &= ~OMAP_TIMER_CTRL_AR;
- }
- l |= OMAP_TIMER_CTRL_ST;
-
- __omap_dm_timer_load_start(timer, l, load, timer->posted);
-
- /* Save the context */
- timer->context.tclr = l;
- timer->context.tldr = load;
- timer->context.tcrr = load;
- return 0;
-}
static int omap_dm_timer_set_match(struct omap_dm_timer *timer, int enable,
unsigned int match)
{
if (ret)
goto unmap_ctx;
- if (mapped_nents) {
+ if (mapped_nents)
sg_to_sec4_sg_last(req->src, mapped_nents,
edesc->sec4_sg + sec4_sg_src_index,
0);
- if (*next_buflen)
- scatterwalk_map_and_copy(next_buf, req->src,
- to_hash - *buflen,
- *next_buflen, 0);
- } else {
+ else
sg_to_sec4_set_last(edesc->sec4_sg + sec4_sg_src_index -
1);
- }
+ if (*next_buflen)
+ scatterwalk_map_and_copy(next_buf, req->src,
+ to_hash - *buflen,
+ *next_buflen, 0);
desc = edesc->hw_desc;
edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg,
config EXTCON_PTN5150
tristate "NXP PTN5150 CC LOGIC USB EXTCON support"
- depends on I2C && GPIOLIB || COMPILE_TEST
+ depends on I2C && (GPIOLIB || COMPILE_TEST)
select REGMAP_I2C
help
Say Y here to enable support for USB peripheral and USB host
/* No need to recover an evicted BO */
if (shadow->tbo.mem.mem_type != TTM_PL_TT ||
+ shadow->tbo.mem.start == AMDGPU_BO_INVALID_OFFSET ||
shadow->parent->tbo.mem.mem_type != TTM_PL_VRAM)
continue;
break;
if (fence) {
- r = dma_fence_wait_timeout(fence, false, tmo);
+ tmo = dma_fence_wait_timeout(fence, false, tmo);
dma_fence_put(fence);
fence = next;
- if (r <= 0)
+ if (tmo == 0) {
+ r = -ETIMEDOUT;
break;
+ } else if (tmo < 0) {
+ r = tmo;
+ break;
+ }
} else {
fence = next;
}
tmo = dma_fence_wait_timeout(fence, false, tmo);
dma_fence_put(fence);
- if (r <= 0 || tmo <= 0) {
- DRM_ERROR("recover vram bo from shadow failed\n");
+ if (r < 0 || tmo <= 0) {
+ DRM_ERROR("recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
return -EIO;
}
#include "amdgpu_trace.h"
#define AMDGPU_IB_TEST_TIMEOUT msecs_to_jiffies(1000)
+#define AMDGPU_IB_TEST_GFX_XGMI_TIMEOUT msecs_to_jiffies(2000)
/*
* IB
* cost waiting for it coming back under RUNTIME only
*/
tmo_gfx = 8 * AMDGPU_IB_TEST_TIMEOUT;
+ } else if (adev->gmc.xgmi.hive_id) {
+ tmo_gfx = AMDGPU_IB_TEST_GFX_XGMI_TIMEOUT;
}
for (i = 0; i < adev->num_rings; ++i) {
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3,
L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
}
+ WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL3, tmp);
tmp = mmVM_L2_CNTL4_DEFAULT;
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0);
{ 0x9876, &carrizo_device_info }, /* Carrizo */
{ 0x9877, &carrizo_device_info }, /* Carrizo */
{ 0x15DD, &raven_device_info }, /* Raven */
+ { 0x15D8, &raven_device_info }, /* Raven */
#endif
{ 0x67A0, &hawaii_device_info }, /* Hawaii */
{ 0x67A1, &hawaii_device_info }, /* Hawaii */
amdgpu_crtc->cursor_width = plane->state->crtc_w;
amdgpu_crtc->cursor_height = plane->state->crtc_h;
+ memset(&attributes, 0, sizeof(attributes));
attributes.address.high_part = upper_32_bits(address);
attributes.address.low_part = lower_32_bits(address);
attributes.width = plane->state->crtc_w;
return UPDATE_TYPE_FULL;
}
+ if (u->surface->force_full_update) {
+ update_flags->bits.full_update = 1;
+ return UPDATE_TYPE_FULL;
+ }
+
type = get_plane_info_update_type(u);
elevate_update_type(&overall_type, type);
}
dc_resource_state_copy_construct(state, context);
+
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *new_pipe = &context->res_ctx.pipe_ctx[i];
+ struct pipe_ctx *old_pipe = &dc->current_state->res_ctx.pipe_ctx[i];
+
+ if (new_pipe->plane_state && new_pipe->plane_state != old_pipe->plane_state)
+ new_pipe->plane_state->force_full_update = true;
+ }
}
dc->current_state = context;
dc_release_state(old);
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
+
+ if (pipe_ctx->plane_state && pipe_ctx->stream == stream)
+ pipe_ctx->plane_state->force_full_update = false;
+ }
}
/*let's use current_state to update watermark etc*/
if (update_type >= UPDATE_TYPE_FULL)
struct dc_plane_status status;
struct dc_context *ctx;
+ /* HACK: Workaround for forcing full reprogramming under some conditions */
+ bool force_full_update;
+
/* private to dc_surface.c */
enum dc_irq_source irq_source;
struct kref refcount;
1,
0);
}
+
+ REG_UPDATE(AUX_INTERRUPT_CONTROL, AUX_SW_DONE_ACK, 1);
+
+ REG_WAIT(AUX_SW_STATUS, AUX_SW_DONE, 0,
+ 10, aux110->timeout_period/10);
+
/* set the delay and the number of bytes to write */
/* The length include
}
}
- REG_UPDATE(AUX_INTERRUPT_CONTROL, AUX_SW_DONE_ACK, 1);
- REG_WAIT(AUX_SW_STATUS, AUX_SW_DONE, 0,
- 10, aux110->timeout_period/10);
REG_UPDATE(AUX_SW_CONTROL, AUX_SW_GO, 1);
}
* at most within ~240usec. That means,
* increasing this timeout will not affect normal operation,
* and we'll timeout after
- * SW_AUX_TIMEOUT_PERIOD_MULTIPLIER * AUX_TIMEOUT_PERIOD = 1600usec.
+ * SW_AUX_TIMEOUT_PERIOD_MULTIPLIER * AUX_TIMEOUT_PERIOD = 2400usec.
* This timeout is especially important for
- * resume from S3 and CTS.
+ * converters, resume from S3, and CTS.
*/
- SW_AUX_TIMEOUT_PERIOD_MULTIPLIER = 4
+ SW_AUX_TIMEOUT_PERIOD_MULTIPLIER = 6
};
struct dce_aux {
REG_UPDATE(CURSOR_CONTROL,
CURSOR_ENABLE, cur_en);
- //account for cases where we see negative offset relative to overlay plane
- if (src_x_offset < 0 && src_y_offset < 0) {
- REG_SET_2(CURSOR_POSITION, 0,
- CURSOR_X_POSITION, 0,
- CURSOR_Y_POSITION, 0);
- x_hotspot -= src_x_offset;
- y_hotspot -= src_y_offset;
- } else if (src_x_offset < 0) {
- REG_SET_2(CURSOR_POSITION, 0,
- CURSOR_X_POSITION, 0,
- CURSOR_Y_POSITION, pos->y);
- x_hotspot -= src_x_offset;
- } else if (src_y_offset < 0) {
- REG_SET_2(CURSOR_POSITION, 0,
+ REG_SET_2(CURSOR_POSITION, 0,
CURSOR_X_POSITION, pos->x,
- CURSOR_Y_POSITION, 0);
- y_hotspot -= src_y_offset;
- } else {
- REG_SET_2(CURSOR_POSITION, 0,
- CURSOR_X_POSITION, pos->x,
- CURSOR_Y_POSITION, pos->y);
- }
+ CURSOR_Y_POSITION, pos->y);
REG_SET_2(CURSOR_HOT_SPOT, 0,
CURSOR_HOT_SPOT_X, x_hotspot,
}
EXPORT_SYMBOL_GPL(dw_hdmi_phy_i2c_write);
+/* Filter out invalid setups to avoid configuring SCDC and scrambling */
+static bool dw_hdmi_support_scdc(struct dw_hdmi *hdmi)
+{
+ struct drm_display_info *display = &hdmi->connector.display_info;
+
+ /* Completely disable SCDC support for older controllers */
+ if (hdmi->version < 0x200a)
+ return false;
+
+ /* Disable if SCDC is not supported, or if an HF-VSDB block is absent */
+ if (!display->hdmi.scdc.supported ||
+ !display->hdmi.scdc.scrambling.supported)
+ return false;
+
+ /*
+ * Disable if display only support low TMDS rates and scrambling
+ * for low rates is not supported either
+ */
+ if (!display->hdmi.scdc.scrambling.low_rates &&
+ display->max_tmds_clock <= 340000)
+ return false;
+
+ return true;
+}
+
/*
* HDMI2.0 Specifies the following procedure for High TMDS Bit Rates:
* - The Source shall suspend transmission of the TMDS clock and data
unsigned long mtmdsclock = hdmi->hdmi_data.video_mode.mtmdsclock;
/* Control for TMDS Bit Period/TMDS Clock-Period Ratio */
- if (hdmi->connector.display_info.hdmi.scdc.supported) {
+ if (dw_hdmi_support_scdc(hdmi)) {
if (mtmdsclock > HDMI14_MAX_TMDSCLK)
drm_scdc_set_high_tmds_clock_ratio(hdmi->ddc, 1);
else
/* Set up HDMI_FC_INVIDCONF */
inv_val = (hdmi->hdmi_data.hdcp_enable ||
- vmode->mtmdsclock > HDMI14_MAX_TMDSCLK ||
- hdmi_info->scdc.scrambling.low_rates ?
+ (dw_hdmi_support_scdc(hdmi) &&
+ (vmode->mtmdsclock > HDMI14_MAX_TMDSCLK ||
+ hdmi_info->scdc.scrambling.low_rates)) ?
HDMI_FC_INVIDCONF_HDCP_KEEPOUT_ACTIVE :
HDMI_FC_INVIDCONF_HDCP_KEEPOUT_INACTIVE);
}
/* Scrambling Control */
- if (hdmi_info->scdc.supported) {
+ if (dw_hdmi_support_scdc(hdmi)) {
if (vmode->mtmdsclock > HDMI14_MAX_TMDSCLK ||
hdmi_info->scdc.scrambling.low_rates) {
/*
funcs->atomic_disable(crtc, old_crtc_state);
else if (funcs->disable)
funcs->disable(crtc);
- else
+ else if (funcs->dpms)
funcs->dpms(crtc, DRM_MODE_DPMS_OFF);
if (!(dev->irq_enabled && dev->num_crtcs))
if (new_crtc_state->enable) {
DRM_DEBUG_ATOMIC("enabling [CRTC:%d:%s]\n",
crtc->base.id, crtc->name);
-
if (funcs->atomic_enable)
funcs->atomic_enable(crtc, old_crtc_state);
- else
+ else if (funcs->commit)
funcs->commit(crtc);
}
}
struct drm_i915_private *dev_priv = to_i915(dev);
struct intel_vgpu_primary_plane_format p;
struct intel_vgpu_cursor_plane_format c;
- int ret;
+ int ret, tile_height = 1;
if (plane_id == DRM_PLANE_TYPE_PRIMARY) {
ret = intel_vgpu_decode_primary_plane(vgpu, &p);
break;
case PLANE_CTL_TILED_X:
info->drm_format_mod = I915_FORMAT_MOD_X_TILED;
+ tile_height = 8;
break;
case PLANE_CTL_TILED_Y:
info->drm_format_mod = I915_FORMAT_MOD_Y_TILED;
+ tile_height = 32;
break;
case PLANE_CTL_TILED_YF:
info->drm_format_mod = I915_FORMAT_MOD_Yf_TILED;
+ tile_height = 32;
break;
default:
gvt_vgpu_err("invalid tiling mode: %x\n", p.tiled);
return -EINVAL;
}
- info->size = (info->stride * info->height + PAGE_SIZE - 1)
- >> PAGE_SHIFT;
+ info->size = (info->stride * roundup(info->height, tile_height)
+ + PAGE_SIZE - 1) >> PAGE_SHIFT;
if (info->size == 0) {
gvt_vgpu_err("fb size is zero\n");
return -EINVAL;
static void ppgtt_free_all_spt(struct intel_vgpu *vgpu)
{
- struct intel_vgpu_ppgtt_spt *spt;
+ struct intel_vgpu_ppgtt_spt *spt, *spn;
struct radix_tree_iter iter;
- void **slot;
+ LIST_HEAD(all_spt);
+ void __rcu **slot;
+ rcu_read_lock();
radix_tree_for_each_slot(slot, &vgpu->gtt.spt_tree, &iter, 0) {
spt = radix_tree_deref_slot(slot);
- ppgtt_free_spt(spt);
+ list_move(&spt->post_shadow_list, &all_spt);
}
+ rcu_read_unlock();
+
+ list_for_each_entry_safe(spt, spn, &all_spt, post_shadow_list)
+ ppgtt_free_spt(spt);
}
static int ppgtt_handle_guest_write_page_table_bytes(
static int intel_vgpu_aperture_rw(struct intel_vgpu *vgpu, u64 off,
void *buf, unsigned long count, bool is_write)
{
- void *aperture_va;
+ void __iomem *aperture_va;
if (!intel_vgpu_in_aperture(vgpu, off) ||
!intel_vgpu_in_aperture(vgpu, off + count)) {
return -EIO;
if (is_write)
- memcpy(aperture_va + offset_in_page(off), buf, count);
+ memcpy_toio(aperture_va + offset_in_page(off), buf, count);
else
- memcpy(buf, aperture_va + offset_in_page(off), count);
+ memcpy_fromio(buf, aperture_va + offset_in_page(off), count);
io_mapping_unmap(aperture_va);
}
}
+static void get_dsi_io_power_domains(struct drm_i915_private *dev_priv,
+ struct intel_dsi *intel_dsi)
+{
+ enum port port;
+
+ for_each_dsi_port(port, intel_dsi->ports) {
+ WARN_ON(intel_dsi->io_wakeref[port]);
+ intel_dsi->io_wakeref[port] =
+ intel_display_power_get(dev_priv,
+ port == PORT_A ?
+ POWER_DOMAIN_PORT_DDI_A_IO :
+ POWER_DOMAIN_PORT_DDI_B_IO);
+ }
+}
+
static void gen11_dsi_enable_io_power(struct intel_encoder *encoder)
{
struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
I915_WRITE(ICL_DSI_IO_MODECTL(port), tmp);
}
- for_each_dsi_port(port, intel_dsi->ports) {
- intel_dsi->io_wakeref[port] =
- intel_display_power_get(dev_priv,
- port == PORT_A ?
- POWER_DOMAIN_PORT_DDI_A_IO :
- POWER_DOMAIN_PORT_DDI_B_IO);
- }
+ get_dsi_io_power_domains(dev_priv, intel_dsi);
}
static void gen11_dsi_power_up_lanes(struct intel_encoder *encoder)
val |= DPCLKA_CFGCR0_DDI_CLK_SEL(pll->info->id, port);
}
I915_WRITE(DPCLKA_CFGCR0_ICL, val);
+
+ for_each_dsi_port(port, intel_dsi->ports) {
+ val &= ~DPCLKA_CFGCR0_DDI_CLK_OFF(port);
+ }
+ I915_WRITE(DPCLKA_CFGCR0_ICL, val);
+
POSTING_READ(DPCLKA_CFGCR0_ICL);
mutex_unlock(&dev_priv->dpll_lock);
DRM_ERROR("DDI port:%c buffer not idle\n",
port_name(port));
}
- gen11_dsi_ungate_clocks(encoder);
+ gen11_dsi_gate_clocks(encoder);
}
static void gen11_dsi_disable_io_power(struct intel_encoder *encoder)
return 0;
}
-static u64 gen11_dsi_get_power_domains(struct intel_encoder *encoder,
- struct intel_crtc_state *crtc_state)
+static void gen11_dsi_get_power_domains(struct intel_encoder *encoder,
+ struct intel_crtc_state *crtc_state)
{
- struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
- u64 domains = 0;
- enum port port;
-
- for_each_dsi_port(port, intel_dsi->ports)
- if (port == PORT_A)
- domains |= BIT_ULL(POWER_DOMAIN_PORT_DDI_A_IO);
- else
- domains |= BIT_ULL(POWER_DOMAIN_PORT_DDI_B_IO);
-
- return domains;
+ get_dsi_io_power_domains(to_i915(encoder->base.dev),
+ enc_to_intel_dsi(&encoder->base));
}
static bool gen11_dsi_get_hw_state(struct intel_encoder *encoder,
intel_aux_power_domain(dig_port);
}
-static u64 intel_ddi_get_power_domains(struct intel_encoder *encoder,
- struct intel_crtc_state *crtc_state)
+static void intel_ddi_get_power_domains(struct intel_encoder *encoder,
+ struct intel_crtc_state *crtc_state)
{
struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
struct intel_digital_port *dig_port;
- u64 domains;
/*
* TODO: Add support for MST encoders. Atm, the following should never
* hook.
*/
if (WARN_ON(intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DP_MST)))
- return 0;
+ return;
dig_port = enc_to_dig_port(&encoder->base);
- domains = BIT_ULL(dig_port->ddi_io_power_domain);
+ intel_display_power_get(dev_priv, dig_port->ddi_io_power_domain);
/*
* AUX power is only needed for (e)DP mode, and for HDMI mode on TC
*/
if (intel_crtc_has_dp_encoder(crtc_state) ||
intel_port_is_tc(dev_priv, encoder->port))
- domains |= BIT_ULL(intel_ddi_main_link_aux_domain(dig_port));
+ intel_display_power_get(dev_priv,
+ intel_ddi_main_link_aux_domain(dig_port));
/*
* VDSC power is needed when DSC is enabled
*/
if (crtc_state->dsc_params.compression_enable)
- domains |= BIT_ULL(intel_dsc_power_domain(crtc_state));
-
- return domains;
+ intel_display_power_get(dev_priv,
+ intel_dsc_power_domain(crtc_state));
}
void intel_ddi_enable_pipe_clock(const struct intel_crtc_state *crtc_state)
return;
}
/*
- * DSI ports should have their DDI clock ungated when disabled
- * and gated when enabled.
+ * For DSI we keep the ddi clocks gated
+ * except during enable/disable sequence.
*/
- ddi_clk_needed = !encoder->base.crtc;
+ ddi_clk_needed = false;
}
val = I915_READ(DPCLKA_CFGCR0_ICL);
struct intel_encoder *encoder;
for_each_intel_encoder(&dev_priv->drm, encoder) {
- u64 get_domains;
- enum intel_display_power_domain domain;
struct intel_crtc_state *crtc_state;
if (!encoder->get_power_domains)
continue;
crtc_state = to_intel_crtc_state(encoder->base.crtc->state);
- get_domains = encoder->get_power_domains(encoder, crtc_state);
- for_each_power_domain(domain, get_domains)
- intel_display_power_get(dev_priv, domain);
+ encoder->get_power_domains(encoder, crtc_state);
}
}
return -EINVAL;
}
-/* Optimize link config in order: max bpp, min lanes, min clock */
-static int
-intel_dp_compute_link_config_fast(struct intel_dp *intel_dp,
- struct intel_crtc_state *pipe_config,
- const struct link_config_limits *limits)
-{
- struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode;
- int bpp, clock, lane_count;
- int mode_rate, link_clock, link_avail;
-
- for (bpp = limits->max_bpp; bpp >= limits->min_bpp; bpp -= 2 * 3) {
- mode_rate = intel_dp_link_required(adjusted_mode->crtc_clock,
- bpp);
-
- for (lane_count = limits->min_lane_count;
- lane_count <= limits->max_lane_count;
- lane_count <<= 1) {
- for (clock = limits->min_clock; clock <= limits->max_clock; clock++) {
- link_clock = intel_dp->common_rates[clock];
- link_avail = intel_dp_max_data_rate(link_clock,
- lane_count);
-
- if (mode_rate <= link_avail) {
- pipe_config->lane_count = lane_count;
- pipe_config->pipe_bpp = bpp;
- pipe_config->port_clock = link_clock;
-
- return 0;
- }
- }
- }
- }
-
- return -EINVAL;
-}
-
static int intel_dp_dsc_compute_bpp(struct intel_dp *intel_dp, u8 dsc_max_bpc)
{
int i, num_bpc;
limits.min_bpp = 6 * 3;
limits.max_bpp = intel_dp_compute_bpp(intel_dp, pipe_config);
- if (intel_dp_is_edp(intel_dp) && intel_dp->edp_dpcd[0] < DP_EDP_14) {
+ if (intel_dp_is_edp(intel_dp)) {
/*
* Use the maximum clock and number of lanes the eDP panel
- * advertizes being capable of. The eDP 1.3 and earlier panels
- * are generally designed to support only a single clock and
- * lane configuration, and typically these values correspond to
- * the native resolution of the panel. With eDP 1.4 rate select
- * and DSC, this is decreasingly the case, and we need to be
- * able to select less than maximum link config.
+ * advertizes being capable of. The panels are generally
+ * designed to support only a single clock and lane
+ * configuration, and typically these values correspond to the
+ * native resolution of the panel.
*/
limits.min_lane_count = limits.max_lane_count;
limits.min_clock = limits.max_clock;
intel_dp->common_rates[limits.max_clock],
limits.max_bpp, adjusted_mode->crtc_clock);
- if (intel_dp_is_edp(intel_dp))
- /*
- * Optimize for fast and narrow. eDP 1.3 section 3.3 and eDP 1.4
- * section A.1: "It is recommended that the minimum number of
- * lanes be used, using the minimum link rate allowed for that
- * lane configuration."
- *
- * Note that we use the max clock and lane count for eDP 1.3 and
- * earlier, and fast vs. wide is irrelevant.
- */
- ret = intel_dp_compute_link_config_fast(intel_dp, pipe_config,
- &limits);
- else
- /* Optimize for slow and wide. */
- ret = intel_dp_compute_link_config_wide(intel_dp, pipe_config,
- &limits);
+ /*
+ * Optimize for slow and wide. This is the place to add alternative
+ * optimization policy.
+ */
+ ret = intel_dp_compute_link_config_wide(intel_dp, pipe_config, &limits);
/* enable compression if the mode doesn't fit available BW */
DRM_DEBUG_KMS("Force DSC en = %d\n", intel_dp->force_dsc_en);
* be set correctly before calling this function. */
void (*get_config)(struct intel_encoder *,
struct intel_crtc_state *pipe_config);
- /* Returns a mask of power domains that need to be referenced as part
- * of the hardware state readout code. */
- u64 (*get_power_domains)(struct intel_encoder *encoder,
- struct intel_crtc_state *crtc_state);
+ /*
+ * Acquires the power domains needed for an active encoder during
+ * hardware state readout.
+ */
+ void (*get_power_domains)(struct intel_encoder *encoder,
+ struct intel_crtc_state *crtc_state);
/*
* Called during system suspend after all pending requests for the
* encoder are flushed (for example for DP AUX transactions) and
bool *enabled, int width, int height)
{
struct drm_i915_private *dev_priv = to_i915(fb_helper->dev);
+ unsigned long conn_configured, conn_seq, mask;
unsigned int count = min(fb_helper->connector_count, BITS_PER_LONG);
- unsigned long conn_configured, conn_seq;
int i, j;
bool *save_enabled;
bool fallback = true, ret = true;
drm_modeset_backoff(&ctx);
memcpy(save_enabled, enabled, count);
- conn_seq = GENMASK(count - 1, 0);
+ mask = GENMASK(count - 1, 0);
conn_configured = 0;
retry:
+ conn_seq = conn_configured;
for (i = 0; i < count; i++) {
struct drm_fb_helper_connector *fb_conn;
struct drm_connector *connector;
if (conn_configured & BIT(i))
continue;
- /* First pass, only consider tiled connectors */
- if (conn_seq == GENMASK(count - 1, 0) && !connector->has_tile)
+ if (conn_seq == 0 && !connector->has_tile)
continue;
if (connector->status == connector_status_connected)
conn_configured |= BIT(i);
}
- if (conn_configured != conn_seq) { /* repeat until no more are found */
- conn_seq = conn_configured;
+ if ((conn_configured & mask) != mask && conn_configured != conn_seq)
goto retry;
- }
/*
* If the BIOS didn't enable everything it could, fall back to have the
mutex_unlock(&dev_priv->sb_lock);
}
+static int bdw_get_pipemisc_bpp(struct intel_crtc *crtc)
+{
+ struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+ u32 tmp;
+
+ tmp = I915_READ(PIPEMISC(crtc->pipe));
+
+ switch (tmp & PIPEMISC_DITHER_BPC_MASK) {
+ case PIPEMISC_DITHER_6_BPC:
+ return 18;
+ case PIPEMISC_DITHER_8_BPC:
+ return 24;
+ case PIPEMISC_DITHER_10_BPC:
+ return 30;
+ case PIPEMISC_DITHER_12_BPC:
+ return 36;
+ default:
+ MISSING_CASE(tmp);
+ return 0;
+ }
+}
+
static int intel_dsi_compute_config(struct intel_encoder *encoder,
struct intel_crtc_state *pipe_config,
struct drm_connector_state *conn_state)
bpp = mipi_dsi_pixel_format_to_bpp(
pixel_format_from_register_bits(fmt));
+ pipe_config->pipe_bpp = bdw_get_pipemisc_bpp(crtc);
+
/* Enable Frame time stamo based scanline reporting */
adjusted_mode->private_flags |=
I915_MODE_FLAG_GET_SCANLINE_FROM_TIMESTAMP;
static unsigned int mt2701_calculate_factor(int clock)
{
if (clock <= 64000)
- return 16;
- else if (clock <= 128000)
- return 8;
- else if (clock <= 256000)
return 4;
- else
+ else if (clock <= 128000)
return 2;
+ else
+ return 1;
}
static const struct mtk_dpi_conf mt8173_conf = {
#include <drm/drmP.h>
#include <drm/drm_atomic.h>
#include <drm/drm_atomic_helper.h>
+#include <drm/drm_fb_helper.h>
#include <drm/drm_gem.h>
#include <drm/drm_gem_cma_helper.h>
#include <drm/drm_of.h>
.gem_prime_get_sg_table = mtk_gem_prime_get_sg_table,
.gem_prime_import_sg_table = mtk_gem_prime_import_sg_table,
.gem_prime_mmap = mtk_drm_gem_mmap_buf,
+ .gem_prime_vmap = mtk_drm_gem_prime_vmap,
+ .gem_prime_vunmap = mtk_drm_gem_prime_vunmap,
.fops = &mtk_drm_fops,
.name = DRIVER_NAME,
if (ret < 0)
goto err_deinit;
+ ret = drm_fbdev_generic_setup(drm, 32);
+ if (ret)
+ DRM_ERROR("Failed to initialize fbdev: %d\n", ret);
+
return 0;
err_deinit:
kfree(mtk_gem);
return ERR_PTR(ret);
}
+
+void *mtk_drm_gem_prime_vmap(struct drm_gem_object *obj)
+{
+ struct mtk_drm_gem_obj *mtk_gem = to_mtk_gem_obj(obj);
+ struct sg_table *sgt;
+ struct sg_page_iter iter;
+ unsigned int npages;
+ unsigned int i = 0;
+
+ if (mtk_gem->kvaddr)
+ return mtk_gem->kvaddr;
+
+ sgt = mtk_gem_prime_get_sg_table(obj);
+ if (IS_ERR(sgt))
+ return NULL;
+
+ npages = obj->size >> PAGE_SHIFT;
+ mtk_gem->pages = kcalloc(npages, sizeof(*mtk_gem->pages), GFP_KERNEL);
+ if (!mtk_gem->pages)
+ goto out;
+
+ for_each_sg_page(sgt->sgl, &iter, sgt->orig_nents, 0) {
+ mtk_gem->pages[i++] = sg_page_iter_page(&iter);
+ if (i > npages)
+ break;
+ }
+ mtk_gem->kvaddr = vmap(mtk_gem->pages, npages, VM_MAP,
+ pgprot_writecombine(PAGE_KERNEL));
+
+out:
+ kfree((void *)sgt);
+
+ return mtk_gem->kvaddr;
+}
+
+void mtk_drm_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr)
+{
+ struct mtk_drm_gem_obj *mtk_gem = to_mtk_gem_obj(obj);
+
+ if (!mtk_gem->pages)
+ return;
+
+ vunmap(vaddr);
+ mtk_gem->kvaddr = 0;
+ kfree((void *)mtk_gem->pages);
+}
dma_addr_t dma_addr;
unsigned long dma_attrs;
struct sg_table *sg;
+ struct page **pages;
};
#define to_mtk_gem_obj(x) container_of(x, struct mtk_drm_gem_obj, base)
struct sg_table *mtk_gem_prime_get_sg_table(struct drm_gem_object *obj);
struct drm_gem_object *mtk_gem_prime_import_sg_table(struct drm_device *dev,
struct dma_buf_attachment *attach, struct sg_table *sg);
+void *mtk_drm_gem_prime_vmap(struct drm_gem_object *obj);
+void mtk_drm_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr);
#endif
if (IS_ERR(regmap))
ret = PTR_ERR(regmap);
if (ret) {
- ret = PTR_ERR(regmap);
dev_err(dev,
"Failed to get system configuration registers: %d\n",
ret);
of_node_put(remote);
hdmi->ddc_adpt = of_find_i2c_adapter_by_node(i2c_np);
+ of_node_put(i2c_np);
if (!hdmi->ddc_adpt) {
dev_err(dev, "Failed to get ddc i2c adapter by node\n");
return -EINVAL;
.owner = THIS_MODULE,
};
-long mtk_hdmi_pll_round_rate(struct clk_hw *hw, unsigned long rate,
- unsigned long *parent_rate)
-{
- struct mtk_hdmi_phy *hdmi_phy = to_mtk_hdmi_phy(hw);
-
- hdmi_phy->pll_rate = rate;
- if (rate <= 74250000)
- *parent_rate = rate;
- else
- *parent_rate = rate / 2;
-
- return rate;
-}
-
-unsigned long mtk_hdmi_pll_recalc_rate(struct clk_hw *hw,
- unsigned long parent_rate)
-{
- struct mtk_hdmi_phy *hdmi_phy = to_mtk_hdmi_phy(hw);
-
- return hdmi_phy->pll_rate;
-}
-
void mtk_hdmi_phy_clear_bits(struct mtk_hdmi_phy *hdmi_phy, u32 offset,
u32 bits)
{
return NULL;
}
-static void mtk_hdmi_phy_clk_get_ops(struct mtk_hdmi_phy *hdmi_phy,
- const struct clk_ops **ops)
+static void mtk_hdmi_phy_clk_get_data(struct mtk_hdmi_phy *hdmi_phy,
+ struct clk_init_data *clk_init)
{
- if (hdmi_phy && hdmi_phy->conf && hdmi_phy->conf->hdmi_phy_clk_ops)
- *ops = hdmi_phy->conf->hdmi_phy_clk_ops;
- else
- dev_err(hdmi_phy->dev, "Failed to get clk ops of phy\n");
+ clk_init->flags = hdmi_phy->conf->flags;
+ clk_init->ops = hdmi_phy->conf->hdmi_phy_clk_ops;
}
static int mtk_hdmi_phy_probe(struct platform_device *pdev)
struct clk_init_data clk_init = {
.num_parents = 1,
.parent_names = (const char * const *)&ref_clk_name,
- .flags = CLK_SET_RATE_PARENT | CLK_SET_RATE_GATE,
};
struct phy *phy;
hdmi_phy->dev = dev;
hdmi_phy->conf =
(struct mtk_hdmi_phy_conf *)of_device_get_match_data(dev);
- mtk_hdmi_phy_clk_get_ops(hdmi_phy, &clk_init.ops);
+ mtk_hdmi_phy_clk_get_data(hdmi_phy, &clk_init);
hdmi_phy->pll_hw.init = &clk_init;
hdmi_phy->pll = devm_clk_register(dev, &hdmi_phy->pll_hw);
if (IS_ERR(hdmi_phy->pll)) {
struct mtk_hdmi_phy_conf {
bool tz_disabled;
+ unsigned long flags;
const struct clk_ops *hdmi_phy_clk_ops;
void (*hdmi_phy_enable_tmds)(struct mtk_hdmi_phy *hdmi_phy);
void (*hdmi_phy_disable_tmds)(struct mtk_hdmi_phy *hdmi_phy);
void mtk_hdmi_phy_mask(struct mtk_hdmi_phy *hdmi_phy, u32 offset,
u32 val, u32 mask);
struct mtk_hdmi_phy *to_mtk_hdmi_phy(struct clk_hw *hw);
-long mtk_hdmi_pll_round_rate(struct clk_hw *hw, unsigned long rate,
- unsigned long *parent_rate);
-unsigned long mtk_hdmi_pll_recalc_rate(struct clk_hw *hw,
- unsigned long parent_rate);
extern struct platform_driver mtk_hdmi_phy_driver;
extern struct mtk_hdmi_phy_conf mtk_hdmi_phy_8173_conf;
mtk_hdmi_phy_set_bits(hdmi_phy, HDMI_CON0, RG_HDMITX_EN_SLDO_MASK);
usleep_range(80, 100);
mtk_hdmi_phy_set_bits(hdmi_phy, HDMI_CON2, RG_HDMITX_MBIAS_LPF_EN);
- mtk_hdmi_phy_set_bits(hdmi_phy, HDMI_CON2, RG_HDMITX_EN_TX_POSDIV);
mtk_hdmi_phy_set_bits(hdmi_phy, HDMI_CON0, RG_HDMITX_EN_SER_MASK);
mtk_hdmi_phy_set_bits(hdmi_phy, HDMI_CON0, RG_HDMITX_EN_PRED_MASK);
mtk_hdmi_phy_set_bits(hdmi_phy, HDMI_CON0, RG_HDMITX_EN_DRV_MASK);
mtk_hdmi_phy_clear_bits(hdmi_phy, HDMI_CON0, RG_HDMITX_EN_DRV_MASK);
mtk_hdmi_phy_clear_bits(hdmi_phy, HDMI_CON0, RG_HDMITX_EN_PRED_MASK);
mtk_hdmi_phy_clear_bits(hdmi_phy, HDMI_CON0, RG_HDMITX_EN_SER_MASK);
- mtk_hdmi_phy_clear_bits(hdmi_phy, HDMI_CON2, RG_HDMITX_EN_TX_POSDIV);
mtk_hdmi_phy_clear_bits(hdmi_phy, HDMI_CON2, RG_HDMITX_MBIAS_LPF_EN);
usleep_range(80, 100);
mtk_hdmi_phy_clear_bits(hdmi_phy, HDMI_CON0, RG_HDMITX_EN_SLDO_MASK);
usleep_range(80, 100);
}
+static long mtk_hdmi_pll_round_rate(struct clk_hw *hw, unsigned long rate,
+ unsigned long *parent_rate)
+{
+ return rate;
+}
+
static int mtk_hdmi_pll_set_rate(struct clk_hw *hw, unsigned long rate,
unsigned long parent_rate)
{
if (rate <= 64000000)
pos_div = 3;
- else if (rate <= 12800000)
- pos_div = 1;
+ else if (rate <= 128000000)
+ pos_div = 2;
else
pos_div = 1;
mtk_hdmi_phy_set_bits(hdmi_phy, HDMI_CON6, RG_HTPLL_PREDIV_MASK);
mtk_hdmi_phy_set_bits(hdmi_phy, HDMI_CON6, RG_HTPLL_POSDIV_MASK);
+ mtk_hdmi_phy_set_bits(hdmi_phy, HDMI_CON2, RG_HDMITX_EN_TX_POSDIV);
mtk_hdmi_phy_mask(hdmi_phy, HDMI_CON6, (0x1 << RG_HTPLL_IC),
RG_HTPLL_IC_MASK);
mtk_hdmi_phy_mask(hdmi_phy, HDMI_CON6, (0x1 << RG_HTPLL_IR),
return 0;
}
+static unsigned long mtk_hdmi_pll_recalc_rate(struct clk_hw *hw,
+ unsigned long parent_rate)
+{
+ struct mtk_hdmi_phy *hdmi_phy = to_mtk_hdmi_phy(hw);
+ unsigned long out_rate, val;
+
+ val = (readl(hdmi_phy->regs + HDMI_CON6)
+ & RG_HTPLL_PREDIV_MASK) >> RG_HTPLL_PREDIV;
+ switch (val) {
+ case 0x00:
+ out_rate = parent_rate;
+ break;
+ case 0x01:
+ out_rate = parent_rate / 2;
+ break;
+ default:
+ out_rate = parent_rate / 4;
+ break;
+ }
+
+ val = (readl(hdmi_phy->regs + HDMI_CON6)
+ & RG_HTPLL_FBKDIV_MASK) >> RG_HTPLL_FBKDIV;
+ out_rate *= (val + 1) * 2;
+ val = (readl(hdmi_phy->regs + HDMI_CON2)
+ & RG_HDMITX_TX_POSDIV_MASK);
+ out_rate >>= (val >> RG_HDMITX_TX_POSDIV);
+
+ if (readl(hdmi_phy->regs + HDMI_CON2) & RG_HDMITX_EN_TX_POSDIV)
+ out_rate /= 5;
+
+ return out_rate;
+}
+
static const struct clk_ops mtk_hdmi_phy_pll_ops = {
.prepare = mtk_hdmi_pll_prepare,
.unprepare = mtk_hdmi_pll_unprepare,
mtk_hdmi_phy_set_bits(hdmi_phy, HDMI_CON0, RG_HDMITX_EN_SLDO_MASK);
usleep_range(80, 100);
mtk_hdmi_phy_set_bits(hdmi_phy, HDMI_CON2, RG_HDMITX_MBIAS_LPF_EN);
- mtk_hdmi_phy_set_bits(hdmi_phy, HDMI_CON2, RG_HDMITX_EN_TX_POSDIV);
mtk_hdmi_phy_set_bits(hdmi_phy, HDMI_CON0, RG_HDMITX_EN_SER_MASK);
mtk_hdmi_phy_set_bits(hdmi_phy, HDMI_CON0, RG_HDMITX_EN_PRED_MASK);
mtk_hdmi_phy_set_bits(hdmi_phy, HDMI_CON0, RG_HDMITX_EN_DRV_MASK);
mtk_hdmi_phy_clear_bits(hdmi_phy, HDMI_CON0, RG_HDMITX_EN_DRV_MASK);
mtk_hdmi_phy_clear_bits(hdmi_phy, HDMI_CON0, RG_HDMITX_EN_PRED_MASK);
mtk_hdmi_phy_clear_bits(hdmi_phy, HDMI_CON0, RG_HDMITX_EN_SER_MASK);
- mtk_hdmi_phy_clear_bits(hdmi_phy, HDMI_CON2, RG_HDMITX_EN_TX_POSDIV);
mtk_hdmi_phy_clear_bits(hdmi_phy, HDMI_CON2, RG_HDMITX_MBIAS_LPF_EN);
usleep_range(80, 100);
mtk_hdmi_phy_clear_bits(hdmi_phy, HDMI_CON0, RG_HDMITX_EN_SLDO_MASK);
struct mtk_hdmi_phy_conf mtk_hdmi_phy_2701_conf = {
.tz_disabled = true,
+ .flags = CLK_SET_RATE_GATE,
.hdmi_phy_clk_ops = &mtk_hdmi_phy_pll_ops,
.hdmi_phy_enable_tmds = mtk_hdmi_phy_enable_tmds,
.hdmi_phy_disable_tmds = mtk_hdmi_phy_disable_tmds,
usleep_range(100, 150);
}
+static long mtk_hdmi_pll_round_rate(struct clk_hw *hw, unsigned long rate,
+ unsigned long *parent_rate)
+{
+ struct mtk_hdmi_phy *hdmi_phy = to_mtk_hdmi_phy(hw);
+
+ hdmi_phy->pll_rate = rate;
+ if (rate <= 74250000)
+ *parent_rate = rate;
+ else
+ *parent_rate = rate / 2;
+
+ return rate;
+}
+
static int mtk_hdmi_pll_set_rate(struct clk_hw *hw, unsigned long rate,
unsigned long parent_rate)
{
return 0;
}
+static unsigned long mtk_hdmi_pll_recalc_rate(struct clk_hw *hw,
+ unsigned long parent_rate)
+{
+ struct mtk_hdmi_phy *hdmi_phy = to_mtk_hdmi_phy(hw);
+
+ return hdmi_phy->pll_rate;
+}
+
static const struct clk_ops mtk_hdmi_phy_pll_ops = {
.prepare = mtk_hdmi_pll_prepare,
.unprepare = mtk_hdmi_pll_unprepare,
}
struct mtk_hdmi_phy_conf mtk_hdmi_phy_8173_conf = {
+ .flags = CLK_SET_RATE_PARENT | CLK_SET_RATE_GATE,
.hdmi_phy_clk_ops = &mtk_hdmi_phy_pll_ops,
.hdmi_phy_enable_tmds = mtk_hdmi_phy_enable_tmds,
.hdmi_phy_disable_tmds = mtk_hdmi_phy_disable_tmds,
REG_FLD_MOD(core->base, HDMI_CORE_SYS_INTR_UNMASK4, 0, 3, 3);
hdmi_wp_clear_irqenable(core->wp, HDMI_IRQ_CORE);
hdmi_wp_set_irqstatus(core->wp, HDMI_IRQ_CORE);
+ REG_FLD_MOD(core->wp->base, HDMI_WP_CLK, 0, 5, 0);
hdmi4_core_disable(core);
return 0;
}
if (err)
return err;
+ /*
+ * Initialize CEC clock divider: CEC needs 2MHz clock hence
+ * set the divider to 24 to get 48/24=2MHz clock
+ */
+ REG_FLD_MOD(core->wp->base, HDMI_WP_CLK, 0x18, 5, 0);
+
/* Clear TX FIFO */
if (!hdmi_cec_clear_tx_fifo(adap)) {
pr_err("cec-%s: could not clear TX FIFO\n", adap->name);
- return -EIO;
+ err = -EIO;
+ goto err_disable_clk;
}
/* Clear RX FIFO */
if (!hdmi_cec_clear_rx_fifo(adap)) {
pr_err("cec-%s: could not clear RX FIFO\n", adap->name);
- return -EIO;
+ err = -EIO;
+ goto err_disable_clk;
}
/* Clear CEC interrupts */
hdmi_write_reg(core->base, HDMI_CEC_INT_STATUS_1, temp);
}
return 0;
+
+err_disable_clk:
+ REG_FLD_MOD(core->wp->base, HDMI_WP_CLK, 0, 5, 0);
+ hdmi4_core_disable(core);
+
+ return err;
}
static int hdmi_cec_adap_log_addr(struct cec_adapter *adap, u8 log_addr)
return ret;
core->wp = wp;
- /*
- * Initialize CEC clock divider: CEC needs 2MHz clock hence
- * set the devider to 24 to get 48/24=2MHz clock
- */
- REG_FLD_MOD(core->wp->base, HDMI_WP_CLK, 0x18, 5, 0);
+ /* Disable clock initially, hdmi_cec_adap_enable() manages it */
+ REG_FLD_MOD(core->wp->base, HDMI_WP_CLK, 0, 5, 0);
ret = cec_register_adapter(core->adap, &pdev->dev);
if (ret < 0) {
else
acore.i2s_cfg.justification = HDMI_AUDIO_JUSTIFY_RIGHT;
/*
- * The I2S input word length is twice the lenght given in the IEC-60958
+ * The I2S input word length is twice the length given in the IEC-60958
* status word. If the word size is greater than
* 20 bits, increment by one.
*/
sun8i_dw_hdmi_mode_valid_h6(struct drm_connector *connector,
const struct drm_display_mode *mode)
{
- /* This is max for HDMI 2.0b (4K@60Hz) */
- if (mode->clock > 594000)
+ /*
+ * Controller support maximum of 594 MHz, which correlates to
+ * 4K@60Hz 4:4:4 or RGB. However, for frequencies greater than
+ * 340 MHz scrambling has to be enabled. Because scrambling is
+ * not yet implemented, just limit to 340 MHz for now.
+ */
+ if (mode->clock > 340000)
return MODE_CLOCK_HIGH;
return MODE_OK;
err_unregister_gates:
for (i = 0; i < CLK_NUM; i++)
- if (clk_data->hws[i])
+ if (!IS_ERR_OR_NULL(clk_data->hws[i]))
clk_hw_unregister_gate(clk_data->hws[i]);
clk_disable_unprepare(tcon_top->bus);
err_assert_reset:
of_clk_del_provider(dev->of_node);
for (i = 0; i < CLK_NUM; i++)
- clk_hw_unregister_gate(clk_data->hws[i]);
+ if (clk_data->hws[i])
+ clk_hw_unregister_gate(clk_data->hws[i]);
clk_disable_unprepare(tcon_top->bus);
reset_control_assert(tcon_top->rst);
hdmi->dvi = !tegra_output_is_hdmi(output);
if (!hdmi->dvi) {
- err = tegra_hdmi_setup_audio(hdmi);
- if (err < 0)
- hdmi->dvi = true;
+ /*
+ * Make sure that the audio format has been configured before
+ * enabling audio, otherwise we may try to divide by zero.
+ */
+ if (hdmi->format.sample_rate > 0) {
+ err = tegra_hdmi_setup_audio(hdmi);
+ if (err < 0)
+ hdmi->dvi = true;
+ }
}
if (hdmi->config->has_hda)
reservation_object_add_shared_fence(bo->resv, fence);
ret = reservation_object_reserve_shared(bo->resv, 1);
- if (unlikely(ret))
+ if (unlikely(ret)) {
+ dma_fence_put(fence);
return ret;
+ }
dma_fence_put(bo->moving);
bo->moving = fence;
}
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- if (!(flags & TTM_PAGE_FLAG_DMA32)) {
- for (j = 0; j < HPAGE_PMD_NR; ++j)
- if (p++ != pages[i + j])
+ if (!(flags & TTM_PAGE_FLAG_DMA32) &&
+ (npages - i) >= HPAGE_PMD_NR) {
+ for (j = 1; j < HPAGE_PMD_NR; ++j)
+ if (++p != pages[i + j])
break;
if (j == HPAGE_PMD_NR)
unsigned max_size, n2free;
spin_lock_irqsave(&huge->lock, irq_flags);
- while (i < npages) {
+ while ((npages - i) >= HPAGE_PMD_NR) {
struct page *p = pages[i];
unsigned j;
if (!p)
break;
- for (j = 0; j < HPAGE_PMD_NR; ++j)
- if (p++ != pages[i + j])
+ for (j = 1; j < HPAGE_PMD_NR; ++j)
+ if (++p != pages[i + j])
break;
if (j != HPAGE_PMD_NR)
.driver_features = DRIVER_MODESET | DRIVER_GEM | DRIVER_PRIME,
.load = udl_driver_load,
.unload = udl_driver_unload,
+ .release = udl_driver_release,
/* gem hooks */
.gem_free_object_unlocked = udl_gem_free_object,
int udl_driver_load(struct drm_device *dev, unsigned long flags);
void udl_driver_unload(struct drm_device *dev);
+void udl_driver_release(struct drm_device *dev);
int udl_fbdev_init(struct drm_device *dev);
void udl_fbdev_cleanup(struct drm_device *dev);
udl_free_urb_list(dev);
udl_fbdev_cleanup(dev);
- udl_modeset_cleanup(dev);
kfree(udl);
}
+
+void udl_driver_release(struct drm_device *dev)
+{
+ udl_modeset_cleanup(dev);
+ drm_dev_fini(dev);
+ kfree(dev);
+}
#if defined(CONFIG_DEBUG_FS)
.debugfs_init = virtio_gpu_debugfs_init,
#endif
+ .prime_handle_to_fd = drm_gem_prime_handle_to_fd,
+ .prime_fd_to_handle = drm_gem_prime_fd_to_handle,
.gem_prime_export = drm_gem_prime_export,
.gem_prime_import = drm_gem_prime_import,
.gem_prime_pin = virtgpu_gem_prime_pin,
.gem_prime_unpin = virtgpu_gem_prime_unpin,
+ .gem_prime_get_sg_table = virtgpu_gem_prime_get_sg_table,
+ .gem_prime_import_sg_table = virtgpu_gem_prime_import_sg_table,
.gem_prime_vmap = virtgpu_gem_prime_vmap,
.gem_prime_vunmap = virtgpu_gem_prime_vunmap,
.gem_prime_mmap = virtgpu_gem_prime_mmap,
/* virtgpu_prime.c */
int virtgpu_gem_prime_pin(struct drm_gem_object *obj);
void virtgpu_gem_prime_unpin(struct drm_gem_object *obj);
+struct sg_table *virtgpu_gem_prime_get_sg_table(struct drm_gem_object *obj);
+struct drm_gem_object *virtgpu_gem_prime_import_sg_table(
+ struct drm_device *dev, struct dma_buf_attachment *attach,
+ struct sg_table *sgt);
void *virtgpu_gem_prime_vmap(struct drm_gem_object *obj);
void virtgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr);
int virtgpu_gem_prime_mmap(struct drm_gem_object *obj,
WARN_ONCE(1, "not implemented");
}
+struct sg_table *virtgpu_gem_prime_get_sg_table(struct drm_gem_object *obj)
+{
+ return ERR_PTR(-ENODEV);
+}
+
+struct drm_gem_object *virtgpu_gem_prime_import_sg_table(
+ struct drm_device *dev, struct dma_buf_attachment *attach,
+ struct sg_table *table)
+{
+ return ERR_PTR(-ENODEV);
+}
+
void *virtgpu_gem_prime_vmap(struct drm_gem_object *obj)
{
struct virtio_gpu_object *bo = gem_to_virtio_gpu_obj(obj);
static void host1x_channel_set_streamid(struct host1x_channel *channel)
{
#if HOST1X_HW >= 6
+ u32 sid = 0x7f;
+#ifdef CONFIG_IOMMU_API
struct iommu_fwspec *spec = dev_iommu_fwspec_get(channel->dev->parent);
- u32 sid = spec ? spec->ids[0] & 0xffff : 0x7f;
+ if (spec)
+ sid = spec->ids[0] & 0xffff;
+#endif
host1x_ch_writel(channel, sid, HOST1X_CHANNEL_SMMU_STREAMID);
#endif
break;
}
+ if ((usage->hid & 0xf0) == 0xb0) { /* SC - Display */
+ switch (usage->hid & 0xf) {
+ case 0x05: map_key_clear(KEY_SWITCHVIDEOMODE); break;
+ default: goto ignore;
+ }
+ break;
+ }
+
/*
* Some lazy vendors declare 255 usages for System Control,
* leading to the creation of ABS_X|Y axis and too many others.
case 0x06a: map_key_clear(KEY_GREEN); break;
case 0x06b: map_key_clear(KEY_BLUE); break;
case 0x06c: map_key_clear(KEY_YELLOW); break;
- case 0x06d: map_key_clear(KEY_ZOOM); break;
+ case 0x06d: map_key_clear(KEY_ASPECT_RATIO); break;
case 0x06f: map_key_clear(KEY_BRIGHTNESSUP); break;
case 0x070: map_key_clear(KEY_BRIGHTNESSDOWN); break;
case 0x074: map_key_clear(KEY_BRIGHTNESS_MAX); break;
case 0x075: map_key_clear(KEY_BRIGHTNESS_AUTO); break;
+ case 0x079: map_key_clear(KEY_KBDILLUMUP); break;
+ case 0x07a: map_key_clear(KEY_KBDILLUMDOWN); break;
+ case 0x07c: map_key_clear(KEY_KBDILLUMTOGGLE); break;
+
case 0x082: map_key_clear(KEY_VIDEO_NEXT); break;
case 0x083: map_key_clear(KEY_LAST); break;
case 0x084: map_key_clear(KEY_ENTER); break;
case 0x22d: map_key_clear(KEY_ZOOMIN); break;
case 0x22e: map_key_clear(KEY_ZOOMOUT); break;
case 0x22f: map_key_clear(KEY_ZOOMRESET); break;
+ case 0x232: map_key_clear(KEY_FULL_SCREEN); break;
case 0x233: map_key_clear(KEY_SCROLLUP); break;
case 0x234: map_key_clear(KEY_SCROLLDOWN); break;
case 0x238: /* AC Pan */
case 0x2cb: map_key_clear(KEY_KBDINPUTASSIST_ACCEPT); break;
case 0x2cc: map_key_clear(KEY_KBDINPUTASSIST_CANCEL); break;
+ case 0x29f: map_key_clear(KEY_SCALE); break;
+
default: map_key_clear(KEY_UNKNOWN);
}
break;
/* Init DMA config if supported */
ret = i2c_imx_dma_request(i2c_imx, phy_addr);
if (ret < 0)
- goto clk_notifier_unregister;
+ goto del_adapter;
dev_info(&i2c_imx->adapter.dev, "IMX I2C adapter registered\n");
return 0; /* Return OK */
+del_adapter:
+ i2c_del_adapter(&i2c_imx->adapter);
clk_notifier_unregister:
clk_notifier_unregister(i2c_imx->clk, &i2c_imx->clk_change_nb);
rpm_disable:
{
struct i3c_dev_boardinfo *boardinfo;
struct device *dev = &master->dev;
- struct i3c_device_info info = { };
enum i3c_addr_slot_status addrstatus;
u32 init_dyn_addr = 0;
boardinfo->pid = ((u64)reg[1] << 32) | reg[2];
- if ((info.pid & GENMASK_ULL(63, 48)) ||
- I3C_PID_RND_LOWER_32BITS(info.pid))
+ if ((boardinfo->pid & GENMASK_ULL(63, 48)) ||
+ I3C_PID_RND_LOWER_32BITS(boardinfo->pid))
return -EINVAL;
boardinfo->init_dyn_addr = init_dyn_addr;
static void dw_i3c_master_disable(struct dw_i3c_master *master)
{
- writel(readl(master->regs + DEVICE_CTRL) & DEV_CTRL_ENABLE,
+ writel(readl(master->regs + DEVICE_CTRL) & ~DEV_CTRL_ENABLE,
master->regs + DEVICE_CTRL);
}
mutex_lock(&data->mutex);
ret = kxcjk1013_set_mode(data, OPERATION);
+ if (ret == 0)
+ ret = kxcjk1013_set_range(data, data->range);
mutex_unlock(&data->mutex);
return ret;
if (sigma_delta->info->has_registers) {
data[0] = reg << sigma_delta->info->addr_shift;
data[0] |= sigma_delta->info->read_mask;
+ data[0] |= sigma_delta->comm;
spi_message_add_tail(&t[0], &m);
}
spi_message_add_tail(&t[1], &m);
ret = wait_event_interruptible_timeout(st->wq_data_avail,
st->done,
msecs_to_jiffies(1000));
- if (ret == 0)
- ret = -ETIMEDOUT;
- if (ret < 0) {
- mutex_unlock(&st->lock);
- return ret;
- }
-
- *val = st->last_value;
+ /* Disable interrupts, regardless if adc conversion was
+ * successful or not
+ */
at91_adc_writel(st, AT91_ADC_CHDR,
AT91_ADC_CH(chan->channel));
at91_adc_writel(st, AT91_ADC_IDR, BIT(chan->channel));
- st->last_value = 0;
- st->done = false;
+ if (ret > 0) {
+ /* a valid conversion took place */
+ *val = st->last_value;
+ st->last_value = 0;
+ st->done = false;
+ ret = IIO_VAL_INT;
+ } else if (ret == 0) {
+ /* conversion timeout */
+ dev_err(&idev->dev, "ADC Channel %d timeout.\n",
+ chan->channel);
+ ret = -ETIMEDOUT;
+ }
+
mutex_unlock(&st->lock);
- return IIO_VAL_INT;
+ return ret;
case IIO_CHAN_INFO_SCALE:
*val = st->vref_mv;
err_free_irq:
free_irq(xadc->irq, indio_dev);
+ cancel_delayed_work_sync(&xadc->zynq_unmask_work);
err_clk_disable_unprepare:
clk_disable_unprepare(xadc->clk);
err_free_samplerate_trigger:
iio_triggered_buffer_cleanup(indio_dev);
}
free_irq(xadc->irq, indio_dev);
+ cancel_delayed_work_sync(&xadc->zynq_unmask_work);
clk_disable_unprepare(xadc->clk);
- cancel_delayed_work(&xadc->zynq_unmask_work);
kfree(xadc->data);
kfree(indio_dev->channels);
config PMS7003
tristate "Plantower PMS7003 particulate matter sensor"
depends on SERIAL_DEV_BUS
+ select IIO_TRIGGERED_BUFFER
help
Say Y here to build support for the Plantower PMS7003 particulate
matter sensor.
To compile this driver as a module, choose M here: the module will
be called pms7003.
+config SENSIRION_SGP30
+ tristate "Sensirion SGPxx gas sensors"
+ depends on I2C
+ select CRC8
+ help
+ Say Y here to build I2C interface support for the following
+ Sensirion SGP gas sensors:
+ * SGP30 gas sensor
+ * SGPC3 low power gas sensor
+
+ To compile this driver as module, choose M here: the
+ module will be called sgp30.
+
config SPS30
tristate "SPS30 particulate matter sensor"
depends on I2C
#ifndef BME680_H_
#define BME680_H_
-#define BME680_REG_CHIP_I2C_ID 0xD0
-#define BME680_REG_CHIP_SPI_ID 0x50
+#define BME680_REG_CHIP_ID 0xD0
#define BME680_CHIP_ID_VAL 0x61
-#define BME680_REG_SOFT_RESET_I2C 0xE0
-#define BME680_REG_SOFT_RESET_SPI 0x60
+#define BME680_REG_SOFT_RESET 0xE0
#define BME680_CMD_SOFTRESET 0xB6
#define BME680_REG_STATUS 0x73
#define BME680_SPI_MEM_PAGE_BIT BIT(4)
s32 t_fine;
};
+static const struct regmap_range bme680_volatile_ranges[] = {
+ regmap_reg_range(BME680_REG_MEAS_STAT_0, BME680_REG_GAS_R_LSB),
+ regmap_reg_range(BME680_REG_STATUS, BME680_REG_STATUS),
+ regmap_reg_range(BME680_T2_LSB_REG, BME680_GH3_REG),
+};
+
+static const struct regmap_access_table bme680_volatile_table = {
+ .yes_ranges = bme680_volatile_ranges,
+ .n_yes_ranges = ARRAY_SIZE(bme680_volatile_ranges),
+};
+
const struct regmap_config bme680_regmap_config = {
.reg_bits = 8,
.val_bits = 8,
+ .max_register = 0xef,
+ .volatile_table = &bme680_volatile_table,
+ .cache_type = REGCACHE_RBTREE,
};
EXPORT_SYMBOL(bme680_regmap_config);
s64 var1, var2, var3;
s16 calc_temp;
+ /* If the calibration is invalid, attempt to reload it */
+ if (!calib->par_t2)
+ bme680_read_calib(data, calib);
+
var1 = (adc_temp >> 3) - (calib->par_t1 << 1);
var2 = (var1 * calib->par_t2) >> 11;
var3 = ((var1 >> 1) * (var1 >> 1)) >> 12;
return ret;
}
-static int bme680_read_temp(struct bme680_data *data,
- int *val, int *val2)
+static int bme680_read_temp(struct bme680_data *data, int *val)
{
struct device *dev = regmap_get_device(data->regmap);
int ret;
* compensate_press/compensate_humid to get compensated
* pressure/humidity readings.
*/
- if (val && val2) {
- *val = comp_temp;
- *val2 = 100;
- return IIO_VAL_FRACTIONAL;
+ if (val) {
+ *val = comp_temp * 10; /* Centidegrees to millidegrees */
+ return IIO_VAL_INT;
}
return ret;
s32 adc_press;
/* Read and compensate temperature to get a reading of t_fine */
- ret = bme680_read_temp(data, NULL, NULL);
+ ret = bme680_read_temp(data, NULL);
if (ret < 0)
return ret;
u32 comp_humidity;
/* Read and compensate temperature to get a reading of t_fine */
- ret = bme680_read_temp(data, NULL, NULL);
+ ret = bme680_read_temp(data, NULL);
if (ret < 0)
return ret;
case IIO_CHAN_INFO_PROCESSED:
switch (chan->type) {
case IIO_TEMP:
- return bme680_read_temp(data, val, val2);
+ return bme680_read_temp(data, val);
case IIO_PRESSURE:
return bme680_read_press(data, val, val2);
case IIO_HUMIDITYRELATIVE:
{
struct iio_dev *indio_dev;
struct bme680_data *data;
+ unsigned int val;
int ret;
+ ret = regmap_write(regmap, BME680_REG_SOFT_RESET,
+ BME680_CMD_SOFTRESET);
+ if (ret < 0) {
+ dev_err(dev, "Failed to reset chip\n");
+ return ret;
+ }
+
+ ret = regmap_read(regmap, BME680_REG_CHIP_ID, &val);
+ if (ret < 0) {
+ dev_err(dev, "Error reading chip ID\n");
+ return ret;
+ }
+
+ if (val != BME680_CHIP_ID_VAL) {
+ dev_err(dev, "Wrong chip ID, got %x expected %x\n",
+ val, BME680_CHIP_ID_VAL);
+ return -ENODEV;
+ }
+
indio_dev = devm_iio_device_alloc(dev, sizeof(*data));
if (!indio_dev)
return -ENOMEM;
{
struct regmap *regmap;
const char *name = NULL;
- unsigned int val;
- int ret;
regmap = devm_regmap_init_i2c(client, &bme680_regmap_config);
if (IS_ERR(regmap)) {
return PTR_ERR(regmap);
}
- ret = regmap_write(regmap, BME680_REG_SOFT_RESET_I2C,
- BME680_CMD_SOFTRESET);
- if (ret < 0) {
- dev_err(&client->dev, "Failed to reset chip\n");
- return ret;
- }
-
- ret = regmap_read(regmap, BME680_REG_CHIP_I2C_ID, &val);
- if (ret < 0) {
- dev_err(&client->dev, "Error reading I2C chip ID\n");
- return ret;
- }
-
- if (val != BME680_CHIP_ID_VAL) {
- dev_err(&client->dev, "Wrong chip ID, got %x expected %x\n",
- val, BME680_CHIP_ID_VAL);
- return -ENODEV;
- }
-
if (id)
name = id->name;
#include "bme680.h"
+struct bme680_spi_bus_context {
+ struct spi_device *spi;
+ u8 current_page;
+};
+
+/*
+ * In SPI mode there are only 7 address bits, a "page" register determines
+ * which part of the 8-bit range is active. This function looks at the address
+ * and writes the page selection bit if needed
+ */
+static int bme680_regmap_spi_select_page(
+ struct bme680_spi_bus_context *ctx, u8 reg)
+{
+ struct spi_device *spi = ctx->spi;
+ int ret;
+ u8 buf[2];
+ u8 page = (reg & 0x80) ? 0 : 1; /* Page "1" is low range */
+
+ if (page == ctx->current_page)
+ return 0;
+
+ /*
+ * Data sheet claims we're only allowed to change bit 4, so we must do
+ * a read-modify-write on each and every page select
+ */
+ buf[0] = BME680_REG_STATUS;
+ ret = spi_write_then_read(spi, buf, 1, buf + 1, 1);
+ if (ret < 0) {
+ dev_err(&spi->dev, "failed to set page %u\n", page);
+ return ret;
+ }
+
+ buf[0] = BME680_REG_STATUS;
+ if (page)
+ buf[1] |= BME680_SPI_MEM_PAGE_BIT;
+ else
+ buf[1] &= ~BME680_SPI_MEM_PAGE_BIT;
+
+ ret = spi_write(spi, buf, 2);
+ if (ret < 0) {
+ dev_err(&spi->dev, "failed to set page %u\n", page);
+ return ret;
+ }
+
+ ctx->current_page = page;
+
+ return 0;
+}
+
static int bme680_regmap_spi_write(void *context, const void *data,
size_t count)
{
- struct spi_device *spi = context;
+ struct bme680_spi_bus_context *ctx = context;
+ struct spi_device *spi = ctx->spi;
+ int ret;
u8 buf[2];
memcpy(buf, data, 2);
+
+ ret = bme680_regmap_spi_select_page(ctx, buf[0]);
+ if (ret)
+ return ret;
+
/*
* The SPI register address (= full register address without bit 7)
* and the write command (bit7 = RW = '0')
*/
buf[0] &= ~0x80;
- return spi_write_then_read(spi, buf, 2, NULL, 0);
+ return spi_write(spi, buf, 2);
}
static int bme680_regmap_spi_read(void *context, const void *reg,
size_t reg_size, void *val, size_t val_size)
{
- struct spi_device *spi = context;
+ struct bme680_spi_bus_context *ctx = context;
+ struct spi_device *spi = ctx->spi;
+ int ret;
+ u8 addr = *(const u8 *)reg;
+
+ ret = bme680_regmap_spi_select_page(ctx, addr);
+ if (ret)
+ return ret;
- return spi_write_then_read(spi, reg, reg_size, val, val_size);
+ addr |= 0x80; /* bit7 = RW = '1' */
+
+ return spi_write_then_read(spi, &addr, 1, val, val_size);
}
static struct regmap_bus bme680_regmap_bus = {
static int bme680_spi_probe(struct spi_device *spi)
{
const struct spi_device_id *id = spi_get_device_id(spi);
+ struct bme680_spi_bus_context *bus_context;
struct regmap *regmap;
- unsigned int val;
int ret;
spi->bits_per_word = 8;
return ret;
}
+ bus_context = devm_kzalloc(&spi->dev, sizeof(*bus_context), GFP_KERNEL);
+ if (!bus_context)
+ return -ENOMEM;
+
+ bus_context->spi = spi;
+ bus_context->current_page = 0xff; /* Undefined on warm boot */
+
regmap = devm_regmap_init(&spi->dev, &bme680_regmap_bus,
- &spi->dev, &bme680_regmap_config);
+ bus_context, &bme680_regmap_config);
if (IS_ERR(regmap)) {
dev_err(&spi->dev, "Failed to register spi regmap %d\n",
(int)PTR_ERR(regmap));
return PTR_ERR(regmap);
}
- ret = regmap_write(regmap, BME680_REG_SOFT_RESET_SPI,
- BME680_CMD_SOFTRESET);
- if (ret < 0) {
- dev_err(&spi->dev, "Failed to reset chip\n");
- return ret;
- }
-
- /* after power-on reset, Page 0(0x80-0xFF) of spi_mem_page is active */
- ret = regmap_read(regmap, BME680_REG_CHIP_SPI_ID, &val);
- if (ret < 0) {
- dev_err(&spi->dev, "Error reading SPI chip ID\n");
- return ret;
- }
-
- if (val != BME680_CHIP_ID_VAL) {
- dev_err(&spi->dev, "Wrong chip ID, got %x expected %x\n",
- val, BME680_CHIP_ID_VAL);
- return -ENODEV;
- }
- /*
- * select Page 1 of spi_mem_page to enable access to
- * to registers from address 0x00 to 0x7F.
- */
- ret = regmap_write_bits(regmap, BME680_REG_STATUS,
- BME680_SPI_MEM_PAGE_BIT,
- BME680_SPI_MEM_PAGE_1_VAL);
- if (ret < 0) {
- dev_err(&spi->dev, "failed to set page 1 of spi_mem_page\n");
- return ret;
- }
-
return bme680_core_probe(&spi->dev, regmap, id->name);
}
* Do not use IIO_DEGREE_TO_RAD to avoid precision
* loss. Round to the nearest integer.
*/
- *val = div_s64(val64 * 314159 + 9000000ULL, 1000);
- *val2 = 18000 << (CROS_EC_SENSOR_BITS - 1);
- ret = IIO_VAL_FRACTIONAL;
+ *val = 0;
+ *val2 = div_s64(val64 * 3141592653ULL,
+ 180 << (CROS_EC_SENSOR_BITS - 1));
+ ret = IIO_VAL_INT_PLUS_NANO;
break;
case MOTIONSENSE_TYPE_MAG:
/*
inoutbuf[0] = 0x60; /* write EEPROM */
inoutbuf[0] |= data->ref_mode << 3;
+ inoutbuf[0] |= data->powerdown ? ((data->powerdown_mode + 1) << 1) : 0;
inoutbuf[1] = data->dac_value >> 4;
inoutbuf[2] = (data->dac_value & 0xf) << 4;
case IIO_CHAN_INFO_LOW_PASS_FILTER_3DB_FREQUENCY:
return bmg160_get_filter(data, val);
case IIO_CHAN_INFO_SCALE:
- *val = 0;
switch (chan->type) {
case IIO_TEMP:
- *val2 = 500000;
- return IIO_VAL_INT_PLUS_MICRO;
+ *val = 500;
+ return IIO_VAL_INT;
case IIO_ANGL_VEL:
{
int i;
for (i = 0; i < ARRAY_SIZE(bmg160_scale_table); ++i) {
if (bmg160_scale_table[i].dps_range ==
data->dps_range) {
+ *val = 0;
*val2 = bmg160_scale_table[i].scale;
return IIO_VAL_INT_PLUS_MICRO;
}
#include "mpu3050.h"
-#define MPU3050_CHIP_ID 0x69
+#define MPU3050_CHIP_ID 0x68
+#define MPU3050_CHIP_ID_MASK 0x7E
/*
* Register map: anything suffixed *_H is a big-endian high byte and always
goto err_power_down;
}
- if (val != MPU3050_CHIP_ID) {
- dev_err(dev, "unsupported chip id %02x\n", (u8)val);
+ if ((val & MPU3050_CHIP_ID_MASK) != MPU3050_CHIP_ID) {
+ dev_err(dev, "unsupported chip id %02x\n",
+ (u8)(val & MPU3050_CHIP_ID_MASK));
ret = -ENODEV;
goto err_power_down;
}
const unsigned long *mask;
unsigned long *trialmask;
- trialmask = kmalloc_array(BITS_TO_LONGS(indio_dev->masklength),
- sizeof(*trialmask),
- GFP_KERNEL);
+ trialmask = kcalloc(BITS_TO_LONGS(indio_dev->masklength),
+ sizeof(*trialmask), GFP_KERNEL);
if (trialmask == NULL)
return -ENOMEM;
if (!indio_dev->masklength) {
**/
void iio_device_unregister(struct iio_dev *indio_dev)
{
- mutex_lock(&indio_dev->info_exist_lock);
-
cdev_device_del(&indio_dev->chrdev, &indio_dev->dev);
+ mutex_lock(&indio_dev->info_exist_lock);
+
iio_device_unregister_debugfs(indio_dev);
iio_disable_all_buffers(indio_dev);
if (family == AF_INET) {
rt = container_of(dst, struct rtable, dst);
- return rt->rt_uses_gateway;
+ return rt->rt_gw_family == AF_INET;
}
rt6 = container_of(dst, struct rt6_info, dst);
* will only be one mm, so no big deal.
*/
down_write(&mm->mmap_sem);
+ if (!mmget_still_valid(mm))
+ goto skip_mm;
mutex_lock(&ufile->umap_lock);
list_for_each_entry_safe (priv, next_priv, &ufile->umaps,
list) {
vma->vm_flags &= ~(VM_SHARED | VM_MAYSHARE);
}
mutex_unlock(&ufile->umap_lock);
+ skip_mm:
up_write(&mm->mmap_sem);
mmput(mm);
}
int total_contexts;
int ret;
unsigned ngroups;
- int qos_rmt_count;
+ int rmt_count;
int user_rmt_reduced;
u32 n_usr_ctxts;
u32 send_contexts = chip_send_contexts(dd);
n_usr_ctxts = rcv_contexts - total_contexts;
}
- /* each user context requires an entry in the RMT */
- qos_rmt_count = qos_rmt_entries(dd, NULL, NULL);
- if (qos_rmt_count + n_usr_ctxts > NUM_MAP_ENTRIES) {
- user_rmt_reduced = NUM_MAP_ENTRIES - qos_rmt_count;
+ /*
+ * The RMT entries are currently allocated as shown below:
+ * 1. QOS (0 to 128 entries);
+ * 2. FECN for PSM (num_user_contexts + num_vnic_contexts);
+ * 3. VNIC (num_vnic_contexts).
+ * It should be noted that PSM FECN oversubscribe num_vnic_contexts
+ * entries of RMT because both VNIC and PSM could allocate any receive
+ * context between dd->first_dyn_alloc_text and dd->num_rcv_contexts,
+ * and PSM FECN must reserve an RMT entry for each possible PSM receive
+ * context.
+ */
+ rmt_count = qos_rmt_entries(dd, NULL, NULL) + (num_vnic_contexts * 2);
+ if (rmt_count + n_usr_ctxts > NUM_MAP_ENTRIES) {
+ user_rmt_reduced = NUM_MAP_ENTRIES - rmt_count;
dd_dev_err(dd,
"RMT size is reducing the number of user receive contexts from %u to %d\n",
n_usr_ctxts,
u64 reg;
int i, idx, regoff, regidx;
u8 offset;
+ u32 total_cnt;
/* there needs to be enough room in the map table */
- if (rmt->used + dd->num_user_contexts >= NUM_MAP_ENTRIES) {
+ total_cnt = dd->num_rcv_contexts - dd->first_dyn_alloc_ctxt;
+ if (rmt->used + total_cnt >= NUM_MAP_ENTRIES) {
dd_dev_err(dd, "User FECN handling disabled - too many user contexts allocated\n");
return;
}
/* add rule 1 */
add_rsm_rule(dd, RSM_INS_FECN, &rrd);
- rmt->used += dd->num_user_contexts;
+ rmt->used += total_cnt;
}
/* Initialize RSM for VNIC */
if (!list_empty(&priv->s_iowait.list) &&
!(qp->s_flags & RVT_S_BUSY) &&
!(priv->s_flags & RVT_S_BUSY)) {
- qp->s_flags &= ~RVT_S_ANY_WAIT_IO;
+ qp->s_flags &= ~HFI1_S_ANY_WAIT_IO;
+ iowait_clear_flag(&priv->s_iowait, IOWAIT_PENDING_IB);
+ iowait_clear_flag(&priv->s_iowait, IOWAIT_PENDING_TID);
list_del_init(&priv->s_iowait.list);
priv->s_iowait.lock = NULL;
rvt_put_qp(qp);
update_ack_queue(qp, next);
}
e = &qp->s_ack_queue[qp->r_head_ack_queue];
- if (e->opcode == OP(RDMA_READ_REQUEST) && e->rdma_sge.mr) {
+ if (e->rdma_sge.mr) {
rvt_put_mr(e->rdma_sge.mr);
e->rdma_sge.mr = NULL;
}
update_ack_queue(qp, next);
}
e = &qp->s_ack_queue[qp->r_head_ack_queue];
- if (e->opcode == OP(RDMA_READ_REQUEST) && e->rdma_sge.mr) {
+ if (e->rdma_sge.mr) {
rvt_put_mr(e->rdma_sge.mr);
e->rdma_sge.mr = NULL;
}
make_tid_rdma_ack(qp, ohdr, ps))
return 1;
- if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_SEND_OK)) {
- if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND))
- goto bail;
- /* We are in the error state, flush the work request. */
- if (qp->s_last == READ_ONCE(qp->s_head))
- goto bail;
- /* If DMAs are in progress, we can't flush immediately. */
- if (iowait_sdma_pending(&priv->s_iowait)) {
- qp->s_flags |= RVT_S_WAIT_DMA;
- goto bail;
- }
- clear_ahg(qp);
- wqe = rvt_get_swqe_ptr(qp, qp->s_last);
- hfi1_trdma_send_complete(qp, wqe, qp->s_last != qp->s_acked ?
- IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR);
- /* will get called again */
- goto done_free_tx;
- }
+ /*
+ * Bail out if we can't send data.
+ * Be reminded that this check must been done after the call to
+ * make_tid_rdma_ack() because the responding QP could be in
+ * RTR state where it can send TID RDMA ACK, not TID RDMA WRITE DATA.
+ */
+ if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_SEND_OK))
+ goto bail;
if (priv->s_flags & RVT_S_WAIT_ACK)
goto bail;
hfi1_make_ruc_header(qp, ohdr, (opcode << 24), bth1, bth2,
middle, ps);
return 1;
-done_free_tx:
- hfi1_put_txreq(ps->s_txreq);
- ps->s_txreq = NULL;
- return 1;
-
bail:
hfi1_put_txreq(ps->s_txreq);
bail_no_tx:
idx_offset = (obj & (table->num_obj - 1)) % obj_per_chunk;
dma_offset = offset = idx_offset * table->obj_size;
} else {
+ u32 seg_size = 64; /* 8 bytes per BA and 8 BA per segment */
+
hns_roce_calc_hem_mhop(hr_dev, table, &mhop_obj, &mhop);
/* mtt mhop */
i = mhop.l0_idx;
hem_idx = i;
hem = table->hem[hem_idx];
- dma_offset = offset = (obj & (table->num_obj - 1)) *
- table->obj_size % mhop.bt_chunk_size;
+ dma_offset = offset = (obj & (table->num_obj - 1)) * seg_size %
+ mhop.bt_chunk_size;
if (mhop.hop_num == 2)
dma_offset = offset = 0;
}
struct hns_roce_hem_table *table;
dma_addr_t dma_handle;
__le64 *mtts;
- u32 s = start_index * sizeof(u64);
u32 bt_page_size;
u32 i;
return -EINVAL;
mtts = hns_roce_table_find(hr_dev, table,
- mtt->first_seg + s / hr_dev->caps.mtt_entry_sz,
+ mtt->first_seg +
+ start_index / HNS_ROCE_MTT_ENTRY_PER_SEG,
&dma_handle);
if (!mtts)
return -ENOMEM;
wait_for_completion(&hr_qp->free);
if ((hr_qp->ibqp.qp_type) != IB_QPT_GSI) {
- if (hr_dev->caps.sccc_entry_sz)
- hns_roce_table_put(hr_dev, &qp_table->sccc_table,
- hr_qp->qpn);
if (hr_dev->caps.trrl_entry_sz)
hns_roce_table_put(hr_dev, &qp_table->trrl_table,
hr_qp->qpn);
return ret;
}
- *addr = pci_resource_start(dev->pdev, 0) +
+ *addr = dev->bar_addr +
MLX5_GET64(alloc_memic_out, out, memic_start_addr);
return 0;
u64 start_page_idx;
int err;
- addr -= pci_resource_start(dev->pdev, 0);
+ addr -= dev->bar_addr;
start_page_idx = (addr - hw_start_addr) >> PAGE_SHIFT;
MLX5_SET(dealloc_memic_in, in, opcode, MLX5_CMD_OP_DEALLOC_MEMIC);
fw_uars_per_page = MLX5_CAP_GEN(dev->mdev, uar_4k) ? MLX5_UARS_IN_PAGE : 1;
- return (pci_resource_start(dev->mdev->pdev, 0) >> PAGE_SHIFT) + uar_idx / fw_uars_per_page;
+ return (dev->mdev->bar_addr >> PAGE_SHIFT) + uar_idx / fw_uars_per_page;
}
static int get_command(unsigned long offset)
page_idx + npages)
return -EINVAL;
- pfn = ((pci_resource_start(dev->mdev->pdev, 0) +
+ pfn = ((dev->mdev->bar_addr +
MLX5_CAP64_DEV_MEM(dev->mdev, memic_bar_start_addr)) >>
PAGE_SHIFT) +
page_idx;
goto err_free;
start_offset = memic_addr & ~PAGE_MASK;
- page_idx = (memic_addr - pci_resource_start(memic->dev->pdev, 0) -
+ page_idx = (memic_addr - memic->dev->bar_addr -
MLX5_CAP64_DEV_MEM(memic->dev, memic_bar_start_addr)) >>
PAGE_SHIFT;
if (ret)
return ret;
- page_idx = (dm->dev_addr - pci_resource_start(memic->dev->pdev, 0) -
+ page_idx = (dm->dev_addr - memic->dev->bar_addr -
MLX5_CAP64_DEV_MEM(memic->dev, memic_bar_start_addr)) >>
PAGE_SHIFT;
bitmap_clear(to_mucontext(ibdm->uobject->context)->dm_pages,
MLX5_SET64(mkc, mkc, len, length);
MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
MLX5_SET(mkc, mkc, qpn, 0xffffff);
- MLX5_SET64(mkc, mkc, start_addr,
- memic_addr - pci_resource_start(dev->mdev->pdev, 0));
+ MLX5_SET64(mkc, mkc, start_addr, memic_addr - dev->mdev->bar_addr);
err = mlx5_core_create_mkey(mdev, &mr->mmkey, in, inlen);
if (err)
struct ib_umem_odp *odp_mr = to_ib_umem_odp(mr->umem);
bool downgrade = flags & MLX5_PF_FLAGS_DOWNGRADE;
bool prefetch = flags & MLX5_PF_FLAGS_PREFETCH;
- u64 access_mask = ODP_READ_ALLOWED_BIT;
+ u64 access_mask;
u64 start_idx, page_mask;
struct ib_umem_odp *odp;
size_t size;
page_shift = mr->umem->page_shift;
page_mask = ~(BIT(page_shift) - 1);
start_idx = (io_virt - (mr->mmkey.iova & page_mask)) >> page_shift;
+ access_mask = ODP_READ_ALLOWED_BIT;
if (prefetch && !downgrade && !mr->umem->writable) {
/* prefetch with write-access must
wmb();
/* currently we support only regular doorbells */
- mlx5_write64((__be32 *)ctrl, bf->bfreg->map + bf->offset, NULL);
+ mlx5_write64((__be32 *)ctrl, bf->bfreg->map + bf->offset);
/* Make sure doorbells don't leak out of SQ spinlock
* and reach the HCA out of order.
*/
if (neigh->nud_state & NUD_VALID) {
nes_debug(NES_DBG_CM, "Neighbor MAC address for 0x%08X"
" is %pM, Gateway is 0x%08X \n", dst_ip,
- neigh->ha, ntohl(rt->rt_gateway));
+ neigh->ha, ntohl(rt->rt_gw4));
if (arpindex >= 0) {
if (ether_addr_equal(nesadapter->arp_table[arpindex].mac_addr, neigh->ha)) {
pvrdma_page_dir_cleanup(dev, &dev->cq_pdir);
pvrdma_page_dir_cleanup(dev, &dev->async_pdir);
pvrdma_free_slots(dev);
+ dma_free_coherent(&pdev->dev, sizeof(*dev->dsr), dev->dsr,
+ dev->dsrbase);
iounmap(dev->regs);
kfree(dev->sgid_tbl);
return error;
}
+ pdata->input = input;
+ platform_set_drvdata(pdev, pdata);
+
error = devm_request_irq(&pdev->dev, pdata->irq,
imx_snvs_pwrkey_interrupt,
0, pdev->name, pdev);
return error;
}
- pdata->input = input;
- platform_set_drvdata(pdev, pdata);
-
device_init_wakeup(&pdev->dev, pdata->wakeup);
return 0;
{ "ELAN0600", 0 },
{ "ELAN0601", 0 },
{ "ELAN0602", 0 },
+ { "ELAN0603", 0 },
+ { "ELAN0604", 0 },
{ "ELAN0605", 0 },
+ { "ELAN0606", 0 },
+ { "ELAN0607", 0 },
{ "ELAN0608", 0 },
{ "ELAN0609", 0 },
{ "ELAN060B", 0 },
{ "ELAN060C", 0 },
+ { "ELAN060F", 0 },
+ { "ELAN0610", 0 },
{ "ELAN0611", 0 },
{ "ELAN0612", 0 },
+ { "ELAN0615", 0 },
+ { "ELAN0616", 0 },
{ "ELAN0617", 0 },
{ "ELAN0618", 0 },
+ { "ELAN0619", 0 },
+ { "ELAN061A", 0 },
+ { "ELAN061B", 0 },
{ "ELAN061C", 0 },
{ "ELAN061D", 0 },
{ "ELAN061E", 0 },
+ { "ELAN061F", 0 },
{ "ELAN0620", 0 },
{ "ELAN0621", 0 },
{ "ELAN0622", 0 },
+ { "ELAN0623", 0 },
+ { "ELAN0624", 0 },
+ { "ELAN0625", 0 },
+ { "ELAN0626", 0 },
+ { "ELAN0627", 0 },
+ { "ELAN0628", 0 },
+ { "ELAN0629", 0 },
+ { "ELAN062A", 0 },
+ { "ELAN062B", 0 },
+ { "ELAN062C", 0 },
+ { "ELAN062D", 0 },
+ { "ELAN0631", 0 },
+ { "ELAN0632", 0 },
{ "ELAN1000", 0 },
{ }
};
static void iommu_set_exclusion_range(struct amd_iommu *iommu)
{
u64 start = iommu->exclusion_start & PAGE_MASK;
- u64 limit = (start + iommu->exclusion_length) & PAGE_MASK;
+ u64 limit = (start + iommu->exclusion_length - 1) & PAGE_MASK;
u64 entry;
if (!iommu->exclusion_start)
#define AR71XX_RESET_REG_MISC_INT_ENABLE 4
#define ATH79_MISC_IRQ_COUNT 32
+#define ATH79_MISC_PERF_IRQ 5
+
+static int ath79_perfcount_irq;
+
+int get_c0_perfcount_int(void)
+{
+ return ath79_perfcount_irq;
+}
+EXPORT_SYMBOL_GPL(get_c0_perfcount_int);
static void ath79_misc_irq_handler(struct irq_desc *desc)
{
{
void __iomem *base = domain->host_data;
+ ath79_perfcount_irq = irq_create_mapping(domain, ATH79_MISC_PERF_IRQ);
+
/* Disable and clear all interrupts */
__raw_writel(0, base + AR71XX_RESET_REG_MISC_INT_ENABLE);
__raw_writel(0, base + AR71XX_RESET_REG_MISC_INT_STATUS);
NULL);
if (!priv->domain) {
pr_err("ls1x-irq: cannot add IRQ domain\n");
+ err = -ENOMEM;
goto out_iounmap;
}
}
printk(KERN_INFO
- "HFC-PCI: defined at mem %#lx fifo %#lx(%#lx) IRQ %d HZ %d\n",
- (u_long) hc->hw.pci_io, (u_long) hc->hw.fifos,
- (u_long) hc->hw.dmahandle, hc->irq, HZ);
+ "HFC-PCI: defined at mem %#lx fifo %p(%pad) IRQ %d HZ %d\n",
+ (u_long) hc->hw.pci_io, hc->hw.fifos,
+ &hc->hw.dmahandle, hc->irq, HZ);
/* enable memory mapped ports, disable busmaster */
pci_write_config_word(hc->pdev, PCI_COMMAND, PCI_ENA_MEMIO);
printk(KERN_DEBUG "HiSax: reportcard No %d\n", cardnr + 1);
printk(KERN_DEBUG "HiSax: Type %s\n", CardType[cs->typ]);
printk(KERN_DEBUG "HiSax: debuglevel %x\n", cs->debug);
- printk(KERN_DEBUG "HiSax: HiSax_reportcard address 0x%lX\n",
- (ulong) & HiSax_reportcard);
- printk(KERN_DEBUG "HiSax: cs 0x%lX\n", (ulong) cs);
+ printk(KERN_DEBUG "HiSax: HiSax_reportcard address 0x%px\n",
+ HiSax_reportcard);
+ printk(KERN_DEBUG "HiSax: cs 0x%px\n", cs);
printk(KERN_DEBUG "HiSax: HW_Flags %lx bc0 flg %lx bc1 flg %lx\n",
cs->HW_Flags, cs->bcs[0].Flag, cs->bcs[1].Flag);
printk(KERN_DEBUG "HiSax: bcs 0 mode %d ch%d\n",
struct sock *sk = sock->sk;
int err = 0;
- if (!maddr || maddr->family != AF_ISDN)
+ if (addr_len < sizeof(struct sockaddr_mISDN))
return -EINVAL;
- if (addr_len < sizeof(struct sockaddr_mISDN))
+ if (!maddr || maddr->family != AF_ISDN)
return -EINVAL;
lock_sock(sk);
struct pblk_sec_meta *meta;
struct bio *new_bio = rqd->bio;
struct bio *bio = pr_ctx->orig_bio;
- struct bio_vec src_bv, dst_bv;
void *meta_list = rqd->meta_list;
- int bio_init_idx = pr_ctx->bio_init_idx;
unsigned long *read_bitmap = pr_ctx->bitmap;
+ struct bvec_iter orig_iter = BVEC_ITER_ALL_INIT;
+ struct bvec_iter new_iter = BVEC_ITER_ALL_INIT;
int nr_secs = pr_ctx->orig_nr_secs;
int nr_holes = nr_secs - bitmap_weight(read_bitmap, nr_secs);
void *src_p, *dst_p;
- int hole, i;
+ int bit, i;
if (unlikely(nr_holes == 1)) {
struct ppa_addr ppa;
/* Fill the holes in the original bio */
i = 0;
- hole = find_first_zero_bit(read_bitmap, nr_secs);
- do {
- struct pblk_line *line;
+ for (bit = 0; bit < nr_secs; bit++) {
+ if (!test_bit(bit, read_bitmap)) {
+ struct bio_vec dst_bv, src_bv;
+ struct pblk_line *line;
- line = pblk_ppa_to_line(pblk, rqd->ppa_list[i]);
- kref_put(&line->ref, pblk_line_put);
+ line = pblk_ppa_to_line(pblk, rqd->ppa_list[i]);
+ kref_put(&line->ref, pblk_line_put);
- meta = pblk_get_meta(pblk, meta_list, hole);
- meta->lba = cpu_to_le64(pr_ctx->lba_list_media[i]);
+ meta = pblk_get_meta(pblk, meta_list, bit);
+ meta->lba = cpu_to_le64(pr_ctx->lba_list_media[i]);
- src_bv = new_bio->bi_io_vec[i++];
- dst_bv = bio->bi_io_vec[bio_init_idx + hole];
+ dst_bv = bio_iter_iovec(bio, orig_iter);
+ src_bv = bio_iter_iovec(new_bio, new_iter);
- src_p = kmap_atomic(src_bv.bv_page);
- dst_p = kmap_atomic(dst_bv.bv_page);
+ src_p = kmap_atomic(src_bv.bv_page);
+ dst_p = kmap_atomic(dst_bv.bv_page);
- memcpy(dst_p + dst_bv.bv_offset,
- src_p + src_bv.bv_offset,
- PBLK_EXPOSED_PAGE_SIZE);
+ memcpy(dst_p + dst_bv.bv_offset,
+ src_p + src_bv.bv_offset,
+ PBLK_EXPOSED_PAGE_SIZE);
- kunmap_atomic(src_p);
- kunmap_atomic(dst_p);
+ kunmap_atomic(src_p);
+ kunmap_atomic(dst_p);
- mempool_free(src_bv.bv_page, &pblk->page_bio_pool);
+ flush_dcache_page(dst_bv.bv_page);
+ mempool_free(src_bv.bv_page, &pblk->page_bio_pool);
- hole = find_next_zero_bit(read_bitmap, nr_secs, hole + 1);
- } while (hole < nr_secs);
+ bio_advance_iter(new_bio, &new_iter,
+ PBLK_EXPOSED_PAGE_SIZE);
+ i++;
+ }
+ bio_advance_iter(bio, &orig_iter, PBLK_EXPOSED_PAGE_SIZE);
+ }
bio_put(new_bio);
kfree(pr_ctx);
struct srcu_struct io_barrier;
};
+void disable_discard(struct mapped_device *md);
void disable_write_same(struct mapped_device *md);
void disable_write_zeroes(struct mapped_device *md);
struct list_head list;
};
-const char *dm_allowed_targets[] __initconst = {
+const char * const dm_allowed_targets[] __initconst = {
"crypt",
"delay",
"linear",
static bool ranges_overlap(struct dm_integrity_range *range1, struct dm_integrity_range *range2)
{
return range1->logical_sector < range2->logical_sector + range2->n_sectors &&
- range2->logical_sector + range2->n_sectors > range2->logical_sector;
+ range1->logical_sector + range1->n_sectors > range2->logical_sector;
}
static bool add_new_range(struct dm_integrity_c *ic, struct dm_integrity_range *new_range, bool check_waiting)
struct dm_integrity_range *last_range =
list_first_entry(&ic->wait_list, struct dm_integrity_range, wait_entry);
struct task_struct *last_range_task;
- if (!ranges_overlap(range, last_range))
- break;
last_range_task = last_range->task;
list_del(&last_range->wait_entry);
if (!add_new_range(ic, last_range, false)) {
journal_watermark = val;
else if (sscanf(opt_string, "commit_time:%u%c", &val, &dummy) == 1)
sync_msec = val;
- else if (!memcmp(opt_string, "meta_device:", strlen("meta_device:"))) {
+ else if (!strncmp(opt_string, "meta_device:", strlen("meta_device:"))) {
if (ic->meta_dev) {
dm_put_device(ti, ic->meta_dev);
ic->meta_dev = NULL;
goto bad;
}
ic->sectors_per_block = val >> SECTOR_SHIFT;
- } else if (!memcmp(opt_string, "internal_hash:", strlen("internal_hash:"))) {
+ } else if (!strncmp(opt_string, "internal_hash:", strlen("internal_hash:"))) {
r = get_alg_and_key(opt_string, &ic->internal_hash_alg, &ti->error,
"Invalid internal_hash argument");
if (r)
goto bad;
- } else if (!memcmp(opt_string, "journal_crypt:", strlen("journal_crypt:"))) {
+ } else if (!strncmp(opt_string, "journal_crypt:", strlen("journal_crypt:"))) {
r = get_alg_and_key(opt_string, &ic->journal_crypt_alg, &ti->error,
"Invalid journal_crypt argument");
if (r)
goto bad;
- } else if (!memcmp(opt_string, "journal_mac:", strlen("journal_mac:"))) {
+ } else if (!strncmp(opt_string, "journal_mac:", strlen("journal_mac:"))) {
r = get_alg_and_key(opt_string, &ic->journal_mac_alg, &ti->error,
"Invalid journal_mac argument");
if (r)
.io_hints = dm_integrity_io_hints,
};
-int __init dm_integrity_init(void)
+static int __init dm_integrity_init(void)
{
int r;
return r;
}
-void dm_integrity_exit(void)
+static void __exit dm_integrity_exit(void)
{
dm_unregister_target(&integrity_target);
kmem_cache_destroy(journal_io_cache);
}
if (unlikely(error == BLK_STS_TARGET)) {
- if (req_op(clone) == REQ_OP_WRITE_SAME &&
- !clone->q->limits.max_write_same_sectors)
+ if (req_op(clone) == REQ_OP_DISCARD &&
+ !clone->q->limits.max_discard_sectors)
+ disable_discard(tio->md);
+ else if (req_op(clone) == REQ_OP_WRITE_SAME &&
+ !clone->q->limits.max_write_same_sectors)
disable_write_same(tio->md);
- if (req_op(clone) == REQ_OP_WRITE_ZEROES &&
- !clone->q->limits.max_write_zeroes_sectors)
+ else if (req_op(clone) == REQ_OP_WRITE_ZEROES &&
+ !clone->q->limits.max_write_zeroes_sectors)
disable_write_zeroes(tio->md);
}
return true;
}
+static int device_requires_stable_pages(struct dm_target *ti,
+ struct dm_dev *dev, sector_t start,
+ sector_t len, void *data)
+{
+ struct request_queue *q = bdev_get_queue(dev->bdev);
+
+ return q && bdi_cap_stable_pages_required(q->backing_dev_info);
+}
+
+/*
+ * If any underlying device requires stable pages, a table must require
+ * them as well. Only targets that support iterate_devices are considered:
+ * don't want error, zero, etc to require stable pages.
+ */
+static bool dm_table_requires_stable_pages(struct dm_table *t)
+{
+ struct dm_target *ti;
+ unsigned i;
+
+ for (i = 0; i < dm_table_get_num_targets(t); i++) {
+ ti = dm_table_get_target(t, i);
+
+ if (ti->type->iterate_devices &&
+ ti->type->iterate_devices(ti, device_requires_stable_pages, NULL))
+ return true;
+ }
+
+ return false;
+}
+
void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
struct queue_limits *limits)
{
dm_table_verify_integrity(t);
+ /*
+ * Some devices don't use blk_integrity but still want stable pages
+ * because they do their own checksumming.
+ */
+ if (dm_table_requires_stable_pages(t))
+ q->backing_dev_info->capabilities |= BDI_CAP_STABLE_WRITES;
+ else
+ q->backing_dev_info->capabilities &= ~BDI_CAP_STABLE_WRITES;
+
/*
* Determine whether or not this queue's I/O timings contribute
* to the entropy pool, Only request-based targets use this.
}
}
+void disable_discard(struct mapped_device *md)
+{
+ struct queue_limits *limits = dm_get_queue_limits(md);
+
+ /* device doesn't really support DISCARD, disable it */
+ limits->max_discard_sectors = 0;
+ blk_queue_flag_clear(QUEUE_FLAG_DISCARD, md->queue);
+}
+
void disable_write_same(struct mapped_device *md)
{
struct queue_limits *limits = dm_get_queue_limits(md);
dm_endio_fn endio = tio->ti->type->end_io;
if (unlikely(error == BLK_STS_TARGET) && md->type != DM_TYPE_NVME_BIO_BASED) {
- if (bio_op(bio) == REQ_OP_WRITE_SAME &&
- !bio->bi_disk->queue->limits.max_write_same_sectors)
+ if (bio_op(bio) == REQ_OP_DISCARD &&
+ !bio->bi_disk->queue->limits.max_discard_sectors)
+ disable_discard(md);
+ else if (bio_op(bio) == REQ_OP_WRITE_SAME &&
+ !bio->bi_disk->queue->limits.max_write_same_sectors)
disable_write_same(md);
- if (bio_op(bio) == REQ_OP_WRITE_ZEROES &&
- !bio->bi_disk->queue->limits.max_write_zeroes_sectors)
+ else if (bio_op(bio) == REQ_OP_WRITE_ZEROES &&
+ !bio->bi_disk->queue->limits.max_write_zeroes_sectors)
disable_write_zeroes(md);
}
return -EINVAL;
}
- /*
- * BIO based queue uses its own splitting. When multipage bvecs
- * is switched on, size of the incoming bio may be too big to
- * be handled in some targets, such as crypt.
- *
- * When these targets are ready for the big bio, we can remove
- * the limit.
- */
- ti->max_io_len = min_t(uint32_t, len, BIO_MAX_PAGES * PAGE_SIZE);
+ ti->max_io_len = (uint32_t) len;
return 0;
}
return &bpf_map_update_elem_proto;
case BPF_FUNC_map_delete_elem:
return &bpf_map_delete_elem_proto;
+ case BPF_FUNC_map_push_elem:
+ return &bpf_map_push_elem_proto;
+ case BPF_FUNC_map_pop_elem:
+ return &bpf_map_pop_elem_proto;
+ case BPF_FUNC_map_peek_elem:
+ return &bpf_map_peek_elem_proto;
case BPF_FUNC_ktime_get_ns:
return &bpf_ktime_get_ns_proto;
case BPF_FUNC_tail_call:
struct fastrpc_session_ctx *sess;
struct device *dev = &pdev->dev;
int i, sessions = 0;
+ int rc;
cctx = dev_get_drvdata(dev->parent);
if (!cctx)
}
cctx->sesscount++;
spin_unlock(&cctx->lock);
- dma_set_mask(dev, DMA_BIT_MASK(32));
+ rc = dma_set_mask(dev, DMA_BIT_MASK(32));
+ if (rc) {
+ dev_err(dev, "32-bit DMA enable failed\n");
+ return rc;
+ }
return 0;
}
/*
* Workaround for H2 #HW-23 bug
- * Set DMA max outstanding read requests to 240 on DMA CH 1. Set it
- * to 16 on KMD DMA
- * We need to limit only these DMAs because the user can only read
+ * Set DMA max outstanding read requests to 240 on DMA CH 1.
+ * This limitation is still large enough to not affect Gen4 bandwidth.
+ * We need to only limit that DMA channel because the user can only read
* from Host using DMA CH 1
*/
- WREG32(mmDMA_CH_0_CFG0, 0x0fff0010);
WREG32(mmDMA_CH_1_CFG0, 0x0fff00F0);
goya->hw_cap_initialized |= HW_CAP_GOLDEN;
* WA for HW-23.
* We can't allow user to read from Host using QMANs other than 1.
*/
- if (parser->hw_queue_id > GOYA_QUEUE_ID_DMA_1 &&
+ if (parser->hw_queue_id != GOYA_QUEUE_ID_DMA_1 &&
hl_mem_area_inside_range(le64_to_cpu(user_dma_pkt->src_addr),
le32_to_cpu(user_dma_pkt->tsize),
hdev->asic_prop.va_space_host_start_address,
struct mmc_command *cmd;
struct mmc_data *data;
unsigned int dma_on:1;
- unsigned int early_data:1;
struct mutex cmd_mutex;
host->sg_count--;
}
-static void alcor_trigger_data_transfer(struct alcor_sdmmc_host *host,
- bool early)
+static void alcor_trigger_data_transfer(struct alcor_sdmmc_host *host)
{
struct alcor_pci_priv *priv = host->alcor_pci;
struct mmc_data *data = host->data;
ctrl |= AU6601_DATA_WRITE;
if (data->host_cookie == COOKIE_MAPPED) {
- if (host->early_data) {
- host->early_data = false;
- return;
- }
-
- host->early_data = early;
-
alcor_data_set_dma(host);
ctrl |= AU6601_DATA_DMA_MODE;
host->dma_on = 1;
static void alcor_prepare_data(struct alcor_sdmmc_host *host,
struct mmc_command *cmd)
{
+ struct alcor_pci_priv *priv = host->alcor_pci;
struct mmc_data *data = cmd->data;
if (!data)
if (data->host_cookie != COOKIE_MAPPED)
alcor_prepare_sg_miter(host);
- alcor_trigger_data_transfer(host, true);
+ alcor_write8(priv, 0, AU6601_DATA_XFER_CTRL);
}
static void alcor_send_cmd(struct alcor_sdmmc_host *host,
if (!host->data)
return false;
- alcor_trigger_data_transfer(host, false);
+ alcor_trigger_data_transfer(host);
host->cmd = NULL;
return true;
}
if (!host->data)
alcor_request_complete(host, 1);
else
- alcor_trigger_data_transfer(host, false);
+ alcor_trigger_data_transfer(host);
host->cmd = NULL;
}
break;
case AU6601_INT_READ_BUF_RDY:
alcor_trf_block_pio(host, true);
- if (!host->blocks)
- break;
- alcor_trigger_data_transfer(host, false);
return 1;
case AU6601_INT_WRITE_BUF_RDY:
alcor_trf_block_pio(host, false);
- if (!host->blocks)
- break;
- alcor_trigger_data_transfer(host, false);
return 1;
case AU6601_INT_DMA_END:
if (!host->sg_count)
break;
}
- if (intmask & AU6601_INT_DATA_END)
- return 0;
+ if (intmask & AU6601_INT_DATA_END) {
+ if (!host->dma_on && host->blocks) {
+ alcor_trigger_data_transfer(host);
+ return 1;
+ } else {
+ return 0;
+ }
+ }
return 1;
}
sdhci_reset(host, mask);
}
+#define CMD_ERR_MASK (SDHCI_INT_CRC | SDHCI_INT_END_BIT | SDHCI_INT_INDEX |\
+ SDHCI_INT_TIMEOUT)
+#define CMD_MASK (CMD_ERR_MASK | SDHCI_INT_RESPONSE)
+
+static u32 sdhci_omap_irq(struct sdhci_host *host, u32 intmask)
+{
+ struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+ struct sdhci_omap_host *omap_host = sdhci_pltfm_priv(pltfm_host);
+
+ if (omap_host->is_tuning && host->cmd && !host->data_early &&
+ (intmask & CMD_ERR_MASK)) {
+
+ /*
+ * Since we are not resetting data lines during tuning
+ * operation, data error or data complete interrupts
+ * might still arrive. Mark this request as a failure
+ * but still wait for the data interrupt
+ */
+ if (intmask & SDHCI_INT_TIMEOUT)
+ host->cmd->error = -ETIMEDOUT;
+ else
+ host->cmd->error = -EILSEQ;
+
+ host->cmd = NULL;
+
+ /*
+ * Sometimes command error interrupts and command complete
+ * interrupt will arrive together. Clear all command related
+ * interrupts here.
+ */
+ sdhci_writel(host, intmask & CMD_MASK, SDHCI_INT_STATUS);
+ intmask &= ~CMD_MASK;
+ }
+
+ return intmask;
+}
+
static struct sdhci_ops sdhci_omap_ops = {
.set_clock = sdhci_omap_set_clock,
.set_power = sdhci_omap_set_power,
.platform_send_init_74_clocks = sdhci_omap_init_74_clocks,
.reset = sdhci_omap_reset,
.set_uhs_signaling = sdhci_omap_set_uhs_signaling,
+ .irq = sdhci_omap_irq,
};
static int sdhci_omap_set_capabilities(struct sdhci_omap_host *omap_host)
continue;
}
- if (time_after(jiffies, timeo) && !chip_ready(map, adr))
+ /*
+ * We check "time_after" and "!chip_good" before checking "chip_good" to avoid
+ * the failure due to scheduling.
+ */
+ if (time_after(jiffies, timeo) && !chip_good(map, adr, datum))
break;
if (chip_good(map, adr, datum)) {
*/
static netdev_tx_t ipddp_xmit(struct sk_buff *skb, struct net_device *dev)
{
- __be32 paddr = skb_rtable(skb)->rt_gateway;
+ struct rtable *rtable = skb_rtable(skb);
+ __be32 paddr = 0;
struct ddpehdr *ddp;
struct ipddp_route *rt;
struct atalk_addr *our_addr;
+ if (rtable->rt_gw_family == AF_INET)
+ paddr = rtable->rt_gw4;
+
spin_lock(&ipddp_route_lock);
/*
return NOTIFY_DONE;
if (event_dev->flags & IFF_MASTER) {
+ int ret;
+
netdev_dbg(event_dev, "IFF_MASTER\n");
- return bond_master_netdev_event(event, event_dev);
+ ret = bond_master_netdev_event(event, event_dev);
+ if (ret != NOTIFY_DONE)
+ return ret;
}
if (event_dev->flags & IFF_SLAVE) {
int speed, int duplex, int pause,
phy_interface_t mode)
{
+ struct phylink_link_state state;
int err;
if (!chip->info->ops->port_set_link)
return 0;
+ if (!chip->info->ops->port_link_state)
+ return 0;
+
+ err = chip->info->ops->port_link_state(chip, port, &state);
+ if (err)
+ return err;
+
+ /* Has anything actually changed? We don't expect the
+ * interface mode to change without one of the other
+ * parameters also changing
+ */
+ if (state.link == link &&
+ state.speed == speed &&
+ state.duplex == duplex)
+ return 0;
+
/* Port's MAC control must not be changed unless the link is down */
err = chip->info->ops->port_set_link(chip, port, 0);
if (err)
#include <linux/kernel.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
+#include <linux/ethtool.h>
#include <linux/init.h>
#include <linux/moduleparam.h>
#include <linux/rtnetlink.h>
strlcpy(info->version, DRV_VERSION, sizeof(info->version));
}
-static int dummy_get_ts_info(struct net_device *dev,
- struct ethtool_ts_info *ts_info)
-{
- ts_info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE |
- SOF_TIMESTAMPING_RX_SOFTWARE |
- SOF_TIMESTAMPING_SOFTWARE;
-
- ts_info->phc_index = -1;
-
- return 0;
-};
-
static const struct ethtool_ops dummy_ethtool_ops = {
.get_drvinfo = dummy_get_drvinfo,
- .get_ts_info = dummy_get_ts_info,
+ .get_ts_info = ethtool_op_get_ts_info,
};
static void dummy_setup(struct net_device *dev)
/* PTP v2, UDP, any kind of event packet */
case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
XGMAC_SET_BITS(mac_tscr, MAC_TSCR, TSVER2ENA, 1);
- /* PTP v1, UDP, any kind of event packet */
+ /* Fall through - to PTP v1, UDP, any kind of event packet */
case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
XGMAC_SET_BITS(mac_tscr, MAC_TSCR, TSIPV4ENA, 1);
XGMAC_SET_BITS(mac_tscr, MAC_TSCR, TSIPV6ENA, 1);
/* PTP v2, UDP, Sync packet */
case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
XGMAC_SET_BITS(mac_tscr, MAC_TSCR, TSVER2ENA, 1);
- /* PTP v1, UDP, Sync packet */
+ /* Fall through - to PTP v1, UDP, Sync packet */
case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
XGMAC_SET_BITS(mac_tscr, MAC_TSCR, TSIPV4ENA, 1);
XGMAC_SET_BITS(mac_tscr, MAC_TSCR, TSIPV6ENA, 1);
/* PTP v2, UDP, Delay_req packet */
case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
XGMAC_SET_BITS(mac_tscr, MAC_TSCR, TSVER2ENA, 1);
- /* PTP v1, UDP, Delay_req packet */
+ /* Fall through - to PTP v1, UDP, Delay_req packet */
case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
XGMAC_SET_BITS(mac_tscr, MAC_TSCR, TSIPV4ENA, 1);
XGMAC_SET_BITS(mac_tscr, MAC_TSCR, TSIPV6ENA, 1);
adapter->soft_stats.scc += smb->tx_1_col;
adapter->soft_stats.mcc += smb->tx_2_col;
adapter->soft_stats.latecol += smb->tx_late_col;
- adapter->soft_stats.tx_underun += smb->tx_underrun;
+ adapter->soft_stats.tx_underrun += smb->tx_underrun;
adapter->soft_stats.tx_trunc += smb->tx_trunc;
adapter->soft_stats.tx_pause += smb->tx_pause;
{"tx_deferred_ok", ATL1_STAT(soft_stats.deffer)},
{"tx_single_coll_ok", ATL1_STAT(soft_stats.scc)},
{"tx_multi_coll_ok", ATL1_STAT(soft_stats.mcc)},
- {"tx_underun", ATL1_STAT(soft_stats.tx_underun)},
+ {"tx_underrun", ATL1_STAT(soft_stats.tx_underrun)},
{"tx_trunc", ATL1_STAT(soft_stats.tx_trunc)},
{"tx_pause", ATL1_STAT(soft_stats.tx_pause)},
{"rx_pause", ATL1_STAT(soft_stats.rx_pause)},
u64 scc; /* packets TX after a single collision */
u64 mcc; /* packets TX after multiple collisions */
u64 latecol; /* TX packets w/ late collisions */
- u64 tx_underun; /* TX packets aborted due to TX FIFO underrun
+ u64 tx_underrun; /* TX packets aborted due to TX FIFO underrun
* or TRD FIFO underrun */
u64 tx_trunc; /* TX packets truncated due to size > MTU */
u64 rx_pause; /* num Pause packets received. */
netdev->stats.tx_aborted_errors++;
if (txs->late_col)
netdev->stats.tx_window_errors++;
- if (txs->underun)
+ if (txs->underrun)
netdev->stats.tx_fifo_errors++;
} while (1);
unsigned multi_col:1;
unsigned late_col:1;
unsigned abort_col:1;
- unsigned underun:1; /* current packet is aborted
+ unsigned underrun:1; /* current packet is aborted
* due to txram underrun */
unsigned:3; /* reserved */
unsigned update:1; /* always 1'b1 in tx_status_buf */
writel_relaxed(lower_32_bits(addr), d + DESC_ADDR_LO);
}
-static inline void tdma_port_write_desc_addr(struct bcm_sysport_priv *priv,
- struct dma_desc *desc,
- unsigned int port)
-{
- /* Ports are latched, so write upper address first */
- tdma_writel(priv, desc->addr_status_len, TDMA_WRITE_PORT_HI(port));
- tdma_writel(priv, desc->addr_lo, TDMA_WRITE_PORT_LO(port));
-}
-
/* Ethtool operations */
static void bcm_sysport_set_rx_csum(struct net_device *dev,
netdev_features_t wanted)
struct bcm_sysport_tx_ring *ring;
struct bcm_sysport_cb *cb;
struct netdev_queue *txq;
- struct dma_desc *desc;
+ u32 len_status, addr_lo;
unsigned int skb_len;
unsigned long flags;
dma_addr_t mapping;
- u32 len_status;
u16 queue;
int ret;
dma_unmap_addr_set(cb, dma_addr, mapping);
dma_unmap_len_set(cb, dma_len, skb_len);
- /* Fetch a descriptor entry from our pool */
- desc = ring->desc_cpu;
-
- desc->addr_lo = lower_32_bits(mapping);
+ addr_lo = lower_32_bits(mapping);
len_status = upper_32_bits(mapping) & DESC_ADDR_HI_MASK;
len_status |= (skb_len << DESC_LEN_SHIFT);
len_status |= (DESC_SOP | DESC_EOP | TX_STATUS_APP_CRC) <<
ring->curr_desc = 0;
ring->desc_count--;
- /* Ensure write completion of the descriptor status/length
- * in DRAM before the System Port WRITE_PORT register latches
- * the value
- */
- wmb();
- desc->addr_status_len = len_status;
- wmb();
-
- /* Write this descriptor address to the RING write port */
- tdma_port_write_desc_addr(priv, desc, ring->index);
+ /* Ports are latched, so write upper address first */
+ tdma_writel(priv, len_status, TDMA_WRITE_PORT_HI(ring->index));
+ tdma_writel(priv, addr_lo, TDMA_WRITE_PORT_LO(ring->index));
/* Check ring space and update SW control flow */
if (ring->desc_count == 0)
unsigned int index)
{
struct bcm_sysport_tx_ring *ring = &priv->tx_rings[index];
- struct device *kdev = &priv->pdev->dev;
size_t size;
- void *p;
u32 reg;
/* Simple descriptors partitioning for now */
size = 256;
- /* We just need one DMA descriptor which is DMA-able, since writing to
- * the port will allocate a new descriptor in its internal linked-list
- */
- p = dma_alloc_coherent(kdev, sizeof(struct dma_desc), &ring->desc_dma,
- GFP_KERNEL);
- if (!p) {
- netif_err(priv, hw, priv->netdev, "DMA alloc failed\n");
- return -ENOMEM;
- }
-
ring->cbs = kcalloc(size, sizeof(struct bcm_sysport_cb), GFP_KERNEL);
if (!ring->cbs) {
- dma_free_coherent(kdev, sizeof(struct dma_desc),
- ring->desc_cpu, ring->desc_dma);
netif_err(priv, hw, priv->netdev, "CB allocation failed\n");
return -ENOMEM;
}
ring->size = size;
ring->clean_index = 0;
ring->alloc_size = ring->size;
- ring->desc_cpu = p;
ring->desc_count = ring->size;
ring->curr_desc = 0;
napi_enable(&ring->napi);
netif_dbg(priv, hw, priv->netdev,
- "TDMA cfg, size=%d, desc_cpu=%p switch q=%d,port=%d\n",
- ring->size, ring->desc_cpu, ring->switch_queue,
+ "TDMA cfg, size=%d, switch q=%d,port=%d\n",
+ ring->size, ring->switch_queue,
ring->switch_port);
return 0;
unsigned int index)
{
struct bcm_sysport_tx_ring *ring = &priv->tx_rings[index];
- struct device *kdev = &priv->pdev->dev;
u32 reg;
/* Caller should stop the TDMA engine */
kfree(ring->cbs);
ring->cbs = NULL;
-
- if (ring->desc_dma) {
- dma_free_coherent(kdev, sizeof(struct dma_desc),
- ring->desc_cpu, ring->desc_dma);
- ring->desc_dma = 0;
- }
ring->size = 0;
ring->alloc_size = 0;
#define TDMA_DEBUG 0x64c
-/* Transmit/Receive descriptor */
-struct dma_desc {
- u32 addr_status_len;
- u32 addr_lo;
-};
-
/* Number of Receive hardware descriptor words */
#define SP_NUM_HW_RX_DESC_WORDS 1024
#define SP_LT_NUM_HW_RX_DESC_WORDS 256
#define SP_NUM_TX_DESC 1536
#define SP_LT_NUM_TX_DESC 256
-#define WORDS_PER_DESC (sizeof(struct dma_desc) / sizeof(u32))
+#define WORDS_PER_DESC 2
/* Rx/Tx common counter group.*/
struct bcm_sysport_pkt_counters {
struct bcm_sysport_tx_ring {
spinlock_t lock; /* Ring lock for tx reclaim/xmit */
struct napi_struct napi; /* NAPI per tx queue */
- dma_addr_t desc_dma; /* DMA cookie */
unsigned int index; /* Ring index */
unsigned int size; /* Ring current size */
unsigned int alloc_size; /* Ring one-time allocated size */
unsigned int c_index; /* Last consumer index */
unsigned int clean_index; /* Current clean index */
struct bcm_sysport_cb *cbs; /* Transmit control blocks */
- struct dma_desc *desc_cpu; /* CPU view of the descriptor */
struct bcm_sysport_priv *priv; /* private context backpointer */
unsigned long packets; /* packets statistics */
unsigned long bytes; /* bytes statistics */
return 0;
}
+#define BNX2X_P2P_DETECT_PARAM_MASK 0x5F5
+#define BNX2X_P2P_DETECT_RULE_MASK 0x3DBB
+#define BNX2X_PTP_TX_ON_PARAM_MASK (BNX2X_P2P_DETECT_PARAM_MASK & 0x6AA)
+#define BNX2X_PTP_TX_ON_RULE_MASK (BNX2X_P2P_DETECT_RULE_MASK & 0x3EEE)
+#define BNX2X_PTP_V1_L4_PARAM_MASK (BNX2X_P2P_DETECT_PARAM_MASK & 0x7EE)
+#define BNX2X_PTP_V1_L4_RULE_MASK (BNX2X_P2P_DETECT_RULE_MASK & 0x3FFE)
+#define BNX2X_PTP_V2_L4_PARAM_MASK (BNX2X_P2P_DETECT_PARAM_MASK & 0x7EA)
+#define BNX2X_PTP_V2_L4_RULE_MASK (BNX2X_P2P_DETECT_RULE_MASK & 0x3FEE)
+#define BNX2X_PTP_V2_L2_PARAM_MASK (BNX2X_P2P_DETECT_PARAM_MASK & 0x6BF)
+#define BNX2X_PTP_V2_L2_RULE_MASK (BNX2X_P2P_DETECT_RULE_MASK & 0x3EFF)
+#define BNX2X_PTP_V2_PARAM_MASK (BNX2X_P2P_DETECT_PARAM_MASK & 0x6AA)
+#define BNX2X_PTP_V2_RULE_MASK (BNX2X_P2P_DETECT_RULE_MASK & 0x3EEE)
+
int bnx2x_configure_ptp_filters(struct bnx2x *bp)
{
int port = BP_PORT(bp);
+ u32 param, rule;
int rc;
if (!bp->hwtstamp_ioctl_called)
return 0;
+ param = port ? NIG_REG_P1_TLLH_PTP_PARAM_MASK :
+ NIG_REG_P0_TLLH_PTP_PARAM_MASK;
+ rule = port ? NIG_REG_P1_TLLH_PTP_RULE_MASK :
+ NIG_REG_P0_TLLH_PTP_RULE_MASK;
switch (bp->tx_type) {
case HWTSTAMP_TX_ON:
bp->flags |= TX_TIMESTAMPING_EN;
- REG_WR(bp, port ? NIG_REG_P1_TLLH_PTP_PARAM_MASK :
- NIG_REG_P0_TLLH_PTP_PARAM_MASK, 0x6AA);
- REG_WR(bp, port ? NIG_REG_P1_TLLH_PTP_RULE_MASK :
- NIG_REG_P0_TLLH_PTP_RULE_MASK, 0x3EEE);
+ REG_WR(bp, param, BNX2X_PTP_TX_ON_PARAM_MASK);
+ REG_WR(bp, rule, BNX2X_PTP_TX_ON_RULE_MASK);
break;
case HWTSTAMP_TX_ONESTEP_SYNC:
BNX2X_ERR("One-step timestamping is not supported\n");
return -ERANGE;
}
+ param = port ? NIG_REG_P1_LLH_PTP_PARAM_MASK :
+ NIG_REG_P0_LLH_PTP_PARAM_MASK;
+ rule = port ? NIG_REG_P1_LLH_PTP_RULE_MASK :
+ NIG_REG_P0_LLH_PTP_RULE_MASK;
switch (bp->rx_filter) {
case HWTSTAMP_FILTER_NONE:
break;
case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
bp->rx_filter = HWTSTAMP_FILTER_PTP_V1_L4_EVENT;
/* Initialize PTP detection for UDP/IPv4 events */
- REG_WR(bp, port ? NIG_REG_P1_LLH_PTP_PARAM_MASK :
- NIG_REG_P0_LLH_PTP_PARAM_MASK, 0x7EE);
- REG_WR(bp, port ? NIG_REG_P1_LLH_PTP_RULE_MASK :
- NIG_REG_P0_LLH_PTP_RULE_MASK, 0x3FFE);
+ REG_WR(bp, param, BNX2X_PTP_V1_L4_PARAM_MASK);
+ REG_WR(bp, rule, BNX2X_PTP_V1_L4_RULE_MASK);
break;
case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
bp->rx_filter = HWTSTAMP_FILTER_PTP_V2_L4_EVENT;
/* Initialize PTP detection for UDP/IPv4 or UDP/IPv6 events */
- REG_WR(bp, port ? NIG_REG_P1_LLH_PTP_PARAM_MASK :
- NIG_REG_P0_LLH_PTP_PARAM_MASK, 0x7EA);
- REG_WR(bp, port ? NIG_REG_P1_LLH_PTP_RULE_MASK :
- NIG_REG_P0_LLH_PTP_RULE_MASK, 0x3FEE);
+ REG_WR(bp, param, BNX2X_PTP_V2_L4_PARAM_MASK);
+ REG_WR(bp, rule, BNX2X_PTP_V2_L4_RULE_MASK);
break;
case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
bp->rx_filter = HWTSTAMP_FILTER_PTP_V2_L2_EVENT;
/* Initialize PTP detection L2 events */
- REG_WR(bp, port ? NIG_REG_P1_LLH_PTP_PARAM_MASK :
- NIG_REG_P0_LLH_PTP_PARAM_MASK, 0x6BF);
- REG_WR(bp, port ? NIG_REG_P1_LLH_PTP_RULE_MASK :
- NIG_REG_P0_LLH_PTP_RULE_MASK, 0x3EFF);
+ REG_WR(bp, param, BNX2X_PTP_V2_L2_PARAM_MASK);
+ REG_WR(bp, rule, BNX2X_PTP_V2_L2_RULE_MASK);
break;
case HWTSTAMP_FILTER_PTP_V2_EVENT:
case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
bp->rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
/* Initialize PTP detection L2, UDP/IPv4 or UDP/IPv6 events */
- REG_WR(bp, port ? NIG_REG_P1_LLH_PTP_PARAM_MASK :
- NIG_REG_P0_LLH_PTP_PARAM_MASK, 0x6AA);
- REG_WR(bp, port ? NIG_REG_P1_LLH_PTP_RULE_MASK :
- NIG_REG_P0_LLH_PTP_RULE_MASK, 0x3EEE);
+ REG_WR(bp, param, BNX2X_PTP_V2_PARAM_MASK);
+ REG_WR(bp, rule, BNX2X_PTP_V2_RULE_MASK);
break;
}
bnx2x_sample_bulletin(bp);
if (bp->shadow_bulletin.content.valid_bitmap & 1 << VLAN_VALID) {
- BNX2X_ERR("Hypervisor will dicline the request, avoiding\n");
+ BNX2X_ERR("Hypervisor will decline the request, avoiding\n");
rc = -EINVAL;
goto out;
}
tpa_info = &rxr->rx_tpa[agg_id];
if (unlikely(cons != rxr->rx_next_cons)) {
+ netdev_warn(bp->dev, "TPA cons %x != expected cons %x\n",
+ cons, rxr->rx_next_cons);
bnxt_sched_reset(bp, rxr);
return;
}
}
cons = rxcmp->rx_cmp_opaque;
- rx_buf = &rxr->rx_buf_ring[cons];
- data = rx_buf->data;
- data_ptr = rx_buf->data_ptr;
if (unlikely(cons != rxr->rx_next_cons)) {
int rc1 = bnxt_discard_rx(bp, cpr, raw_cons, rxcmp);
+ netdev_warn(bp->dev, "RX cons %x != expected cons %x\n",
+ cons, rxr->rx_next_cons);
bnxt_sched_reset(bp, rxr);
return rc1;
}
+ rx_buf = &rxr->rx_buf_ring[cons];
+ data = rx_buf->data;
+ data_ptr = rx_buf->data_ptr;
prefetch(data_ptr);
misc = le32_to_cpu(rxcmp->rx_cmp_misc_v1);
rx_buf->data = NULL;
if (rxcmp1->rx_cmp_cfa_code_errors_v2 & RX_CMP_L2_ERRORS) {
+ u32 rx_err = le32_to_cpu(rxcmp1->rx_cmp_cfa_code_errors_v2);
+
bnxt_reuse_rx_data(rxr, cons, data);
if (agg_bufs)
bnxt_reuse_rx_agg_bufs(cpr, cp_cons, agg_bufs);
rc = -EIO;
+ if (rx_err & RX_CMPL_ERRORS_BUFFER_ERROR_MASK) {
+ netdev_warn(bp->dev, "RX buffer error %x\n", rx_err);
+ bnxt_sched_reset(bp, rxr);
+ }
goto next_rx;
}
case L5CM_RAMROD_CMD_ID_CLOSE: {
struct iscsi_kcqe *l5kcqe = (struct iscsi_kcqe *) kcqe;
- if (l4kcqe->status != 0 || l5kcqe->completion_status != 0) {
- netdev_warn(dev->netdev, "RAMROD CLOSE compl with status 0x%x completion status 0x%x\n",
- l4kcqe->status, l5kcqe->completion_status);
- opcode = L4_KCQE_OPCODE_VALUE_CLOSE_COMP;
- /* Fall through */
- } else {
+ if (l4kcqe->status == 0 && l5kcqe->completion_status == 0)
break;
- }
+
+ netdev_warn(dev->netdev, "RAMROD CLOSE compl with status 0x%x completion status 0x%x\n",
+ l4kcqe->status, l5kcqe->completion_status);
+ opcode = L4_KCQE_OPCODE_VALUE_CLOSE_COMP;
}
+ /* Fall through */
case L4_KCQE_OPCODE_VALUE_RESET_RECEIVED:
case L4_KCQE_OPCODE_VALUE_CLOSE_COMP:
case L4_KCQE_OPCODE_VALUE_RESET_COMP:
pci_set_power_state(tp->pdev, PCI_D3hot);
}
-static void tg3_aux_stat_to_speed_duplex(struct tg3 *tp, u32 val, u16 *speed, u8 *duplex)
+static void tg3_aux_stat_to_speed_duplex(struct tg3 *tp, u32 val, u32 *speed, u8 *duplex)
{
switch (val & MII_TG3_AUX_STAT_SPDMASK) {
case MII_TG3_AUX_STAT_10HALF:
bool current_link_up;
u32 bmsr, val;
u32 lcl_adv, rmt_adv;
- u16 current_speed;
+ u32 current_speed;
u8 current_duplex;
int i, err;
static int tg3_setup_fiber_phy(struct tg3 *tp, bool force_reset)
{
u32 orig_pause_cfg;
- u16 orig_active_speed;
+ u32 orig_active_speed;
u8 orig_active_duplex;
u32 mac_status;
bool current_link_up;
{
int err = 0;
u32 bmsr, bmcr;
- u16 current_speed = SPEED_UNKNOWN;
+ u32 current_speed = SPEED_UNKNOWN;
u8 current_duplex = DUPLEX_UNKNOWN;
bool current_link_up = false;
u32 local_adv, remote_adv, sgsr;
struct tg3_link_config {
/* Describes what we're trying to get. */
u32 advertising;
- u16 speed;
+ u32 speed;
u8 duplex;
u8 autoneg;
u8 flowctrl;
u8 active_flowctrl;
u8 active_duplex;
- u16 active_speed;
+ u32 active_speed;
u32 rmt_adv;
};
/* First, update TX stats if needed */
if (skb) {
- if (gem_ptp_do_txstamp(queue, skb, desc) == 0) {
+ if (unlikely(skb_shinfo(skb)->tx_flags &
+ SKBTX_HW_TSTAMP) &&
+ gem_ptp_do_txstamp(queue, skb, desc) == 0) {
/* skb now belongs to timestamp buffer
* and will be removed later
*/
#define DRV_NAME "nicvf"
#define DRV_VERSION "1.0"
+/* NOTE: Packets bigger than 1530 are split across multiple pages and XDP needs
+ * the buffer to be contiguous. Allow XDP to be set up only if we don't exceed
+ * this value, keeping headroom for the 14 byte Ethernet header and two
+ * VLAN tags (for QinQ)
+ */
+#define MAX_XDP_MTU (1530 - ETH_HLEN - VLAN_HLEN * 2)
+
/* Supported devices */
static const struct pci_device_id nicvf_id_table[] = {
{ PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM,
struct nicvf *nic = netdev_priv(netdev);
int orig_mtu = netdev->mtu;
+ /* For now just support only the usual MTU sized frames,
+ * plus some headroom for VLAN, QinQ.
+ */
+ if (nic->xdp_prog && new_mtu > MAX_XDP_MTU) {
+ netdev_warn(netdev, "Jumbo frames not yet supported with XDP, current MTU %d.\n",
+ netdev->mtu);
+ return -EINVAL;
+ }
+
netdev->mtu = new_mtu;
if (!netif_running(netdev))
bool bpf_attached = false;
int ret = 0;
- /* For now just support only the usual MTU sized frames */
- if (prog && (dev->mtu > 1500)) {
+ /* For now just support only the usual MTU sized frames,
+ * plus some headroom for VLAN, QinQ.
+ */
+ if (prog && dev->mtu > MAX_XDP_MTU) {
netdev_warn(dev, "Jumbo frames not yet supported with XDP, current MTU %d.\n",
dev->mtu);
return -EOPNOTSUPP;
lmac->last_duplex = (an_result >> 1) & 0x1;
switch (speed) {
case 0:
- lmac->last_speed = 10;
+ lmac->last_speed = SPEED_10;
break;
case 1:
- lmac->last_speed = 100;
+ lmac->last_speed = SPEED_100;
break;
case 2:
- lmac->last_speed = 1000;
+ lmac->last_speed = SPEED_1000;
break;
default:
lmac->link_up = false;
!(smu_link & SMU_RX_CTL_STATUS)) {
lmac->link_up = 1;
if (lmac->lmac_type == BGX_MODE_XLAUI)
- lmac->last_speed = 40000;
+ lmac->last_speed = SPEED_40000;
else
- lmac->last_speed = 10000;
- lmac->last_duplex = 1;
+ lmac->last_speed = SPEED_10000;
+ lmac->last_duplex = DUPLEX_FULL;
} else {
lmac->link_up = 0;
lmac->last_speed = SPEED_UNKNOWN;
} else {
/* Default to below link speed and duplex */
lmac->link_up = true;
- lmac->last_speed = 1000;
- lmac->last_duplex = 1;
+ lmac->last_speed = SPEED_1000;
+ lmac->last_duplex = DUPLEX_FULL;
bgx_sgmii_change_link_state(lmac);
return 0;
}
break;
default:
- dev_err(adap->pdev_dev, "%s: filter creation PROBLEM; status = %u\n",
- __func__, status);
+ if (status != CPL_ERR_TCAM_FULL)
+ dev_err(adap->pdev_dev, "%s: filter creation PROBLEM; status = %u\n",
+ __func__, status);
if (ctx) {
if (status == CPL_ERR_TCAM_FULL)
- ctx->result = -EAGAIN;
+ ctx->result = -ENOSPC;
else
ctx->result = -EINVAL;
}
ret = ctx.result;
/* Check if hw returned error for filter creation */
- if (ret) {
- netdev_err(dev, "%s: filter creation err %d\n",
- __func__, ret);
+ if (ret)
goto free_entry;
- }
ch_flower->tc_flower_cookie = cls->cookie;
ch_flower->filter_id = ctx.tid;
break;
}
cpl = (void *)p;
- /*FALLTHROUGH*/
}
+ /* Fall through */
case CPL_SGE_EGR_UPDATE: {
/*
int txq_num, nfrags;
union dma_rwptr rw;
- SKB_FRAG_ASSERT(skb);
-
if (skb->len >= 0x10000)
goto out_drop_free;
.rxnfc_field = RXH_L2DA,
.cls_prot = NET_PROT_ETH,
.cls_field = NH_FLD_ETH_DA,
+ .id = DPAA2_ETH_DIST_ETHDST,
.size = 6,
}, {
.cls_prot = NET_PROT_ETH,
.cls_field = NH_FLD_ETH_SA,
+ .id = DPAA2_ETH_DIST_ETHSRC,
.size = 6,
}, {
/* This is the last ethertype field parsed:
*/
.cls_prot = NET_PROT_ETH,
.cls_field = NH_FLD_ETH_TYPE,
+ .id = DPAA2_ETH_DIST_ETHTYPE,
.size = 2,
}, {
/* VLAN header */
.rxnfc_field = RXH_VLAN,
.cls_prot = NET_PROT_VLAN,
.cls_field = NH_FLD_VLAN_TCI,
+ .id = DPAA2_ETH_DIST_VLAN,
.size = 2,
}, {
/* IP header */
.rxnfc_field = RXH_IP_SRC,
.cls_prot = NET_PROT_IP,
.cls_field = NH_FLD_IP_SRC,
+ .id = DPAA2_ETH_DIST_IPSRC,
.size = 4,
}, {
.rxnfc_field = RXH_IP_DST,
.cls_prot = NET_PROT_IP,
.cls_field = NH_FLD_IP_DST,
+ .id = DPAA2_ETH_DIST_IPDST,
.size = 4,
}, {
.rxnfc_field = RXH_L3_PROTO,
.cls_prot = NET_PROT_IP,
.cls_field = NH_FLD_IP_PROTO,
+ .id = DPAA2_ETH_DIST_IPPROTO,
.size = 1,
}, {
/* Using UDP ports, this is functionally equivalent to raw
.rxnfc_field = RXH_L4_B_0_1,
.cls_prot = NET_PROT_UDP,
.cls_field = NH_FLD_UDP_PORT_SRC,
+ .id = DPAA2_ETH_DIST_L4SRC,
.size = 2,
}, {
.rxnfc_field = RXH_L4_B_2_3,
.cls_prot = NET_PROT_UDP,
.cls_field = NH_FLD_UDP_PORT_DST,
+ .id = DPAA2_ETH_DIST_L4DST,
.size = 2,
},
};
}
/* Size of the Rx flow classification key */
-int dpaa2_eth_cls_key_size(void)
+int dpaa2_eth_cls_key_size(u64 fields)
{
int i, size = 0;
- for (i = 0; i < ARRAY_SIZE(dist_fields); i++)
+ for (i = 0; i < ARRAY_SIZE(dist_fields); i++) {
+ if (!(fields & dist_fields[i].id))
+ continue;
size += dist_fields[i].size;
+ }
return size;
}
return 0;
}
+/* Prune unused fields from the classification rule.
+ * Used when masking is not supported
+ */
+void dpaa2_eth_cls_trim_rule(void *key_mem, u64 fields)
+{
+ int off = 0, new_off = 0;
+ int i, size;
+
+ for (i = 0; i < ARRAY_SIZE(dist_fields); i++) {
+ size = dist_fields[i].size;
+ if (dist_fields[i].id & fields) {
+ memcpy(key_mem + new_off, key_mem + off, size);
+ new_off += size;
+ }
+ off += size;
+ }
+}
+
/* Set Rx distribution (hash or flow classification) key
* flags is a combination of RXH_ bits
*/
struct dpkg_extract *key =
&cls_cfg.extracts[cls_cfg.num_extracts];
- /* For Rx hashing key we set only the selected fields.
- * For Rx flow classification key we set all supported fields
+ /* For both Rx hashing and classification keys
+ * we set only the selected fields.
*/
- if (type == DPAA2_ETH_RX_DIST_HASH) {
- if (!(flags & dist_fields[i].rxnfc_field))
- continue;
+ if (!(flags & dist_fields[i].id))
+ continue;
+ if (type == DPAA2_ETH_RX_DIST_HASH)
rx_hash_fields |= dist_fields[i].rxnfc_field;
- }
if (cls_cfg.num_extracts >= DPKG_MAX_NUM_OF_EXTRACTS) {
dev_err(dev, "error adding key extraction rule, too many rules?\n");
int dpaa2_eth_set_hash(struct net_device *net_dev, u64 flags)
{
struct dpaa2_eth_priv *priv = netdev_priv(net_dev);
+ u64 key = 0;
+ int i;
if (!dpaa2_eth_hash_enabled(priv))
return -EOPNOTSUPP;
- return dpaa2_eth_set_dist_key(net_dev, DPAA2_ETH_RX_DIST_HASH, flags);
+ for (i = 0; i < ARRAY_SIZE(dist_fields); i++)
+ if (dist_fields[i].rxnfc_field & flags)
+ key |= dist_fields[i].id;
+
+ return dpaa2_eth_set_dist_key(net_dev, DPAA2_ETH_RX_DIST_HASH, key);
}
-static int dpaa2_eth_set_cls(struct dpaa2_eth_priv *priv)
+int dpaa2_eth_set_cls(struct net_device *net_dev, u64 flags)
+{
+ return dpaa2_eth_set_dist_key(net_dev, DPAA2_ETH_RX_DIST_CLS, flags);
+}
+
+static int dpaa2_eth_set_default_cls(struct dpaa2_eth_priv *priv)
{
struct device *dev = priv->net_dev->dev.parent;
+ int err;
/* Check if we actually support Rx flow classification */
if (dpaa2_eth_has_legacy_dist(priv)) {
return -EOPNOTSUPP;
}
- if (priv->dpni_attrs.options & DPNI_OPT_NO_FS ||
- !(priv->dpni_attrs.options & DPNI_OPT_HAS_KEY_MASKING)) {
+ if (!dpaa2_eth_fs_enabled(priv)) {
dev_dbg(dev, "Rx cls disabled in DPNI options\n");
return -EOPNOTSUPP;
}
return -EOPNOTSUPP;
}
+ /* If there is no support for masking in the classification table,
+ * we don't set a default key, as it will depend on the rules
+ * added by the user at runtime.
+ */
+ if (!dpaa2_eth_fs_mask_enabled(priv))
+ goto out;
+
+ err = dpaa2_eth_set_cls(priv->net_dev, DPAA2_ETH_DIST_ALL);
+ if (err)
+ return err;
+
+out:
priv->rx_cls_enabled = 1;
- return dpaa2_eth_set_dist_key(priv->net_dev, DPAA2_ETH_RX_DIST_CLS, 0);
+ return 0;
}
/* Bind the DPNI to its needed objects and resources: buffer pool, DPIOs,
/* Configure the flow classification key; it includes all
* supported header fields and cannot be modified at runtime
*/
- err = dpaa2_eth_set_cls(priv);
+ err = dpaa2_eth_set_default_cls(priv);
if (err && err != -EOPNOTSUPP)
dev_err(dev, "Failed to configure Rx classification key\n");
enum net_prot cls_prot;
int cls_field;
int size;
+ u64 id;
};
struct dpaa2_eth_cls_rule {
/* enabled ethtool hashing bits */
u64 rx_hash_fields;
+ u64 rx_cls_fields;
struct dpaa2_eth_cls_rule *cls_rules;
u8 rx_cls_enabled;
struct bpf_prog *xdp_prog;
(dpaa2_eth_cmp_dpni_ver((priv), DPNI_RX_DIST_KEY_VER_MAJOR, \
DPNI_RX_DIST_KEY_VER_MINOR) < 0)
+#define dpaa2_eth_fs_enabled(priv) \
+ (!((priv)->dpni_attrs.options & DPNI_OPT_NO_FS))
+
+#define dpaa2_eth_fs_mask_enabled(priv) \
+ ((priv)->dpni_attrs.options & DPNI_OPT_HAS_KEY_MASKING)
+
#define dpaa2_eth_fs_count(priv) \
((priv)->dpni_attrs.fs_entries)
DPAA2_ETH_RX_DIST_CLS
};
+/* Unique IDs for the supported Rx classification header fields */
+#define DPAA2_ETH_DIST_ETHDST BIT(0)
+#define DPAA2_ETH_DIST_ETHSRC BIT(1)
+#define DPAA2_ETH_DIST_ETHTYPE BIT(2)
+#define DPAA2_ETH_DIST_VLAN BIT(3)
+#define DPAA2_ETH_DIST_IPSRC BIT(4)
+#define DPAA2_ETH_DIST_IPDST BIT(5)
+#define DPAA2_ETH_DIST_IPPROTO BIT(6)
+#define DPAA2_ETH_DIST_L4SRC BIT(7)
+#define DPAA2_ETH_DIST_L4DST BIT(8)
+#define DPAA2_ETH_DIST_ALL (~0U)
+
static inline
unsigned int dpaa2_eth_needed_headroom(struct dpaa2_eth_priv *priv,
struct sk_buff *skb)
}
int dpaa2_eth_set_hash(struct net_device *net_dev, u64 flags);
-int dpaa2_eth_cls_key_size(void);
+int dpaa2_eth_set_cls(struct net_device *net_dev, u64 key);
+int dpaa2_eth_cls_key_size(u64 key);
int dpaa2_eth_cls_fld_off(int prot, int field);
+void dpaa2_eth_cls_trim_rule(void *key_mem, u64 fields);
#endif /* __DPAA2_H */
}
static int prep_eth_rule(struct ethhdr *eth_value, struct ethhdr *eth_mask,
- void *key, void *mask)
+ void *key, void *mask, u64 *fields)
{
int off;
off = dpaa2_eth_cls_fld_off(NET_PROT_ETH, NH_FLD_ETH_TYPE);
*(__be16 *)(key + off) = eth_value->h_proto;
*(__be16 *)(mask + off) = eth_mask->h_proto;
+ *fields |= DPAA2_ETH_DIST_ETHTYPE;
}
if (!is_zero_ether_addr(eth_mask->h_source)) {
off = dpaa2_eth_cls_fld_off(NET_PROT_ETH, NH_FLD_ETH_SA);
ether_addr_copy(key + off, eth_value->h_source);
ether_addr_copy(mask + off, eth_mask->h_source);
+ *fields |= DPAA2_ETH_DIST_ETHSRC;
}
if (!is_zero_ether_addr(eth_mask->h_dest)) {
off = dpaa2_eth_cls_fld_off(NET_PROT_ETH, NH_FLD_ETH_DA);
ether_addr_copy(key + off, eth_value->h_dest);
ether_addr_copy(mask + off, eth_mask->h_dest);
+ *fields |= DPAA2_ETH_DIST_ETHDST;
}
return 0;
static int prep_uip_rule(struct ethtool_usrip4_spec *uip_value,
struct ethtool_usrip4_spec *uip_mask,
- void *key, void *mask)
+ void *key, void *mask, u64 *fields)
{
int off;
u32 tmp_value, tmp_mask;
off = dpaa2_eth_cls_fld_off(NET_PROT_IP, NH_FLD_IP_SRC);
*(__be32 *)(key + off) = uip_value->ip4src;
*(__be32 *)(mask + off) = uip_mask->ip4src;
+ *fields |= DPAA2_ETH_DIST_IPSRC;
}
if (uip_mask->ip4dst) {
off = dpaa2_eth_cls_fld_off(NET_PROT_IP, NH_FLD_IP_DST);
*(__be32 *)(key + off) = uip_value->ip4dst;
*(__be32 *)(mask + off) = uip_mask->ip4dst;
+ *fields |= DPAA2_ETH_DIST_IPDST;
}
if (uip_mask->proto) {
off = dpaa2_eth_cls_fld_off(NET_PROT_IP, NH_FLD_IP_PROTO);
*(u8 *)(key + off) = uip_value->proto;
*(u8 *)(mask + off) = uip_mask->proto;
+ *fields |= DPAA2_ETH_DIST_IPPROTO;
}
if (uip_mask->l4_4_bytes) {
off = dpaa2_eth_cls_fld_off(NET_PROT_UDP, NH_FLD_UDP_PORT_SRC);
*(__be16 *)(key + off) = htons(tmp_value >> 16);
*(__be16 *)(mask + off) = htons(tmp_mask >> 16);
+ *fields |= DPAA2_ETH_DIST_L4SRC;
off = dpaa2_eth_cls_fld_off(NET_PROT_UDP, NH_FLD_UDP_PORT_DST);
*(__be16 *)(key + off) = htons(tmp_value & 0xFFFF);
*(__be16 *)(mask + off) = htons(tmp_mask & 0xFFFF);
+ *fields |= DPAA2_ETH_DIST_L4DST;
}
/* Only apply the rule for IPv4 frames */
off = dpaa2_eth_cls_fld_off(NET_PROT_ETH, NH_FLD_ETH_TYPE);
*(__be16 *)(key + off) = htons(ETH_P_IP);
*(__be16 *)(mask + off) = htons(0xFFFF);
+ *fields |= DPAA2_ETH_DIST_ETHTYPE;
return 0;
}
static int prep_l4_rule(struct ethtool_tcpip4_spec *l4_value,
struct ethtool_tcpip4_spec *l4_mask,
- void *key, void *mask, u8 l4_proto)
+ void *key, void *mask, u8 l4_proto, u64 *fields)
{
int off;
off = dpaa2_eth_cls_fld_off(NET_PROT_IP, NH_FLD_IP_SRC);
*(__be32 *)(key + off) = l4_value->ip4src;
*(__be32 *)(mask + off) = l4_mask->ip4src;
+ *fields |= DPAA2_ETH_DIST_IPSRC;
}
if (l4_mask->ip4dst) {
off = dpaa2_eth_cls_fld_off(NET_PROT_IP, NH_FLD_IP_DST);
*(__be32 *)(key + off) = l4_value->ip4dst;
*(__be32 *)(mask + off) = l4_mask->ip4dst;
+ *fields |= DPAA2_ETH_DIST_IPDST;
}
if (l4_mask->psrc) {
off = dpaa2_eth_cls_fld_off(NET_PROT_UDP, NH_FLD_UDP_PORT_SRC);
*(__be16 *)(key + off) = l4_value->psrc;
*(__be16 *)(mask + off) = l4_mask->psrc;
+ *fields |= DPAA2_ETH_DIST_L4SRC;
}
if (l4_mask->pdst) {
off = dpaa2_eth_cls_fld_off(NET_PROT_UDP, NH_FLD_UDP_PORT_DST);
*(__be16 *)(key + off) = l4_value->pdst;
*(__be16 *)(mask + off) = l4_mask->pdst;
+ *fields |= DPAA2_ETH_DIST_L4DST;
}
/* Only apply the rule for IPv4 frames with the specified L4 proto */
off = dpaa2_eth_cls_fld_off(NET_PROT_ETH, NH_FLD_ETH_TYPE);
*(__be16 *)(key + off) = htons(ETH_P_IP);
*(__be16 *)(mask + off) = htons(0xFFFF);
+ *fields |= DPAA2_ETH_DIST_ETHTYPE;
off = dpaa2_eth_cls_fld_off(NET_PROT_IP, NH_FLD_IP_PROTO);
*(u8 *)(key + off) = l4_proto;
*(u8 *)(mask + off) = 0xFF;
+ *fields |= DPAA2_ETH_DIST_IPPROTO;
return 0;
}
static int prep_ext_rule(struct ethtool_flow_ext *ext_value,
struct ethtool_flow_ext *ext_mask,
- void *key, void *mask)
+ void *key, void *mask, u64 *fields)
{
int off;
off = dpaa2_eth_cls_fld_off(NET_PROT_VLAN, NH_FLD_VLAN_TCI);
*(__be16 *)(key + off) = ext_value->vlan_tci;
*(__be16 *)(mask + off) = ext_mask->vlan_tci;
+ *fields |= DPAA2_ETH_DIST_VLAN;
}
return 0;
static int prep_mac_ext_rule(struct ethtool_flow_ext *ext_value,
struct ethtool_flow_ext *ext_mask,
- void *key, void *mask)
+ void *key, void *mask, u64 *fields)
{
int off;
off = dpaa2_eth_cls_fld_off(NET_PROT_ETH, NH_FLD_ETH_DA);
ether_addr_copy(key + off, ext_value->h_dest);
ether_addr_copy(mask + off, ext_mask->h_dest);
+ *fields |= DPAA2_ETH_DIST_ETHDST;
}
return 0;
}
-static int prep_cls_rule(struct ethtool_rx_flow_spec *fs, void *key, void *mask)
+static int prep_cls_rule(struct ethtool_rx_flow_spec *fs, void *key, void *mask,
+ u64 *fields)
{
int err;
switch (fs->flow_type & 0xFF) {
case ETHER_FLOW:
err = prep_eth_rule(&fs->h_u.ether_spec, &fs->m_u.ether_spec,
- key, mask);
+ key, mask, fields);
break;
case IP_USER_FLOW:
err = prep_uip_rule(&fs->h_u.usr_ip4_spec,
- &fs->m_u.usr_ip4_spec, key, mask);
+ &fs->m_u.usr_ip4_spec, key, mask, fields);
break;
case TCP_V4_FLOW:
err = prep_l4_rule(&fs->h_u.tcp_ip4_spec, &fs->m_u.tcp_ip4_spec,
- key, mask, IPPROTO_TCP);
+ key, mask, IPPROTO_TCP, fields);
break;
case UDP_V4_FLOW:
err = prep_l4_rule(&fs->h_u.udp_ip4_spec, &fs->m_u.udp_ip4_spec,
- key, mask, IPPROTO_UDP);
+ key, mask, IPPROTO_UDP, fields);
break;
case SCTP_V4_FLOW:
err = prep_l4_rule(&fs->h_u.sctp_ip4_spec,
&fs->m_u.sctp_ip4_spec, key, mask,
- IPPROTO_SCTP);
+ IPPROTO_SCTP, fields);
break;
default:
return -EOPNOTSUPP;
return err;
if (fs->flow_type & FLOW_EXT) {
- err = prep_ext_rule(&fs->h_ext, &fs->m_ext, key, mask);
+ err = prep_ext_rule(&fs->h_ext, &fs->m_ext, key, mask, fields);
if (err)
return err;
}
if (fs->flow_type & FLOW_MAC_EXT) {
- err = prep_mac_ext_rule(&fs->h_ext, &fs->m_ext, key, mask);
+ err = prep_mac_ext_rule(&fs->h_ext, &fs->m_ext, key, mask,
+ fields);
if (err)
return err;
}
struct dpni_rule_cfg rule_cfg = { 0 };
struct dpni_fs_action_cfg fs_act = { 0 };
dma_addr_t key_iova;
+ u64 fields = 0;
void *key_buf;
int err;
fs->ring_cookie >= dpaa2_eth_queue_count(priv))
return -EINVAL;
- rule_cfg.key_size = dpaa2_eth_cls_key_size();
+ rule_cfg.key_size = dpaa2_eth_cls_key_size(DPAA2_ETH_DIST_ALL);
/* allocate twice the key size, for the actual key and for mask */
key_buf = kzalloc(rule_cfg.key_size * 2, GFP_KERNEL);
return -ENOMEM;
/* Fill the key and mask memory areas */
- err = prep_cls_rule(fs, key_buf, key_buf + rule_cfg.key_size);
+ err = prep_cls_rule(fs, key_buf, key_buf + rule_cfg.key_size, &fields);
if (err)
goto free_mem;
+ if (!dpaa2_eth_fs_mask_enabled(priv)) {
+ /* Masking allows us to configure a maximal key during init and
+ * use it for all flow steering rules. Without it, we include
+ * in the key only the fields actually used, so we need to
+ * extract the others from the final key buffer.
+ *
+ * Program the FS key if needed, or return error if previously
+ * set key can't be used for the current rule. User needs to
+ * delete existing rules in this case to allow for the new one.
+ */
+ if (!priv->rx_cls_fields) {
+ err = dpaa2_eth_set_cls(net_dev, fields);
+ if (err)
+ goto free_mem;
+
+ priv->rx_cls_fields = fields;
+ } else if (priv->rx_cls_fields != fields) {
+ netdev_err(net_dev, "No support for multiple FS keys, need to delete existing rules\n");
+ err = -EOPNOTSUPP;
+ goto free_mem;
+ }
+
+ dpaa2_eth_cls_trim_rule(key_buf, fields);
+ rule_cfg.key_size = dpaa2_eth_cls_key_size(fields);
+ }
+
key_iova = dma_map_single(dev, key_buf, rule_cfg.key_size * 2,
DMA_TO_DEVICE);
if (dma_mapping_error(dev, key_iova)) {
}
rule_cfg.key_iova = key_iova;
- rule_cfg.mask_iova = key_iova + rule_cfg.key_size;
+ if (dpaa2_eth_fs_mask_enabled(priv))
+ rule_cfg.mask_iova = key_iova + rule_cfg.key_size;
if (add) {
if (fs->ring_cookie == RX_CLS_FLOW_DISC)
return err;
}
+static int num_rules(struct dpaa2_eth_priv *priv)
+{
+ int i, rules = 0;
+
+ for (i = 0; i < dpaa2_eth_fs_count(priv); i++)
+ if (priv->cls_rules[i].in_use)
+ rules++;
+
+ return rules;
+}
+
static int update_cls_rule(struct net_device *net_dev,
struct ethtool_rx_flow_spec *new_fs,
int location)
return err;
rule->in_use = 0;
+
+ if (!dpaa2_eth_fs_mask_enabled(priv) && !num_rules(priv))
+ priv->rx_cls_fields = 0;
}
/* If no new entry to add, return here */
break;
case ETHTOOL_GRXCLSRLCNT:
rxnfc->rule_cnt = 0;
- for (i = 0; i < max_rules; i++)
- if (priv->cls_rules[i].in_use)
- rxnfc->rule_cnt++;
+ rxnfc->rule_cnt = num_rules(priv);
rxnfc->data = max_rules;
break;
case ETHTOOL_GRXCLSRULE:
int ret;
if (enable) {
- ret = clk_prepare_enable(fep->clk_ahb);
- if (ret)
- return ret;
-
ret = clk_prepare_enable(fep->clk_enet_out);
if (ret)
- goto failed_clk_enet_out;
+ return ret;
if (fep->clk_ptp) {
mutex_lock(&fep->ptp_clk_mutex);
phy_reset_after_clk_enable(ndev->phydev);
} else {
- clk_disable_unprepare(fep->clk_ahb);
clk_disable_unprepare(fep->clk_enet_out);
if (fep->clk_ptp) {
mutex_lock(&fep->ptp_clk_mutex);
failed_clk_ptp:
if (fep->clk_enet_out)
clk_disable_unprepare(fep->clk_enet_out);
-failed_clk_enet_out:
- clk_disable_unprepare(fep->clk_ahb);
return ret;
}
ret = clk_prepare_enable(fep->clk_ipg);
if (ret)
goto failed_clk_ipg;
+ ret = clk_prepare_enable(fep->clk_ahb);
+ if (ret)
+ goto failed_clk_ahb;
fep->reg_phy = devm_regulator_get_optional(&pdev->dev, "phy");
if (!IS_ERR(fep->reg_phy)) {
pm_runtime_put(&pdev->dev);
pm_runtime_disable(&pdev->dev);
failed_regulator:
+ clk_disable_unprepare(fep->clk_ahb);
+failed_clk_ahb:
+ clk_disable_unprepare(fep->clk_ipg);
failed_clk_ipg:
fec_enet_clk_enable(ndev, false);
failed_clk:
struct net_device *ndev = dev_get_drvdata(dev);
struct fec_enet_private *fep = netdev_priv(ndev);
+ clk_disable_unprepare(fep->clk_ahb);
clk_disable_unprepare(fep->clk_ipg);
return 0;
{
struct net_device *ndev = dev_get_drvdata(dev);
struct fec_enet_private *fep = netdev_priv(ndev);
+ int ret;
- return clk_prepare_enable(fep->clk_ipg);
+ ret = clk_prepare_enable(fep->clk_ahb);
+ if (ret)
+ return ret;
+ ret = clk_prepare_enable(fep->clk_ipg);
+ if (ret)
+ goto failed_clk_ipg;
+
+ return 0;
+
+failed_clk_ipg:
+ clk_disable_unprepare(fep->clk_ahb);
+ return ret;
}
static const struct dev_pm_ops fec_pm_ops = {
HCLGE_MBX_GET_QID_IN_PF, /* (VF -> PF) get queue id in pf */
HCLGE_MBX_LINK_STAT_MODE, /* (PF -> VF) link mode has changed */
HCLGE_MBX_GET_LINK_MODE, /* (VF -> PF) get the link mode of pf */
+ HLCGE_MBX_PUSH_VLAN_INFO, /* (PF -> VF) push port base vlan */
HCLGE_MBX_GET_MEDIA_TYPE, /* (VF -> PF) get media type */
HCLGE_MBX_GET_VF_FLR_STATUS = 200, /* (M7 -> PF) get vf reset status */
HCLGE_MBX_VLAN_FILTER = 0, /* set vlan filter */
HCLGE_MBX_VLAN_TX_OFF_CFG, /* set tx side vlan offload */
HCLGE_MBX_VLAN_RX_OFF_CFG, /* set rx side vlan offload */
+ HCLGE_MBX_PORT_BASE_VLAN_CFG, /* set port based vlan configuration */
+ HCLGE_MBX_GET_PORT_BASE_VLAN_STATE, /* get port based vlan state */
};
#define HCLGE_MBX_MAX_MSG_SIZE 16
struct hclge_mbx_vf_to_pf_cmd {
u8 rsv;
u8 mbx_src_vfid; /* Auto filled by IMP */
- u8 rsv1[2];
+ u8 mbx_need_resp;
+ u8 rsv1[1];
u8 msg_len;
u8 rsv2[3];
u8 msg[HCLGE_MBX_MAX_MSG_SIZE];
};
+#define HCLGE_MBX_NEED_RESP_BIT BIT(0)
+
struct hclge_mbx_pf_to_vf_cmd {
u8 dest_vfid;
u8 rsv[3];
struct hclgevf_dev *hdev;
u32 head;
u32 tail;
- u32 count;
+ atomic_t count;
u16 msg_q[HCLGE_MBX_MAX_ARQ_MSG_NUM][HCLGE_MBX_MAX_ARQ_MSG_SIZE];
};
HNAE3_FLR_DONE,
};
+enum hnae3_port_base_vlan_state {
+ HNAE3_PORT_BASE_VLAN_DISABLE,
+ HNAE3_PORT_BASE_VLAN_ENABLE,
+ HNAE3_PORT_BASE_VLAN_MODIFY,
+ HNAE3_PORT_BASE_VLAN_NOCHANGE,
+};
+
struct hnae3_vector_info {
u8 __iomem *io_addr;
int vector;
void (*update_stats)(struct hnae3_handle *handle,
struct net_device_stats *net_stats);
void (*get_stats)(struct hnae3_handle *handle, u64 *data);
-
+ void (*get_mac_pause_stats)(struct hnae3_handle *handle, u64 *tx_cnt,
+ u64 *rx_cnt);
void (*get_strings)(struct hnae3_handle *handle,
u32 stringset, u8 *data);
int (*get_sset_count)(struct hnae3_handle *handle, int stringset);
u32 numa_node_mask; /* for multi-chip support */
+ enum hnae3_port_base_vlan_state port_base_vlan_state;
+
u8 netdev_flags;
struct dentry *hnae3_dbgfs;
+
+ /* Network interface message level enabled bits */
+ u32 msg_enable;
};
#define hnae3_set_field(origin, mask, shift, val) \
dev_info(&h->pdev->dev, "queue info [number]\n");
dev_info(&h->pdev->dev, "queue map\n");
dev_info(&h->pdev->dev, "bd info [q_num] <bd index>\n");
+
+ if (!hns3_is_phys_func(h->pdev))
+ return;
+
dev_info(&h->pdev->dev, "dump fd tcam\n");
dev_info(&h->pdev->dev, "dump tc\n");
dev_info(&h->pdev->dev, "dump tm map [q_num]\n");
dev_info(&h->pdev->dev, "dump qos pri map\n");
dev_info(&h->pdev->dev, "dump qos buf cfg\n");
dev_info(&h->pdev->dev, "dump mng tbl\n");
+ dev_info(&h->pdev->dev, "dump reset info\n");
+ dev_info(&h->pdev->dev, "dump ncl_config <offset> <length>(in hex)\n");
+ dev_info(&h->pdev->dev, "dump mac tnl status\n");
memset(printf_buf, 0, HNS3_DBG_BUF_LEN);
strncat(printf_buf, "dump reg [[bios common] [ssu <prt_id>]",
ret = hns3_dbg_bd_info(handle, cmd_buf);
else if (handle->ae_algo->ops->dbg_run_cmd)
ret = handle->ae_algo->ops->dbg_run_cmd(handle, cmd_buf);
+ else
+ ret = -EOPNOTSUPP;
if (ret)
hns3_dbg_help(handle);
static const char hns3_copyright[] = "Copyright (c) 2017 Huawei Corporation.";
static struct hnae3_client client;
+static int debug = -1;
+module_param(debug, int, 0);
+MODULE_PARM_DESC(debug, " Network interface message level setting");
+
+#define DEFAULT_MSG_LEVEL (NETIF_MSG_PROBE | NETIF_MSG_LINK | \
+ NETIF_MSG_IFDOWN | NETIF_MSG_IFUP)
+
/* hns3_pci_tbl - PCI Device ID Table
*
* Last entry must be all 0s
{
#define HNS3_TX_VLAN_PRIO_SHIFT 13
+ struct hnae3_handle *handle = tx_ring->tqp->handle;
+
+ /* Since HW limitation, if port based insert VLAN enabled, only one VLAN
+ * header is allowed in skb, otherwise it will cause RAS error.
+ */
+ if (unlikely(skb_vlan_tagged_multi(skb) &&
+ handle->port_base_vlan_state ==
+ HNAE3_PORT_BASE_VLAN_ENABLE))
+ return -EINVAL;
+
if (skb->protocol == htons(ETH_P_8021Q) &&
!(tx_ring->tqp->handle->kinfo.netdev->features &
NETIF_F_HW_VLAN_CTAG_TX)) {
* and use inner_vtag in one tag case.
*/
if (skb->protocol == htons(ETH_P_8021Q)) {
- hns3_set_field(*out_vlan_flag, HNS3_TXD_OVLAN_B, 1);
- *out_vtag = vlan_tag;
+ if (handle->port_base_vlan_state ==
+ HNAE3_PORT_BASE_VLAN_DISABLE){
+ hns3_set_field(*out_vlan_flag,
+ HNS3_TXD_OVLAN_B, 1);
+ *out_vtag = vlan_tag;
+ } else {
+ hns3_set_field(*inner_vlan_flag,
+ HNS3_TXD_VLAN_B, 1);
+ *inner_vtag = vlan_tag;
+ }
} else {
hns3_set_field(*inner_vlan_flag, HNS3_TXD_VLAN_B, 1);
*inner_vtag = vlan_tag;
static bool hns3_get_tx_timeo_queue_info(struct net_device *ndev)
{
struct hns3_nic_priv *priv = netdev_priv(ndev);
+ struct hnae3_handle *h = hns3_get_handle(ndev);
struct hns3_enet_ring *tx_ring = NULL;
+ struct napi_struct *napi;
int timeout_queue = 0;
int hw_head, hw_tail;
+ int fbd_num, fbd_oft;
+ int ebd_num, ebd_oft;
+ int bd_num, bd_err;
+ int ring_en, tc;
int i;
/* Find the stopped queue the same way the stack does */
- for (i = 0; i < ndev->real_num_tx_queues; i++) {
+ for (i = 0; i < ndev->num_tx_queues; i++) {
struct netdev_queue *q;
unsigned long trans_start;
return false;
}
+ priv->tx_timeout_count++;
+
tx_ring = priv->ring_data[timeout_queue].ring;
+ napi = &tx_ring->tqp_vector->napi;
+
+ netdev_info(ndev,
+ "tx_timeout count: %llu, queue id: %d, SW_NTU: 0x%x, SW_NTC: 0x%x, napi state: %lu\n",
+ priv->tx_timeout_count, timeout_queue, tx_ring->next_to_use,
+ tx_ring->next_to_clean, napi->state);
+
+ netdev_info(ndev,
+ "tx_pkts: %llu, tx_bytes: %llu, io_err_cnt: %llu, sw_err_cnt: %llu\n",
+ tx_ring->stats.tx_pkts, tx_ring->stats.tx_bytes,
+ tx_ring->stats.io_err_cnt, tx_ring->stats.sw_err_cnt);
+
+ netdev_info(ndev,
+ "seg_pkt_cnt: %llu, tx_err_cnt: %llu, restart_queue: %llu, tx_busy: %llu\n",
+ tx_ring->stats.seg_pkt_cnt, tx_ring->stats.tx_err_cnt,
+ tx_ring->stats.restart_queue, tx_ring->stats.tx_busy);
+
+ /* When mac received many pause frames continuous, it's unable to send
+ * packets, which may cause tx timeout
+ */
+ if (h->ae_algo->ops->update_stats &&
+ h->ae_algo->ops->get_mac_pause_stats) {
+ u64 tx_pause_cnt, rx_pause_cnt;
+
+ h->ae_algo->ops->update_stats(h, &ndev->stats);
+ h->ae_algo->ops->get_mac_pause_stats(h, &tx_pause_cnt,
+ &rx_pause_cnt);
+ netdev_info(ndev, "tx_pause_cnt: %llu, rx_pause_cnt: %llu\n",
+ tx_pause_cnt, rx_pause_cnt);
+ }
hw_head = readl_relaxed(tx_ring->tqp->io_base +
HNS3_RING_TX_RING_HEAD_REG);
hw_tail = readl_relaxed(tx_ring->tqp->io_base +
HNS3_RING_TX_RING_TAIL_REG);
+ fbd_num = readl_relaxed(tx_ring->tqp->io_base +
+ HNS3_RING_TX_RING_FBDNUM_REG);
+ fbd_oft = readl_relaxed(tx_ring->tqp->io_base +
+ HNS3_RING_TX_RING_OFFSET_REG);
+ ebd_num = readl_relaxed(tx_ring->tqp->io_base +
+ HNS3_RING_TX_RING_EBDNUM_REG);
+ ebd_oft = readl_relaxed(tx_ring->tqp->io_base +
+ HNS3_RING_TX_RING_EBD_OFFSET_REG);
+ bd_num = readl_relaxed(tx_ring->tqp->io_base +
+ HNS3_RING_TX_RING_BD_NUM_REG);
+ bd_err = readl_relaxed(tx_ring->tqp->io_base +
+ HNS3_RING_TX_RING_BD_ERR_REG);
+ ring_en = readl_relaxed(tx_ring->tqp->io_base + HNS3_RING_EN_REG);
+ tc = readl_relaxed(tx_ring->tqp->io_base + HNS3_RING_TX_RING_TC_REG);
+
netdev_info(ndev,
- "tx_timeout count: %llu, queue id: %d, SW_NTU: 0x%x, SW_NTC: 0x%x, HW_HEAD: 0x%x, HW_TAIL: 0x%x, INT: 0x%x\n",
- priv->tx_timeout_count,
- timeout_queue,
- tx_ring->next_to_use,
- tx_ring->next_to_clean,
- hw_head,
- hw_tail,
+ "BD_NUM: 0x%x HW_HEAD: 0x%x, HW_TAIL: 0x%x, BD_ERR: 0x%x, INT: 0x%x\n",
+ bd_num, hw_head, hw_tail, bd_err,
readl(tx_ring->tqp_vector->mask_addr));
+ netdev_info(ndev,
+ "RING_EN: 0x%x, TC: 0x%x, FBD_NUM: 0x%x FBD_OFT: 0x%x, EBD_NUM: 0x%x, EBD_OFT: 0x%x\n",
+ ring_en, tc, fbd_num, fbd_oft, ebd_num, ebd_oft);
return true;
}
if (!hns3_get_tx_timeo_queue_info(ndev))
return;
- priv->tx_timeout_count++;
-
/* request the reset, and let the hclge to determine
* which reset level should be done
*/
.ndo_set_vf_vlan = hns3_ndo_set_vf_vlan,
};
-static bool hns3_is_phys_func(struct pci_dev *pdev)
+bool hns3_is_phys_func(struct pci_dev *pdev)
{
u32 dev_id = pdev->device;
static void hns3_nic_reclaim_one_desc(struct hns3_enet_ring *ring, int *bytes,
int *pkts)
{
- struct hns3_desc_cb *desc_cb = &ring->desc_cb[ring->next_to_clean];
+ int ntc = ring->next_to_clean;
+ struct hns3_desc_cb *desc_cb;
+ desc_cb = &ring->desc_cb[ntc];
(*pkts) += (desc_cb->type == DESC_TYPE_SKB);
(*bytes) += desc_cb->length;
/* desc_cb will be cleaned, after hnae3_free_buffer_detach*/
- hns3_free_buffer_detach(ring, ring->next_to_clean);
+ hns3_free_buffer_detach(ring, ntc);
- ring_ptr_move_fw(ring, next_to_clean);
+ if (++ntc == ring->desc_num)
+ ntc = 0;
+
+ /* This smp_store_release() pairs with smp_load_acquire() in
+ * ring_space called by hns3_nic_net_xmit.
+ */
+ smp_store_release(&ring->next_to_clean, ntc);
}
static int is_valid_clean_head(struct hns3_enet_ring *ring, int h)
}
}
+static int hns3_gro_complete(struct sk_buff *skb)
+{
+ __be16 type = skb->protocol;
+ struct tcphdr *th;
+ int depth = 0;
+
+ while (type == htons(ETH_P_8021Q)) {
+ struct vlan_hdr *vh;
+
+ if ((depth + VLAN_HLEN) > skb_headlen(skb))
+ return -EFAULT;
+
+ vh = (struct vlan_hdr *)(skb->data + depth);
+ type = vh->h_vlan_encapsulated_proto;
+ depth += VLAN_HLEN;
+ }
+
+ if (type == htons(ETH_P_IP)) {
+ depth += sizeof(struct iphdr);
+ } else if (type == htons(ETH_P_IPV6)) {
+ depth += sizeof(struct ipv6hdr);
+ } else {
+ netdev_err(skb->dev,
+ "Error: FW GRO supports only IPv4/IPv6, not 0x%04x, depth: %d\n",
+ be16_to_cpu(type), depth);
+ return -EFAULT;
+ }
+
+ th = (struct tcphdr *)(skb->data + depth);
+ skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
+ if (th->cwr)
+ skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
+
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+ return 0;
+}
+
static void hns3_rx_checksum(struct hns3_enet_ring *ring, struct sk_buff *skb,
- struct hns3_desc *desc)
+ u32 l234info, u32 bd_base_info)
{
struct net_device *netdev = ring->tqp->handle->kinfo.netdev;
int l3_type, l4_type;
- u32 bd_base_info;
int ol4_type;
- u32 l234info;
-
- bd_base_info = le32_to_cpu(desc->rx.bd_base_info);
- l234info = le32_to_cpu(desc->rx.l234_info);
skb->ip_summed = CHECKSUM_NONE;
if (!(netdev->features & NETIF_F_RXCSUM))
return;
- /* We MUST enable hardware checksum before enabling hardware GRO */
- if (skb_shinfo(skb)->gso_size) {
- skb->ip_summed = CHECKSUM_UNNECESSARY;
- return;
- }
-
/* check if hardware has done checksum */
if (!(bd_base_info & BIT(HNS3_RXD_L3L4P_B)))
return;
struct hns3_desc *desc, u32 l234info,
u16 *vlan_tag)
{
+ struct hnae3_handle *handle = ring->tqp->handle;
struct pci_dev *pdev = ring->tqp->handle->pdev;
if (pdev->revision == 0x20) {
#define HNS3_STRP_OUTER_VLAN 0x1
#define HNS3_STRP_INNER_VLAN 0x2
+#define HNS3_STRP_BOTH 0x3
+ /* Hardware always insert VLAN tag into RX descriptor when
+ * remove the tag from packet, driver needs to determine
+ * reporting which tag to stack.
+ */
switch (hnae3_get_field(l234info, HNS3_RXD_STRP_TAGP_M,
HNS3_RXD_STRP_TAGP_S)) {
case HNS3_STRP_OUTER_VLAN:
+ if (handle->port_base_vlan_state !=
+ HNAE3_PORT_BASE_VLAN_DISABLE)
+ return false;
+
*vlan_tag = le16_to_cpu(desc->rx.ot_vlan_tag);
return true;
case HNS3_STRP_INNER_VLAN:
+ if (handle->port_base_vlan_state !=
+ HNAE3_PORT_BASE_VLAN_DISABLE)
+ return false;
+
*vlan_tag = le16_to_cpu(desc->rx.vlan_tag);
+ return true;
+ case HNS3_STRP_BOTH:
+ if (handle->port_base_vlan_state ==
+ HNAE3_PORT_BASE_VLAN_DISABLE)
+ *vlan_tag = le16_to_cpu(desc->rx.ot_vlan_tag);
+ else
+ *vlan_tag = le16_to_cpu(desc->rx.vlan_tag);
+
return true;
default:
return false;
return 0;
}
-static void hns3_set_gro_param(struct sk_buff *skb, u32 l234info,
- u32 bd_base_info)
+static int hns3_set_gro_and_checksum(struct hns3_enet_ring *ring,
+ struct sk_buff *skb, u32 l234info,
+ u32 bd_base_info)
{
u16 gro_count;
u32 l3_type;
gro_count = hnae3_get_field(l234info, HNS3_RXD_GRO_COUNT_M,
HNS3_RXD_GRO_COUNT_S);
/* if there is no HW GRO, do not set gro params */
- if (!gro_count)
- return;
+ if (!gro_count) {
+ hns3_rx_checksum(ring, skb, l234info, bd_base_info);
+ return 0;
+ }
- /* tcp_gro_complete() will copy NAPI_GRO_CB(skb)->count
- * to skb_shinfo(skb)->gso_segs
- */
NAPI_GRO_CB(skb)->count = gro_count;
l3_type = hnae3_get_field(l234info, HNS3_RXD_L3ID_M,
else if (l3_type == HNS3_L3_TYPE_IPV6)
skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
else
- return;
+ return -EFAULT;
skb_shinfo(skb)->gso_size = hnae3_get_field(bd_base_info,
HNS3_RXD_GRO_SIZE_M,
HNS3_RXD_GRO_SIZE_S);
- if (skb_shinfo(skb)->gso_size)
- tcp_gro_complete(skb);
+
+ return hns3_gro_complete(skb);
}
static void hns3_set_rx_skb_rss_type(struct hns3_enet_ring *ring,
- struct sk_buff *skb)
+ struct sk_buff *skb, u32 rss_hash)
{
struct hnae3_handle *handle = ring->tqp->handle;
enum pkt_hash_types rss_type;
- struct hns3_desc *desc;
- int last_bd;
- /* When driver handle the rss type, ring->next_to_clean indicates the
- * first descriptor of next packet, need -1 here.
- */
- last_bd = (ring->next_to_clean - 1 + ring->desc_num) % ring->desc_num;
- desc = &ring->desc[last_bd];
-
- if (le32_to_cpu(desc->rx.rss_hash))
+ if (rss_hash)
rss_type = handle->kinfo.rss_type;
else
rss_type = PKT_HASH_TYPE_NONE;
- skb_set_hash(skb, le32_to_cpu(desc->rx.rss_hash), rss_type);
+ skb_set_hash(skb, rss_hash, rss_type);
}
-static int hns3_handle_rx_bd(struct hns3_enet_ring *ring,
- struct sk_buff **out_skb)
+static int hns3_handle_bdinfo(struct hns3_enet_ring *ring, struct sk_buff *skb)
{
struct net_device *netdev = ring->tqp->handle->kinfo.netdev;
enum hns3_pkt_l2t_type l2_frame_type;
+ u32 bd_base_info, l234info;
+ struct hns3_desc *desc;
+ unsigned int len;
+ int pre_ntc, ret;
+
+ /* bdinfo handled below is only valid on the last BD of the
+ * current packet, and ring->next_to_clean indicates the first
+ * descriptor of next packet, so need - 1 below.
+ */
+ pre_ntc = ring->next_to_clean ? (ring->next_to_clean - 1) :
+ (ring->desc_num - 1);
+ desc = &ring->desc[pre_ntc];
+ bd_base_info = le32_to_cpu(desc->rx.bd_base_info);
+ l234info = le32_to_cpu(desc->rx.l234_info);
+
+ /* Based on hw strategy, the tag offloaded will be stored at
+ * ot_vlan_tag in two layer tag case, and stored at vlan_tag
+ * in one layer tag case.
+ */
+ if (netdev->features & NETIF_F_HW_VLAN_CTAG_RX) {
+ u16 vlan_tag;
+
+ if (hns3_parse_vlan_tag(ring, desc, l234info, &vlan_tag))
+ __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
+ vlan_tag);
+ }
+
+ if (unlikely(!(bd_base_info & BIT(HNS3_RXD_VLD_B)))) {
+ u64_stats_update_begin(&ring->syncp);
+ ring->stats.non_vld_descs++;
+ u64_stats_update_end(&ring->syncp);
+
+ return -EINVAL;
+ }
+
+ if (unlikely(!desc->rx.pkt_len || (l234info & (BIT(HNS3_RXD_TRUNCAT_B) |
+ BIT(HNS3_RXD_L2E_B))))) {
+ u64_stats_update_begin(&ring->syncp);
+ if (l234info & BIT(HNS3_RXD_L2E_B))
+ ring->stats.l2_err++;
+ else
+ ring->stats.err_pkt_len++;
+ u64_stats_update_end(&ring->syncp);
+
+ return -EFAULT;
+ }
+
+ len = skb->len;
+
+ /* Do update ip stack process */
+ skb->protocol = eth_type_trans(skb, netdev);
+
+ /* This is needed in order to enable forwarding support */
+ ret = hns3_set_gro_and_checksum(ring, skb, l234info, bd_base_info);
+ if (unlikely(ret)) {
+ u64_stats_update_begin(&ring->syncp);
+ ring->stats.rx_err_cnt++;
+ u64_stats_update_end(&ring->syncp);
+ return ret;
+ }
+
+ l2_frame_type = hnae3_get_field(l234info, HNS3_RXD_DMAC_M,
+ HNS3_RXD_DMAC_S);
+
+ u64_stats_update_begin(&ring->syncp);
+ ring->stats.rx_pkts++;
+ ring->stats.rx_bytes += len;
+
+ if (l2_frame_type == HNS3_L2_TYPE_MULTICAST)
+ ring->stats.rx_multicast++;
+
+ u64_stats_update_end(&ring->syncp);
+
+ ring->tqp_vector->rx_group.total_bytes += len;
+
+ hns3_set_rx_skb_rss_type(ring, skb, le32_to_cpu(desc->rx.rss_hash));
+ return 0;
+}
+
+static int hns3_handle_rx_bd(struct hns3_enet_ring *ring,
+ struct sk_buff **out_skb)
+{
struct sk_buff *skb = ring->skb;
struct hns3_desc_cb *desc_cb;
struct hns3_desc *desc;
u32 bd_base_info;
- u32 l234info;
int length;
int ret;
ALIGN(ring->pull_len, sizeof(long)));
}
- l234info = le32_to_cpu(desc->rx.l234_info);
- bd_base_info = le32_to_cpu(desc->rx.bd_base_info);
-
- /* Based on hw strategy, the tag offloaded will be stored at
- * ot_vlan_tag in two layer tag case, and stored at vlan_tag
- * in one layer tag case.
- */
- if (netdev->features & NETIF_F_HW_VLAN_CTAG_RX) {
- u16 vlan_tag;
-
- if (hns3_parse_vlan_tag(ring, desc, l234info, &vlan_tag))
- __vlan_hwaccel_put_tag(skb,
- htons(ETH_P_8021Q),
- vlan_tag);
- }
-
- if (unlikely(!(bd_base_info & BIT(HNS3_RXD_VLD_B)))) {
- u64_stats_update_begin(&ring->syncp);
- ring->stats.non_vld_descs++;
- u64_stats_update_end(&ring->syncp);
-
+ ret = hns3_handle_bdinfo(ring, skb);
+ if (unlikely(ret)) {
dev_kfree_skb_any(skb);
- return -EINVAL;
- }
-
- if (unlikely((!desc->rx.pkt_len) ||
- (l234info & (BIT(HNS3_RXD_TRUNCAT_B) |
- BIT(HNS3_RXD_L2E_B))))) {
- u64_stats_update_begin(&ring->syncp);
- if (l234info & BIT(HNS3_RXD_L2E_B))
- ring->stats.l2_err++;
- else
- ring->stats.err_pkt_len++;
- u64_stats_update_end(&ring->syncp);
-
- dev_kfree_skb_any(skb);
- return -EFAULT;
+ return ret;
}
-
- l2_frame_type = hnae3_get_field(l234info, HNS3_RXD_DMAC_M,
- HNS3_RXD_DMAC_S);
- u64_stats_update_begin(&ring->syncp);
- if (l2_frame_type == HNS3_L2_TYPE_MULTICAST)
- ring->stats.rx_multicast++;
-
- ring->stats.rx_pkts++;
- ring->stats.rx_bytes += skb->len;
- u64_stats_update_end(&ring->syncp);
-
- ring->tqp_vector->rx_group.total_bytes += skb->len;
-
- /* This is needed in order to enable forwarding support */
- hns3_set_gro_param(skb, l234info, bd_base_info);
-
- hns3_rx_checksum(ring, skb, desc);
*out_skb = skb;
- hns3_set_rx_skb_rss_type(ring, skb);
return 0;
}
void (*rx_fn)(struct hns3_enet_ring *, struct sk_buff *))
{
#define RCB_NOF_ALLOC_RX_BUFF_ONCE 16
- struct net_device *netdev = ring->tqp->handle->kinfo.netdev;
int recv_pkts, recv_bds, clean_count, err;
- int unused_count = hns3_desc_unused(ring) - ring->pending_buf;
+ int unused_count = hns3_desc_unused(ring);
struct sk_buff *skb = ring->skb;
int num;
recv_pkts = 0, recv_bds = 0, clean_count = 0;
num -= unused_count;
+ unused_count -= ring->pending_buf;
while (recv_pkts < budget && recv_bds < num) {
/* Reuse or realloc buffers */
continue;
}
- /* Do update ip stack process */
- skb->protocol = eth_type_trans(skb, netdev);
rx_fn(ring, skb);
recv_bds += ring->pending_buf;
clean_count += ring->pending_buf;
}
devm_kfree(&pdev->dev, priv->ring_data);
+ priv->ring_data = NULL;
return ret;
}
struct hnae3_handle *h = priv->ae_handle;
int i;
+ if (!priv->ring_data)
+ return;
+
for (i = 0; i < h->kinfo.num_tqps; i++) {
devm_kfree(priv->dev, priv->ring_data[i].ring);
devm_kfree(priv->dev,
priv->ring_data[i + h->kinfo.num_tqps].ring);
}
devm_kfree(priv->dev, priv->ring_data);
+ priv->ring_data = NULL;
}
static int hns3_alloc_ring_memory(struct hns3_enet_ring *ring)
handle->ae_algo->ops->client_stop(handle);
}
+static void hns3_info_show(struct hns3_nic_priv *priv)
+{
+ struct hnae3_knic_private_info *kinfo = &priv->ae_handle->kinfo;
+
+ dev_info(priv->dev, "MAC address: %pM\n", priv->netdev->dev_addr);
+ dev_info(priv->dev, "Task queue pairs numbers: %d\n", kinfo->num_tqps);
+ dev_info(priv->dev, "RSS size: %d\n", kinfo->rss_size);
+ dev_info(priv->dev, "Allocated RSS size: %d\n", kinfo->req_rss_size);
+ dev_info(priv->dev, "RX buffer length: %d\n", kinfo->rx_buf_len);
+ dev_info(priv->dev, "Desc num per TX queue: %d\n", kinfo->num_tx_desc);
+ dev_info(priv->dev, "Desc num per RX queue: %d\n", kinfo->num_rx_desc);
+ dev_info(priv->dev, "Total number of enabled TCs: %d\n", kinfo->num_tc);
+ dev_info(priv->dev, "Max mtu size: %d\n", priv->netdev->max_mtu);
+}
+
static int hns3_client_init(struct hnae3_handle *handle)
{
struct pci_dev *pdev = handle->pdev;
priv->tx_timeout_count = 0;
set_bit(HNS3_NIC_STATE_DOWN, &priv->state);
+ handle->msg_enable = netif_msg_init(debug, DEFAULT_MSG_LEVEL);
+
handle->kinfo.netdev = netdev;
handle->priv = (void *)priv;
set_bit(HNS3_NIC_STATE_INITED, &priv->state);
+ if (netif_msg_drv(handle))
+ hns3_info_show(priv);
+
return ret;
out_client_start:
struct hns3_nic_priv *priv = netdev_priv(netdev);
int ret;
- hns3_client_stop(handle);
-
hns3_remove_hw_addr(netdev);
if (netdev->reg_state != NETREG_UNINITIALIZED)
unregister_netdev(netdev);
+ hns3_client_stop(handle);
+
if (!test_and_clear_bit(HNS3_NIC_STATE_INITED, &priv->state)) {
netdev_warn(netdev, "already uninitialized\n");
goto out_netdev_free;
hns3_dbg_uninit(handle);
- priv->ring_data = NULL;
-
out_netdev_free:
free_netdev(netdev);
}
if (linkup) {
netif_carrier_on(netdev);
netif_tx_wake_all_queues(netdev);
- netdev_info(netdev, "link up\n");
+ if (netif_msg_link(handle))
+ netdev_info(netdev, "link up\n");
} else {
netif_carrier_off(netdev);
netif_tx_stop_all_queues(netdev);
- netdev_info(netdev, "link down\n");
+ if (netif_msg_link(handle))
+ netdev_info(netdev, "link down\n");
}
}
ring_ptr_move_fw(ring, next_to_use);
}
+ /* Free the pending skb in rx ring */
+ if (ring->skb) {
+ dev_kfree_skb_any(ring->skb);
+ ring->skb = NULL;
+ ring->pending_buf = 0;
+ }
+
return 0;
}
if (ret)
goto err_uninit_vector;
+ ret = hns3_client_start(handle);
+ if (ret) {
+ dev_err(priv->dev, "hns3_client_start fail! ret=%d\n", ret);
+ goto err_uninit_ring;
+ }
+
set_bit(HNS3_NIC_STATE_INITED, &priv->state);
return ret;
+err_uninit_ring:
+ hns3_uninit_all_ring(priv);
err_uninit_vector:
hns3_nic_uninit_vector_data(priv);
- priv->ring_data = NULL;
err_dealloc_vector:
hns3_nic_dealloc_vector_data(priv);
err_put_ring:
hns3_put_ring_config(priv);
- priv->ring_data = NULL;
return ret;
}
netdev_err(netdev, "uninit ring error\n");
hns3_put_ring_config(priv);
- priv->ring_data = NULL;
return ret;
}
#define HNS3_RING_TX_RING_HEAD_REG 0x0005C
#define HNS3_RING_TX_RING_FBDNUM_REG 0x00060
#define HNS3_RING_TX_RING_OFFSET_REG 0x00064
+#define HNS3_RING_TX_RING_EBDNUM_REG 0x00068
#define HNS3_RING_TX_RING_PKTNUM_RECORD_REG 0x0006C
-
+#define HNS3_RING_TX_RING_EBD_OFFSET_REG 0x00070
+#define HNS3_RING_TX_RING_BD_ERR_REG 0x00074
#define HNS3_RING_PREFETCH_EN_REG 0x0007C
#define HNS3_RING_CFG_VF_NUM_REG 0x00080
#define HNS3_RING_ASID_REG 0x0008C
static inline int ring_space(struct hns3_enet_ring *ring)
{
- int begin = ring->next_to_clean;
- int end = ring->next_to_use;
+ /* This smp_load_acquire() pairs with smp_store_release() in
+ * hns3_nic_reclaim_one_desc called by hns3_clean_tx_ring.
+ */
+ int begin = smp_load_acquire(&ring->next_to_clean);
+ int end = READ_ONCE(ring->next_to_use);
return ((end >= begin) ? (ring->desc_num - end + begin) :
(begin - end)) - 1;
int hns3_uninit_all_ring(struct hns3_nic_priv *priv);
int hns3_nic_reset_all_ring(struct hnae3_handle *h);
netdev_tx_t hns3_nic_net_xmit(struct sk_buff *skb, struct net_device *netdev);
+bool hns3_is_phys_func(struct pci_dev *pdev);
int hns3_clean_rx_ring(
struct hns3_enet_ring *ring, int budget,
void (*rx_fn)(struct hns3_enet_ring *, struct sk_buff *));
return h->ae_algo->ops->set_led_id(h, state);
}
+static u32 hns3_get_msglevel(struct net_device *netdev)
+{
+ struct hnae3_handle *h = hns3_get_handle(netdev);
+
+ return h->msg_enable;
+}
+
+static void hns3_set_msglevel(struct net_device *netdev, u32 msg_level)
+{
+ struct hnae3_handle *h = hns3_get_handle(netdev);
+
+ h->msg_enable = msg_level;
+}
+
static const struct ethtool_ops hns3vf_ethtool_ops = {
.get_drvinfo = hns3_get_drvinfo,
.get_ringparam = hns3_get_ringparam,
.get_regs_len = hns3_get_regs_len,
.get_regs = hns3_get_regs,
.get_link = hns3_get_link,
+ .get_msglevel = hns3_get_msglevel,
+ .set_msglevel = hns3_set_msglevel,
};
static const struct ethtool_ops hns3_ethtool_ops = {
.get_regs_len = hns3_get_regs_len,
.get_regs = hns3_get_regs,
.set_phys_id = hns3_set_phys_id,
+ .get_msglevel = hns3_get_msglevel,
+ .set_msglevel = hns3_set_msglevel,
};
void hns3_ethtool_set_ops(struct net_device *netdev)
int ret;
spin_lock_bh(&hdev->hw.cmq.csq.lock);
- spin_lock_bh(&hdev->hw.cmq.crq.lock);
+ spin_lock(&hdev->hw.cmq.crq.lock);
hdev->hw.cmq.csq.next_to_clean = 0;
hdev->hw.cmq.csq.next_to_use = 0;
hclge_cmd_init_regs(&hdev->hw);
- spin_unlock_bh(&hdev->hw.cmq.crq.lock);
+ spin_unlock(&hdev->hw.cmq.crq.lock);
spin_unlock_bh(&hdev->hw.cmq.csq.lock);
clear_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state);
* reset may happen when lower level reset is being processed.
*/
if ((hclge_is_reset_pending(hdev))) {
- set_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state);
- return -EBUSY;
+ ret = -EBUSY;
+ goto err_cmd_init;
}
ret = hclge_cmd_query_firmware_version(&hdev->hw, &version);
if (ret) {
dev_err(&hdev->pdev->dev,
"firmware version query failed %d\n", ret);
- return ret;
+ goto err_cmd_init;
}
hdev->fw_version = version;
dev_info(&hdev->pdev->dev, "The firmware version is %08x\n", version);
return 0;
+
+err_cmd_init:
+ set_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state);
+
+ return ret;
}
static void hclge_cmd_uninit_regs(struct hclge_hw *hw)
HCLGE_OPC_QUERY_LINK_STATUS = 0x0307,
HCLGE_OPC_CONFIG_MAX_FRM_SIZE = 0x0308,
HCLGE_OPC_CONFIG_SPEED_DUP = 0x0309,
+ HCLGE_OPC_QUERY_MAC_TNL_INT = 0x0310,
+ HCLGE_OPC_MAC_TNL_INT_EN = 0x0311,
+ HCLGE_OPC_CLEAR_MAC_TNL_INT = 0x0312,
HCLGE_OPC_SERDES_LOOPBACK = 0x0315,
/* PFC/Pause commands */
/* Led command */
HCLGE_OPC_LED_STATUS_CFG = 0xB000,
+ /* NCL config command */
+ HCLGE_OPC_QUERY_NCL_CONFIG = 0x7011,
+
/* SFP command */
HCLGE_OPC_SFP_GET_SPEED = 0x7104,
}
}
+static void hclge_dbg_dump_rst_info(struct hclge_dev *hdev)
+{
+ dev_info(&hdev->pdev->dev, "PF reset count: %d\n",
+ hdev->rst_stats.pf_rst_cnt);
+ dev_info(&hdev->pdev->dev, "FLR reset count: %d\n",
+ hdev->rst_stats.flr_rst_cnt);
+ dev_info(&hdev->pdev->dev, "CORE reset count: %d\n",
+ hdev->rst_stats.core_rst_cnt);
+ dev_info(&hdev->pdev->dev, "GLOBAL reset count: %d\n",
+ hdev->rst_stats.global_rst_cnt);
+ dev_info(&hdev->pdev->dev, "IMP reset count: %d\n",
+ hdev->rst_stats.imp_rst_cnt);
+ dev_info(&hdev->pdev->dev, "reset done count: %d\n",
+ hdev->rst_stats.reset_done_cnt);
+ dev_info(&hdev->pdev->dev, "HW reset done count: %d\n",
+ hdev->rst_stats.hw_reset_done_cnt);
+ dev_info(&hdev->pdev->dev, "reset count: %d\n",
+ hdev->rst_stats.reset_cnt);
+}
+
+/* hclge_dbg_dump_ncl_config: print specified range of NCL_CONFIG file
+ * @hdev: pointer to struct hclge_dev
+ * @cmd_buf: string that contains offset and length
+ */
+static void hclge_dbg_dump_ncl_config(struct hclge_dev *hdev, char *cmd_buf)
+{
+#define HCLGE_MAX_NCL_CONFIG_OFFSET 4096
+#define HCLGE_MAX_NCL_CONFIG_LENGTH (20 + 24 * 4)
+#define HCLGE_CMD_DATA_NUM 6
+
+ struct hclge_desc desc[5];
+ u32 byte_offset;
+ int bd_num = 5;
+ int offset;
+ int length;
+ int data0;
+ int ret;
+ int i;
+ int j;
+
+ ret = sscanf(cmd_buf, "%x %x", &offset, &length);
+ if (ret != 2 || offset >= HCLGE_MAX_NCL_CONFIG_OFFSET ||
+ length > HCLGE_MAX_NCL_CONFIG_OFFSET - offset) {
+ dev_err(&hdev->pdev->dev, "Invalid offset or length.\n");
+ return;
+ }
+ if (offset < 0 || length <= 0) {
+ dev_err(&hdev->pdev->dev, "Non-positive offset or length.\n");
+ return;
+ }
+
+ dev_info(&hdev->pdev->dev, "offset | data\n");
+
+ while (length > 0) {
+ data0 = offset;
+ if (length >= HCLGE_MAX_NCL_CONFIG_LENGTH)
+ data0 |= HCLGE_MAX_NCL_CONFIG_LENGTH << 16;
+ else
+ data0 |= length << 16;
+ ret = hclge_dbg_cmd_send(hdev, desc, data0, bd_num,
+ HCLGE_OPC_QUERY_NCL_CONFIG);
+ if (ret)
+ return;
+
+ byte_offset = offset;
+ for (i = 0; i < bd_num; i++) {
+ for (j = 0; j < HCLGE_CMD_DATA_NUM; j++) {
+ if (i == 0 && j == 0)
+ continue;
+
+ dev_info(&hdev->pdev->dev, "0x%04x | 0x%08x\n",
+ byte_offset,
+ le32_to_cpu(desc[i].data[j]));
+ byte_offset += sizeof(u32);
+ length -= sizeof(u32);
+ if (length <= 0)
+ return;
+ }
+ }
+ offset += HCLGE_MAX_NCL_CONFIG_LENGTH;
+ }
+}
+
+/* hclge_dbg_dump_mac_tnl_status: print message about mac tnl interrupt
+ * @hdev: pointer to struct hclge_dev
+ */
+static void hclge_dbg_dump_mac_tnl_status(struct hclge_dev *hdev)
+{
+#define HCLGE_BILLION_NANO_SECONDS 1000000000
+
+ struct hclge_mac_tnl_stats stats;
+ unsigned long rem_nsec;
+
+ dev_info(&hdev->pdev->dev, "Recently generated mac tnl interruption:\n");
+
+ while (kfifo_get(&hdev->mac_tnl_log, &stats)) {
+ rem_nsec = do_div(stats.time, HCLGE_BILLION_NANO_SECONDS);
+ dev_info(&hdev->pdev->dev, "[%07lu.%03lu]status = 0x%x\n",
+ (unsigned long)stats.time, rem_nsec / 1000,
+ stats.status);
+ }
+}
+
int hclge_dbg_run_cmd(struct hnae3_handle *handle, char *cmd_buf)
{
struct hclge_vport *vport = hclge_get_vport(handle);
hclge_dbg_dump_mng_table(hdev);
} else if (strncmp(cmd_buf, "dump reg", 8) == 0) {
hclge_dbg_dump_reg_cmd(hdev, cmd_buf);
+ } else if (strncmp(cmd_buf, "dump reset info", 15) == 0) {
+ hclge_dbg_dump_rst_info(hdev);
+ } else if (strncmp(cmd_buf, "dump ncl_config", 15) == 0) {
+ hclge_dbg_dump_ncl_config(hdev,
+ &cmd_buf[sizeof("dump ncl_config")]);
+ } else if (strncmp(cmd_buf, "dump mac tnl status", 19) == 0) {
+ hclge_dbg_dump_mac_tnl_status(hdev);
} else {
dev_info(&hdev->pdev->dev, "unknown command\n");
return -EINVAL;
#include "hclge_err.h"
static const struct hclge_hw_error hclge_imp_tcm_ecc_int[] = {
- { .int_msk = BIT(1), .msg = "imp_itcm0_ecc_mbit_err" },
- { .int_msk = BIT(3), .msg = "imp_itcm1_ecc_mbit_err" },
- { .int_msk = BIT(5), .msg = "imp_itcm2_ecc_mbit_err" },
- { .int_msk = BIT(7), .msg = "imp_itcm3_ecc_mbit_err" },
- { .int_msk = BIT(9), .msg = "imp_dtcm0_mem0_ecc_mbit_err" },
- { .int_msk = BIT(11), .msg = "imp_dtcm0_mem1_ecc_mbit_err" },
- { .int_msk = BIT(13), .msg = "imp_dtcm1_mem0_ecc_mbit_err" },
- { .int_msk = BIT(15), .msg = "imp_dtcm1_mem1_ecc_mbit_err" },
- { .int_msk = BIT(17), .msg = "imp_itcm4_ecc_mbit_err" },
+ { .int_msk = BIT(1), .msg = "imp_itcm0_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET },
+ { .int_msk = BIT(3), .msg = "imp_itcm1_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET },
+ { .int_msk = BIT(5), .msg = "imp_itcm2_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET },
+ { .int_msk = BIT(7), .msg = "imp_itcm3_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET },
+ { .int_msk = BIT(9), .msg = "imp_dtcm0_mem0_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET },
+ { .int_msk = BIT(11), .msg = "imp_dtcm0_mem1_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET },
+ { .int_msk = BIT(13), .msg = "imp_dtcm1_mem0_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET },
+ { .int_msk = BIT(15), .msg = "imp_dtcm1_mem1_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET },
+ { .int_msk = BIT(17), .msg = "imp_itcm4_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET },
{ /* sentinel */ }
};
static const struct hclge_hw_error hclge_cmdq_nic_mem_ecc_int[] = {
- { .int_msk = BIT(1), .msg = "cmdq_nic_rx_depth_ecc_mbit_err" },
- { .int_msk = BIT(3), .msg = "cmdq_nic_tx_depth_ecc_mbit_err" },
- { .int_msk = BIT(5), .msg = "cmdq_nic_rx_tail_ecc_mbit_err" },
- { .int_msk = BIT(7), .msg = "cmdq_nic_tx_tail_ecc_mbit_err" },
- { .int_msk = BIT(9), .msg = "cmdq_nic_rx_head_ecc_mbit_err" },
- { .int_msk = BIT(11), .msg = "cmdq_nic_tx_head_ecc_mbit_err" },
- { .int_msk = BIT(13), .msg = "cmdq_nic_rx_addr_ecc_mbit_err" },
- { .int_msk = BIT(15), .msg = "cmdq_nic_tx_addr_ecc_mbit_err" },
- { .int_msk = BIT(17), .msg = "cmdq_rocee_rx_depth_ecc_mbit_err" },
- { .int_msk = BIT(19), .msg = "cmdq_rocee_tx_depth_ecc_mbit_err" },
- { .int_msk = BIT(21), .msg = "cmdq_rocee_rx_tail_ecc_mbit_err" },
- { .int_msk = BIT(23), .msg = "cmdq_rocee_tx_tail_ecc_mbit_err" },
- { .int_msk = BIT(25), .msg = "cmdq_rocee_rx_head_ecc_mbit_err" },
- { .int_msk = BIT(27), .msg = "cmdq_rocee_tx_head_ecc_mbit_err" },
- { .int_msk = BIT(29), .msg = "cmdq_rocee_rx_addr_ecc_mbit_err" },
- { .int_msk = BIT(31), .msg = "cmdq_rocee_tx_addr_ecc_mbit_err" },
+ { .int_msk = BIT(1), .msg = "cmdq_nic_rx_depth_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET },
+ { .int_msk = BIT(3), .msg = "cmdq_nic_tx_depth_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET },
+ { .int_msk = BIT(5), .msg = "cmdq_nic_rx_tail_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET },
+ { .int_msk = BIT(7), .msg = "cmdq_nic_tx_tail_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET },
+ { .int_msk = BIT(9), .msg = "cmdq_nic_rx_head_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET },
+ { .int_msk = BIT(11), .msg = "cmdq_nic_tx_head_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET },
+ { .int_msk = BIT(13), .msg = "cmdq_nic_rx_addr_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET },
+ { .int_msk = BIT(15), .msg = "cmdq_nic_tx_addr_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET },
+ { .int_msk = BIT(17), .msg = "cmdq_rocee_rx_depth_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET },
+ { .int_msk = BIT(19), .msg = "cmdq_rocee_tx_depth_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET },
+ { .int_msk = BIT(21), .msg = "cmdq_rocee_rx_tail_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET },
+ { .int_msk = BIT(23), .msg = "cmdq_rocee_tx_tail_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET },
+ { .int_msk = BIT(25), .msg = "cmdq_rocee_rx_head_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET },
+ { .int_msk = BIT(27), .msg = "cmdq_rocee_tx_head_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET },
+ { .int_msk = BIT(29), .msg = "cmdq_rocee_rx_addr_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET },
+ { .int_msk = BIT(31), .msg = "cmdq_rocee_tx_addr_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET },
{ /* sentinel */ }
};
static const struct hclge_hw_error hclge_tqp_int_ecc_int[] = {
- { .int_msk = BIT(6), .msg = "tqp_int_cfg_even_ecc_mbit_err" },
- { .int_msk = BIT(7), .msg = "tqp_int_cfg_odd_ecc_mbit_err" },
- { .int_msk = BIT(8), .msg = "tqp_int_ctrl_even_ecc_mbit_err" },
- { .int_msk = BIT(9), .msg = "tqp_int_ctrl_odd_ecc_mbit_err" },
- { .int_msk = BIT(10), .msg = "tx_que_scan_int_ecc_mbit_err" },
- { .int_msk = BIT(11), .msg = "rx_que_scan_int_ecc_mbit_err" },
+ { .int_msk = BIT(6), .msg = "tqp_int_cfg_even_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET },
+ { .int_msk = BIT(7), .msg = "tqp_int_cfg_odd_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET },
+ { .int_msk = BIT(8), .msg = "tqp_int_ctrl_even_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET },
+ { .int_msk = BIT(9), .msg = "tqp_int_ctrl_odd_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET },
+ { .int_msk = BIT(10), .msg = "tx_que_scan_int_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET },
+ { .int_msk = BIT(11), .msg = "rx_que_scan_int_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET },
{ /* sentinel */ }
};
static const struct hclge_hw_error hclge_msix_sram_ecc_int[] = {
- { .int_msk = BIT(1), .msg = "msix_nic_ecc_mbit_err" },
- { .int_msk = BIT(3), .msg = "msix_rocee_ecc_mbit_err" },
+ { .int_msk = BIT(1), .msg = "msix_nic_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET },
+ { .int_msk = BIT(3), .msg = "msix_rocee_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET },
{ /* sentinel */ }
};
static const struct hclge_hw_error hclge_igu_int[] = {
- { .int_msk = BIT(0), .msg = "igu_rx_buf0_ecc_mbit_err" },
- { .int_msk = BIT(2), .msg = "igu_rx_buf1_ecc_mbit_err" },
+ { .int_msk = BIT(0), .msg = "igu_rx_buf0_ecc_mbit_err",
+ .reset_level = HNAE3_CORE_RESET },
+ { .int_msk = BIT(2), .msg = "igu_rx_buf1_ecc_mbit_err",
+ .reset_level = HNAE3_CORE_RESET },
{ /* sentinel */ }
};
static const struct hclge_hw_error hclge_igu_egu_tnl_int[] = {
- { .int_msk = BIT(0), .msg = "rx_buf_overflow" },
- { .int_msk = BIT(1), .msg = "rx_stp_fifo_overflow" },
- { .int_msk = BIT(2), .msg = "rx_stp_fifo_undeflow" },
- { .int_msk = BIT(3), .msg = "tx_buf_overflow" },
- { .int_msk = BIT(4), .msg = "tx_buf_underrun" },
- { .int_msk = BIT(5), .msg = "rx_stp_buf_overflow" },
+ { .int_msk = BIT(0), .msg = "rx_buf_overflow",
+ .reset_level = HNAE3_CORE_RESET },
+ { .int_msk = BIT(1), .msg = "rx_stp_fifo_overflow",
+ .reset_level = HNAE3_CORE_RESET },
+ { .int_msk = BIT(2), .msg = "rx_stp_fifo_undeflow",
+ .reset_level = HNAE3_CORE_RESET },
+ { .int_msk = BIT(3), .msg = "tx_buf_overflow",
+ .reset_level = HNAE3_CORE_RESET },
+ { .int_msk = BIT(4), .msg = "tx_buf_underrun",
+ .reset_level = HNAE3_CORE_RESET },
+ { .int_msk = BIT(5), .msg = "rx_stp_buf_overflow",
+ .reset_level = HNAE3_CORE_RESET },
{ /* sentinel */ }
};
static const struct hclge_hw_error hclge_ncsi_err_int[] = {
- { .int_msk = BIT(1), .msg = "ncsi_tx_ecc_mbit_err" },
+ { .int_msk = BIT(1), .msg = "ncsi_tx_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET },
{ /* sentinel */ }
};
static const struct hclge_hw_error hclge_ppp_mpf_abnormal_int_st1[] = {
- { .int_msk = BIT(0), .msg = "vf_vlan_ad_mem_ecc_mbit_err" },
- { .int_msk = BIT(1), .msg = "umv_mcast_group_mem_ecc_mbit_err" },
- { .int_msk = BIT(2), .msg = "umv_key_mem0_ecc_mbit_err" },
- { .int_msk = BIT(3), .msg = "umv_key_mem1_ecc_mbit_err" },
- { .int_msk = BIT(4), .msg = "umv_key_mem2_ecc_mbit_err" },
- { .int_msk = BIT(5), .msg = "umv_key_mem3_ecc_mbit_err" },
- { .int_msk = BIT(6), .msg = "umv_ad_mem_ecc_mbit_err" },
- { .int_msk = BIT(7), .msg = "rss_tc_mode_mem_ecc_mbit_err" },
- { .int_msk = BIT(8), .msg = "rss_idt_mem0_ecc_mbit_err" },
- { .int_msk = BIT(9), .msg = "rss_idt_mem1_ecc_mbit_err" },
- { .int_msk = BIT(10), .msg = "rss_idt_mem2_ecc_mbit_err" },
- { .int_msk = BIT(11), .msg = "rss_idt_mem3_ecc_mbit_err" },
- { .int_msk = BIT(12), .msg = "rss_idt_mem4_ecc_mbit_err" },
- { .int_msk = BIT(13), .msg = "rss_idt_mem5_ecc_mbit_err" },
- { .int_msk = BIT(14), .msg = "rss_idt_mem6_ecc_mbit_err" },
- { .int_msk = BIT(15), .msg = "rss_idt_mem7_ecc_mbit_err" },
- { .int_msk = BIT(16), .msg = "rss_idt_mem8_ecc_mbit_err" },
- { .int_msk = BIT(17), .msg = "rss_idt_mem9_ecc_mbit_err" },
- { .int_msk = BIT(18), .msg = "rss_idt_mem10_ecc_m1bit_err" },
- { .int_msk = BIT(19), .msg = "rss_idt_mem11_ecc_mbit_err" },
- { .int_msk = BIT(20), .msg = "rss_idt_mem12_ecc_mbit_err" },
- { .int_msk = BIT(21), .msg = "rss_idt_mem13_ecc_mbit_err" },
- { .int_msk = BIT(22), .msg = "rss_idt_mem14_ecc_mbit_err" },
- { .int_msk = BIT(23), .msg = "rss_idt_mem15_ecc_mbit_err" },
- { .int_msk = BIT(24), .msg = "port_vlan_mem_ecc_mbit_err" },
- { .int_msk = BIT(25), .msg = "mcast_linear_table_mem_ecc_mbit_err" },
- { .int_msk = BIT(26), .msg = "mcast_result_mem_ecc_mbit_err" },
- { .int_msk = BIT(27),
- .msg = "flow_director_ad_mem0_ecc_mbit_err" },
- { .int_msk = BIT(28),
- .msg = "flow_director_ad_mem1_ecc_mbit_err" },
- { .int_msk = BIT(29),
- .msg = "rx_vlan_tag_memory_ecc_mbit_err" },
- { .int_msk = BIT(30),
- .msg = "Tx_UP_mapping_config_mem_ecc_mbit_err" },
+ { .int_msk = BIT(0), .msg = "vf_vlan_ad_mem_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(1), .msg = "umv_mcast_group_mem_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(2), .msg = "umv_key_mem0_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(3), .msg = "umv_key_mem1_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(4), .msg = "umv_key_mem2_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(5), .msg = "umv_key_mem3_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(6), .msg = "umv_ad_mem_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(7), .msg = "rss_tc_mode_mem_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(8), .msg = "rss_idt_mem0_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(9), .msg = "rss_idt_mem1_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(10), .msg = "rss_idt_mem2_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(11), .msg = "rss_idt_mem3_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(12), .msg = "rss_idt_mem4_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(13), .msg = "rss_idt_mem5_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(14), .msg = "rss_idt_mem6_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(15), .msg = "rss_idt_mem7_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(16), .msg = "rss_idt_mem8_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(17), .msg = "rss_idt_mem9_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(18), .msg = "rss_idt_mem10_ecc_m1bit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(19), .msg = "rss_idt_mem11_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(20), .msg = "rss_idt_mem12_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(21), .msg = "rss_idt_mem13_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(22), .msg = "rss_idt_mem14_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(23), .msg = "rss_idt_mem15_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(24), .msg = "port_vlan_mem_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(25), .msg = "mcast_linear_table_mem_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(26), .msg = "mcast_result_mem_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(27), .msg = "flow_director_ad_mem0_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(28), .msg = "flow_director_ad_mem1_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(29), .msg = "rx_vlan_tag_memory_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(30), .msg = "Tx_UP_mapping_config_mem_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
{ /* sentinel */ }
};
static const struct hclge_hw_error hclge_ppp_pf_abnormal_int[] = {
- { .int_msk = BIT(0), .msg = "tx_vlan_tag_err" },
- { .int_msk = BIT(1), .msg = "rss_list_tc_unassigned_queue_err" },
+ { .int_msk = BIT(0), .msg = "tx_vlan_tag_err",
+ .reset_level = HNAE3_NONE_RESET },
+ { .int_msk = BIT(1), .msg = "rss_list_tc_unassigned_queue_err",
+ .reset_level = HNAE3_NONE_RESET },
{ /* sentinel */ }
};
static const struct hclge_hw_error hclge_ppp_mpf_abnormal_int_st3[] = {
- { .int_msk = BIT(0), .msg = "hfs_fifo_mem_ecc_mbit_err" },
- { .int_msk = BIT(1), .msg = "rslt_descr_fifo_mem_ecc_mbit_err" },
- { .int_msk = BIT(2), .msg = "tx_vlan_tag_mem_ecc_mbit_err" },
- { .int_msk = BIT(3), .msg = "FD_CN0_memory_ecc_mbit_err" },
- { .int_msk = BIT(4), .msg = "FD_CN1_memory_ecc_mbit_err" },
- { .int_msk = BIT(5), .msg = "GRO_AD_memory_ecc_mbit_err" },
+ { .int_msk = BIT(0), .msg = "hfs_fifo_mem_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(1), .msg = "rslt_descr_fifo_mem_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(2), .msg = "tx_vlan_tag_mem_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(3), .msg = "FD_CN0_memory_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(4), .msg = "FD_CN1_memory_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(5), .msg = "GRO_AD_memory_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
{ /* sentinel */ }
};
static const struct hclge_hw_error hclge_tm_sch_rint[] = {
- { .int_msk = BIT(1), .msg = "tm_sch_ecc_mbit_err" },
- { .int_msk = BIT(2), .msg = "tm_sch_port_shap_sub_fifo_wr_err" },
- { .int_msk = BIT(3), .msg = "tm_sch_port_shap_sub_fifo_rd_err" },
- { .int_msk = BIT(4), .msg = "tm_sch_pg_pshap_sub_fifo_wr_err" },
- { .int_msk = BIT(5), .msg = "tm_sch_pg_pshap_sub_fifo_rd_err" },
- { .int_msk = BIT(6), .msg = "tm_sch_pg_cshap_sub_fifo_wr_err" },
- { .int_msk = BIT(7), .msg = "tm_sch_pg_cshap_sub_fifo_rd_err" },
- { .int_msk = BIT(8), .msg = "tm_sch_pri_pshap_sub_fifo_wr_err" },
- { .int_msk = BIT(9), .msg = "tm_sch_pri_pshap_sub_fifo_rd_err" },
- { .int_msk = BIT(10), .msg = "tm_sch_pri_cshap_sub_fifo_wr_err" },
- { .int_msk = BIT(11), .msg = "tm_sch_pri_cshap_sub_fifo_rd_err" },
- { .int_msk = BIT(12),
- .msg = "tm_sch_port_shap_offset_fifo_wr_err" },
- { .int_msk = BIT(13),
- .msg = "tm_sch_port_shap_offset_fifo_rd_err" },
- { .int_msk = BIT(14),
- .msg = "tm_sch_pg_pshap_offset_fifo_wr_err" },
- { .int_msk = BIT(15),
- .msg = "tm_sch_pg_pshap_offset_fifo_rd_err" },
- { .int_msk = BIT(16),
- .msg = "tm_sch_pg_cshap_offset_fifo_wr_err" },
- { .int_msk = BIT(17),
- .msg = "tm_sch_pg_cshap_offset_fifo_rd_err" },
- { .int_msk = BIT(18),
- .msg = "tm_sch_pri_pshap_offset_fifo_wr_err" },
- { .int_msk = BIT(19),
- .msg = "tm_sch_pri_pshap_offset_fifo_rd_err" },
- { .int_msk = BIT(20),
- .msg = "tm_sch_pri_cshap_offset_fifo_wr_err" },
- { .int_msk = BIT(21),
- .msg = "tm_sch_pri_cshap_offset_fifo_rd_err" },
- { .int_msk = BIT(22), .msg = "tm_sch_rq_fifo_wr_err" },
- { .int_msk = BIT(23), .msg = "tm_sch_rq_fifo_rd_err" },
- { .int_msk = BIT(24), .msg = "tm_sch_nq_fifo_wr_err" },
- { .int_msk = BIT(25), .msg = "tm_sch_nq_fifo_rd_err" },
- { .int_msk = BIT(26), .msg = "tm_sch_roce_up_fifo_wr_err" },
- { .int_msk = BIT(27), .msg = "tm_sch_roce_up_fifo_rd_err" },
- { .int_msk = BIT(28), .msg = "tm_sch_rcb_byte_fifo_wr_err" },
- { .int_msk = BIT(29), .msg = "tm_sch_rcb_byte_fifo_rd_err" },
- { .int_msk = BIT(30), .msg = "tm_sch_ssu_byte_fifo_wr_err" },
- { .int_msk = BIT(31), .msg = "tm_sch_ssu_byte_fifo_rd_err" },
+ { .int_msk = BIT(1), .msg = "tm_sch_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(2), .msg = "tm_sch_port_shap_sub_fifo_wr_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(3), .msg = "tm_sch_port_shap_sub_fifo_rd_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(4), .msg = "tm_sch_pg_pshap_sub_fifo_wr_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(5), .msg = "tm_sch_pg_pshap_sub_fifo_rd_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(6), .msg = "tm_sch_pg_cshap_sub_fifo_wr_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(7), .msg = "tm_sch_pg_cshap_sub_fifo_rd_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(8), .msg = "tm_sch_pri_pshap_sub_fifo_wr_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(9), .msg = "tm_sch_pri_pshap_sub_fifo_rd_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(10), .msg = "tm_sch_pri_cshap_sub_fifo_wr_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(11), .msg = "tm_sch_pri_cshap_sub_fifo_rd_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(12), .msg = "tm_sch_port_shap_offset_fifo_wr_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(13), .msg = "tm_sch_port_shap_offset_fifo_rd_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(14), .msg = "tm_sch_pg_pshap_offset_fifo_wr_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(15), .msg = "tm_sch_pg_pshap_offset_fifo_rd_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(16), .msg = "tm_sch_pg_cshap_offset_fifo_wr_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(17), .msg = "tm_sch_pg_cshap_offset_fifo_rd_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(18), .msg = "tm_sch_pri_pshap_offset_fifo_wr_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(19), .msg = "tm_sch_pri_pshap_offset_fifo_rd_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(20), .msg = "tm_sch_pri_cshap_offset_fifo_wr_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(21), .msg = "tm_sch_pri_cshap_offset_fifo_rd_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(22), .msg = "tm_sch_rq_fifo_wr_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(23), .msg = "tm_sch_rq_fifo_rd_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(24), .msg = "tm_sch_nq_fifo_wr_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(25), .msg = "tm_sch_nq_fifo_rd_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(26), .msg = "tm_sch_roce_up_fifo_wr_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(27), .msg = "tm_sch_roce_up_fifo_rd_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(28), .msg = "tm_sch_rcb_byte_fifo_wr_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(29), .msg = "tm_sch_rcb_byte_fifo_rd_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(30), .msg = "tm_sch_ssu_byte_fifo_wr_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(31), .msg = "tm_sch_ssu_byte_fifo_rd_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
{ /* sentinel */ }
};
static const struct hclge_hw_error hclge_qcn_fifo_rint[] = {
- { .int_msk = BIT(0), .msg = "qcn_shap_gp0_sch_fifo_rd_err" },
- { .int_msk = BIT(1), .msg = "qcn_shap_gp0_sch_fifo_wr_err" },
- { .int_msk = BIT(2), .msg = "qcn_shap_gp1_sch_fifo_rd_err" },
- { .int_msk = BIT(3), .msg = "qcn_shap_gp1_sch_fifo_wr_err" },
- { .int_msk = BIT(4), .msg = "qcn_shap_gp2_sch_fifo_rd_err" },
- { .int_msk = BIT(5), .msg = "qcn_shap_gp2_sch_fifo_wr_err" },
- { .int_msk = BIT(6), .msg = "qcn_shap_gp3_sch_fifo_rd_err" },
- { .int_msk = BIT(7), .msg = "qcn_shap_gp3_sch_fifo_wr_err" },
- { .int_msk = BIT(8), .msg = "qcn_shap_gp0_offset_fifo_rd_err" },
- { .int_msk = BIT(9), .msg = "qcn_shap_gp0_offset_fifo_wr_err" },
- { .int_msk = BIT(10), .msg = "qcn_shap_gp1_offset_fifo_rd_err" },
- { .int_msk = BIT(11), .msg = "qcn_shap_gp1_offset_fifo_wr_err" },
- { .int_msk = BIT(12), .msg = "qcn_shap_gp2_offset_fifo_rd_err" },
- { .int_msk = BIT(13), .msg = "qcn_shap_gp2_offset_fifo_wr_err" },
- { .int_msk = BIT(14), .msg = "qcn_shap_gp3_offset_fifo_rd_err" },
- { .int_msk = BIT(15), .msg = "qcn_shap_gp3_offset_fifo_wr_err" },
- { .int_msk = BIT(16), .msg = "qcn_byte_info_fifo_rd_err" },
- { .int_msk = BIT(17), .msg = "qcn_byte_info_fifo_wr_err" },
+ { .int_msk = BIT(0), .msg = "qcn_shap_gp0_sch_fifo_rd_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(1), .msg = "qcn_shap_gp0_sch_fifo_wr_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(2), .msg = "qcn_shap_gp1_sch_fifo_rd_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(3), .msg = "qcn_shap_gp1_sch_fifo_wr_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(4), .msg = "qcn_shap_gp2_sch_fifo_rd_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(5), .msg = "qcn_shap_gp2_sch_fifo_wr_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(6), .msg = "qcn_shap_gp3_sch_fifo_rd_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(7), .msg = "qcn_shap_gp3_sch_fifo_wr_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(8), .msg = "qcn_shap_gp0_offset_fifo_rd_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(9), .msg = "qcn_shap_gp0_offset_fifo_wr_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(10), .msg = "qcn_shap_gp1_offset_fifo_rd_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(11), .msg = "qcn_shap_gp1_offset_fifo_wr_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(12), .msg = "qcn_shap_gp2_offset_fifo_rd_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(13), .msg = "qcn_shap_gp2_offset_fifo_wr_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(14), .msg = "qcn_shap_gp3_offset_fifo_rd_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(15), .msg = "qcn_shap_gp3_offset_fifo_wr_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(16), .msg = "qcn_byte_info_fifo_rd_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(17), .msg = "qcn_byte_info_fifo_wr_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
{ /* sentinel */ }
};
static const struct hclge_hw_error hclge_qcn_ecc_rint[] = {
- { .int_msk = BIT(1), .msg = "qcn_byte_mem_ecc_mbit_err" },
- { .int_msk = BIT(3), .msg = "qcn_time_mem_ecc_mbit_err" },
- { .int_msk = BIT(5), .msg = "qcn_fb_mem_ecc_mbit_err" },
- { .int_msk = BIT(7), .msg = "qcn_link_mem_ecc_mbit_err" },
- { .int_msk = BIT(9), .msg = "qcn_rate_mem_ecc_mbit_err" },
- { .int_msk = BIT(11), .msg = "qcn_tmplt_mem_ecc_mbit_err" },
- { .int_msk = BIT(13), .msg = "qcn_shap_cfg_mem_ecc_mbit_err" },
- { .int_msk = BIT(15), .msg = "qcn_gp0_barrel_mem_ecc_mbit_err" },
- { .int_msk = BIT(17), .msg = "qcn_gp1_barrel_mem_ecc_mbit_err" },
- { .int_msk = BIT(19), .msg = "qcn_gp2_barrel_mem_ecc_mbit_err" },
- { .int_msk = BIT(21), .msg = "qcn_gp3_barral_mem_ecc_mbit_err" },
+ { .int_msk = BIT(1), .msg = "qcn_byte_mem_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(3), .msg = "qcn_time_mem_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(5), .msg = "qcn_fb_mem_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(7), .msg = "qcn_link_mem_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(9), .msg = "qcn_rate_mem_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(11), .msg = "qcn_tmplt_mem_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(13), .msg = "qcn_shap_cfg_mem_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(15), .msg = "qcn_gp0_barrel_mem_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(17), .msg = "qcn_gp1_barrel_mem_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(19), .msg = "qcn_gp2_barrel_mem_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(21), .msg = "qcn_gp3_barral_mem_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
{ /* sentinel */ }
};
static const struct hclge_hw_error hclge_mac_afifo_tnl_int[] = {
- { .int_msk = BIT(0), .msg = "egu_cge_afifo_ecc_1bit_err" },
- { .int_msk = BIT(1), .msg = "egu_cge_afifo_ecc_mbit_err" },
- { .int_msk = BIT(2), .msg = "egu_lge_afifo_ecc_1bit_err" },
- { .int_msk = BIT(3), .msg = "egu_lge_afifo_ecc_mbit_err" },
- { .int_msk = BIT(4), .msg = "cge_igu_afifo_ecc_1bit_err" },
- { .int_msk = BIT(5), .msg = "cge_igu_afifo_ecc_mbit_err" },
- { .int_msk = BIT(6), .msg = "lge_igu_afifo_ecc_1bit_err" },
- { .int_msk = BIT(7), .msg = "lge_igu_afifo_ecc_mbit_err" },
- { .int_msk = BIT(8), .msg = "cge_igu_afifo_overflow_err" },
- { .int_msk = BIT(9), .msg = "lge_igu_afifo_overflow_err" },
- { .int_msk = BIT(10), .msg = "egu_cge_afifo_underrun_err" },
- { .int_msk = BIT(11), .msg = "egu_lge_afifo_underrun_err" },
- { .int_msk = BIT(12), .msg = "egu_ge_afifo_underrun_err" },
- { .int_msk = BIT(13), .msg = "ge_igu_afifo_overflow_err" },
+ { .int_msk = BIT(0), .msg = "egu_cge_afifo_ecc_1bit_err",
+ .reset_level = HNAE3_NONE_RESET },
+ { .int_msk = BIT(1), .msg = "egu_cge_afifo_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(2), .msg = "egu_lge_afifo_ecc_1bit_err",
+ .reset_level = HNAE3_NONE_RESET },
+ { .int_msk = BIT(3), .msg = "egu_lge_afifo_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(4), .msg = "cge_igu_afifo_ecc_1bit_err",
+ .reset_level = HNAE3_NONE_RESET },
+ { .int_msk = BIT(5), .msg = "cge_igu_afifo_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(6), .msg = "lge_igu_afifo_ecc_1bit_err",
+ .reset_level = HNAE3_NONE_RESET },
+ { .int_msk = BIT(7), .msg = "lge_igu_afifo_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(8), .msg = "cge_igu_afifo_overflow_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(9), .msg = "lge_igu_afifo_overflow_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(10), .msg = "egu_cge_afifo_underrun_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(11), .msg = "egu_lge_afifo_underrun_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(12), .msg = "egu_ge_afifo_underrun_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(13), .msg = "ge_igu_afifo_overflow_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
{ /* sentinel */ }
};
static const struct hclge_hw_error hclge_ppu_mpf_abnormal_int_st2[] = {
- { .int_msk = BIT(13), .msg = "rpu_rx_pkt_bit32_ecc_mbit_err" },
- { .int_msk = BIT(14), .msg = "rpu_rx_pkt_bit33_ecc_mbit_err" },
- { .int_msk = BIT(15), .msg = "rpu_rx_pkt_bit34_ecc_mbit_err" },
- { .int_msk = BIT(16), .msg = "rpu_rx_pkt_bit35_ecc_mbit_err" },
- { .int_msk = BIT(17), .msg = "rcb_tx_ring_ecc_mbit_err" },
- { .int_msk = BIT(18), .msg = "rcb_rx_ring_ecc_mbit_err" },
- { .int_msk = BIT(19), .msg = "rcb_tx_fbd_ecc_mbit_err" },
- { .int_msk = BIT(20), .msg = "rcb_rx_ebd_ecc_mbit_err" },
- { .int_msk = BIT(21), .msg = "rcb_tso_info_ecc_mbit_err" },
- { .int_msk = BIT(22), .msg = "rcb_tx_int_info_ecc_mbit_err" },
- { .int_msk = BIT(23), .msg = "rcb_rx_int_info_ecc_mbit_err" },
- { .int_msk = BIT(24), .msg = "tpu_tx_pkt_0_ecc_mbit_err" },
- { .int_msk = BIT(25), .msg = "tpu_tx_pkt_1_ecc_mbit_err" },
- { .int_msk = BIT(26), .msg = "rd_bus_err" },
- { .int_msk = BIT(27), .msg = "wr_bus_err" },
- { .int_msk = BIT(28), .msg = "reg_search_miss" },
- { .int_msk = BIT(29), .msg = "rx_q_search_miss" },
- { .int_msk = BIT(30), .msg = "ooo_ecc_err_detect" },
- { .int_msk = BIT(31), .msg = "ooo_ecc_err_multpl" },
+ { .int_msk = BIT(13), .msg = "rpu_rx_pkt_bit32_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(14), .msg = "rpu_rx_pkt_bit33_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(15), .msg = "rpu_rx_pkt_bit34_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(16), .msg = "rpu_rx_pkt_bit35_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(17), .msg = "rcb_tx_ring_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(18), .msg = "rcb_rx_ring_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(19), .msg = "rcb_tx_fbd_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(20), .msg = "rcb_rx_ebd_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(21), .msg = "rcb_tso_info_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(22), .msg = "rcb_tx_int_info_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(23), .msg = "rcb_rx_int_info_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(24), .msg = "tpu_tx_pkt_0_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(25), .msg = "tpu_tx_pkt_1_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(26), .msg = "rd_bus_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(27), .msg = "wr_bus_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(28), .msg = "reg_search_miss",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(29), .msg = "rx_q_search_miss",
+ .reset_level = HNAE3_NONE_RESET },
+ { .int_msk = BIT(30), .msg = "ooo_ecc_err_detect",
+ .reset_level = HNAE3_NONE_RESET },
+ { .int_msk = BIT(31), .msg = "ooo_ecc_err_multpl",
+ .reset_level = HNAE3_GLOBAL_RESET },
{ /* sentinel */ }
};
static const struct hclge_hw_error hclge_ppu_mpf_abnormal_int_st3[] = {
- { .int_msk = BIT(4), .msg = "gro_bd_ecc_mbit_err" },
- { .int_msk = BIT(5), .msg = "gro_context_ecc_mbit_err" },
- { .int_msk = BIT(6), .msg = "rx_stash_cfg_ecc_mbit_err" },
- { .int_msk = BIT(7), .msg = "axi_rd_fbd_ecc_mbit_err" },
+ { .int_msk = BIT(4), .msg = "gro_bd_ecc_mbit_err",
+ .reset_level = HNAE3_CORE_RESET },
+ { .int_msk = BIT(5), .msg = "gro_context_ecc_mbit_err",
+ .reset_level = HNAE3_CORE_RESET },
+ { .int_msk = BIT(6), .msg = "rx_stash_cfg_ecc_mbit_err",
+ .reset_level = HNAE3_CORE_RESET },
+ { .int_msk = BIT(7), .msg = "axi_rd_fbd_ecc_mbit_err",
+ .reset_level = HNAE3_CORE_RESET },
{ /* sentinel */ }
};
static const struct hclge_hw_error hclge_ppu_pf_abnormal_int[] = {
- { .int_msk = BIT(0), .msg = "over_8bd_no_fe" },
- { .int_msk = BIT(1), .msg = "tso_mss_cmp_min_err" },
- { .int_msk = BIT(2), .msg = "tso_mss_cmp_max_err" },
- { .int_msk = BIT(3), .msg = "tx_rd_fbd_poison" },
- { .int_msk = BIT(4), .msg = "rx_rd_ebd_poison" },
- { .int_msk = BIT(5), .msg = "buf_wait_timeout" },
+ { .int_msk = BIT(0), .msg = "over_8bd_no_fe",
+ .reset_level = HNAE3_FUNC_RESET },
+ { .int_msk = BIT(1), .msg = "tso_mss_cmp_min_err",
+ .reset_level = HNAE3_NONE_RESET },
+ { .int_msk = BIT(2), .msg = "tso_mss_cmp_max_err",
+ .reset_level = HNAE3_NONE_RESET },
+ { .int_msk = BIT(3), .msg = "tx_rd_fbd_poison",
+ .reset_level = HNAE3_FUNC_RESET },
+ { .int_msk = BIT(4), .msg = "rx_rd_ebd_poison",
+ .reset_level = HNAE3_FUNC_RESET },
+ { .int_msk = BIT(5), .msg = "buf_wait_timeout",
+ .reset_level = HNAE3_NONE_RESET },
{ /* sentinel */ }
};
static const struct hclge_hw_error hclge_ssu_com_err_int[] = {
- { .int_msk = BIT(0), .msg = "buf_sum_err" },
- { .int_msk = BIT(1), .msg = "ppp_mb_num_err" },
- { .int_msk = BIT(2), .msg = "ppp_mbid_err" },
- { .int_msk = BIT(3), .msg = "ppp_rlt_mac_err" },
- { .int_msk = BIT(4), .msg = "ppp_rlt_host_err" },
- { .int_msk = BIT(5), .msg = "cks_edit_position_err" },
- { .int_msk = BIT(6), .msg = "cks_edit_condition_err" },
- { .int_msk = BIT(7), .msg = "vlan_edit_condition_err" },
- { .int_msk = BIT(8), .msg = "vlan_num_ot_err" },
- { .int_msk = BIT(9), .msg = "vlan_num_in_err" },
+ { .int_msk = BIT(0), .msg = "buf_sum_err",
+ .reset_level = HNAE3_NONE_RESET },
+ { .int_msk = BIT(1), .msg = "ppp_mb_num_err",
+ .reset_level = HNAE3_NONE_RESET },
+ { .int_msk = BIT(2), .msg = "ppp_mbid_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(3), .msg = "ppp_rlt_mac_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(4), .msg = "ppp_rlt_host_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(5), .msg = "cks_edit_position_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(6), .msg = "cks_edit_condition_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(7), .msg = "vlan_edit_condition_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(8), .msg = "vlan_num_ot_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(9), .msg = "vlan_num_in_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
{ /* sentinel */ }
};
#define HCLGE_SSU_MEM_ECC_ERR(x) \
- { .int_msk = BIT(x), .msg = "ssu_mem" #x "_ecc_mbit_err" }
+ { .int_msk = BIT(x), .msg = "ssu_mem" #x "_ecc_mbit_err", \
+ .reset_level = HNAE3_GLOBAL_RESET }
static const struct hclge_hw_error hclge_ssu_mem_ecc_err_int[] = {
HCLGE_SSU_MEM_ECC_ERR(0),
};
static const struct hclge_hw_error hclge_ssu_port_based_err_int[] = {
- { .int_msk = BIT(0), .msg = "roc_pkt_without_key_port" },
- { .int_msk = BIT(1), .msg = "tpu_pkt_without_key_port" },
- { .int_msk = BIT(2), .msg = "igu_pkt_without_key_port" },
- { .int_msk = BIT(3), .msg = "roc_eof_mis_match_port" },
- { .int_msk = BIT(4), .msg = "tpu_eof_mis_match_port" },
- { .int_msk = BIT(5), .msg = "igu_eof_mis_match_port" },
- { .int_msk = BIT(6), .msg = "roc_sof_mis_match_port" },
- { .int_msk = BIT(7), .msg = "tpu_sof_mis_match_port" },
- { .int_msk = BIT(8), .msg = "igu_sof_mis_match_port" },
- { .int_msk = BIT(11), .msg = "ets_rd_int_rx_port" },
- { .int_msk = BIT(12), .msg = "ets_wr_int_rx_port" },
- { .int_msk = BIT(13), .msg = "ets_rd_int_tx_port" },
- { .int_msk = BIT(14), .msg = "ets_wr_int_tx_port" },
+ { .int_msk = BIT(0), .msg = "roc_pkt_without_key_port",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(1), .msg = "tpu_pkt_without_key_port",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(2), .msg = "igu_pkt_without_key_port",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(3), .msg = "roc_eof_mis_match_port",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(4), .msg = "tpu_eof_mis_match_port",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(5), .msg = "igu_eof_mis_match_port",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(6), .msg = "roc_sof_mis_match_port",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(7), .msg = "tpu_sof_mis_match_port",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(8), .msg = "igu_sof_mis_match_port",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(11), .msg = "ets_rd_int_rx_port",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(12), .msg = "ets_wr_int_rx_port",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(13), .msg = "ets_rd_int_tx_port",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(14), .msg = "ets_wr_int_tx_port",
+ .reset_level = HNAE3_GLOBAL_RESET },
{ /* sentinel */ }
};
static const struct hclge_hw_error hclge_ssu_fifo_overflow_int[] = {
- { .int_msk = BIT(0), .msg = "ig_mac_inf_int" },
- { .int_msk = BIT(1), .msg = "ig_host_inf_int" },
- { .int_msk = BIT(2), .msg = "ig_roc_buf_int" },
- { .int_msk = BIT(3), .msg = "ig_host_data_fifo_int" },
- { .int_msk = BIT(4), .msg = "ig_host_key_fifo_int" },
- { .int_msk = BIT(5), .msg = "tx_qcn_fifo_int" },
- { .int_msk = BIT(6), .msg = "rx_qcn_fifo_int" },
- { .int_msk = BIT(7), .msg = "tx_pf_rd_fifo_int" },
- { .int_msk = BIT(8), .msg = "rx_pf_rd_fifo_int" },
- { .int_msk = BIT(9), .msg = "qm_eof_fifo_int" },
- { .int_msk = BIT(10), .msg = "mb_rlt_fifo_int" },
- { .int_msk = BIT(11), .msg = "dup_uncopy_fifo_int" },
- { .int_msk = BIT(12), .msg = "dup_cnt_rd_fifo_int" },
- { .int_msk = BIT(13), .msg = "dup_cnt_drop_fifo_int" },
- { .int_msk = BIT(14), .msg = "dup_cnt_wrb_fifo_int" },
- { .int_msk = BIT(15), .msg = "host_cmd_fifo_int" },
- { .int_msk = BIT(16), .msg = "mac_cmd_fifo_int" },
- { .int_msk = BIT(17), .msg = "host_cmd_bitmap_empty_int" },
- { .int_msk = BIT(18), .msg = "mac_cmd_bitmap_empty_int" },
- { .int_msk = BIT(19), .msg = "dup_bitmap_empty_int" },
- { .int_msk = BIT(20), .msg = "out_queue_bitmap_empty_int" },
- { .int_msk = BIT(21), .msg = "bank2_bitmap_empty_int" },
- { .int_msk = BIT(22), .msg = "bank1_bitmap_empty_int" },
- { .int_msk = BIT(23), .msg = "bank0_bitmap_empty_int" },
+ { .int_msk = BIT(0), .msg = "ig_mac_inf_int",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(1), .msg = "ig_host_inf_int",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(2), .msg = "ig_roc_buf_int",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(3), .msg = "ig_host_data_fifo_int",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(4), .msg = "ig_host_key_fifo_int",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(5), .msg = "tx_qcn_fifo_int",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(6), .msg = "rx_qcn_fifo_int",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(7), .msg = "tx_pf_rd_fifo_int",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(8), .msg = "rx_pf_rd_fifo_int",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(9), .msg = "qm_eof_fifo_int",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(10), .msg = "mb_rlt_fifo_int",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(11), .msg = "dup_uncopy_fifo_int",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(12), .msg = "dup_cnt_rd_fifo_int",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(13), .msg = "dup_cnt_drop_fifo_int",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(14), .msg = "dup_cnt_wrb_fifo_int",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(15), .msg = "host_cmd_fifo_int",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(16), .msg = "mac_cmd_fifo_int",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(17), .msg = "host_cmd_bitmap_empty_int",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(18), .msg = "mac_cmd_bitmap_empty_int",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(19), .msg = "dup_bitmap_empty_int",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(20), .msg = "out_queue_bitmap_empty_int",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(21), .msg = "bank2_bitmap_empty_int",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(22), .msg = "bank1_bitmap_empty_int",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(23), .msg = "bank0_bitmap_empty_int",
+ .reset_level = HNAE3_GLOBAL_RESET },
{ /* sentinel */ }
};
static const struct hclge_hw_error hclge_ssu_ets_tcg_int[] = {
- { .int_msk = BIT(0), .msg = "ets_rd_int_rx_tcg" },
- { .int_msk = BIT(1), .msg = "ets_wr_int_rx_tcg" },
- { .int_msk = BIT(2), .msg = "ets_rd_int_tx_tcg" },
- { .int_msk = BIT(3), .msg = "ets_wr_int_tx_tcg" },
+ { .int_msk = BIT(0), .msg = "ets_rd_int_rx_tcg",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(1), .msg = "ets_wr_int_rx_tcg",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(2), .msg = "ets_rd_int_tx_tcg",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(3), .msg = "ets_wr_int_tx_tcg",
+ .reset_level = HNAE3_GLOBAL_RESET },
{ /* sentinel */ }
};
static const struct hclge_hw_error hclge_ssu_port_based_pf_int[] = {
- { .int_msk = BIT(0), .msg = "roc_pkt_without_key_port" },
- { .int_msk = BIT(9), .msg = "low_water_line_err_port" },
- { .int_msk = BIT(10), .msg = "hi_water_line_err_port" },
+ { .int_msk = BIT(0), .msg = "roc_pkt_without_key_port",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(9), .msg = "low_water_line_err_port",
+ .reset_level = HNAE3_NONE_RESET },
+ { .int_msk = BIT(10), .msg = "hi_water_line_err_port",
+ .reset_level = HNAE3_GLOBAL_RESET },
{ /* sentinel */ }
};
{ /* sentinel */ }
};
-static void hclge_log_error(struct device *dev, char *reg,
- const struct hclge_hw_error *err,
- u32 err_sts)
+static enum hnae3_reset_type hclge_log_error(struct device *dev, char *reg,
+ const struct hclge_hw_error *err,
+ u32 err_sts)
{
+ enum hnae3_reset_type reset_level = HNAE3_FUNC_RESET;
+ bool need_reset = false;
+
while (err->msg) {
- if (err->int_msk & err_sts)
+ if (err->int_msk & err_sts) {
dev_warn(dev, "%s %s found [error status=0x%x]\n",
reg, err->msg, err_sts);
+ if (err->reset_level != HNAE3_NONE_RESET &&
+ err->reset_level >= reset_level) {
+ reset_level = err->reset_level;
+ need_reset = true;
+ }
+ }
err++;
}
+ if (need_reset)
+ return reset_level;
+ else
+ return HNAE3_NONE_RESET;
}
/* hclge_cmd_query_error: read the error information
return ret;
}
+static int hclge_clear_mac_tnl_int(struct hclge_dev *hdev)
+{
+ struct hclge_desc desc;
+
+ hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CLEAR_MAC_TNL_INT, false);
+ desc.data[0] = cpu_to_le32(HCLGE_MAC_TNL_INT_CLR);
+
+ return hclge_cmd_send(&hdev->hw, &desc, 1);
+}
+
static int hclge_config_common_hw_err_int(struct hclge_dev *hdev, bool en)
{
struct device *dev = &hdev->pdev->dev;
return ret;
}
+int hclge_config_mac_tnl_int(struct hclge_dev *hdev, bool en)
+{
+ struct hclge_desc desc;
+
+ hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_MAC_TNL_INT_EN, false);
+ if (en)
+ desc.data[0] = cpu_to_le32(HCLGE_MAC_TNL_INT_EN);
+ else
+ desc.data[0] = 0;
+
+ desc.data[1] = cpu_to_le32(HCLGE_MAC_TNL_INT_EN_MASK);
+
+ return hclge_cmd_send(&hdev->hw, &desc, 1);
+}
+
static int hclge_config_ppu_error_interrupts(struct hclge_dev *hdev, u32 cmd,
bool en)
{
int num)
{
struct hnae3_ae_dev *ae_dev = hdev->ae_dev;
+ enum hnae3_reset_type reset_level;
struct device *dev = &hdev->pdev->dev;
__le32 *desc_data;
u32 status;
/* log HNS common errors */
status = le32_to_cpu(desc[0].data[0]);
if (status) {
- hclge_log_error(dev, "IMP_TCM_ECC_INT_STS",
- &hclge_imp_tcm_ecc_int[0], status);
- HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_GLOBAL_RESET);
+ reset_level = hclge_log_error(dev, "IMP_TCM_ECC_INT_STS",
+ &hclge_imp_tcm_ecc_int[0],
+ status);
+ HCLGE_SET_DEFAULT_RESET_REQUEST(reset_level);
}
status = le32_to_cpu(desc[0].data[1]);
if (status) {
- hclge_log_error(dev, "CMDQ_MEM_ECC_INT_STS",
- &hclge_cmdq_nic_mem_ecc_int[0], status);
- HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_GLOBAL_RESET);
+ reset_level = hclge_log_error(dev, "CMDQ_MEM_ECC_INT_STS",
+ &hclge_cmdq_nic_mem_ecc_int[0],
+ status);
+ HCLGE_SET_DEFAULT_RESET_REQUEST(reset_level);
}
if ((le32_to_cpu(desc[0].data[2])) & BIT(0)) {
dev_warn(dev, "imp_rd_data_poison_err found\n");
- HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_GLOBAL_RESET);
+ HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_NONE_RESET);
}
status = le32_to_cpu(desc[0].data[3]);
if (status) {
- hclge_log_error(dev, "TQP_INT_ECC_INT_STS",
- &hclge_tqp_int_ecc_int[0], status);
- HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_CORE_RESET);
+ reset_level = hclge_log_error(dev, "TQP_INT_ECC_INT_STS",
+ &hclge_tqp_int_ecc_int[0],
+ status);
+ HCLGE_SET_DEFAULT_RESET_REQUEST(reset_level);
}
status = le32_to_cpu(desc[0].data[4]);
if (status) {
- hclge_log_error(dev, "MSIX_ECC_INT_STS",
- &hclge_msix_sram_ecc_int[0], status);
- HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_CORE_RESET);
+ reset_level = hclge_log_error(dev, "MSIX_ECC_INT_STS",
+ &hclge_msix_sram_ecc_int[0],
+ status);
+ HCLGE_SET_DEFAULT_RESET_REQUEST(reset_level);
}
/* log SSU(Storage Switch Unit) errors */
desc_data = (__le32 *)&desc[2];
status = le32_to_cpu(*(desc_data + 2));
if (status) {
- hclge_log_error(dev, "SSU_ECC_MULTI_BIT_INT_0",
- &hclge_ssu_mem_ecc_err_int[0], status);
- HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_CORE_RESET);
+ reset_level = hclge_log_error(dev, "SSU_ECC_MULTI_BIT_INT_0",
+ &hclge_ssu_mem_ecc_err_int[0],
+ status);
+ HCLGE_SET_DEFAULT_RESET_REQUEST(reset_level);
}
status = le32_to_cpu(*(desc_data + 3)) & BIT(0);
if (status) {
dev_warn(dev, "SSU_ECC_MULTI_BIT_INT_1 ssu_mem32_ecc_mbit_err found [error status=0x%x]\n",
status);
- HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_CORE_RESET);
+ HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_GLOBAL_RESET);
}
status = le32_to_cpu(*(desc_data + 4)) & HCLGE_SSU_COMMON_ERR_INT_MASK;
if (status) {
- hclge_log_error(dev, "SSU_COMMON_ERR_INT",
- &hclge_ssu_com_err_int[0], status);
- HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_GLOBAL_RESET);
+ reset_level = hclge_log_error(dev, "SSU_COMMON_ERR_INT",
+ &hclge_ssu_com_err_int[0],
+ status);
+ HCLGE_SET_DEFAULT_RESET_REQUEST(reset_level);
}
/* log IGU(Ingress Unit) errors */
desc_data = (__le32 *)&desc[3];
status = le32_to_cpu(*desc_data) & HCLGE_IGU_INT_MASK;
- if (status)
- hclge_log_error(dev, "IGU_INT_STS",
- &hclge_igu_int[0], status);
+ if (status) {
+ reset_level = hclge_log_error(dev, "IGU_INT_STS",
+ &hclge_igu_int[0], status);
+ HCLGE_SET_DEFAULT_RESET_REQUEST(reset_level);
+ }
/* log PPP(Programmable Packet Process) errors */
desc_data = (__le32 *)&desc[4];
status = le32_to_cpu(*(desc_data + 1));
- if (status)
- hclge_log_error(dev, "PPP_MPF_ABNORMAL_INT_ST1",
- &hclge_ppp_mpf_abnormal_int_st1[0], status);
+ if (status) {
+ reset_level =
+ hclge_log_error(dev, "PPP_MPF_ABNORMAL_INT_ST1",
+ &hclge_ppp_mpf_abnormal_int_st1[0],
+ status);
+ HCLGE_SET_DEFAULT_RESET_REQUEST(reset_level);
+ }
status = le32_to_cpu(*(desc_data + 3)) & HCLGE_PPP_MPF_INT_ST3_MASK;
- if (status)
- hclge_log_error(dev, "PPP_MPF_ABNORMAL_INT_ST3",
- &hclge_ppp_mpf_abnormal_int_st3[0], status);
+ if (status) {
+ reset_level =
+ hclge_log_error(dev, "PPP_MPF_ABNORMAL_INT_ST3",
+ &hclge_ppp_mpf_abnormal_int_st3[0],
+ status);
+ HCLGE_SET_DEFAULT_RESET_REQUEST(reset_level);
+ }
/* log PPU(RCB) errors */
desc_data = (__le32 *)&desc[5];
if (status) {
dev_warn(dev, "PPU_MPF_ABNORMAL_INT_ST1 %s found\n",
"rpu_rx_pkt_ecc_mbit_err");
- HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_CORE_RESET);
+ HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_GLOBAL_RESET);
}
status = le32_to_cpu(*(desc_data + 2));
if (status) {
- hclge_log_error(dev, "PPU_MPF_ABNORMAL_INT_ST2",
- &hclge_ppu_mpf_abnormal_int_st2[0], status);
- HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_CORE_RESET);
+ reset_level =
+ hclge_log_error(dev, "PPU_MPF_ABNORMAL_INT_ST2",
+ &hclge_ppu_mpf_abnormal_int_st2[0],
+ status);
+ HCLGE_SET_DEFAULT_RESET_REQUEST(reset_level);
}
status = le32_to_cpu(*(desc_data + 3)) & HCLGE_PPU_MPF_INT_ST3_MASK;
if (status) {
- hclge_log_error(dev, "PPU_MPF_ABNORMAL_INT_ST3",
- &hclge_ppu_mpf_abnormal_int_st3[0], status);
- HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_CORE_RESET);
+ reset_level =
+ hclge_log_error(dev, "PPU_MPF_ABNORMAL_INT_ST3",
+ &hclge_ppu_mpf_abnormal_int_st3[0],
+ status);
+ HCLGE_SET_DEFAULT_RESET_REQUEST(reset_level);
}
/* log TM(Traffic Manager) errors */
desc_data = (__le32 *)&desc[6];
status = le32_to_cpu(*desc_data);
if (status) {
- hclge_log_error(dev, "TM_SCH_RINT",
- &hclge_tm_sch_rint[0], status);
- HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_CORE_RESET);
+ reset_level = hclge_log_error(dev, "TM_SCH_RINT",
+ &hclge_tm_sch_rint[0], status);
+ HCLGE_SET_DEFAULT_RESET_REQUEST(reset_level);
}
/* log QCN(Quantized Congestion Control) errors */
desc_data = (__le32 *)&desc[7];
status = le32_to_cpu(*desc_data) & HCLGE_QCN_FIFO_INT_MASK;
if (status) {
- hclge_log_error(dev, "QCN_FIFO_RINT",
- &hclge_qcn_fifo_rint[0], status);
- HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_CORE_RESET);
+ reset_level = hclge_log_error(dev, "QCN_FIFO_RINT",
+ &hclge_qcn_fifo_rint[0], status);
+ HCLGE_SET_DEFAULT_RESET_REQUEST(reset_level);
}
status = le32_to_cpu(*(desc_data + 1)) & HCLGE_QCN_ECC_INT_MASK;
if (status) {
- hclge_log_error(dev, "QCN_ECC_RINT",
- &hclge_qcn_ecc_rint[0], status);
- HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_CORE_RESET);
+ reset_level = hclge_log_error(dev, "QCN_ECC_RINT",
+ &hclge_qcn_ecc_rint[0],
+ status);
+ HCLGE_SET_DEFAULT_RESET_REQUEST(reset_level);
}
/* log NCSI errors */
desc_data = (__le32 *)&desc[9];
status = le32_to_cpu(*desc_data) & HCLGE_NCSI_ECC_INT_MASK;
if (status) {
- hclge_log_error(dev, "NCSI_ECC_INT_RPT",
- &hclge_ncsi_err_int[0], status);
- HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_CORE_RESET);
+ reset_level = hclge_log_error(dev, "NCSI_ECC_INT_RPT",
+ &hclge_ncsi_err_int[0], status);
+ HCLGE_SET_DEFAULT_RESET_REQUEST(reset_level);
}
/* clear all main PF RAS errors */
{
struct hnae3_ae_dev *ae_dev = hdev->ae_dev;
struct device *dev = &hdev->pdev->dev;
+ enum hnae3_reset_type reset_level;
__le32 *desc_data;
u32 status;
int ret;
/* log SSU(Storage Switch Unit) errors */
status = le32_to_cpu(desc[0].data[0]);
if (status) {
- hclge_log_error(dev, "SSU_PORT_BASED_ERR_INT",
- &hclge_ssu_port_based_err_int[0], status);
- HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_GLOBAL_RESET);
+ reset_level = hclge_log_error(dev, "SSU_PORT_BASED_ERR_INT",
+ &hclge_ssu_port_based_err_int[0],
+ status);
+ HCLGE_SET_DEFAULT_RESET_REQUEST(reset_level);
}
status = le32_to_cpu(desc[0].data[1]);
if (status) {
- hclge_log_error(dev, "SSU_FIFO_OVERFLOW_INT",
- &hclge_ssu_fifo_overflow_int[0], status);
- HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_GLOBAL_RESET);
+ reset_level = hclge_log_error(dev, "SSU_FIFO_OVERFLOW_INT",
+ &hclge_ssu_fifo_overflow_int[0],
+ status);
+ HCLGE_SET_DEFAULT_RESET_REQUEST(reset_level);
}
status = le32_to_cpu(desc[0].data[2]);
if (status) {
- hclge_log_error(dev, "SSU_ETS_TCG_INT",
- &hclge_ssu_ets_tcg_int[0], status);
- HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_GLOBAL_RESET);
+ reset_level = hclge_log_error(dev, "SSU_ETS_TCG_INT",
+ &hclge_ssu_ets_tcg_int[0],
+ status);
+ HCLGE_SET_DEFAULT_RESET_REQUEST(reset_level);
}
/* log IGU(Ingress Unit) EGU(Egress Unit) TNL errors */
desc_data = (__le32 *)&desc[1];
status = le32_to_cpu(*desc_data) & HCLGE_IGU_EGU_TNL_INT_MASK;
- if (status)
- hclge_log_error(dev, "IGU_EGU_TNL_INT_STS",
- &hclge_igu_egu_tnl_int[0], status);
+ if (status) {
+ reset_level = hclge_log_error(dev, "IGU_EGU_TNL_INT_STS",
+ &hclge_igu_egu_tnl_int[0],
+ status);
+ HCLGE_SET_DEFAULT_RESET_REQUEST(reset_level);
+ }
/* log PPU(RCB) errors */
desc_data = (__le32 *)&desc[3];
status = le32_to_cpu(*desc_data) & HCLGE_PPU_PF_INT_RAS_MASK;
- if (status)
- hclge_log_error(dev, "PPU_PF_ABNORMAL_INT_ST0",
- &hclge_ppu_pf_abnormal_int[0], status);
+ if (status) {
+ reset_level = hclge_log_error(dev, "PPU_PF_ABNORMAL_INT_ST0",
+ &hclge_ppu_pf_abnormal_int[0],
+ status);
+ HCLGE_SET_DEFAULT_RESET_REQUEST(reset_level);
+ }
/* clear all PF RAS errors */
hclge_cmd_reuse_desc(&desc[0], false);
int hclge_handle_hw_msix_error(struct hclge_dev *hdev,
unsigned long *reset_requests)
{
+ struct hclge_mac_tnl_stats mac_tnl_stats;
struct device *dev = &hdev->pdev->dev;
u32 mpf_bd_num, pf_bd_num, bd_num;
+ enum hnae3_reset_type reset_level;
struct hclge_desc desc_bd;
struct hclge_desc *desc;
__le32 *desc_data;
- int ret = 0;
u32 status;
-
- /* set default handling */
- set_bit(HNAE3_FUNC_RESET, reset_requests);
+ int ret;
/* query the number of bds for the MSIx int status */
hclge_cmd_setup_basic_desc(&desc_bd, HCLGE_QUERY_MSIX_INT_STS_BD_NUM,
if (ret) {
dev_err(dev, "fail(%d) to query msix int status bd num\n",
ret);
- /* reset everything for now */
- set_bit(HNAE3_GLOBAL_RESET, reset_requests);
return ret;
}
if (ret) {
dev_err(dev, "query all mpf msix int cmd failed (%d)\n",
ret);
- /* reset everything for now */
- set_bit(HNAE3_GLOBAL_RESET, reset_requests);
goto msi_error;
}
desc_data = (__le32 *)&desc[1];
status = le32_to_cpu(*desc_data);
if (status) {
- hclge_log_error(dev, "MAC_AFIFO_TNL_INT_R",
- &hclge_mac_afifo_tnl_int[0], status);
- set_bit(HNAE3_GLOBAL_RESET, reset_requests);
+ reset_level = hclge_log_error(dev, "MAC_AFIFO_TNL_INT_R",
+ &hclge_mac_afifo_tnl_int[0],
+ status);
+ set_bit(reset_level, reset_requests);
}
/* log PPU(RCB) MPF errors */
status = le32_to_cpu(*(desc_data + 2)) &
HCLGE_PPU_MPF_INT_ST2_MSIX_MASK;
if (status) {
- hclge_log_error(dev, "PPU_MPF_ABNORMAL_INT_ST2",
- &hclge_ppu_mpf_abnormal_int_st2[0], status);
- set_bit(HNAE3_CORE_RESET, reset_requests);
+ reset_level =
+ hclge_log_error(dev, "PPU_MPF_ABNORMAL_INT_ST2",
+ &hclge_ppu_mpf_abnormal_int_st2[0],
+ status);
+ set_bit(reset_level, reset_requests);
}
/* clear all main PF MSIx errors */
if (ret) {
dev_err(dev, "clear all mpf msix int cmd failed (%d)\n",
ret);
- /* reset everything for now */
- set_bit(HNAE3_GLOBAL_RESET, reset_requests);
goto msi_error;
}
if (ret) {
dev_err(dev, "query all pf msix int cmd failed (%d)\n",
ret);
- /* reset everything for now */
- set_bit(HNAE3_GLOBAL_RESET, reset_requests);
goto msi_error;
}
/* log SSU PF errors */
status = le32_to_cpu(desc[0].data[0]) & HCLGE_SSU_PORT_INT_MSIX_MASK;
if (status) {
- hclge_log_error(dev, "SSU_PORT_BASED_ERR_INT",
- &hclge_ssu_port_based_pf_int[0], status);
- set_bit(HNAE3_GLOBAL_RESET, reset_requests);
+ reset_level = hclge_log_error(dev, "SSU_PORT_BASED_ERR_INT",
+ &hclge_ssu_port_based_pf_int[0],
+ status);
+ set_bit(reset_level, reset_requests);
}
/* read and log PPP PF errors */
desc_data = (__le32 *)&desc[2];
status = le32_to_cpu(*desc_data);
- if (status)
- hclge_log_error(dev, "PPP_PF_ABNORMAL_INT_ST0",
- &hclge_ppp_pf_abnormal_int[0], status);
+ if (status) {
+ reset_level = hclge_log_error(dev, "PPP_PF_ABNORMAL_INT_ST0",
+ &hclge_ppp_pf_abnormal_int[0],
+ status);
+ set_bit(reset_level, reset_requests);
+ }
/* log PPU(RCB) PF errors */
desc_data = (__le32 *)&desc[3];
status = le32_to_cpu(*desc_data) & HCLGE_PPU_PF_INT_MSIX_MASK;
- if (status)
- hclge_log_error(dev, "PPU_PF_ABNORMAL_INT_ST",
- &hclge_ppu_pf_abnormal_int[0], status);
+ if (status) {
+ reset_level = hclge_log_error(dev, "PPU_PF_ABNORMAL_INT_ST",
+ &hclge_ppu_pf_abnormal_int[0],
+ status);
+ set_bit(reset_level, reset_requests);
+ }
/* clear all PF MSIx errors */
hclge_cmd_reuse_desc(&desc[0], false);
if (ret) {
dev_err(dev, "clear all pf msix int cmd failed (%d)\n",
ret);
- /* reset everything for now */
- set_bit(HNAE3_GLOBAL_RESET, reset_requests);
+ }
+
+ /* query and clear mac tnl interruptions */
+ hclge_cmd_setup_basic_desc(&desc[0], HCLGE_OPC_QUERY_MAC_TNL_INT,
+ true);
+ ret = hclge_cmd_send(&hdev->hw, &desc[0], 1);
+ if (ret) {
+ dev_err(dev, "query mac tnl int cmd failed (%d)\n", ret);
+ goto msi_error;
+ }
+
+ status = le32_to_cpu(desc->data[0]);
+ if (status) {
+ /* When mac tnl interrupt occurs, we record current time and
+ * register status here in a fifo, then clear the status. So
+ * that if link status changes suddenly at some time, we can
+ * query them by debugfs.
+ */
+ mac_tnl_stats.time = local_clock();
+ mac_tnl_stats.status = status;
+ kfifo_put(&hdev->mac_tnl_log, mac_tnl_stats);
+ ret = hclge_clear_mac_tnl_int(hdev);
+ if (ret)
+ dev_err(dev, "clear mac tnl int failed (%d)\n", ret);
+ set_bit(HNAE3_NONE_RESET, reset_requests);
}
msi_error:
#define HCLGE_NCSI_ERR_INT_TYPE 0x9
#define HCLGE_MAC_COMMON_ERR_INT_EN 0x107FF
#define HCLGE_MAC_COMMON_ERR_INT_EN_MASK 0x107FF
+#define HCLGE_MAC_TNL_INT_EN GENMASK(7, 0)
+#define HCLGE_MAC_TNL_INT_EN_MASK GENMASK(7, 0)
+#define HCLGE_MAC_TNL_INT_CLR GENMASK(7, 0)
#define HCLGE_PPU_MPF_ABNORMAL_INT0_EN GENMASK(31, 0)
#define HCLGE_PPU_MPF_ABNORMAL_INT0_EN_MASK GENMASK(31, 0)
#define HCLGE_PPU_MPF_ABNORMAL_INT1_EN GENMASK(31, 0)
struct hclge_hw_error {
u32 int_msk;
const char *msg;
+ enum hnae3_reset_type reset_level;
};
+int hclge_config_mac_tnl_int(struct hclge_dev *hdev, bool en);
int hclge_hw_error_set_state(struct hclge_dev *hdev, bool state);
pci_ers_result_t hclge_handle_hw_ras_error(struct hnae3_ae_dev *ae_dev);
int hclge_handle_hw_msix_error(struct hclge_dev *hdev,
static int hclge_set_mac_mtu(struct hclge_dev *hdev, int new_mps);
static int hclge_init_vlan_config(struct hclge_dev *hdev);
static int hclge_reset_ae_dev(struct hnae3_ae_dev *ae_dev);
+static bool hclge_get_hw_reset_stat(struct hnae3_handle *handle);
static int hclge_set_umv_space(struct hclge_dev *hdev, u16 space_size,
u16 *allocated_size, bool is_alloc);
p = hclge_tqps_get_stats(handle, p);
}
+static void hclge_get_mac_pause_stat(struct hnae3_handle *handle, u64 *tx_cnt,
+ u64 *rx_cnt)
+{
+ struct hclge_vport *vport = hclge_get_vport(handle);
+ struct hclge_dev *hdev = vport->back;
+
+ *tx_cnt = hdev->hw_stats.mac_stats.mac_tx_mac_pause_num;
+ *rx_cnt = hdev->hw_stats.mac_stats.mac_rx_mac_pause_num;
+}
+
static int hclge_parse_func_status(struct hclge_dev *hdev,
struct hclge_func_status_cmd *status)
{
vport->back = hdev;
vport->vport_id = i;
vport->mps = HCLGE_MAC_DEFAULT_FRAME;
+ vport->port_base_vlan_cfg.state = HNAE3_PORT_BASE_VLAN_DISABLE;
+ vport->rxvlan_cfg.rx_vlan_offload_en = true;
INIT_LIST_HEAD(&vport->vlan_list);
INIT_LIST_HEAD(&vport->uc_mac_list);
INIT_LIST_HEAD(&vport->mc_mac_list);
return ret;
}
-static int hclge_get_tc_num(struct hclge_dev *hdev)
+static u32 hclge_get_tc_num(struct hclge_dev *hdev)
{
int i, cnt = 0;
return cnt;
}
-static int hclge_get_pfc_enalbe_num(struct hclge_dev *hdev)
-{
- int i, cnt = 0;
-
- for (i = 0; i < HCLGE_MAX_TC_NUM; i++)
- if (hdev->hw_tc_map & BIT(i) &&
- hdev->tm_info.hw_pfc_map & BIT(i))
- cnt++;
- return cnt;
-}
-
/* Get the number of pfc enabled TCs, which have private buffer */
static int hclge_get_pfc_priv_num(struct hclge_dev *hdev,
struct hclge_pkt_buf_alloc *buf_alloc)
struct hclge_pkt_buf_alloc *buf_alloc,
u32 rx_all)
{
- u32 shared_buf_min, shared_buf_tc, shared_std;
- int tc_num, pfc_enable_num;
+ u32 shared_buf_min, shared_buf_tc, shared_std, hi_thrd, lo_thrd;
+ u32 tc_num = hclge_get_tc_num(hdev);
u32 shared_buf, aligned_mps;
u32 rx_priv;
int i;
- tc_num = hclge_get_tc_num(hdev);
- pfc_enable_num = hclge_get_pfc_enalbe_num(hdev);
aligned_mps = roundup(hdev->mps, HCLGE_BUF_SIZE_UNIT);
if (hnae3_dev_dcb_supported(hdev))
shared_buf_min = aligned_mps + HCLGE_NON_DCB_ADDITIONAL_BUF
+ hdev->dv_buf_size;
- shared_buf_tc = pfc_enable_num * aligned_mps +
- (tc_num - pfc_enable_num) * aligned_mps / 2 +
- aligned_mps;
+ shared_buf_tc = tc_num * aligned_mps + aligned_mps;
shared_std = roundup(max_t(u32, shared_buf_min, shared_buf_tc),
HCLGE_BUF_SIZE_UNIT);
} else {
buf_alloc->s_buf.self.high = aligned_mps +
HCLGE_NON_DCB_ADDITIONAL_BUF;
- buf_alloc->s_buf.self.low =
- roundup(aligned_mps / 2, HCLGE_BUF_SIZE_UNIT);
+ buf_alloc->s_buf.self.low = aligned_mps;
+ }
+
+ if (hnae3_dev_dcb_supported(hdev)) {
+ if (tc_num)
+ hi_thrd = (shared_buf - hdev->dv_buf_size) / tc_num;
+ else
+ hi_thrd = shared_buf - hdev->dv_buf_size;
+
+ hi_thrd = max_t(u32, hi_thrd, 2 * aligned_mps);
+ hi_thrd = rounddown(hi_thrd, HCLGE_BUF_SIZE_UNIT);
+ lo_thrd = hi_thrd - aligned_mps / 2;
+ } else {
+ hi_thrd = aligned_mps + HCLGE_NON_DCB_ADDITIONAL_BUF;
+ lo_thrd = aligned_mps;
}
for (i = 0; i < HCLGE_MAX_TC_NUM; i++) {
- if ((hdev->hw_tc_map & BIT(i)) &&
- (hdev->tm_info.hw_pfc_map & BIT(i))) {
- buf_alloc->s_buf.tc_thrd[i].low = aligned_mps;
- buf_alloc->s_buf.tc_thrd[i].high = 2 * aligned_mps;
- } else {
- buf_alloc->s_buf.tc_thrd[i].low = 0;
- buf_alloc->s_buf.tc_thrd[i].high = aligned_mps;
- }
+ buf_alloc->s_buf.tc_thrd[i].low = lo_thrd;
+ buf_alloc->s_buf.tc_thrd[i].high = hi_thrd;
}
return true;
static void hclge_mbx_task_schedule(struct hclge_dev *hdev)
{
- if (!test_and_set_bit(HCLGE_STATE_MBX_SERVICE_SCHED, &hdev->state))
+ if (!test_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state) &&
+ !test_and_set_bit(HCLGE_STATE_MBX_SERVICE_SCHED, &hdev->state))
schedule_work(&hdev->mbx_service_task);
}
for (i = 0; i < hdev->num_vmdq_vport + 1; i++) {
handle = &hdev->vport[i].nic;
client->ops->link_status_change(handle, state);
+ hclge_config_mac_tnl_int(hdev, state);
rhandle = &hdev->vport[i].roce;
if (rclient && rclient->ops->link_status_change)
rclient->ops->link_status_change(rhandle,
set_bit(HNAE3_IMP_RESET, &hdev->reset_pending);
set_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state);
*clearval = BIT(HCLGE_VECTOR0_IMPRESET_INT_B);
+ hdev->rst_stats.imp_rst_cnt++;
return HCLGE_VECTOR0_EVENT_RST;
}
set_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state);
set_bit(HNAE3_GLOBAL_RESET, &hdev->reset_pending);
*clearval = BIT(HCLGE_VECTOR0_GLOBALRESET_INT_B);
+ hdev->rst_stats.global_rst_cnt++;
return HCLGE_VECTOR0_EVENT_RST;
}
set_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state);
set_bit(HNAE3_CORE_RESET, &hdev->reset_pending);
*clearval = BIT(HCLGE_VECTOR0_CORERESET_INT_B);
+ hdev->rst_stats.core_rst_cnt++;
return HCLGE_VECTOR0_EVENT_RST;
}
/* check for vector0 msix event source */
- if (msix_src_reg & HCLGE_VECTOR0_REG_MSIX_MASK)
+ if (msix_src_reg & HCLGE_VECTOR0_REG_MSIX_MASK) {
+ dev_dbg(&hdev->pdev->dev, "received event 0x%x\n",
+ msix_src_reg);
return HCLGE_VECTOR0_EVENT_ERR;
+ }
/* check for vector0 mailbox(=CMDQ RX) event source */
if (BIT(HCLGE_VECTOR0_RX_CMDQ_INT_B) & cmdq_src_reg) {
return HCLGE_VECTOR0_EVENT_MBX;
}
+ /* print other vector0 event source */
+ dev_dbg(&hdev->pdev->dev, "cmdq_src_reg:0x%x, msix_src_reg:0x%x\n",
+ cmdq_src_reg, msix_src_reg);
return HCLGE_VECTOR0_EVENT_OTHER;
}
return ret;
}
- if (!reset)
+ if (!reset || !test_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state))
continue;
/* Inform VF to process the reset.
static void hclge_do_reset(struct hclge_dev *hdev)
{
+ struct hnae3_handle *handle = &hdev->vport[0].nic;
struct pci_dev *pdev = hdev->pdev;
u32 val;
+ if (hclge_get_hw_reset_stat(handle)) {
+ dev_info(&pdev->dev, "Hardware reset not finish\n");
+ dev_info(&pdev->dev, "func_rst_reg:0x%x, global_rst_reg:0x%x\n",
+ hclge_read_dev(&hdev->hw, HCLGE_FUN_RST_ING),
+ hclge_read_dev(&hdev->hw, HCLGE_GLOBAL_RESET_REG));
+ return;
+ }
+
switch (hdev->reset_type) {
case HNAE3_GLOBAL_RESET:
val = hclge_read_dev(&hdev->hw, HCLGE_GLOBAL_RESET_REG);
clear_bit(HNAE3_FLR_RESET, addr);
}
+ if (hdev->reset_type != HNAE3_NONE_RESET &&
+ rst_level < hdev->reset_type)
+ return HNAE3_NONE_RESET;
+
return rst_level;
}
* after hclge_cmd_init is called.
*/
set_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state);
+ hdev->rst_stats.pf_rst_cnt++;
break;
case HNAE3_FLR_RESET:
/* There is no mechanism for PF to know if VF has stopped IO
msleep(100);
set_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state);
set_bit(HNAE3_FLR_DOWN, &hdev->flr_state);
+ hdev->rst_stats.flr_rst_cnt++;
break;
case HNAE3_IMP_RESET:
reg_val = hclge_read_dev(&hdev->hw, HCLGE_PF_OTHER_INT_REG);
* know if device is undergoing reset
*/
ae_dev->reset_type = hdev->reset_type;
- hdev->reset_count++;
+ hdev->rst_stats.reset_cnt++;
/* perform reset of the stack & ae device for a client */
ret = hclge_notify_roce_client(hdev, HNAE3_DOWN_CLIENT);
if (ret)
goto err_reset;
}
+ hdev->rst_stats.hw_reset_done_cnt++;
+
ret = hclge_notify_roce_client(hdev, HNAE3_UNINIT_CLIENT);
if (ret)
goto err_reset;
hdev->last_reset_time = jiffies;
hdev->reset_fail_cnt = 0;
+ hdev->rst_stats.reset_done_cnt++;
ae_dev->reset_type = HNAE3_NONE_RESET;
+ del_timer(&hdev->reset_timer);
return;
struct hclge_vport *vport = hclge_get_vport(handle);
struct hclge_dev *hdev = vport->back;
- return hdev->reset_count;
+ return hdev->rst_stats.hw_reset_done_cnt;
}
static void hclge_enable_fd(struct hnae3_handle *handle, bool enable)
#define HCLGE_SERDES_RETRY_MS 10
#define HCLGE_SERDES_RETRY_NUM 100
-#define HCLGE_MAC_LINK_STATUS_MS 20
-#define HCLGE_MAC_LINK_STATUS_NUM 10
+#define HCLGE_MAC_LINK_STATUS_MS 10
+#define HCLGE_MAC_LINK_STATUS_NUM 100
#define HCLGE_MAC_LINK_STATUS_DOWN 0
#define HCLGE_MAC_LINK_STATUS_UP 1
return ret;
}
-int hclge_set_vlan_filter(struct hnae3_handle *handle, __be16 proto,
- u16 vlan_id, bool is_kill)
-{
- struct hclge_vport *vport = hclge_get_vport(handle);
- struct hclge_dev *hdev = vport->back;
-
- return hclge_set_vlan_filter_hw(hdev, proto, vport->vport_id, vlan_id,
- 0, is_kill);
-}
-
-static int hclge_set_vf_vlan_filter(struct hnae3_handle *handle, int vfid,
- u16 vlan, u8 qos, __be16 proto)
-{
- struct hclge_vport *vport = hclge_get_vport(handle);
- struct hclge_dev *hdev = vport->back;
-
- if ((vfid >= hdev->num_alloc_vfs) || (vlan > 4095) || (qos > 7))
- return -EINVAL;
- if (proto != htons(ETH_P_8021Q))
- return -EPROTONOSUPPORT;
-
- return hclge_set_vlan_filter_hw(hdev, proto, vfid, vlan, qos, false);
-}
-
static int hclge_set_vlan_tx_offload_cfg(struct hclge_vport *vport)
{
struct hclge_tx_vtag_cfg *vcfg = &vport->txvlan_cfg;
return status;
}
+static int hclge_vlan_offload_cfg(struct hclge_vport *vport,
+ u16 port_base_vlan_state,
+ u16 vlan_tag)
+{
+ int ret;
+
+ if (port_base_vlan_state == HNAE3_PORT_BASE_VLAN_DISABLE) {
+ vport->txvlan_cfg.accept_tag1 = true;
+ vport->txvlan_cfg.insert_tag1_en = false;
+ vport->txvlan_cfg.default_tag1 = 0;
+ } else {
+ vport->txvlan_cfg.accept_tag1 = false;
+ vport->txvlan_cfg.insert_tag1_en = true;
+ vport->txvlan_cfg.default_tag1 = vlan_tag;
+ }
+
+ vport->txvlan_cfg.accept_untag1 = true;
+
+ /* accept_tag2 and accept_untag2 are not supported on
+ * pdev revision(0x20), new revision support them,
+ * this two fields can not be configured by user.
+ */
+ vport->txvlan_cfg.accept_tag2 = true;
+ vport->txvlan_cfg.accept_untag2 = true;
+ vport->txvlan_cfg.insert_tag2_en = false;
+ vport->txvlan_cfg.default_tag2 = 0;
+
+ if (port_base_vlan_state == HNAE3_PORT_BASE_VLAN_DISABLE) {
+ vport->rxvlan_cfg.strip_tag1_en = false;
+ vport->rxvlan_cfg.strip_tag2_en =
+ vport->rxvlan_cfg.rx_vlan_offload_en;
+ } else {
+ vport->rxvlan_cfg.strip_tag1_en =
+ vport->rxvlan_cfg.rx_vlan_offload_en;
+ vport->rxvlan_cfg.strip_tag2_en = true;
+ }
+ vport->rxvlan_cfg.vlan1_vlan_prionly = false;
+ vport->rxvlan_cfg.vlan2_vlan_prionly = false;
+
+ ret = hclge_set_vlan_tx_offload_cfg(vport);
+ if (ret)
+ return ret;
+
+ return hclge_set_vlan_rx_offload_cfg(vport);
+}
+
static int hclge_set_vlan_protocol_type(struct hclge_dev *hdev)
{
struct hclge_rx_vlan_type_cfg_cmd *rx_req;
return ret;
for (i = 0; i < hdev->num_alloc_vport; i++) {
- vport = &hdev->vport[i];
- vport->txvlan_cfg.accept_tag1 = true;
- vport->txvlan_cfg.accept_untag1 = true;
+ u16 vlan_tag;
- /* accept_tag2 and accept_untag2 are not supported on
- * pdev revision(0x20), new revision support them. The
- * value of this two fields will not return error when driver
- * send command to fireware in revision(0x20).
- * This two fields can not configured by user.
- */
- vport->txvlan_cfg.accept_tag2 = true;
- vport->txvlan_cfg.accept_untag2 = true;
-
- vport->txvlan_cfg.insert_tag1_en = false;
- vport->txvlan_cfg.insert_tag2_en = false;
- vport->txvlan_cfg.default_tag1 = 0;
- vport->txvlan_cfg.default_tag2 = 0;
-
- ret = hclge_set_vlan_tx_offload_cfg(vport);
- if (ret)
- return ret;
-
- vport->rxvlan_cfg.strip_tag1_en = false;
- vport->rxvlan_cfg.strip_tag2_en = true;
- vport->rxvlan_cfg.vlan1_vlan_prionly = false;
- vport->rxvlan_cfg.vlan2_vlan_prionly = false;
+ vport = &hdev->vport[i];
+ vlan_tag = vport->port_base_vlan_cfg.vlan_info.vlan_tag;
- ret = hclge_set_vlan_rx_offload_cfg(vport);
+ ret = hclge_vlan_offload_cfg(vport,
+ vport->port_base_vlan_cfg.state,
+ vlan_tag);
if (ret)
return ret;
}
return hclge_set_vlan_filter(handle, htons(ETH_P_8021Q), 0, false);
}
-void hclge_add_vport_vlan_table(struct hclge_vport *vport, u16 vlan_id)
+static void hclge_add_vport_vlan_table(struct hclge_vport *vport, u16 vlan_id,
+ bool writen_to_tbl)
{
struct hclge_vport_vlan_cfg *vlan;
if (!vlan)
return;
- vlan->hd_tbl_status = true;
+ vlan->hd_tbl_status = writen_to_tbl;
vlan->vlan_id = vlan_id;
list_add_tail(&vlan->node, &vport->vlan_list);
}
-void hclge_rm_vport_vlan_table(struct hclge_vport *vport, u16 vlan_id,
- bool is_write_tbl)
+static int hclge_add_vport_all_vlan_table(struct hclge_vport *vport)
+{
+ struct hclge_vport_vlan_cfg *vlan, *tmp;
+ struct hclge_dev *hdev = vport->back;
+ int ret;
+
+ list_for_each_entry_safe(vlan, tmp, &vport->vlan_list, node) {
+ if (!vlan->hd_tbl_status) {
+ ret = hclge_set_vlan_filter_hw(hdev, htons(ETH_P_8021Q),
+ vport->vport_id,
+ vlan->vlan_id, 0, false);
+ if (ret) {
+ dev_err(&hdev->pdev->dev,
+ "restore vport vlan list failed, ret=%d\n",
+ ret);
+ return ret;
+ }
+ }
+ vlan->hd_tbl_status = true;
+ }
+
+ return 0;
+}
+
+static void hclge_rm_vport_vlan_table(struct hclge_vport *vport, u16 vlan_id,
+ bool is_write_tbl)
{
struct hclge_vport_vlan_cfg *vlan, *tmp;
struct hclge_dev *hdev = vport->back;
{
struct hclge_vport *vport = hclge_get_vport(handle);
- vport->rxvlan_cfg.strip_tag1_en = false;
- vport->rxvlan_cfg.strip_tag2_en = enable;
+ if (vport->port_base_vlan_cfg.state == HNAE3_PORT_BASE_VLAN_DISABLE) {
+ vport->rxvlan_cfg.strip_tag1_en = false;
+ vport->rxvlan_cfg.strip_tag2_en = enable;
+ } else {
+ vport->rxvlan_cfg.strip_tag1_en = enable;
+ vport->rxvlan_cfg.strip_tag2_en = true;
+ }
vport->rxvlan_cfg.vlan1_vlan_prionly = false;
vport->rxvlan_cfg.vlan2_vlan_prionly = false;
+ vport->rxvlan_cfg.rx_vlan_offload_en = enable;
return hclge_set_vlan_rx_offload_cfg(vport);
}
+static int hclge_update_vlan_filter_entries(struct hclge_vport *vport,
+ u16 port_base_vlan_state,
+ struct hclge_vlan_info *new_info,
+ struct hclge_vlan_info *old_info)
+{
+ struct hclge_dev *hdev = vport->back;
+ int ret;
+
+ if (port_base_vlan_state == HNAE3_PORT_BASE_VLAN_ENABLE) {
+ hclge_rm_vport_all_vlan_table(vport, false);
+ return hclge_set_vlan_filter_hw(hdev,
+ htons(new_info->vlan_proto),
+ vport->vport_id,
+ new_info->vlan_tag,
+ new_info->qos, false);
+ }
+
+ ret = hclge_set_vlan_filter_hw(hdev, htons(old_info->vlan_proto),
+ vport->vport_id, old_info->vlan_tag,
+ old_info->qos, true);
+ if (ret)
+ return ret;
+
+ return hclge_add_vport_all_vlan_table(vport);
+}
+
+int hclge_update_port_base_vlan_cfg(struct hclge_vport *vport, u16 state,
+ struct hclge_vlan_info *vlan_info)
+{
+ struct hnae3_handle *nic = &vport->nic;
+ struct hclge_vlan_info *old_vlan_info;
+ struct hclge_dev *hdev = vport->back;
+ int ret;
+
+ old_vlan_info = &vport->port_base_vlan_cfg.vlan_info;
+
+ ret = hclge_vlan_offload_cfg(vport, state, vlan_info->vlan_tag);
+ if (ret)
+ return ret;
+
+ if (state == HNAE3_PORT_BASE_VLAN_MODIFY) {
+ /* add new VLAN tag */
+ ret = hclge_set_vlan_filter_hw(hdev,
+ htons(vlan_info->vlan_proto),
+ vport->vport_id,
+ vlan_info->vlan_tag,
+ vlan_info->qos, false);
+ if (ret)
+ return ret;
+
+ /* remove old VLAN tag */
+ ret = hclge_set_vlan_filter_hw(hdev,
+ htons(old_vlan_info->vlan_proto),
+ vport->vport_id,
+ old_vlan_info->vlan_tag,
+ old_vlan_info->qos, true);
+ if (ret)
+ return ret;
+
+ goto update;
+ }
+
+ ret = hclge_update_vlan_filter_entries(vport, state, vlan_info,
+ old_vlan_info);
+ if (ret)
+ return ret;
+
+ /* update state only when disable/enable port based VLAN */
+ vport->port_base_vlan_cfg.state = state;
+ if (state == HNAE3_PORT_BASE_VLAN_DISABLE)
+ nic->port_base_vlan_state = HNAE3_PORT_BASE_VLAN_DISABLE;
+ else
+ nic->port_base_vlan_state = HNAE3_PORT_BASE_VLAN_ENABLE;
+
+update:
+ vport->port_base_vlan_cfg.vlan_info.vlan_tag = vlan_info->vlan_tag;
+ vport->port_base_vlan_cfg.vlan_info.qos = vlan_info->qos;
+ vport->port_base_vlan_cfg.vlan_info.vlan_proto = vlan_info->vlan_proto;
+
+ return 0;
+}
+
+static u16 hclge_get_port_base_vlan_state(struct hclge_vport *vport,
+ enum hnae3_port_base_vlan_state state,
+ u16 vlan)
+{
+ if (state == HNAE3_PORT_BASE_VLAN_DISABLE) {
+ if (!vlan)
+ return HNAE3_PORT_BASE_VLAN_NOCHANGE;
+ else
+ return HNAE3_PORT_BASE_VLAN_ENABLE;
+ } else {
+ if (!vlan)
+ return HNAE3_PORT_BASE_VLAN_DISABLE;
+ else if (vport->port_base_vlan_cfg.vlan_info.vlan_tag == vlan)
+ return HNAE3_PORT_BASE_VLAN_NOCHANGE;
+ else
+ return HNAE3_PORT_BASE_VLAN_MODIFY;
+ }
+}
+
+static int hclge_set_vf_vlan_filter(struct hnae3_handle *handle, int vfid,
+ u16 vlan, u8 qos, __be16 proto)
+{
+ struct hclge_vport *vport = hclge_get_vport(handle);
+ struct hclge_dev *hdev = vport->back;
+ struct hclge_vlan_info vlan_info;
+ u16 state;
+ int ret;
+
+ if (hdev->pdev->revision == 0x20)
+ return -EOPNOTSUPP;
+
+ /* qos is a 3 bits value, so can not be bigger than 7 */
+ if (vfid >= hdev->num_alloc_vfs || vlan > VLAN_N_VID - 1 || qos > 7)
+ return -EINVAL;
+ if (proto != htons(ETH_P_8021Q))
+ return -EPROTONOSUPPORT;
+
+ vport = &hdev->vport[vfid];
+ state = hclge_get_port_base_vlan_state(vport,
+ vport->port_base_vlan_cfg.state,
+ vlan);
+ if (state == HNAE3_PORT_BASE_VLAN_NOCHANGE)
+ return 0;
+
+ vlan_info.vlan_tag = vlan;
+ vlan_info.qos = qos;
+ vlan_info.vlan_proto = ntohs(proto);
+
+ /* update port based VLAN for PF */
+ if (!vfid) {
+ hclge_notify_client(hdev, HNAE3_DOWN_CLIENT);
+ ret = hclge_update_port_base_vlan_cfg(vport, state, &vlan_info);
+ hclge_notify_client(hdev, HNAE3_UP_CLIENT);
+
+ return ret;
+ }
+
+ if (!test_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state)) {
+ return hclge_update_port_base_vlan_cfg(vport, state,
+ &vlan_info);
+ } else {
+ ret = hclge_push_vf_port_base_vlan_info(&hdev->vport[0],
+ (u8)vfid, state,
+ vlan, qos,
+ ntohs(proto));
+ return ret;
+ }
+}
+
+int hclge_set_vlan_filter(struct hnae3_handle *handle, __be16 proto,
+ u16 vlan_id, bool is_kill)
+{
+ struct hclge_vport *vport = hclge_get_vport(handle);
+ struct hclge_dev *hdev = vport->back;
+ bool writen_to_tbl = false;
+ int ret = 0;
+
+ /* when port based VLAN enabled, we use port based VLAN as the VLAN
+ * filter entry. In this case, we don't update VLAN filter table
+ * when user add new VLAN or remove exist VLAN, just update the vport
+ * VLAN list. The VLAN id in VLAN list won't be writen in VLAN filter
+ * table until port based VLAN disabled
+ */
+ if (handle->port_base_vlan_state == HNAE3_PORT_BASE_VLAN_DISABLE) {
+ ret = hclge_set_vlan_filter_hw(hdev, proto, vport->vport_id,
+ vlan_id, 0, is_kill);
+ writen_to_tbl = true;
+ }
+
+ if (ret)
+ return ret;
+
+ if (is_kill)
+ hclge_rm_vport_vlan_table(vport, vlan_id, false);
+ else
+ hclge_add_vport_vlan_table(vport, vlan_id,
+ writen_to_tbl);
+
+ return 0;
+}
+
static int hclge_set_mac_mtu(struct hclge_dev *hdev, int new_mps)
{
struct hclge_config_max_frm_size_cmd *req;
*tp_mdix = ETH_TP_MDI;
}
+static void hclge_info_show(struct hclge_dev *hdev)
+{
+ struct device *dev = &hdev->pdev->dev;
+
+ dev_info(dev, "PF info begin:\n");
+
+ dev_info(dev, "Task queue pairs numbers: %d\n", hdev->num_tqps);
+ dev_info(dev, "Desc num per TX queue: %d\n", hdev->num_tx_desc);
+ dev_info(dev, "Desc num per RX queue: %d\n", hdev->num_rx_desc);
+ dev_info(dev, "Numbers of vports: %d\n", hdev->num_alloc_vport);
+ dev_info(dev, "Numbers of vmdp vports: %d\n", hdev->num_vmdq_vport);
+ dev_info(dev, "Numbers of VF for this PF: %d\n", hdev->num_req_vfs);
+ dev_info(dev, "HW tc map: %d\n", hdev->hw_tc_map);
+ dev_info(dev, "Total buffer size for TX/RX: %d\n", hdev->pkt_buf_size);
+ dev_info(dev, "TX buffer size for each TC: %d\n", hdev->tx_buf_size);
+ dev_info(dev, "DV buffer size for each TC: %d\n", hdev->dv_buf_size);
+ dev_info(dev, "This is %s PF\n",
+ hdev->flag & HCLGE_FLAG_MAIN ? "main" : "not main");
+ dev_info(dev, "DCB %s\n",
+ hdev->flag & HCLGE_FLAG_DCB_ENABLE ? "enable" : "disable");
+ dev_info(dev, "MQPRIO %s\n",
+ hdev->flag & HCLGE_FLAG_MQPRIO_ENABLE ? "enable" : "disable");
+
+ dev_info(dev, "PF info end.\n");
+}
+
static int hclge_init_client_instance(struct hnae3_client *client,
struct hnae3_ae_dev *ae_dev)
{
hnae3_set_client_init_flag(client, ae_dev, 1);
+ if (netif_msg_drv(&hdev->vport->nic))
+ hclge_info_show(hdev);
+
if (hdev->roce_client &&
hnae3_dev_roce_supported(hdev)) {
struct hnae3_client *rc = hdev->roce_client;
goto err_mdiobus_unreg;
}
+ INIT_KFIFO(hdev->mac_tnl_log);
+
hclge_dcb_ops_set(hdev);
timer_setup(&hdev->service_timer, hclge_service_timer, 0);
int i;
for (i = 0; i < hdev->num_alloc_vport; i++) {
- hclge_vport_start(vport);
+ hclge_vport_stop(vport);
vport++;
}
}
hclge_enable_vector(&hdev->misc_vector, false);
synchronize_irq(hdev->misc_vector.vector_irq);
+ hclge_config_mac_tnl_int(hdev, false);
hclge_hw_error_set_state(hdev, false);
hclge_cmd_uninit(hdev);
hclge_misc_irq_uninit(hdev);
.set_mtu = hclge_set_mtu,
.reset_queue = hclge_reset_tqp,
.get_stats = hclge_get_stats,
+ .get_mac_pause_stats = hclge_get_mac_pause_stat,
.update_stats = hclge_update_stats,
.get_strings = hclge_get_strings,
.get_sset_count = hclge_get_sset_count,
#include <linux/types.h>
#include <linux/phy.h>
#include <linux/if_vlan.h>
+#include <linux/kfifo.h>
#include "hclge_cmd.h"
#include "hnae3.h"
u16 vlan_id;
};
+struct hclge_rst_stats {
+ u32 reset_done_cnt; /* the number of reset has completed */
+ u32 hw_reset_done_cnt; /* the number of HW reset has completed */
+ u32 pf_rst_cnt; /* the number of PF reset */
+ u32 flr_rst_cnt; /* the number of FLR */
+ u32 core_rst_cnt; /* the number of CORE reset */
+ u32 global_rst_cnt; /* the number of GLOBAL */
+ u32 imp_rst_cnt; /* the number of IMP reset */
+ u32 reset_cnt; /* the number of reset */
+};
+
+/* time and register status when mac tunnel interruption occur */
+struct hclge_mac_tnl_stats {
+ u64 time;
+ u32 status;
+};
+
/* For each bit of TCAM entry, it uses a pair of 'x' and
* 'y' to indicate which value to match, like below:
* ----------------------------------
(y) = (_k_ ^ ~_v_) & (_k_); \
} while (0)
+#define HCLGE_MAC_TNL_LOG_SIZE 8
#define HCLGE_VPORT_NUM 256
struct hclge_dev {
struct pci_dev *pdev;
unsigned long default_reset_request;
unsigned long reset_request; /* reset has been requested */
unsigned long reset_pending; /* client rst is pending to be served */
- unsigned long reset_count; /* the number of reset has been done */
+ struct hclge_rst_stats rst_stats;
u32 reset_fail_cnt;
u32 fw_version;
u16 num_vmdq_vport; /* Num vmdq vport this PF has set up */
struct mutex umv_mutex; /* protect share_umv_size */
struct mutex vport_cfg_mutex; /* Protect stored vf table */
+
+ DECLARE_KFIFO(mac_tnl_log, struct hclge_mac_tnl_stats,
+ HCLGE_MAC_TNL_LOG_SIZE);
};
/* VPort level vlan tag configuration for TX direction */
/* VPort level vlan tag configuration for RX direction */
struct hclge_rx_vtag_cfg {
- bool strip_tag1_en; /* Whether strip inner vlan tag */
- bool strip_tag2_en; /* Whether strip outer vlan tag */
- bool vlan1_vlan_prionly;/* Inner VLAN Tag up to descriptor Enable */
- bool vlan2_vlan_prionly;/* Outer VLAN Tag up to descriptor Enable */
+ u8 rx_vlan_offload_en; /* Whether enable rx vlan offload */
+ u8 strip_tag1_en; /* Whether strip inner vlan tag */
+ u8 strip_tag2_en; /* Whether strip outer vlan tag */
+ u8 vlan1_vlan_prionly; /* Inner VLAN Tag up to descriptor Enable */
+ u8 vlan2_vlan_prionly; /* Outer VLAN Tag up to descriptor Enable */
};
struct hclge_rss_tuple_cfg {
HCLGE_VPORT_STATE_MAX
};
+struct hclge_vlan_info {
+ u16 vlan_proto; /* so far support 802.1Q only */
+ u16 qos;
+ u16 vlan_tag;
+};
+
+struct hclge_port_base_vlan_config {
+ u16 state;
+ struct hclge_vlan_info vlan_info;
+};
+
struct hclge_vport {
u16 alloc_tqps; /* Allocated Tx/Rx queues */
u16 alloc_rss_size;
u16 qs_offset;
- u16 bw_limit; /* VSI BW Limit (0 = disabled) */
+ u32 bw_limit; /* VSI BW Limit (0 = disabled) */
u8 dwrr;
+ struct hclge_port_base_vlan_config port_base_vlan_cfg;
struct hclge_tx_vtag_cfg txvlan_cfg;
struct hclge_rx_vtag_cfg rxvlan_cfg;
void hclge_rm_vport_all_mac_table(struct hclge_vport *vport, bool is_del_list,
enum HCLGE_MAC_ADDR_TYPE mac_type);
void hclge_uninit_vport_mac_table(struct hclge_dev *hdev);
-void hclge_add_vport_vlan_table(struct hclge_vport *vport, u16 vlan_id);
-void hclge_rm_vport_vlan_table(struct hclge_vport *vport, u16 vlan_id,
- bool is_write_tbl);
void hclge_rm_vport_all_vlan_table(struct hclge_vport *vport, bool is_del_list);
void hclge_uninit_vport_vlan_table(struct hclge_dev *hdev);
+int hclge_update_port_base_vlan_cfg(struct hclge_vport *vport, u16 state,
+ struct hclge_vlan_info *vlan_info);
+int hclge_push_vf_port_base_vlan_info(struct hclge_vport *vport, u8 vfid,
+ u16 state, u16 vlan_tag, u16 qos,
+ u16 vlan_proto);
#endif
}
static int hclge_set_vf_uc_mac_addr(struct hclge_vport *vport,
- struct hclge_mbx_vf_to_pf_cmd *mbx_req,
- bool gen_resp)
+ struct hclge_mbx_vf_to_pf_cmd *mbx_req)
{
const u8 *mac_addr = (const u8 *)(&mbx_req->msg[2]);
struct hclge_dev *hdev = vport->back;
return -EIO;
}
- if (gen_resp)
+ if (mbx_req->mbx_need_resp & HCLGE_MBX_NEED_RESP_BIT)
hclge_gen_resp_to_vf(vport, mbx_req, status, NULL, 0);
return 0;
return 0;
}
+int hclge_push_vf_port_base_vlan_info(struct hclge_vport *vport, u8 vfid,
+ u16 state, u16 vlan_tag, u16 qos,
+ u16 vlan_proto)
+{
+#define MSG_DATA_SIZE 8
+
+ u8 msg_data[MSG_DATA_SIZE];
+
+ memcpy(&msg_data[0], &state, sizeof(u16));
+ memcpy(&msg_data[2], &vlan_proto, sizeof(u16));
+ memcpy(&msg_data[4], &qos, sizeof(u16));
+ memcpy(&msg_data[6], &vlan_tag, sizeof(u16));
+
+ return hclge_send_mbx_msg(vport, msg_data, sizeof(msg_data),
+ HLCGE_MBX_PUSH_VLAN_INFO, vfid);
+}
+
static int hclge_set_vf_vlan_cfg(struct hclge_vport *vport,
- struct hclge_mbx_vf_to_pf_cmd *mbx_req,
- bool gen_resp)
+ struct hclge_mbx_vf_to_pf_cmd *mbx_req)
{
int status = 0;
memcpy(&proto, &mbx_req->msg[5], sizeof(proto));
status = hclge_set_vlan_filter(handle, cpu_to_be16(proto),
vlan, is_kill);
- if (!status)
- is_kill ? hclge_rm_vport_vlan_table(vport, vlan, false)
- : hclge_add_vport_vlan_table(vport, vlan);
} else if (mbx_req->msg[1] == HCLGE_MBX_VLAN_RX_OFF_CFG) {
struct hnae3_handle *handle = &vport->nic;
bool en = mbx_req->msg[2] ? true : false;
status = hclge_en_hw_strip_rxvtag(handle, en);
+ } else if (mbx_req->msg[1] == HCLGE_MBX_PORT_BASE_VLAN_CFG) {
+ struct hclge_vlan_info *vlan_info;
+ u16 *state;
+
+ state = (u16 *)&mbx_req->msg[2];
+ vlan_info = (struct hclge_vlan_info *)&mbx_req->msg[4];
+ status = hclge_update_port_base_vlan_cfg(vport, *state,
+ vlan_info);
+ } else if (mbx_req->msg[1] == HCLGE_MBX_GET_PORT_BASE_VLAN_STATE) {
+ u8 state;
+
+ state = vport->port_base_vlan_cfg.state;
+ status = hclge_gen_resp_to_vf(vport, mbx_req, 0, &state,
+ sizeof(u8));
}
- if (gen_resp)
- status = hclge_gen_resp_to_vf(vport, mbx_req, status, NULL, 0);
-
return status;
}
ret);
break;
case HCLGE_MBX_SET_UNICAST:
- ret = hclge_set_vf_uc_mac_addr(vport, req, true);
+ ret = hclge_set_vf_uc_mac_addr(vport, req);
if (ret)
dev_err(&hdev->pdev->dev,
"PF fail(%d) to set VF UC MAC Addr\n",
ret);
break;
case HCLGE_MBX_SET_VLAN:
- ret = hclge_set_vf_vlan_cfg(vport, req, false);
+ ret = hclge_set_vf_vlan_cfg(vport, req);
if (ret)
dev_err(&hdev->pdev->dev,
"PF failed(%d) to config VF's VLAN\n",
#include <linux/etherdevice.h>
#include <linux/kernel.h>
+#include <linux/marvell_phy.h>
#include "hclge_cmd.h"
#include "hclge_main.h"
int hclge_mac_mdio_config(struct hclge_dev *hdev)
{
+#define PHY_INEXISTENT 255
+
struct hclge_mac *mac = &hdev->hw.mac;
struct phy_device *phydev;
struct mii_bus *mdio_bus;
int ret;
- if (hdev->hw.mac.phy_addr >= PHY_MAX_ADDR) {
+ if (hdev->hw.mac.phy_addr == PHY_INEXISTENT) {
+ dev_info(&hdev->pdev->dev,
+ "no phy device is connected to mdio bus\n");
+ return 0;
+ } else if (hdev->hw.mac.phy_addr >= PHY_MAX_ADDR) {
dev_err(&hdev->pdev->dev, "phy_addr(%d) is too large.\n",
hdev->hw.mac.phy_addr);
return -EINVAL;
linkmode_clear_bit(ETHTOOL_LINK_MODE_FIBRE_BIT, phydev->supported);
+ phydev->dev_flags |= MARVELL_PHY_LED0_LINK_LED1_ACTIVE;
+
ret = phy_connect_direct(netdev, phydev,
hclge_mac_adjust_link,
PHY_INTERFACE_MODE_SGMII);
ret = hclge_pfc_setup_hw(hdev);
if (init && ret == -EOPNOTSUPP)
dev_warn(&hdev->pdev->dev, "GE MAC does not support pfc\n");
- else
+ else if (ret) {
+ dev_err(&hdev->pdev->dev, "config pfc failed! ret = %d\n",
+ ret);
return ret;
+ }
return hclge_tm_bp_setup(hdev);
}
int ret;
spin_lock_bh(&hdev->hw.cmq.csq.lock);
- spin_lock_bh(&hdev->hw.cmq.crq.lock);
+ spin_lock(&hdev->hw.cmq.crq.lock);
/* initialize the pointers of async rx queue of mailbox */
hdev->arq.hdev = hdev;
hdev->arq.head = 0;
hdev->arq.tail = 0;
- hdev->arq.count = 0;
+ atomic_set(&hdev->arq.count, 0);
hdev->hw.cmq.csq.next_to_clean = 0;
hdev->hw.cmq.csq.next_to_use = 0;
hdev->hw.cmq.crq.next_to_clean = 0;
hclgevf_cmd_init_regs(&hdev->hw);
- spin_unlock_bh(&hdev->hw.cmq.crq.lock);
+ spin_unlock(&hdev->hw.cmq.crq.lock);
spin_unlock_bh(&hdev->hw.cmq.csq.lock);
clear_bit(HCLGEVF_STATE_CMD_DISABLE, &hdev->state);
* reset may happen when lower level reset is being processed.
*/
if (hclgevf_is_reset_pending(hdev)) {
- set_bit(HCLGEVF_STATE_CMD_DISABLE, &hdev->state);
- return -EBUSY;
+ ret = -EBUSY;
+ goto err_cmd_init;
}
/* get firmware version */
if (ret) {
dev_err(&hdev->pdev->dev,
"failed(%d) to query firmware version\n", ret);
- return ret;
+ goto err_cmd_init;
}
hdev->fw_version = version;
dev_info(&hdev->pdev->dev, "The firmware version is %08x\n", version);
return 0;
+
+err_cmd_init:
+ set_bit(HCLGEVF_STATE_CMD_DISABLE, &hdev->state);
+
+ return ret;
}
static void hclgevf_cmd_uninit_regs(struct hclgevf_hw *hw)
return 0;
}
+static int hclgevf_get_port_base_vlan_filter_state(struct hclgevf_dev *hdev)
+{
+ struct hnae3_handle *nic = &hdev->nic;
+ u8 resp_msg;
+ int ret;
+
+ ret = hclgevf_send_mbx_msg(hdev, HCLGE_MBX_SET_VLAN,
+ HCLGE_MBX_GET_PORT_BASE_VLAN_STATE,
+ NULL, 0, true, &resp_msg, sizeof(u8));
+ if (ret) {
+ dev_err(&hdev->pdev->dev,
+ "VF request to get port based vlan state failed %d",
+ ret);
+ return ret;
+ }
+
+ nic->port_base_vlan_state = resp_msg;
+
+ return 0;
+}
+
static int hclgevf_get_queue_info(struct hclgevf_dev *hdev)
{
#define HCLGEVF_TQPS_RSS_INFO_LEN 6
case HNAE3_VF_FUNC_RESET:
ret = hclgevf_send_mbx_msg(hdev, HCLGE_MBX_RESET, 0, NULL,
0, true, NULL, sizeof(u8));
+ hdev->rst_stats.vf_func_rst_cnt++;
break;
case HNAE3_FLR_RESET:
set_bit(HNAE3_FLR_DOWN, &hdev->flr_state);
+ hdev->rst_stats.flr_rst_cnt++;
break;
default:
break;
* know if device is undergoing reset
*/
ae_dev->reset_type = hdev->reset_type;
- hdev->reset_count++;
+ hdev->rst_stats.rst_cnt++;
rtnl_lock();
/* bring down the nic to stop any ongoing TX/RX */
goto err_reset;
}
+ hdev->rst_stats.hw_rst_done_cnt++;
+
rtnl_lock();
/* now, re-initialize the nic client and ae device*/
hdev->last_reset_time = jiffies;
ae_dev->reset_type = HNAE3_NONE_RESET;
+ hdev->rst_stats.rst_done_cnt++;
return ret;
err_reset_lock:
*/
hclgevf_cmd_init(hdev);
dev_err(&hdev->pdev->dev, "failed to reset VF\n");
+ if (hclgevf_is_reset_pending(hdev))
+ hclgevf_reset_task_schedule(hdev);
return ret;
}
void hclgevf_reset_task_schedule(struct hclgevf_dev *hdev)
{
- if (!test_bit(HCLGEVF_STATE_RST_SERVICE_SCHED, &hdev->state) &&
- !test_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state)) {
+ if (!test_bit(HCLGEVF_STATE_RST_SERVICE_SCHED, &hdev->state)) {
set_bit(HCLGEVF_STATE_RST_SERVICE_SCHED, &hdev->state);
schedule_work(&hdev->rst_service_task);
}
mod_timer(&hdev->service_timer, jiffies + 5 * HZ);
+ hdev->stats_timer++;
hclgevf_task_schedule(hdev);
}
hdev = container_of(work, struct hclgevf_dev, keep_alive_task);
- if (test_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state))
+ if (test_bit(HCLGEVF_STATE_CMD_DISABLE, &hdev->state))
return;
ret = hclgevf_send_mbx_msg(hdev, HCLGE_MBX_KEEP_ALIVE, 0, NULL,
static void hclgevf_service_task(struct work_struct *work)
{
+ struct hnae3_handle *handle;
struct hclgevf_dev *hdev;
hdev = container_of(work, struct hclgevf_dev, service_task);
+ handle = &hdev->nic;
+
+ if (hdev->stats_timer >= HCLGEVF_STATS_TIMER_INTERVAL) {
+ hclgevf_tqps_update_stats(handle);
+ hdev->stats_timer = 0;
+ }
/* request the link status from the PF. PF would be able to tell VF
* about such updates in future so we might remove this later
set_bit(HCLGEVF_STATE_CMD_DISABLE, &hdev->state);
cmdq_src_reg &= ~BIT(HCLGEVF_VECTOR0_RST_INT_B);
*clearval = cmdq_src_reg;
+ hdev->rst_stats.vf_rst_cnt++;
return HCLGEVF_VECTOR0_EVENT_RST;
}
{
int ret;
+ /* get current port based vlan state from PF */
+ ret = hclgevf_get_port_base_vlan_filter_state(hdev);
+ if (ret)
+ return ret;
+
/* get queue configuration from PF */
ret = hclgevf_get_queue_info(hdev);
if (ret)
set_bit(HCLGEVF_STATE_DOWN, &hdev->state);
- for (i = 0; i < handle->kinfo.num_tqps; i++)
- hclgevf_reset_tqp(handle, i);
+ if (hdev->reset_type != HNAE3_VF_RESET)
+ for (i = 0; i < handle->kinfo.num_tqps; i++)
+ if (hclgevf_reset_tqp(handle, i))
+ break;
/* reset tqp stats */
hclgevf_reset_tqp_stats(handle);
static int hclgevf_client_start(struct hnae3_handle *handle)
{
struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
+ int ret;
+
+ ret = hclgevf_set_alive(handle, true);
+ if (ret)
+ return ret;
mod_timer(&hdev->keep_alive_timer, jiffies + 2 * HZ);
- return hclgevf_set_alive(handle, true);
+
+ return 0;
}
static void hclgevf_client_stop(struct hnae3_handle *handle)
{
set_bit(HCLGEVF_STATE_DOWN, &hdev->state);
+ if (hdev->keep_alive_timer.function)
+ del_timer_sync(&hdev->keep_alive_timer);
+ if (hdev->keep_alive_task.func)
+ cancel_work_sync(&hdev->keep_alive_task);
if (hdev->service_timer.function)
del_timer_sync(&hdev->service_timer);
if (hdev->service_task.func)
hclgevf_free_vector(hdev, 0);
}
+static void hclgevf_info_show(struct hclgevf_dev *hdev)
+{
+ struct device *dev = &hdev->pdev->dev;
+
+ dev_info(dev, "VF info begin:\n");
+
+ dev_info(dev, "Task queue pairs numbers: %d\n", hdev->num_tqps);
+ dev_info(dev, "Desc num per TX queue: %d\n", hdev->num_tx_desc);
+ dev_info(dev, "Desc num per RX queue: %d\n", hdev->num_rx_desc);
+ dev_info(dev, "Numbers of vports: %d\n", hdev->num_alloc_vport);
+ dev_info(dev, "HW tc map: %d\n", hdev->hw_tc_map);
+ dev_info(dev, "PF media type of this VF: %d\n",
+ hdev->hw.mac.media_type);
+
+ dev_info(dev, "VF info end.\n");
+}
+
static int hclgevf_init_client_instance(struct hnae3_client *client,
struct hnae3_ae_dev *ae_dev)
{
hnae3_set_client_init_flag(client, ae_dev, 1);
+ if (netif_msg_drv(&hdev->nic))
+ hclgevf_info_show(hdev);
+
if (hdev->roce_client && hnae3_dev_roce_supported(hdev)) {
struct hnae3_client *rc = hdev->roce_client;
{
struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
- return hdev->reset_count;
+ return hdev->rst_stats.hw_rst_done_cnt;
}
static void hclgevf_get_link_mode(struct hnae3_handle *handle,
}
}
+void hclgevf_update_port_base_vlan_info(struct hclgevf_dev *hdev, u16 state,
+ u8 *port_base_vlan_info, u8 data_size)
+{
+ struct hnae3_handle *nic = &hdev->nic;
+
+ rtnl_lock();
+ hclgevf_notify_client(hdev, HNAE3_DOWN_CLIENT);
+ rtnl_unlock();
+
+ /* send msg to PF and wait update port based vlan info */
+ hclgevf_send_mbx_msg(hdev, HCLGE_MBX_SET_VLAN,
+ HCLGE_MBX_PORT_BASE_VLAN_CFG,
+ port_base_vlan_info, data_size,
+ false, NULL, 0);
+
+ if (state == HNAE3_PORT_BASE_VLAN_DISABLE)
+ nic->port_base_vlan_state = HNAE3_PORT_BASE_VLAN_DISABLE;
+ else
+ nic->port_base_vlan_state = HNAE3_PORT_BASE_VLAN_ENABLE;
+
+ rtnl_lock();
+ hclgevf_notify_client(hdev, HNAE3_UP_CLIENT);
+ rtnl_unlock();
+}
+
static const struct hnae3_ae_ops hclgevf_ops = {
.init_ae_dev = hclgevf_init_ae_dev,
.uninit_ae_dev = hclgevf_uninit_ae_dev,
#define HCLGEVF_S_IP_BIT BIT(3)
#define HCLGEVF_V_TAG_BIT BIT(4)
+#define HCLGEVF_STATS_TIMER_INTERVAL (36)
+
enum hclgevf_evt_cause {
HCLGEVF_VECTOR0_EVENT_RST,
HCLGEVF_VECTOR0_EVENT_MBX,
int vector_irq;
};
+struct hclgevf_rst_stats {
+ u32 rst_cnt; /* the number of reset */
+ u32 vf_func_rst_cnt; /* the number of VF function reset */
+ u32 flr_rst_cnt; /* the number of FLR */
+ u32 vf_rst_cnt; /* the number of VF reset */
+ u32 rst_done_cnt; /* the number of reset completed */
+ u32 hw_rst_done_cnt; /* the number of HW reset completed */
+};
+
struct hclgevf_dev {
struct pci_dev *pdev;
struct hnae3_ae_dev *ae_dev;
#define HCLGEVF_RESET_REQUESTED 0
#define HCLGEVF_RESET_PENDING 1
unsigned long reset_state; /* requested, pending */
- unsigned long reset_count; /* the number of reset has been done */
+ struct hclgevf_rst_stats rst_stats;
u32 reset_attempts;
u32 fw_version;
struct hnae3_client *nic_client;
struct hnae3_client *roce_client;
u32 flag;
+ u32 stats_timer;
};
static inline bool hclgevf_is_reset_pending(struct hclgevf_dev *hdev)
u8 duplex);
void hclgevf_reset_task_schedule(struct hclgevf_dev *hdev);
void hclgevf_mbx_task_schedule(struct hclgevf_dev *hdev);
+void hclgevf_update_port_base_vlan_info(struct hclgevf_dev *hdev, u16 state,
+ u8 *port_base_vlan_info, u8 data_size);
#endif
if (i >= HCLGEVF_MAX_TRY_TIMES) {
dev_err(&hdev->pdev->dev,
- "VF could not get mbx resp(=%d) from PF in %d tries\n",
- hdev->mbx_resp.received_resp, i);
+ "VF could not get mbx(%d,%d) resp(=%d) from PF in %d tries\n",
+ code0, code1, hdev->mbx_resp.received_resp, i);
return -EIO;
}
if (!(r_code0 == code0 && r_code1 == code1 && !mbx_resp->resp_status)) {
dev_err(&hdev->pdev->dev,
- "VF could not match resp code(code0=%d,code1=%d), %d",
+ "VF could not match resp code(code0=%d,code1=%d), %d\n",
code0, code1, mbx_resp->resp_status);
+ dev_err(&hdev->pdev->dev,
+ "VF could not match resp r_code(r_code0=%d,r_code1=%d)\n",
+ r_code0, r_code1);
return -EIO;
}
}
hclgevf_cmd_setup_basic_desc(&desc, HCLGEVF_OPC_MBX_VF_TO_PF, false);
+ req->mbx_need_resp |= need_resp ? HCLGE_MBX_NEED_RESP_BIT :
+ ~HCLGE_MBX_NEED_RESP_BIT;
req->msg[0] = code;
req->msg[1] = subcode;
memcpy(&req->msg[2], msg_data, msg_len);
case HCLGE_MBX_LINK_STAT_CHANGE:
case HCLGE_MBX_ASSERTING_RESET:
case HCLGE_MBX_LINK_STAT_MODE:
+ case HLCGE_MBX_PUSH_VLAN_INFO:
/* set this mbx event as pending. This is required as we
* might loose interrupt event when mbx task is busy
* handling. This shall be cleared when mbx task just
/* we will drop the async msg if we find ARQ as full
* and continue with next message
*/
- if (hdev->arq.count >= HCLGE_MBX_MAX_ARQ_MSG_NUM) {
+ if (atomic_read(&hdev->arq.count) >=
+ HCLGE_MBX_MAX_ARQ_MSG_NUM) {
dev_warn(&hdev->pdev->dev,
"Async Q full, dropping msg(%d)\n",
req->msg[1]);
memcpy(&msg_q[0], req->msg,
HCLGE_MBX_MAX_ARQ_MSG_SIZE * sizeof(u16));
hclge_mbx_tail_ptr_move_arq(hdev->arq);
- hdev->arq.count++;
+ atomic_inc(&hdev->arq.count);
hclgevf_mbx_task_schedule(hdev);
void hclgevf_mbx_async_handler(struct hclgevf_dev *hdev)
{
enum hnae3_reset_type reset_type;
- u16 link_status;
- u16 *msg_q;
+ u16 link_status, state;
+ u16 *msg_q, *vlan_info;
u8 duplex;
u32 speed;
u32 tail;
hclgevf_reset_task_schedule(hdev);
break;
+ case HLCGE_MBX_PUSH_VLAN_INFO:
+ state = le16_to_cpu(msg_q[1]);
+ vlan_info = &msg_q[1];
+ hclgevf_update_port_base_vlan_info(hdev, state,
+ (u8 *)vlan_info, 8);
+ break;
default:
dev_err(&hdev->pdev->dev,
"fetched unsupported(%d) message from arq\n",
}
hclge_mbx_head_ptr_move_arq(hdev->arq);
- hdev->arq.count--;
+ atomic_dec(&hdev->arq.count);
msg_q = hdev->arq.msg_q[hdev->arq.head];
}
}
{
struct device *dev = &adapter->vdev->dev;
struct ibmvnic_query_ip_offload_buffer *buf = &adapter->ip_offload_buf;
+ netdev_features_t old_hw_features = 0;
union ibmvnic_crq crq;
int i;
adapter->ip_offload_ctrl.large_rx_ipv4 = 0;
adapter->ip_offload_ctrl.large_rx_ipv6 = 0;
- adapter->netdev->features = NETIF_F_SG | NETIF_F_GSO;
+ if (adapter->state != VNIC_PROBING) {
+ old_hw_features = adapter->netdev->hw_features;
+ adapter->netdev->hw_features = 0;
+ }
+
+ adapter->netdev->hw_features = NETIF_F_SG | NETIF_F_GSO | NETIF_F_GRO;
if (buf->tcp_ipv4_chksum || buf->udp_ipv4_chksum)
- adapter->netdev->features |= NETIF_F_IP_CSUM;
+ adapter->netdev->hw_features |= NETIF_F_IP_CSUM;
if (buf->tcp_ipv6_chksum || buf->udp_ipv6_chksum)
- adapter->netdev->features |= NETIF_F_IPV6_CSUM;
+ adapter->netdev->hw_features |= NETIF_F_IPV6_CSUM;
if ((adapter->netdev->features &
(NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM)))
- adapter->netdev->features |= NETIF_F_RXCSUM;
+ adapter->netdev->hw_features |= NETIF_F_RXCSUM;
if (buf->large_tx_ipv4)
- adapter->netdev->features |= NETIF_F_TSO;
+ adapter->netdev->hw_features |= NETIF_F_TSO;
if (buf->large_tx_ipv6)
- adapter->netdev->features |= NETIF_F_TSO6;
+ adapter->netdev->hw_features |= NETIF_F_TSO6;
- adapter->netdev->hw_features |= adapter->netdev->features;
+ if (adapter->state == VNIC_PROBING) {
+ adapter->netdev->features |= adapter->netdev->hw_features;
+ } else if (old_hw_features != adapter->netdev->hw_features) {
+ netdev_features_t tmp = 0;
+
+ /* disable features no longer supported */
+ adapter->netdev->features &= adapter->netdev->hw_features;
+ /* turn on features now supported if previously enabled */
+ tmp = (old_hw_features ^ adapter->netdev->hw_features) &
+ adapter->netdev->hw_features;
+ adapter->netdev->features |=
+ tmp & adapter->netdev->wanted_features;
+ }
memset(&crq, 0, sizeof(crq));
crq.control_ip_offload.first = IBMVNIC_CRQ_CMD;
i40e_diag.o \
i40e_txrx.o \
i40e_ptp.o \
+ i40e_ddp.o \
i40e_client.o \
i40e_virtchnl_pf.o \
i40e_xsk.o
u8 filter_index;
};
+#define I40_DDP_FLASH_REGION 100
+#define I40E_PROFILE_INFO_SIZE 48
+#define I40E_MAX_PROFILE_NUM 16
+#define I40E_PROFILE_LIST_SIZE \
+ (I40E_PROFILE_INFO_SIZE * I40E_MAX_PROFILE_NUM + 4)
+#define I40E_DDP_PROFILE_PATH "intel/i40e/ddp/"
+#define I40E_DDP_PROFILE_NAME_MAX 64
+
+int i40e_ddp_load(struct net_device *netdev, const u8 *data, size_t size,
+ bool is_add);
+int i40e_ddp_flash(struct net_device *netdev, struct ethtool_flash *flash);
+
+struct i40e_ddp_profile_list {
+ u32 p_count;
+ struct i40e_profile_info p_info[0];
+};
+
+struct i40e_ddp_old_profile_list {
+ struct list_head list;
+ size_t old_ddp_size;
+ u8 old_ddp_buf[0];
+};
+
/* macros related to FLX_PIT */
#define I40E_FLEX_SET_FSIZE(fsize) (((fsize) << \
I40E_PRTQF_FLX_PIT_FSIZE_SHIFT) & \
struct sk_buff *ptp_tx_skb;
unsigned long ptp_tx_start;
struct hwtstamp_config tstamp_config;
+ struct timespec64 ptp_prev_hw_time;
+ ktime_t ptp_reset_start;
struct mutex tmreg_lock; /* Used to protect the SYSTIME registers. */
u32 ptp_adj_mult;
u32 tx_hwtstamp_timeouts;
u16 override_q_count;
u16 last_sw_conf_flags;
u16 last_sw_conf_valid_flags;
+ /* List to keep previous DDP profiles to be rolled back in the future */
+ struct list_head ddp_old_prof;
};
/**
void i40e_ptp_set_increment(struct i40e_pf *pf);
int i40e_ptp_set_ts_config(struct i40e_pf *pf, struct ifreq *ifr);
int i40e_ptp_get_ts_config(struct i40e_pf *pf, struct ifreq *ifr);
+void i40e_ptp_save_hw_time(struct i40e_pf *pf);
+void i40e_ptp_restore_hw_time(struct i40e_pf *pf);
void i40e_ptp_init(struct i40e_pf *pf);
void i40e_ptp_stop(struct i40e_pf *pf);
int i40e_is_vsi_uplink_mode_veb(struct i40e_vsi *vsi);
if (val >= hw->aq.num_asq_entries) {
i40e_debug(hw, I40E_DEBUG_AQ_MESSAGE,
"AQTX: head overrun at %d\n", val);
- status = I40E_ERR_QUEUE_EMPTY;
+ status = I40E_ERR_ADMIN_QUEUE_FULL;
goto asq_send_command_error;
}
*/
#define I40E_FW_API_VERSION_MAJOR 0x0001
-#define I40E_FW_API_VERSION_MINOR_X722 0x0006
-#define I40E_FW_API_VERSION_MINOR_X710 0x0007
+#define I40E_FW_API_VERSION_MINOR_X722 0x0008
+#define I40E_FW_API_VERSION_MINOR_X710 0x0008
#define I40E_FW_MINOR_VERSION(_h) ((_h)->mac.type == I40E_MAC_XL710 ? \
I40E_FW_API_VERSION_MINOR_X710 : \
**/
u32 i40e_led_get(struct i40e_hw *hw)
{
- u32 current_mode = 0;
u32 mode = 0;
int i;
if (!gpio_val)
continue;
- /* ignore gpio LED src mode entries related to the activity
- * LEDs
- */
- current_mode = ((gpio_val & I40E_GLGEN_GPIO_CTL_LED_MODE_MASK)
- >> I40E_GLGEN_GPIO_CTL_LED_MODE_SHIFT);
- switch (current_mode) {
- case I40E_COMBINED_ACTIVITY:
- case I40E_FILTER_ACTIVITY:
- case I40E_MAC_ACTIVITY:
- case I40E_LINK_ACTIVITY:
- continue;
- default:
- break;
- }
-
mode = (gpio_val & I40E_GLGEN_GPIO_CTL_LED_MODE_MASK) >>
I40E_GLGEN_GPIO_CTL_LED_MODE_SHIFT;
break;
**/
void i40e_led_set(struct i40e_hw *hw, u32 mode, bool blink)
{
- u32 current_mode = 0;
int i;
if (mode & 0xfffffff0)
if (!gpio_val)
continue;
-
- /* ignore gpio LED src mode entries related to the activity
- * LEDs
- */
- current_mode = ((gpio_val & I40E_GLGEN_GPIO_CTL_LED_MODE_MASK)
- >> I40E_GLGEN_GPIO_CTL_LED_MODE_SHIFT);
- switch (current_mode) {
- case I40E_COMBINED_ACTIVITY:
- case I40E_FILTER_ACTIVITY:
- case I40E_MAC_ACTIVITY:
- case I40E_LINK_ACTIVITY:
- continue;
- default:
- break;
- }
-
gpio_val &= ~I40E_GLGEN_GPIO_CTL_LED_MODE_MASK;
/* this & is a bit of paranoia, but serves as a range check */
gpio_val |= ((mode << I40E_GLGEN_GPIO_CTL_LED_MODE_SHIFT) &
return NULL;
}
+/* Get section table in profile */
+#define I40E_SECTION_TABLE(profile, sec_tbl) \
+ do { \
+ struct i40e_profile_segment *p = (profile); \
+ u32 count; \
+ u32 *nvm; \
+ count = p->device_table_count; \
+ nvm = (u32 *)&p->device_table[count]; \
+ sec_tbl = (struct i40e_section_table *)&nvm[nvm[0] + 1]; \
+ } while (0)
+
+/* Get section header in profile */
+#define I40E_SECTION_HEADER(profile, offset) \
+ (struct i40e_profile_section_header *)((u8 *)(profile) + (offset))
+
+/**
+ * i40e_find_section_in_profile
+ * @section_type: the section type to search for (i.e., SECTION_TYPE_NOTE)
+ * @profile: pointer to the i40e segment header to be searched
+ *
+ * This function searches i40e segment for a particular section type. On
+ * success it returns a pointer to the section header, otherwise it will
+ * return NULL.
+ **/
+struct i40e_profile_section_header *
+i40e_find_section_in_profile(u32 section_type,
+ struct i40e_profile_segment *profile)
+{
+ struct i40e_profile_section_header *sec;
+ struct i40e_section_table *sec_tbl;
+ u32 sec_off;
+ u32 i;
+
+ if (profile->header.type != SEGMENT_TYPE_I40E)
+ return NULL;
+
+ I40E_SECTION_TABLE(profile, sec_tbl);
+
+ for (i = 0; i < sec_tbl->section_count; i++) {
+ sec_off = sec_tbl->section_offset[i];
+ sec = I40E_SECTION_HEADER(profile, sec_off);
+ if (sec->section.type == section_type)
+ return sec;
+ }
+
+ return NULL;
+}
+
+/**
+ * i40e_ddp_exec_aq_section - Execute generic AQ for DDP
+ * @hw: pointer to the hw struct
+ * @aq: command buffer containing all data to execute AQ
+ **/
+static enum
+i40e_status_code i40e_ddp_exec_aq_section(struct i40e_hw *hw,
+ struct i40e_profile_aq_section *aq)
+{
+ i40e_status status;
+ struct i40e_aq_desc desc;
+ u8 *msg = NULL;
+ u16 msglen;
+
+ i40e_fill_default_direct_cmd_desc(&desc, aq->opcode);
+ desc.flags |= cpu_to_le16(aq->flags);
+ memcpy(desc.params.raw, aq->param, sizeof(desc.params.raw));
+
+ msglen = aq->datalen;
+ if (msglen) {
+ desc.flags |= cpu_to_le16((u16)(I40E_AQ_FLAG_BUF |
+ I40E_AQ_FLAG_RD));
+ if (msglen > I40E_AQ_LARGE_BUF)
+ desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_LB);
+ desc.datalen = cpu_to_le16(msglen);
+ msg = &aq->data[0];
+ }
+
+ status = i40e_asq_send_command(hw, &desc, msg, msglen, NULL);
+
+ if (status) {
+ i40e_debug(hw, I40E_DEBUG_PACKAGE,
+ "unable to exec DDP AQ opcode %u, error %d\n",
+ aq->opcode, status);
+ return status;
+ }
+
+ /* copy returned desc to aq_buf */
+ memcpy(aq->param, desc.params.raw, sizeof(desc.params.raw));
+
+ return 0;
+}
+
+/**
+ * i40e_validate_profile
+ * @hw: pointer to the hardware structure
+ * @profile: pointer to the profile segment of the package to be validated
+ * @track_id: package tracking id
+ * @rollback: flag if the profile is for rollback.
+ *
+ * Validates supported devices and profile's sections.
+ */
+static enum i40e_status_code
+i40e_validate_profile(struct i40e_hw *hw, struct i40e_profile_segment *profile,
+ u32 track_id, bool rollback)
+{
+ struct i40e_profile_section_header *sec = NULL;
+ i40e_status status = 0;
+ struct i40e_section_table *sec_tbl;
+ u32 vendor_dev_id;
+ u32 dev_cnt;
+ u32 sec_off;
+ u32 i;
+
+ if (track_id == I40E_DDP_TRACKID_INVALID) {
+ i40e_debug(hw, I40E_DEBUG_PACKAGE, "Invalid track_id\n");
+ return I40E_NOT_SUPPORTED;
+ }
+
+ dev_cnt = profile->device_table_count;
+ for (i = 0; i < dev_cnt; i++) {
+ vendor_dev_id = profile->device_table[i].vendor_dev_id;
+ if ((vendor_dev_id >> 16) == PCI_VENDOR_ID_INTEL &&
+ hw->device_id == (vendor_dev_id & 0xFFFF))
+ break;
+ }
+ if (dev_cnt && i == dev_cnt) {
+ i40e_debug(hw, I40E_DEBUG_PACKAGE,
+ "Device doesn't support DDP\n");
+ return I40E_ERR_DEVICE_NOT_SUPPORTED;
+ }
+
+ I40E_SECTION_TABLE(profile, sec_tbl);
+
+ /* Validate sections types */
+ for (i = 0; i < sec_tbl->section_count; i++) {
+ sec_off = sec_tbl->section_offset[i];
+ sec = I40E_SECTION_HEADER(profile, sec_off);
+ if (rollback) {
+ if (sec->section.type == SECTION_TYPE_MMIO ||
+ sec->section.type == SECTION_TYPE_AQ ||
+ sec->section.type == SECTION_TYPE_RB_AQ) {
+ i40e_debug(hw, I40E_DEBUG_PACKAGE,
+ "Not a roll-back package\n");
+ return I40E_NOT_SUPPORTED;
+ }
+ } else {
+ if (sec->section.type == SECTION_TYPE_RB_AQ ||
+ sec->section.type == SECTION_TYPE_RB_MMIO) {
+ i40e_debug(hw, I40E_DEBUG_PACKAGE,
+ "Not an original package\n");
+ return I40E_NOT_SUPPORTED;
+ }
+ }
+ }
+
+ return status;
+}
+
/**
* i40e_write_profile
* @hw: pointer to the hardware structure
i40e_status status = 0;
struct i40e_section_table *sec_tbl;
struct i40e_profile_section_header *sec = NULL;
- u32 dev_cnt;
- u32 vendor_dev_id;
- u32 *nvm;
+ struct i40e_profile_aq_section *ddp_aq;
u32 section_size = 0;
u32 offset = 0, info = 0;
+ u32 sec_off;
u32 i;
- dev_cnt = profile->device_table_count;
+ status = i40e_validate_profile(hw, profile, track_id, false);
+ if (status)
+ return status;
- for (i = 0; i < dev_cnt; i++) {
- vendor_dev_id = profile->device_table[i].vendor_dev_id;
- if ((vendor_dev_id >> 16) == PCI_VENDOR_ID_INTEL)
- if (hw->device_id == (vendor_dev_id & 0xFFFF))
+ I40E_SECTION_TABLE(profile, sec_tbl);
+
+ for (i = 0; i < sec_tbl->section_count; i++) {
+ sec_off = sec_tbl->section_offset[i];
+ sec = I40E_SECTION_HEADER(profile, sec_off);
+ /* Process generic admin command */
+ if (sec->section.type == SECTION_TYPE_AQ) {
+ ddp_aq = (struct i40e_profile_aq_section *)&sec[1];
+ status = i40e_ddp_exec_aq_section(hw, ddp_aq);
+ if (status) {
+ i40e_debug(hw, I40E_DEBUG_PACKAGE,
+ "Failed to execute aq: section %d, opcode %u\n",
+ i, ddp_aq->opcode);
break;
+ }
+ sec->section.type = SECTION_TYPE_RB_AQ;
+ }
+
+ /* Skip any non-mmio sections */
+ if (sec->section.type != SECTION_TYPE_MMIO)
+ continue;
+
+ section_size = sec->section.size +
+ sizeof(struct i40e_profile_section_header);
+
+ /* Write MMIO section */
+ status = i40e_aq_write_ddp(hw, (void *)sec, (u16)section_size,
+ track_id, &offset, &info, NULL);
+ if (status) {
+ i40e_debug(hw, I40E_DEBUG_PACKAGE,
+ "Failed to write profile: section %d, offset %d, info %d\n",
+ i, offset, info);
+ break;
+ }
}
- if (i == dev_cnt) {
- i40e_debug(hw, I40E_DEBUG_PACKAGE, "Device doesn't support DDP");
- return I40E_ERR_DEVICE_NOT_SUPPORTED;
- }
+ return status;
+}
+
+/**
+ * i40e_rollback_profile
+ * @hw: pointer to the hardware structure
+ * @profile: pointer to the profile segment of the package to be removed
+ * @track_id: package tracking id
+ *
+ * Rolls back previously loaded package.
+ */
+enum i40e_status_code
+i40e_rollback_profile(struct i40e_hw *hw, struct i40e_profile_segment *profile,
+ u32 track_id)
+{
+ struct i40e_profile_section_header *sec = NULL;
+ i40e_status status = 0;
+ struct i40e_section_table *sec_tbl;
+ u32 offset = 0, info = 0;
+ u32 section_size = 0;
+ u32 sec_off;
+ int i;
+
+ status = i40e_validate_profile(hw, profile, track_id, true);
+ if (status)
+ return status;
- nvm = (u32 *)&profile->device_table[dev_cnt];
- sec_tbl = (struct i40e_section_table *)&nvm[nvm[0] + 1];
+ I40E_SECTION_TABLE(profile, sec_tbl);
- for (i = 0; i < sec_tbl->section_count; i++) {
- sec = (struct i40e_profile_section_header *)((u8 *)profile +
- sec_tbl->section_offset[i]);
+ /* For rollback write sections in reverse */
+ for (i = sec_tbl->section_count - 1; i >= 0; i--) {
+ sec_off = sec_tbl->section_offset[i];
+ sec = I40E_SECTION_HEADER(profile, sec_off);
- /* Skip 'AQ', 'note' and 'name' sections */
- if (sec->section.type != SECTION_TYPE_MMIO)
+ /* Skip any non-rollback sections */
+ if (sec->section.type != SECTION_TYPE_RB_MMIO)
continue;
section_size = sec->section.size +
sizeof(struct i40e_profile_section_header);
- /* Write profile */
+ /* Write roll-back MMIO section */
status = i40e_aq_write_ddp(hw, (void *)sec, (u16)section_size,
track_id, &offset, &info, NULL);
if (status) {
i40e_debug(hw, I40E_DEBUG_PACKAGE,
- "Failed to write profile: offset %d, info %d",
- offset, info);
+ "Failed to write profile: section %d, offset %d, info %d\n",
+ i, offset, info);
break;
}
}
/**
* i40e_init_dcb
* @hw: pointer to the hw struct
+ * @enable_mib_change: enable mib change event
*
* Update DCB configuration from the Firmware
**/
-i40e_status i40e_init_dcb(struct i40e_hw *hw)
+i40e_status i40e_init_dcb(struct i40e_hw *hw, bool enable_mib_change)
{
i40e_status ret = 0;
struct i40e_lldp_variables lldp_cfg;
u8 adminstatus = 0;
if (!hw->func_caps.dcb)
- return ret;
+ return I40E_NOT_SUPPORTED;
/* Read LLDP NVM area */
ret = i40e_read_lldp_cfg(hw, &lldp_cfg);
if (ret)
- return ret;
+ return I40E_ERR_NOT_READY;
/* Get the LLDP AdminStatus for the current port */
adminstatus = lldp_cfg.adminstatus >> (hw->port * 4);
/* LLDP agent disabled */
if (!adminstatus) {
hw->dcbx_status = I40E_DCBX_STATUS_DISABLED;
- return ret;
+ return I40E_ERR_NOT_READY;
}
/* Get DCBX status */
return ret;
/* Check the DCBX Status */
- switch (hw->dcbx_status) {
- case I40E_DCBX_STATUS_DONE:
- case I40E_DCBX_STATUS_IN_PROGRESS:
+ if (hw->dcbx_status == I40E_DCBX_STATUS_DONE ||
+ hw->dcbx_status == I40E_DCBX_STATUS_IN_PROGRESS) {
/* Get current DCBX configuration */
ret = i40e_get_dcb_config(hw);
if (ret)
return ret;
- break;
- case I40E_DCBX_STATUS_DISABLED:
- return ret;
- case I40E_DCBX_STATUS_NOT_STARTED:
- case I40E_DCBX_STATUS_MULTIPLE_PEERS:
- default:
- break;
+ } else if (hw->dcbx_status == I40E_DCBX_STATUS_DISABLED) {
+ return I40E_ERR_NOT_READY;
}
/* Configure the LLDP MIB change event */
- ret = i40e_aq_cfg_lldp_mib_change_event(hw, true, NULL);
- if (ret)
- return ret;
+ if (enable_mib_change)
+ ret = i40e_aq_cfg_lldp_mib_change_event(hw, true, NULL);
return ret;
}
u8 bridgetype,
struct i40e_dcbx_config *dcbcfg);
i40e_status i40e_get_dcb_config(struct i40e_hw *hw);
-i40e_status i40e_init_dcb(struct i40e_hw *hw);
+i40e_status i40e_init_dcb(struct i40e_hw *hw, bool enable_mib_change);
#endif /* _I40E_DCB_H_ */
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
+
+#include "i40e.h"
+
+#include <linux/firmware.h>
+
+/**
+ * i40e_ddp_profiles_eq - checks if DDP profiles are the equivalent
+ * @a: new profile info
+ * @b: old profile info
+ *
+ * checks if DDP profiles are the equivalent.
+ * Returns true if profiles are the same.
+ **/
+static bool i40e_ddp_profiles_eq(struct i40e_profile_info *a,
+ struct i40e_profile_info *b)
+{
+ return a->track_id == b->track_id &&
+ !memcmp(&a->version, &b->version, sizeof(a->version)) &&
+ !memcmp(&a->name, &b->name, I40E_DDP_NAME_SIZE);
+}
+
+/**
+ * i40e_ddp_does_profile_exist - checks if DDP profile loaded already
+ * @hw: HW data structure
+ * @pinfo: DDP profile information structure
+ *
+ * checks if DDP profile loaded already.
+ * Returns >0 if the profile exists.
+ * Returns 0 if the profile is absent.
+ * Returns <0 if error.
+ **/
+static int i40e_ddp_does_profile_exist(struct i40e_hw *hw,
+ struct i40e_profile_info *pinfo)
+{
+ struct i40e_ddp_profile_list *profile_list;
+ u8 buff[I40E_PROFILE_LIST_SIZE];
+ i40e_status status;
+ int i;
+
+ status = i40e_aq_get_ddp_list(hw, buff, I40E_PROFILE_LIST_SIZE, 0,
+ NULL);
+ if (status)
+ return -1;
+
+ profile_list = (struct i40e_ddp_profile_list *)buff;
+ for (i = 0; i < profile_list->p_count; i++) {
+ if (i40e_ddp_profiles_eq(pinfo, &profile_list->p_info[i]))
+ return 1;
+ }
+ return 0;
+}
+
+/**
+ * i40e_ddp_profiles_overlap - checks if DDP profiles overlap.
+ * @new: new profile info
+ * @old: old profile info
+ *
+ * checks if DDP profiles overlap.
+ * Returns true if profiles are overlap.
+ **/
+static bool i40e_ddp_profiles_overlap(struct i40e_profile_info *new,
+ struct i40e_profile_info *old)
+{
+ unsigned int group_id_old = (u8)((old->track_id & 0x00FF0000) >> 16);
+ unsigned int group_id_new = (u8)((new->track_id & 0x00FF0000) >> 16);
+
+ /* 0x00 group must be only the first */
+ if (group_id_new == 0)
+ return true;
+ /* 0xFF group is compatible with anything else */
+ if (group_id_new == 0xFF || group_id_old == 0xFF)
+ return false;
+ /* otherwise only profiles from the same group are compatible*/
+ return group_id_old != group_id_new;
+}
+
+/**
+ * i40e_ddp_does_profiles_ - checks if DDP overlaps with existing one.
+ * @hw: HW data structure
+ * @pinfo: DDP profile information structure
+ *
+ * checks if DDP profile overlaps with existing one.
+ * Returns >0 if the profile overlaps.
+ * Returns 0 if the profile is ok.
+ * Returns <0 if error.
+ **/
+static int i40e_ddp_does_profile_overlap(struct i40e_hw *hw,
+ struct i40e_profile_info *pinfo)
+{
+ struct i40e_ddp_profile_list *profile_list;
+ u8 buff[I40E_PROFILE_LIST_SIZE];
+ i40e_status status;
+ int i;
+
+ status = i40e_aq_get_ddp_list(hw, buff, I40E_PROFILE_LIST_SIZE, 0,
+ NULL);
+ if (status)
+ return -EIO;
+
+ profile_list = (struct i40e_ddp_profile_list *)buff;
+ for (i = 0; i < profile_list->p_count; i++) {
+ if (i40e_ddp_profiles_overlap(pinfo,
+ &profile_list->p_info[i]))
+ return 1;
+ }
+ return 0;
+}
+
+/**
+ * i40e_add_pinfo
+ * @hw: pointer to the hardware structure
+ * @profile: pointer to the profile segment of the package
+ * @profile_info_sec: buffer for information section
+ * @track_id: package tracking id
+ *
+ * Register a profile to the list of loaded profiles.
+ */
+static enum i40e_status_code
+i40e_add_pinfo(struct i40e_hw *hw, struct i40e_profile_segment *profile,
+ u8 *profile_info_sec, u32 track_id)
+{
+ struct i40e_profile_section_header *sec;
+ struct i40e_profile_info *pinfo;
+ i40e_status status;
+ u32 offset = 0, info = 0;
+
+ sec = (struct i40e_profile_section_header *)profile_info_sec;
+ sec->tbl_size = 1;
+ sec->data_end = sizeof(struct i40e_profile_section_header) +
+ sizeof(struct i40e_profile_info);
+ sec->section.type = SECTION_TYPE_INFO;
+ sec->section.offset = sizeof(struct i40e_profile_section_header);
+ sec->section.size = sizeof(struct i40e_profile_info);
+ pinfo = (struct i40e_profile_info *)(profile_info_sec +
+ sec->section.offset);
+ pinfo->track_id = track_id;
+ pinfo->version = profile->version;
+ pinfo->op = I40E_DDP_ADD_TRACKID;
+
+ /* Clear reserved field */
+ memset(pinfo->reserved, 0, sizeof(pinfo->reserved));
+ memcpy(pinfo->name, profile->name, I40E_DDP_NAME_SIZE);
+
+ status = i40e_aq_write_ddp(hw, (void *)sec, sec->data_end,
+ track_id, &offset, &info, NULL);
+ return status;
+}
+
+/**
+ * i40e_del_pinfo - delete DDP profile info from NIC
+ * @hw: HW data structure
+ * @profile: DDP profile segment to be deleted
+ * @profile_info_sec: DDP profile section header
+ * @track_id: track ID of the profile for deletion
+ *
+ * Removes DDP profile from the NIC.
+ **/
+static enum i40e_status_code
+i40e_del_pinfo(struct i40e_hw *hw, struct i40e_profile_segment *profile,
+ u8 *profile_info_sec, u32 track_id)
+{
+ struct i40e_profile_section_header *sec;
+ struct i40e_profile_info *pinfo;
+ i40e_status status;
+ u32 offset = 0, info = 0;
+
+ sec = (struct i40e_profile_section_header *)profile_info_sec;
+ sec->tbl_size = 1;
+ sec->data_end = sizeof(struct i40e_profile_section_header) +
+ sizeof(struct i40e_profile_info);
+ sec->section.type = SECTION_TYPE_INFO;
+ sec->section.offset = sizeof(struct i40e_profile_section_header);
+ sec->section.size = sizeof(struct i40e_profile_info);
+ pinfo = (struct i40e_profile_info *)(profile_info_sec +
+ sec->section.offset);
+ pinfo->track_id = track_id;
+ pinfo->version = profile->version;
+ pinfo->op = I40E_DDP_REMOVE_TRACKID;
+
+ /* Clear reserved field */
+ memset(pinfo->reserved, 0, sizeof(pinfo->reserved));
+ memcpy(pinfo->name, profile->name, I40E_DDP_NAME_SIZE);
+
+ status = i40e_aq_write_ddp(hw, (void *)sec, sec->data_end,
+ track_id, &offset, &info, NULL);
+ return status;
+}
+
+/**
+ * i40e_ddp_is_pkg_hdr_valid - performs basic pkg header integrity checks
+ * @netdev: net device structure (for logging purposes)
+ * @pkg_hdr: pointer to package header
+ * @size_huge: size of the whole DDP profile package in size_t
+ *
+ * Checks correctness of pkg header: Version, size too big/small, and
+ * all segment offsets alignment and boundaries. This function lets
+ * reject non DDP profile file to be loaded by administrator mistake.
+ **/
+static bool i40e_ddp_is_pkg_hdr_valid(struct net_device *netdev,
+ struct i40e_package_header *pkg_hdr,
+ size_t size_huge)
+{
+ u32 size = 0xFFFFFFFFU & size_huge;
+ u32 pkg_hdr_size;
+ u32 segment;
+
+ if (!pkg_hdr)
+ return false;
+
+ if (pkg_hdr->version.major > 0) {
+ struct i40e_ddp_version ver = pkg_hdr->version;
+
+ netdev_err(netdev, "Unsupported DDP profile version %u.%u.%u.%u",
+ ver.major, ver.minor, ver.update, ver.draft);
+ return false;
+ }
+ if (size_huge > size) {
+ netdev_err(netdev, "Invalid DDP profile - size is bigger than 4G");
+ return false;
+ }
+ if (size < (sizeof(struct i40e_package_header) +
+ sizeof(struct i40e_metadata_segment) + sizeof(u32) * 2)) {
+ netdev_err(netdev, "Invalid DDP profile - size is too small.");
+ return false;
+ }
+
+ pkg_hdr_size = sizeof(u32) * (pkg_hdr->segment_count + 2U);
+ if (size < pkg_hdr_size) {
+ netdev_err(netdev, "Invalid DDP profile - too many segments");
+ return false;
+ }
+ for (segment = 0; segment < pkg_hdr->segment_count; ++segment) {
+ u32 offset = pkg_hdr->segment_offset[segment];
+
+ if (0xFU & offset) {
+ netdev_err(netdev,
+ "Invalid DDP profile %u segment alignment",
+ segment);
+ return false;
+ }
+ if (pkg_hdr_size > offset || offset >= size) {
+ netdev_err(netdev,
+ "Invalid DDP profile %u segment offset",
+ segment);
+ return false;
+ }
+ }
+
+ return true;
+}
+
+/**
+ * i40e_ddp_load - performs DDP loading
+ * @netdev: net device structure
+ * @data: buffer containing recipe file
+ * @size: size of the buffer
+ * @is_add: true when loading profile, false when rolling back the previous one
+ *
+ * Checks correctness and loads DDP profile to the NIC. The function is
+ * also used for rolling back previously loaded profile.
+ **/
+int i40e_ddp_load(struct net_device *netdev, const u8 *data, size_t size,
+ bool is_add)
+{
+ u8 profile_info_sec[sizeof(struct i40e_profile_section_header) +
+ sizeof(struct i40e_profile_info)];
+ struct i40e_metadata_segment *metadata_hdr;
+ struct i40e_profile_segment *profile_hdr;
+ struct i40e_profile_info pinfo;
+ struct i40e_package_header *pkg_hdr;
+ i40e_status status;
+ struct i40e_netdev_priv *np = netdev_priv(netdev);
+ struct i40e_vsi *vsi = np->vsi;
+ struct i40e_pf *pf = vsi->back;
+ u32 track_id;
+ int istatus;
+
+ pkg_hdr = (struct i40e_package_header *)data;
+ if (!i40e_ddp_is_pkg_hdr_valid(netdev, pkg_hdr, size))
+ return -EINVAL;
+
+ if (size < (sizeof(struct i40e_package_header) +
+ sizeof(struct i40e_metadata_segment) + sizeof(u32) * 2)) {
+ netdev_err(netdev, "Invalid DDP recipe size.");
+ return -EINVAL;
+ }
+
+ /* Find beginning of segment data in buffer */
+ metadata_hdr = (struct i40e_metadata_segment *)
+ i40e_find_segment_in_package(SEGMENT_TYPE_METADATA, pkg_hdr);
+ if (!metadata_hdr) {
+ netdev_err(netdev, "Failed to find metadata segment in DDP recipe.");
+ return -EINVAL;
+ }
+
+ track_id = metadata_hdr->track_id;
+ profile_hdr = (struct i40e_profile_segment *)
+ i40e_find_segment_in_package(SEGMENT_TYPE_I40E, pkg_hdr);
+ if (!profile_hdr) {
+ netdev_err(netdev, "Failed to find profile segment in DDP recipe.");
+ return -EINVAL;
+ }
+
+ pinfo.track_id = track_id;
+ pinfo.version = profile_hdr->version;
+ if (is_add)
+ pinfo.op = I40E_DDP_ADD_TRACKID;
+ else
+ pinfo.op = I40E_DDP_REMOVE_TRACKID;
+
+ memcpy(pinfo.name, profile_hdr->name, I40E_DDP_NAME_SIZE);
+
+ /* Check if profile data already exists*/
+ istatus = i40e_ddp_does_profile_exist(&pf->hw, &pinfo);
+ if (istatus < 0) {
+ netdev_err(netdev, "Failed to fetch loaded profiles.");
+ return istatus;
+ }
+ if (is_add) {
+ if (istatus > 0) {
+ netdev_err(netdev, "DDP profile already loaded.");
+ return -EINVAL;
+ }
+ istatus = i40e_ddp_does_profile_overlap(&pf->hw, &pinfo);
+ if (istatus < 0) {
+ netdev_err(netdev, "Failed to fetch loaded profiles.");
+ return istatus;
+ }
+ if (istatus > 0) {
+ netdev_err(netdev, "DDP profile overlaps with existing one.");
+ return -EINVAL;
+ }
+ } else {
+ if (istatus == 0) {
+ netdev_err(netdev,
+ "DDP profile for deletion does not exist.");
+ return -EINVAL;
+ }
+ }
+
+ /* Load profile data */
+ if (is_add) {
+ status = i40e_write_profile(&pf->hw, profile_hdr, track_id);
+ if (status) {
+ if (status == I40E_ERR_DEVICE_NOT_SUPPORTED) {
+ netdev_err(netdev,
+ "Profile is not supported by the device.");
+ return -EPERM;
+ }
+ netdev_err(netdev, "Failed to write DDP profile.");
+ return -EIO;
+ }
+ } else {
+ status = i40e_rollback_profile(&pf->hw, profile_hdr, track_id);
+ if (status) {
+ netdev_err(netdev, "Failed to remove DDP profile.");
+ return -EIO;
+ }
+ }
+
+ /* Add/remove profile to/from profile list in FW */
+ if (is_add) {
+ status = i40e_add_pinfo(&pf->hw, profile_hdr, profile_info_sec,
+ track_id);
+ if (status) {
+ netdev_err(netdev, "Failed to add DDP profile info.");
+ return -EIO;
+ }
+ } else {
+ status = i40e_del_pinfo(&pf->hw, profile_hdr, profile_info_sec,
+ track_id);
+ if (status) {
+ netdev_err(netdev, "Failed to restore DDP profile info.");
+ return -EIO;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * i40e_ddp_restore - restore previously loaded profile and remove from list
+ * @pf: PF data struct
+ *
+ * Restores previously loaded profile stored on the list in driver memory.
+ * After rolling back removes entry from the list.
+ **/
+static int i40e_ddp_restore(struct i40e_pf *pf)
+{
+ struct i40e_ddp_old_profile_list *entry;
+ struct net_device *netdev = pf->vsi[pf->lan_vsi]->netdev;
+ int status = 0;
+
+ if (!list_empty(&pf->ddp_old_prof)) {
+ entry = list_first_entry(&pf->ddp_old_prof,
+ struct i40e_ddp_old_profile_list,
+ list);
+ status = i40e_ddp_load(netdev, entry->old_ddp_buf,
+ entry->old_ddp_size, false);
+ list_del(&entry->list);
+ kfree(entry);
+ }
+ return status;
+}
+
+/**
+ * i40e_ddp_flash - callback function for ethtool flash feature
+ * @netdev: net device structure
+ * @flash: kernel flash structure
+ *
+ * Ethtool callback function used for loading and unloading DDP profiles.
+ **/
+int i40e_ddp_flash(struct net_device *netdev, struct ethtool_flash *flash)
+{
+ const struct firmware *ddp_config;
+ struct i40e_netdev_priv *np = netdev_priv(netdev);
+ struct i40e_vsi *vsi = np->vsi;
+ struct i40e_pf *pf = vsi->back;
+ int status = 0;
+
+ /* Check for valid region first */
+ if (flash->region != I40_DDP_FLASH_REGION) {
+ netdev_err(netdev, "Requested firmware region is not recognized by this driver.");
+ return -EINVAL;
+ }
+ if (pf->hw.bus.func != 0) {
+ netdev_err(netdev, "Any DDP operation is allowed only on Phy0 NIC interface");
+ return -EINVAL;
+ }
+
+ /* If the user supplied "-" instead of file name rollback previously
+ * stored profile.
+ */
+ if (strncmp(flash->data, "-", 2) != 0) {
+ struct i40e_ddp_old_profile_list *list_entry;
+ char profile_name[sizeof(I40E_DDP_PROFILE_PATH)
+ + I40E_DDP_PROFILE_NAME_MAX];
+
+ profile_name[sizeof(profile_name) - 1] = 0;
+ strncpy(profile_name, I40E_DDP_PROFILE_PATH,
+ sizeof(profile_name) - 1);
+ strncat(profile_name, flash->data, I40E_DDP_PROFILE_NAME_MAX);
+ /* Load DDP recipe. */
+ status = request_firmware(&ddp_config, profile_name,
+ &netdev->dev);
+ if (status) {
+ netdev_err(netdev, "DDP recipe file request failed.");
+ return status;
+ }
+
+ status = i40e_ddp_load(netdev, ddp_config->data,
+ ddp_config->size, true);
+
+ if (!status) {
+ list_entry =
+ kzalloc(sizeof(struct i40e_ddp_old_profile_list) +
+ ddp_config->size, GFP_KERNEL);
+ if (!list_entry) {
+ netdev_info(netdev, "Failed to allocate memory for previous DDP profile data.");
+ netdev_info(netdev, "New profile loaded but roll-back will be impossible.");
+ } else {
+ memcpy(list_entry->old_ddp_buf,
+ ddp_config->data, ddp_config->size);
+ list_entry->old_ddp_size = ddp_config->size;
+ list_add(&list_entry->list, &pf->ddp_old_prof);
+ }
+ }
+
+ release_firmware(ddp_config);
+ } else {
+ if (!list_empty(&pf->ddp_old_prof)) {
+ status = i40e_ddp_restore(pf);
+ } else {
+ netdev_warn(netdev, "There is no DDP profile to restore.");
+ status = -ENOENT;
+ }
+ }
+ return status;
+}
ethtool_link_ksettings_add_link_mode(ks, advertising,
1000baseT_Full);
}
- if (phy_types & I40E_CAP_PHY_TYPE_40GBASE_SR4)
+ if (phy_types & I40E_CAP_PHY_TYPE_40GBASE_SR4) {
ethtool_link_ksettings_add_link_mode(ks, supported,
40000baseSR4_Full);
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
+ 40000baseSR4_Full);
+ }
if (phy_types & I40E_CAP_PHY_TYPE_40GBASE_LR4)
ethtool_link_ksettings_add_link_mode(ks, supported,
40000baseLR4_Full);
case I40E_PHY_TYPE_40GBASE_SR4:
ethtool_link_ksettings_add_link_mode(ks, supported,
40000baseSR4_Full);
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
+ 40000baseSR4_Full);
break;
case I40E_PHY_TYPE_40GBASE_LR4:
ethtool_link_ksettings_add_link_mode(ks, supported,
.set_link_ksettings = i40e_set_link_ksettings,
.get_fecparam = i40e_get_fec_param,
.set_fecparam = i40e_set_fec_param,
+ .flash_device = i40e_ddp_flash,
};
void i40e_set_ethtool_ops(struct net_device *netdev)
fcnt = i40e_update_filter_state(num_add, list, add_head);
if (fcnt != num_add) {
- set_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state);
- dev_warn(&vsi->back->pdev->dev,
- "Error %s adding RX filters on %s, promiscuous mode forced on\n",
- i40e_aq_str(hw, aq_err),
- vsi_name);
+ if (vsi->type == I40E_VSI_MAIN) {
+ set_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state);
+ dev_warn(&vsi->back->pdev->dev,
+ "Error %s adding RX filters on %s, promiscuous mode forced on\n",
+ i40e_aq_str(hw, aq_err), vsi_name);
+ } else if (vsi->type == I40E_VSI_SRIOV ||
+ vsi->type == I40E_VSI_VMDQ1 ||
+ vsi->type == I40E_VSI_VMDQ2) {
+ dev_warn(&vsi->back->pdev->dev,
+ "Error %s adding RX filters on %s, please set promiscuous on manually for %s\n",
+ i40e_aq_str(hw, aq_err), vsi_name, vsi_name);
+ } else {
+ dev_warn(&vsi->back->pdev->dev,
+ "Error %s adding RX filters on %s, incorrect VSI type: %i.\n",
+ i40e_aq_str(hw, aq_err), vsi_name, vsi->type);
+ }
}
}
struct i40e_vsi_context ctxt;
i40e_status ret;
+ /* Don't modify stripping options if a port VLAN is active */
+ if (vsi->info.pvid)
+ return;
+
if ((vsi->info.valid_sections &
cpu_to_le16(I40E_AQ_VSI_PROP_VLAN_VALID)) &&
((vsi->info.port_vlan_flags & I40E_AQ_VSI_PVLAN_MODE_MASK) == 0))
struct i40e_vsi_context ctxt;
i40e_status ret;
+ /* Don't modify stripping options if a port VLAN is active */
+ if (vsi->info.pvid)
+ return;
+
if ((vsi->info.valid_sections &
cpu_to_le16(I40E_AQ_VSI_PROP_VLAN_VALID)) &&
((vsi->info.port_vlan_flags & I40E_AQ_VSI_PVLAN_EMOD_MASK) ==
goto out;
/* Get the initial DCB configuration */
- err = i40e_init_dcb(hw);
+ err = i40e_init_dcb(hw, true);
if (!err) {
/* Device/Function is not DCBX capable */
if ((!hw->func_caps.dcb) ||
struct i40e_pf *pf = vsi->back;
u8 enabled_tc = 0, num_tc, hw;
bool need_reset = false;
+ int old_queue_pairs;
int ret = -EINVAL;
u16 mode;
int i;
+ old_queue_pairs = vsi->num_queue_pairs;
num_tc = mqprio_qopt->qopt.num_tc;
hw = mqprio_qopt->qopt.hw;
mode = mqprio_qopt->mode;
}
ret = i40e_configure_queue_channels(vsi);
if (ret) {
+ vsi->num_queue_pairs = old_queue_pairs;
netdev_info(netdev,
"Failed configuring queue channels\n");
need_reset = true;
dev_warn(&pf->pdev->dev,
"shutdown_lan_hmc failed: %d\n", ret);
}
+
+ /* Save the current PTP time so that we can restore the time after the
+ * reset completes.
+ */
+ i40e_ptp_save_hw_time(pf);
}
/**
INIT_LIST_HEAD(&pf->l3_flex_pit_list);
INIT_LIST_HEAD(&pf->l4_flex_pit_list);
+ INIT_LIST_HEAD(&pf->ddp_old_prof);
/* set up the locks for the AQ, do this only once in probe
* and destroy them only once in remove
if (err) {
if (err == I40E_ERR_FIRMWARE_API_VERSION)
dev_info(&pdev->dev,
- "The driver for the device stopped because the NVM image is newer than expected. You must install the most recent version of the network driver.\n");
+ "The driver for the device stopped because the NVM image v%u.%u is newer than expected v%u.%u. You must install the most recent version of the network driver.\n",
+ hw->aq.api_maj_ver,
+ hw->aq.api_min_ver,
+ I40E_FW_API_VERSION_MAJOR,
+ I40E_FW_MINOR_VERSION(hw));
else
dev_info(&pdev->dev,
"The driver for the device stopped because the device firmware failed to init. Try updating your NVM image.\n");
if (hw->aq.api_maj_ver == I40E_FW_API_VERSION_MAJOR &&
hw->aq.api_min_ver > I40E_FW_MINOR_VERSION(hw))
dev_info(&pdev->dev,
- "The driver for the device detected a newer version of the NVM image than expected. Please install the most recent version of the network driver.\n");
+ "The driver for the device detected a newer version of the NVM image v%u.%u than expected v%u.%u. Please install the most recent version of the network driver.\n",
+ hw->aq.api_maj_ver,
+ hw->aq.api_min_ver,
+ I40E_FW_API_VERSION_MAJOR,
+ I40E_FW_MINOR_VERSION(hw));
else if (hw->aq.api_maj_ver == 1 && hw->aq.api_min_ver < 4)
dev_info(&pdev->dev,
- "The driver for the device detected an older version of the NVM image than expected. Please update the NVM image.\n");
+ "The driver for the device detected an older version of the NVM image v%u.%u than expected v%u.%u. Please update the NVM image.\n",
+ hw->aq.api_maj_ver,
+ hw->aq.api_min_ver,
+ I40E_FW_API_VERSION_MAJOR,
+ I40E_FW_MINOR_VERSION(hw));
i40e_verify_eeprom(pf);
struct i40e_generic_seg_header *
i40e_find_segment_in_package(u32 segment_type,
struct i40e_package_header *pkg_header);
+struct i40e_profile_section_header *
+i40e_find_section_in_profile(u32 section_type,
+ struct i40e_profile_segment *profile);
enum i40e_status_code
i40e_write_profile(struct i40e_hw *hw, struct i40e_profile_segment *i40e_seg,
u32 track_id);
enum i40e_status_code
+i40e_rollback_profile(struct i40e_hw *hw, struct i40e_profile_segment *i40e_seg,
+ u32 track_id);
+enum i40e_status_code
i40e_add_pinfo_to_list(struct i40e_hw *hw,
struct i40e_profile_segment *profile,
u8 *profile_info_sec, u32 track_id);
pf->tstamp_config.rx_filter = HWTSTAMP_FILTER_NONE;
pf->tstamp_config.tx_type = HWTSTAMP_TX_OFF;
+ /* Set the previous "reset" time to the current Kernel clock time */
+ pf->ptp_prev_hw_time = ktime_to_timespec64(ktime_get_real());
+ pf->ptp_reset_start = ktime_get();
+
return 0;
}
+/**
+ * i40e_ptp_save_hw_time - Save the current PTP time as ptp_prev_hw_time
+ * @pf: Board private structure
+ *
+ * Read the current PTP time and save it into pf->ptp_prev_hw_time. This should
+ * be called at the end of preparing to reset, just before hardware reset
+ * occurs, in order to preserve the PTP time as close as possible across
+ * resets.
+ */
+void i40e_ptp_save_hw_time(struct i40e_pf *pf)
+{
+ /* don't try to access the PTP clock if it's not enabled */
+ if (!(pf->flags & I40E_FLAG_PTP))
+ return;
+
+ i40e_ptp_gettimex(&pf->ptp_caps, &pf->ptp_prev_hw_time, NULL);
+ /* Get a monotonic starting time for this reset */
+ pf->ptp_reset_start = ktime_get();
+}
+
+/**
+ * i40e_ptp_restore_hw_time - Restore the ptp_prev_hw_time + delta to PTP regs
+ * @pf: Board private structure
+ *
+ * Restore the PTP hardware clock registers. We previously cached the PTP
+ * hardware time as pf->ptp_prev_hw_time. To be as accurate as possible,
+ * update this value based on the time delta since the time was saved, using
+ * CLOCK_MONOTONIC (via ktime_get()) to calculate the time difference.
+ *
+ * This ensures that the hardware clock is restored to nearly what it should
+ * have been if a reset had not occurred.
+ */
+void i40e_ptp_restore_hw_time(struct i40e_pf *pf)
+{
+ ktime_t delta = ktime_sub(ktime_get(), pf->ptp_reset_start);
+
+ /* Update the previous HW time with the ktime delta */
+ timespec64_add_ns(&pf->ptp_prev_hw_time, ktime_to_ns(delta));
+
+ /* Restore the hardware clock registers */
+ i40e_ptp_settime(&pf->ptp_caps, &pf->ptp_prev_hw_time);
+}
+
/**
* i40e_ptp_init - Initialize the 1588 support after device probe or reset
* @pf: Board private structure
* This function sets device up for 1588 support. The first time it is run, it
* will create a PHC clock device. It does not create a clock device if one
* already exists. It also reconfigures the device after a reset.
+ *
+ * The first time a clock is created, i40e_ptp_create_clock will set
+ * pf->ptp_prev_hw_time to the current system time. During resets, it is
+ * expected that this timespec will be set to the last known PTP clock time,
+ * in order to preserve the clock time as close as possible across a reset.
**/
void i40e_ptp_init(struct i40e_pf *pf)
{
dev_err(&pf->pdev->dev, "%s: ptp_clock_register failed\n",
__func__);
} else if (pf->ptp_clock) {
- struct timespec64 ts;
u32 regval;
if (pf->hw.debug_mask & I40E_DEBUG_LAN)
/* reset timestamping mode */
i40e_ptp_set_timestamp_mode(pf, &pf->tstamp_config);
- /* Set the clock value. */
- ts = ktime_to_timespec64(ktime_get_real());
- i40e_ptp_settime(&pf->ptp_caps, &ts);
+ /* Restore the clock time based on last known value */
+ i40e_ptp_restore_hw_time(pf);
}
}
struct i40e_metadata_segment {
struct i40e_generic_seg_header header;
struct i40e_ddp_version version;
+#define I40E_DDP_TRACKID_RDONLY 0
+#define I40E_DDP_TRACKID_INVALID 0xFFFFFFFF
u32 track_id;
char name[I40E_DDP_NAME_SIZE];
};
struct {
#define SECTION_TYPE_INFO 0x00000010
#define SECTION_TYPE_MMIO 0x00000800
+#define SECTION_TYPE_RB_MMIO 0x00001800
#define SECTION_TYPE_AQ 0x00000801
+#define SECTION_TYPE_RB_AQ 0x00001801
#define SECTION_TYPE_NOTE 0x80000000
#define SECTION_TYPE_NAME 0x80000001
+#define SECTION_TYPE_PROTO 0x80000002
+#define SECTION_TYPE_PCTYPE 0x80000003
+#define SECTION_TYPE_PTYPE 0x80000004
u32 type;
u32 offset;
u32 size;
} section;
};
+struct i40e_profile_tlv_section_record {
+ u8 rtype;
+ u8 type;
+ u16 len;
+ u8 data[12];
+};
+
+/* Generic AQ section in proflie */
+struct i40e_profile_aq_section {
+ u16 opcode;
+ u16 flags;
+ u8 param[16];
+ u16 datalen;
+ u8 data[1];
+};
+
struct i40e_profile_info {
u32 track_id;
struct i40e_ddp_version version;
(u8 *)&stats, sizeof(stats));
}
-/* If the VF is not trusted restrict the number of MAC/VLAN it can program */
-#define I40E_VC_MAX_MAC_ADDR_PER_VF 12
+/* If the VF is not trusted restrict the number of MAC/VLAN it can program
+ * MAC filters: 16 for multicast, 1 for MAC, 1 for broadcast
+ */
+#define I40E_VC_MAX_MAC_ADDR_PER_VF (16 + 1 + 1)
#define I40E_VC_MAX_VLAN_PER_VF 8
/**
#define I40E_FW_API_VERSION_MAJOR 0x0001
#define I40E_FW_API_VERSION_MINOR_X722 0x0005
-#define I40E_FW_API_VERSION_MINOR_X710 0x0007
+#define I40E_FW_API_VERSION_MINOR_X710 0x0008
#define I40E_FW_MINOR_VERSION(_h) ((_h)->mac.type == I40E_MAC_XL710 ? \
I40E_FW_API_VERSION_MINOR_X710 : \
ice_txrx.o \
ice_ethtool.o
ice-$(CONFIG_PCI_IOV) += ice_virtchnl_pf.o ice_sriov.o
+ice-$(CONFIG_DCB) += ice_dcb.o ice_dcb_lib.o
#include "ice_devids.h"
#include "ice_type.h"
#include "ice_txrx.h"
+#include "ice_dcb.h"
#include "ice_switch.h"
#include "ice_common.h"
#include "ice_sched.h"
struct ice_tc_cfg {
u8 numtc; /* Total number of enabled TCs */
- u8 ena_tc; /* TX map */
+ u8 ena_tc; /* Tx map */
struct ice_tc_info tc_info[ICE_MAX_TRAFFIC_CLASS];
};
};
struct ice_qs_cfg {
- struct mutex *qs_mutex; /* will be assgined to &pf->avail_q_mutex */
+ struct mutex *qs_mutex; /* will be assigned to &pf->avail_q_mutex */
unsigned long *pf_map;
unsigned long pf_map_size;
unsigned int q_count;
ICE_FLAG_RSS_ENA,
ICE_FLAG_SRIOV_ENA,
ICE_FLAG_SRIOV_CAPABLE,
+ ICE_FLAG_DCB_CAPABLE,
+ ICE_FLAG_DCB_ENA,
ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA,
+ ICE_FLAG_DISABLE_FW_LLDP,
+ ICE_FLAG_ETHTOOL_CTXT, /* set when ethtool holds RTNL lock */
ICE_PF_FLAGS_NBITS /* must be last */
};
u32 hw_oicr_idx; /* Other interrupt cause vector HW index */
u32 num_avail_hw_msix; /* remaining HW MSIX vectors left unclaimed */
u32 num_lan_msix; /* Total MSIX vectors for base driver */
- u16 num_lan_tx; /* num lan Tx queues setup */
- u16 num_lan_rx; /* num lan Rx queues setup */
+ u16 num_lan_tx; /* num LAN Tx queues setup */
+ u16 num_lan_rx; /* num LAN Rx queues setup */
u16 q_left_tx; /* remaining num Tx queues left unclaimed */
u16 q_left_rx; /* remaining num Rx queues left unclaimed */
u16 next_vsi; /* Next free slot in pf->vsi[] - 0-based! */
struct ice_hw_port_stats stats_prev;
struct ice_hw hw;
u8 stat_prev_loaded; /* has previous stats been loaded */
+#ifdef CONFIG_DCB
+ u16 dcbx_cap;
+#endif /* CONFIG_DCB */
u32 tx_timeout_count;
unsigned long tx_timeout_last_recovery;
u32 tx_timeout_recovery_level;
/**
* ice_irq_dynamic_ena - Enable default interrupt generation settings
- * @hw: pointer to hw struct
- * @vsi: pointer to vsi struct, can be NULL
+ * @hw: pointer to HW struct
+ * @vsi: pointer to VSI struct, can be NULL
* @q_vector: pointer to q_vector, can be NULL
*/
static inline void
wr32(hw, GLINT_DYN_CTL(vector), val);
}
-static inline void ice_vsi_set_tc_cfg(struct ice_vsi *vsi)
-{
- vsi->tc_cfg.ena_tc = ICE_DFLT_TRAFFIC_CLASS;
- vsi->tc_cfg.numtc = 1;
-}
-
void ice_set_ethtool_ops(struct net_device *netdev);
int ice_up(struct ice_vsi *vsi);
int ice_down(struct ice_vsi *vsi);
void ice_fill_rss_lut(u8 *lut, u16 rss_table_size, u16 rss_size);
void ice_print_link_msg(struct ice_vsi *vsi, bool isup);
void ice_napi_del(struct ice_vsi *vsi);
+#ifdef CONFIG_DCB
+int ice_pf_ena_all_vsi(struct ice_pf *pf, bool locked);
+void ice_pf_dis_all_vsi(struct ice_pf *pf, bool locked);
+#endif /* CONFIG_DCB */
#endif /* _ICE_H_ */
#define ICE_AQ_RES_NVM_WRITE_DFLT_TIMEOUT_MS 180000
#define ICE_AQ_RES_CHNG_LOCK_DFLT_TIMEOUT_MS 1000
#define ICE_AQ_RES_GLBL_LOCK_DFLT_TIMEOUT_MS 3000
- /* For SDP: pin id of the SDP */
+ /* For SDP: pin ID of the SDP */
__le32 res_number;
/* Status is only used for ICE_AQC_RES_ID_GLBL_LOCK */
__le16 status;
__le32 teid[1];
};
+/* Query Port ETS (indirect 0x040E)
+ *
+ * This indirect command is used to query port TC node configuration.
+ */
+struct ice_aqc_query_port_ets {
+ __le32 port_teid;
+ __le32 reserved;
+ __le32 addr_high;
+ __le32 addr_low;
+};
+
+struct ice_aqc_port_ets_elem {
+ u8 tc_valid_bits;
+ u8 reserved[3];
+ /* 3 bits for UP per TC 0-7, 4th byte reserved */
+ __le32 up2tc;
+ u8 tc_bw_share[8];
+ __le32 port_eir_prof_id;
+ __le32 port_cir_prof_id;
+ /* 3 bits per Node priority to TC 0-7, 4th byte reserved */
+ __le32 tc_node_prio;
+#define ICE_TC_NODE_PRIO_S 0x4
+ u8 reserved1[4];
+ __le32 tc_node_teid[8]; /* Used for response, reserved in command */
+};
+
/* Query Scheduler Resource Allocation (indirect 0x0412)
* This indirect command retrieves the scheduler resources allocated by
* EMP Firmware to the given PF.
u8 ext_info;
#define ICE_AQ_LINK_PHY_TEMP_ALARM BIT(0)
#define ICE_AQ_LINK_EXCESSIVE_ERRORS BIT(1) /* Excessive Link Errors */
- /* Port TX Suspended */
+ /* Port Tx Suspended */
#define ICE_AQ_LINK_TX_S 2
#define ICE_AQ_LINK_TX_M (0x03 << ICE_AQ_LINK_TX_S)
#define ICE_AQ_LINK_TX_ACTIVE 0
};
/**
- * Send to PF command (indirect 0x0801) id is only used by PF
+ * Send to PF command (indirect 0x0801) ID is only used by PF
*
- * Send to VF command (indirect 0x0802) id is only used by PF
+ * Send to VF command (indirect 0x0802) ID is only used by PF
*
*/
struct ice_aqc_pf_vf_msg {
__le32 addr_low;
};
+/* Get LLDP MIB (indirect 0x0A00)
+ * Note: This is also used by the LLDP MIB Change Event (0x0A01)
+ * as the format is the same.
+ */
+struct ice_aqc_lldp_get_mib {
+ u8 type;
+#define ICE_AQ_LLDP_MIB_TYPE_S 0
+#define ICE_AQ_LLDP_MIB_TYPE_M (0x3 << ICE_AQ_LLDP_MIB_TYPE_S)
+#define ICE_AQ_LLDP_MIB_LOCAL 0
+#define ICE_AQ_LLDP_MIB_REMOTE 1
+#define ICE_AQ_LLDP_MIB_LOCAL_AND_REMOTE 2
+#define ICE_AQ_LLDP_BRID_TYPE_S 2
+#define ICE_AQ_LLDP_BRID_TYPE_M (0x3 << ICE_AQ_LLDP_BRID_TYPE_S)
+#define ICE_AQ_LLDP_BRID_TYPE_NEAREST_BRID 0
+#define ICE_AQ_LLDP_BRID_TYPE_NON_TPMR 1
+/* Tx pause flags in the 0xA01 event use ICE_AQ_LLDP_TX_* */
+#define ICE_AQ_LLDP_TX_S 0x4
+#define ICE_AQ_LLDP_TX_M (0x03 << ICE_AQ_LLDP_TX_S)
+#define ICE_AQ_LLDP_TX_ACTIVE 0
+#define ICE_AQ_LLDP_TX_SUSPENDED 1
+#define ICE_AQ_LLDP_TX_FLUSHED 3
+/* The following bytes are reserved for the Get LLDP MIB command (0x0A00)
+ * and in the LLDP MIB Change Event (0x0A01). They are valid for the
+ * Get LLDP MIB (0x0A00) response only.
+ */
+ u8 reserved1;
+ __le16 local_len;
+ __le16 remote_len;
+ u8 reserved2[2];
+ __le32 addr_high;
+ __le32 addr_low;
+};
+
+/* Configure LLDP MIB Change Event (direct 0x0A01) */
+/* For MIB Change Event use ice_aqc_lldp_get_mib structure above */
+struct ice_aqc_lldp_set_mib_change {
+ u8 command;
+#define ICE_AQ_LLDP_MIB_UPDATE_ENABLE 0x0
+#define ICE_AQ_LLDP_MIB_UPDATE_DIS 0x1
+ u8 reserved[15];
+};
+
+/* Stop LLDP (direct 0x0A05) */
+struct ice_aqc_lldp_stop {
+ u8 command;
+#define ICE_AQ_LLDP_AGENT_STATE_MASK BIT(0)
+#define ICE_AQ_LLDP_AGENT_STOP 0x0
+#define ICE_AQ_LLDP_AGENT_SHUTDOWN ICE_AQ_LLDP_AGENT_STATE_MASK
+#define ICE_AQ_LLDP_AGENT_PERSIST_DIS BIT(1)
+ u8 reserved[15];
+};
+
+/* Start LLDP (direct 0x0A06) */
+struct ice_aqc_lldp_start {
+ u8 command;
+#define ICE_AQ_LLDP_AGENT_START BIT(0)
+#define ICE_AQ_LLDP_AGENT_PERSIST_ENA BIT(1)
+ u8 reserved[15];
+};
+
+/* Get CEE DCBX Oper Config (0x0A07)
+ * The command uses the generic descriptor struct and
+ * returns the struct below as an indirect response.
+ */
+struct ice_aqc_get_cee_dcb_cfg_resp {
+ u8 oper_num_tc;
+ u8 oper_prio_tc[4];
+ u8 oper_tc_bw[8];
+ u8 oper_pfc_en;
+ __le16 oper_app_prio;
+#define ICE_AQC_CEE_APP_FCOE_S 0
+#define ICE_AQC_CEE_APP_FCOE_M (0x7 << ICE_AQC_CEE_APP_FCOE_S)
+#define ICE_AQC_CEE_APP_ISCSI_S 3
+#define ICE_AQC_CEE_APP_ISCSI_M (0x7 << ICE_AQC_CEE_APP_ISCSI_S)
+#define ICE_AQC_CEE_APP_FIP_S 8
+#define ICE_AQC_CEE_APP_FIP_M (0x7 << ICE_AQC_CEE_APP_FIP_S)
+ __le32 tlv_status;
+#define ICE_AQC_CEE_PG_STATUS_S 0
+#define ICE_AQC_CEE_PG_STATUS_M (0x7 << ICE_AQC_CEE_PG_STATUS_S)
+#define ICE_AQC_CEE_PFC_STATUS_S 3
+#define ICE_AQC_CEE_PFC_STATUS_M (0x7 << ICE_AQC_CEE_PFC_STATUS_S)
+#define ICE_AQC_CEE_FCOE_STATUS_S 8
+#define ICE_AQC_CEE_FCOE_STATUS_M (0x7 << ICE_AQC_CEE_FCOE_STATUS_S)
+#define ICE_AQC_CEE_ISCSI_STATUS_S 11
+#define ICE_AQC_CEE_ISCSI_STATUS_M (0x7 << ICE_AQC_CEE_ISCSI_STATUS_S)
+#define ICE_AQC_CEE_FIP_STATUS_S 16
+#define ICE_AQC_CEE_FIP_STATUS_M (0x7 << ICE_AQC_CEE_FIP_STATUS_S)
+ u8 reserved[12];
+};
+
+/* Set Local LLDP MIB (indirect 0x0A08)
+ * Used to replace the local MIB of a given LLDP agent. e.g. DCBx
+ */
+struct ice_aqc_lldp_set_local_mib {
+ u8 type;
+#define SET_LOCAL_MIB_TYPE_DCBX_M BIT(0)
+#define SET_LOCAL_MIB_TYPE_LOCAL_MIB 0
+#define SET_LOCAL_MIB_TYPE_CEE_M BIT(1)
+#define SET_LOCAL_MIB_TYPE_CEE_WILLING 0
+#define SET_LOCAL_MIB_TYPE_CEE_NON_WILLING SET_LOCAL_MIB_TYPE_CEE_M
+ u8 reserved0;
+ __le16 length;
+ u8 reserved1[4];
+ __le32 addr_high;
+ __le32 addr_low;
+};
+
+/* Stop/Start LLDP Agent (direct 0x0A09)
+ * Used for stopping/starting specific LLDP agent. e.g. DCBx.
+ * The same structure is used for the response, with the command field
+ * being used as the status field.
+ */
+struct ice_aqc_lldp_stop_start_specific_agent {
+ u8 command;
+#define ICE_AQC_START_STOP_AGENT_M BIT(0)
+#define ICE_AQC_START_STOP_AGENT_STOP_DCBX 0
+#define ICE_AQC_START_STOP_AGENT_START_DCBX ICE_AQC_START_STOP_AGENT_M
+ u8 reserved[15];
+};
+
/* Get/Set RSS key (indirect 0x0B04/0x0B02) */
struct ice_aqc_get_set_rss_key {
#define ICE_AQC_GSET_RSS_KEY_VSI_VALID BIT(15)
__le32 addr_low;
};
-/* Add TX LAN Queues (indirect 0x0C30) */
+/* Add Tx LAN Queues (indirect 0x0C30) */
struct ice_aqc_add_txqs {
u8 num_qgrps;
u8 reserved[3];
__le32 addr_low;
};
-/* This is the descriptor of each queue entry for the Add TX LAN Queues
+/* This is the descriptor of each queue entry for the Add Tx LAN Queues
* command (0x0C30). Only used within struct ice_aqc_add_tx_qgrp.
*/
struct ice_aqc_add_txqs_perq {
struct ice_aqc_txsched_elem info;
};
-/* The format of the command buffer for Add TX LAN Queues (0x0C30)
+/* The format of the command buffer for Add Tx LAN Queues (0x0C30)
* is an array of the following structs. Please note that the length of
* each struct ice_aqc_add_tx_qgrp is variable due
* to the variable number of queues in each group!
struct ice_aqc_add_txqs_perq txqs[1];
};
-/* Disable TX LAN Queues (indirect 0x0C31) */
+/* Disable Tx LAN Queues (indirect 0x0C31) */
struct ice_aqc_dis_txqs {
u8 cmd_type;
#define ICE_AQC_Q_DIS_CMD_S 0
__le32 addr_low;
};
-/* The buffer for Disable TX LAN Queues (indirect 0x0C31)
+/* The buffer for Disable Tx LAN Queues (indirect 0x0C31)
* contains the following structures, arrayed one after the
* other.
* Note: Since the q_id is 16 bits wide, if the
struct ice_aqc_get_topo get_topo;
struct ice_aqc_sched_elem_cmd sched_elem_cmd;
struct ice_aqc_query_txsched_res query_sched_res;
+ struct ice_aqc_query_port_ets port_ets;
struct ice_aqc_nvm nvm;
struct ice_aqc_pf_vf_msg virt;
+ struct ice_aqc_lldp_get_mib lldp_get_mib;
+ struct ice_aqc_lldp_set_mib_change lldp_set_event;
+ struct ice_aqc_lldp_stop lldp_stop;
+ struct ice_aqc_lldp_start lldp_start;
+ struct ice_aqc_lldp_set_local_mib lldp_set_mib;
+ struct ice_aqc_lldp_stop_start_specific_agent lldp_agent_ctrl;
struct ice_aqc_get_set_rss_lut get_set_rss_lut;
struct ice_aqc_get_set_rss_key get_set_rss_key;
struct ice_aqc_add_txqs add_txqs;
/* error codes */
enum ice_aq_err {
ICE_AQ_RC_OK = 0, /* Success */
+ ICE_AQ_RC_EPERM = 1, /* Operation not permitted */
+ ICE_AQ_RC_ENOENT = 2, /* No such element */
ICE_AQ_RC_ENOMEM = 9, /* Out of memory */
ICE_AQ_RC_EBUSY = 12, /* Device or resource busy */
ICE_AQ_RC_EEXIST = 13, /* Object already exists */
ice_aqc_opc_get_sched_elems = 0x0404,
ice_aqc_opc_suspend_sched_elems = 0x0409,
ice_aqc_opc_resume_sched_elems = 0x040A,
+ ice_aqc_opc_query_port_ets = 0x040E,
ice_aqc_opc_delete_sched_elems = 0x040F,
ice_aqc_opc_query_sched_res = 0x0412,
/* PF/VF mailbox commands */
ice_mbx_opc_send_msg_to_pf = 0x0801,
ice_mbx_opc_send_msg_to_vf = 0x0802,
+ /* LLDP commands */
+ ice_aqc_opc_lldp_get_mib = 0x0A00,
+ ice_aqc_opc_lldp_set_mib_change = 0x0A01,
+ ice_aqc_opc_lldp_stop = 0x0A05,
+ ice_aqc_opc_lldp_start = 0x0A06,
+ ice_aqc_opc_get_cee_dcb_cfg = 0x0A07,
+ ice_aqc_opc_lldp_set_local_mib = 0x0A08,
+ ice_aqc_opc_lldp_stop_start_specific_agent = 0x0A09,
/* RSS commands */
ice_aqc_opc_set_rss_key = 0x0B02,
ice_aqc_opc_get_rss_key = 0x0B04,
ice_aqc_opc_get_rss_lut = 0x0B05,
- /* TX queue handling commands/events */
+ /* Tx queue handling commands/events */
ice_aqc_opc_add_txqs = 0x0C30,
ice_aqc_opc_dis_txqs = 0x0C31,
* @hw: pointer to the HW structure
*
* This function sets the MAC type of the adapter based on the
- * vendor ID and device ID stored in the hw structure.
+ * vendor ID and device ID stored in the HW structure.
*/
static enum ice_status ice_set_mac_type(struct ice_hw *hw)
{
/**
* ice_aq_manage_mac_read - manage MAC address read command
- * @hw: pointer to the hw struct
+ * @hw: pointer to the HW struct
* @buf: a virtual buffer to hold the manage MAC read response
* @buf_size: Size of the virtual buffer
* @cd: pointer to command details structure or NULL
/**
* ice_init_fltr_mgmt_struct - initializes filter management list and locks
- * @hw: pointer to the hw struct
+ * @hw: pointer to the HW struct
*/
static enum ice_status ice_init_fltr_mgmt_struct(struct ice_hw *hw)
{
/**
* ice_cleanup_fltr_mgmt_struct - cleanup filter management list and locks
- * @hw: pointer to the hw struct
+ * @hw: pointer to the HW struct
*/
static void ice_cleanup_fltr_mgmt_struct(struct ice_hw *hw)
{
/**
* ice_cfg_fw_log - configure FW logging
- * @hw: pointer to the hw struct
+ * @hw: pointer to the HW struct
* @enable: enable certain FW logging events if true, disable all if false
*
* This function enables/disables the FW logging via Rx CQ events and a UART
/**
* ice_output_fw_log
- * @hw: pointer to the hw struct
+ * @hw: pointer to the HW struct
* @desc: pointer to the AQ message descriptor
* @buf: pointer to the buffer accompanying the AQ message
*
/**
* ice_get_itr_intrl_gran - determine int/intrl granularity
- * @hw: pointer to the hw struct
+ * @hw: pointer to the HW struct
*
* Determines the itr/intrl granularities based on the maximum aggregate
* bandwidth according to the device's configuration during power-on.
goto err_unroll_cqinit;
}
- /* set the back pointer to hw */
+ /* set the back pointer to HW */
hw->port_info->hw = hw;
/* Initialize port_info struct with switch configuration data */
* @ice_rxq_ctx: pointer to the rxq context
* @rxq_index: the index of the Rx queue
*
- * Copies rxq context from dense structure to hw register space
+ * Copies rxq context from dense structure to HW register space
*/
static enum ice_status
ice_copy_rxq_ctx_to_hw(struct ice_hw *hw, u8 *ice_rxq_ctx, u32 rxq_index)
if (rxq_index > QRX_CTRL_MAX_INDEX)
return ICE_ERR_PARAM;
- /* Copy each dword separately to hw */
+ /* Copy each dword separately to HW */
for (i = 0; i < ICE_RXQ_CTX_SIZE_DWORDS; i++) {
wr32(hw, QRX_CONTEXT(i, rxq_index),
*((u32 *)(ice_rxq_ctx + (i * sizeof(u32)))));
* @rxq_index: the index of the Rx queue
*
* Converts rxq context from sparse to dense structure and then writes
- * it to hw register space
+ * it to HW register space
*/
enum ice_status
ice_write_rxq_ctx(struct ice_hw *hw, struct ice_rlan_ctx *rlan_ctx,
/**
* ice_aq_send_cmd - send FW Admin Queue command to FW Admin Queue
- * @hw: pointer to the hw struct
+ * @hw: pointer to the HW struct
* @desc: descriptor describing the command
* @buf: buffer to use for indirect commands (NULL for direct commands)
* @buf_size: size of buffer for indirect commands (0 for direct commands)
/**
* ice_aq_get_fw_ver
- * @hw: pointer to the hw struct
+ * @hw: pointer to the HW struct
* @cd: pointer to command details structure or NULL
*
* Get the firmware version (0x0001) from the admin queue commands
/**
* ice_aq_q_shutdown
- * @hw: pointer to the hw struct
+ * @hw: pointer to the HW struct
* @unloading: is the driver unloading itself
*
* Tell the Firmware that we're shutting down the AdminQ and whether
/**
* ice_aq_req_res
- * @hw: pointer to the hw struct
- * @res: resource id
+ * @hw: pointer to the HW struct
+ * @res: resource ID
* @access: access type
* @sdp_number: resource number
* @timeout: the maximum time in ms that the driver may hold the resource
/**
* ice_aq_release_res
- * @hw: pointer to the hw struct
- * @res: resource id
+ * @hw: pointer to the HW struct
+ * @res: resource ID
* @sdp_number: resource number
* @cd: pointer to command details structure or NULL
*
/**
* ice_acquire_res
* @hw: pointer to the HW structure
- * @res: resource id
+ * @res: resource ID
* @access: access type (read or write)
* @timeout: timeout in milliseconds
*
/**
* ice_release_res
* @hw: pointer to the HW structure
- * @res: resource id
+ * @res: resource ID
*
* This function will release a resource using the proper Admin Command.
*/
status = ice_aq_release_res(hw, res, 0, NULL);
/* there are some rare cases when trying to release the resource
- * results in an admin Q timeout, so handle them correctly
+ * results in an admin queue timeout, so handle them correctly
*/
while ((status == ICE_ERR_AQ_TIMEOUT) &&
(total_delay < hw->adminq.sq_cmd_timeout)) {
/**
* ice_get_num_per_func - determine number of resources per PF
- * @hw: pointer to the hw structure
+ * @hw: pointer to the HW structure
* @max: value to be evenly split between each PF
*
* Determine the number of valid functions by going through the bitmap returned
/**
* ice_parse_caps - parse function/device capabilities
- * @hw: pointer to the hw struct
+ * @hw: pointer to the HW struct
* @buf: pointer to a buffer containing function/device capability records
* @cap_count: number of capability records in the list
* @opc: type of capabilities list to parse
/**
* ice_aq_discover_caps - query function/device capabilities
- * @hw: pointer to the hw struct
+ * @hw: pointer to the HW struct
* @buf: a virtual buffer to hold the capabilities
* @buf_size: Size of the virtual buffer
* @cap_count: cap count needed if AQ err==ENOMEM
/**
* ice_aq_manage_mac_write - manage MAC address write command
- * @hw: pointer to the hw struct
+ * @hw: pointer to the HW struct
* @mac_addr: MAC address to be written as LAA/LAA+WoL/Port address
* @flags: flags to control write behavior
* @cd: pointer to command details structure or NULL
/**
* ice_aq_clear_pxe_mode
- * @hw: pointer to the hw struct
+ * @hw: pointer to the HW struct
*
* Tell the firmware that the driver is taking over from PXE (0x0110).
*/
/**
* ice_clear_pxe_mode - clear pxe operations mode
- * @hw: pointer to the hw struct
+ * @hw: pointer to the HW struct
*
* Make sure all PXE mode settings are cleared, including things
* like descriptor fetch/write-back mode.
* @phy_type_low: lower part of phy_type
* @phy_type_high: higher part of phy_type
*
- * This helper function will convert an entry in phy type structure
+ * This helper function will convert an entry in PHY type structure
* [phy_type_low, phy_type_high] to its corresponding link speed.
* Note: In the structure of [phy_type_low, phy_type_high], there should
- * be one bit set, as this function will convert one phy type to its
+ * be one bit set, as this function will convert one PHY type to its
* speed.
* If no bit gets set, ICE_LINK_SPEED_UNKNOWN will be returned
* If more than one bit gets set, ICE_LINK_SPEED_UNKNOWN will be returned
/**
* ice_aq_set_phy_cfg
- * @hw: pointer to the hw struct
+ * @hw: pointer to the HW struct
* @lport: logical port number
* @cfg: structure with PHY configuration data to be set
* @cd: pointer to command details structure or NULL
if (!pcaps)
return ICE_ERR_NO_MEMORY;
- /* Get the current phy config */
+ /* Get the current PHY config */
status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_SW_CFG, pcaps,
NULL);
if (status) {
/**
* __ice_aq_get_set_rss_key
- * @hw: pointer to the hw struct
+ * @hw: pointer to the HW struct
* @vsi_id: VSI FW index
* @key: pointer to key info struct
* @set: set true to set the key, false to get the key
/**
* ice_aq_get_rss_key
- * @hw: pointer to the hw struct
+ * @hw: pointer to the HW struct
* @vsi_handle: software VSI handle
* @key: pointer to key info struct
*
/**
* ice_aq_set_rss_key
- * @hw: pointer to the hw struct
+ * @hw: pointer to the HW struct
* @vsi_handle: software VSI handle
* @keys: pointer to key info struct
*
* @num_qgrps: number of groups in the list
* @qg_list: the list of groups to disable
* @buf_size: the total size of the qg_list buffer in bytes
- * @rst_src: if called due to reset, specifies the RST source
+ * @rst_src: if called due to reset, specifies the reset source
* @vmvf_num: the relative VM or VF number that is undergoing the reset
* @cd: pointer to command details structure or NULL
*
break;
case ICE_VF_RESET:
cmd->cmd_type = ICE_AQC_Q_DIS_CMD_VF_RESET;
- /* In this case, FW expects vmvf_num to be absolute VF id */
+ /* In this case, FW expects vmvf_num to be absolute VF ID */
cmd->vmvf_and_timeout |=
cpu_to_le16((vmvf_num + hw->func_caps.vf_base_id) &
ICE_AQC_Q_DIS_VMVF_NUM_M);
* ice_ena_vsi_txq
* @pi: port information structure
* @vsi_handle: software VSI handle
- * @tc: tc number
+ * @tc: TC number
* @num_qgrps: Number of added queue groups
* @buf: list of queue groups to be added
* @buf_size: size of buffer for indirect command
* @cd: pointer to command details structure or NULL
*
- * This function adds one lan q
+ * This function adds one LAN queue
*/
enum ice_status
ice_ena_vsi_txq(struct ice_port_info *pi, u16 vsi_handle, u8 tc, u8 num_qgrps,
* Bit 5-6.
* - Bit 7 is reserved.
* Without setting the generic section as valid in valid_sections, the
- * Admin Q command will fail with error code ICE_AQ_RC_EINVAL.
+ * Admin queue command will fail with error code ICE_AQ_RC_EINVAL.
*/
buf->txqs[0].info.valid_sections = ICE_AQC_ELEM_VALID_GENERIC;
- /* add the lan q */
+ /* add the LAN queue */
status = ice_aq_add_lan_txq(hw, num_qgrps, buf, buf_size, cd);
if (status) {
ice_debug(hw, ICE_DBG_SCHED, "enable Q %d failed %d\n",
node.node_teid = buf->txqs[0].q_teid;
node.data.elem_type = ICE_AQC_ELEM_TYPE_LEAF;
- /* add a leaf node into schduler tree q layer */
+ /* add a leaf node into schduler tree queue layer */
status = ice_sched_add_node(pi, hw->num_tx_sched_layers - 1, &node);
ena_txq_exit:
* @num_queues: number of queues
* @q_ids: pointer to the q_id array
* @q_teids: pointer to queue node teids
- * @rst_src: if called due to reset, specifies the RST source
+ * @rst_src: if called due to reset, specifies the reset source
* @vmvf_num: the relative VM or VF number that is undergoing the reset
* @cd: pointer to command details structure or NULL
*
}
/**
- * ice_cfg_vsi_qs - configure the new/exisiting VSI queues
+ * ice_cfg_vsi_qs - configure the new/existing VSI queues
* @pi: port information structure
* @vsi_handle: software VSI handle
* @tc_bitmap: TC bitmap
* @maxqs: max queues array per TC
- * @owner: lan or rdma
+ * @owner: LAN or RDMA
*
* This function adds/updates the VSI queues per TC.
*/
}
/**
- * ice_cfg_vsi_lan - configure VSI lan queues
+ * ice_cfg_vsi_lan - configure VSI LAN queues
* @pi: port information structure
* @vsi_handle: software VSI handle
* @tc_bitmap: TC bitmap
- * @max_lanqs: max lan queues array per TC
+ * @max_lanqs: max LAN queues array per TC
*
- * This function adds/updates the VSI lan queues per TC.
+ * This function adds/updates the VSI LAN queues per TC.
*/
enum ice_status
ice_cfg_vsi_lan(struct ice_port_info *pi, u16 vsi_handle, u8 tc_bitmap,
/**
* ice_replay_pre_init - replay pre initialization
- * @hw: pointer to the hw struct
+ * @hw: pointer to the HW struct
*
* Initializes required config data for VSI, FD, ACL, and RSS before replay.
*/
/**
* ice_replay_vsi - replay VSI configuration
- * @hw: pointer to the hw struct
+ * @hw: pointer to the HW struct
* @vsi_handle: driver VSI handle
*
* Restore all VSI configuration after reset. It is required to call this
/**
* ice_replay_post - post replay configuration cleanup
- * @hw: pointer to the hw struct
+ * @hw: pointer to the HW struct
*
* Post replay cleanup.
*/
/* to manage the potential roll-over */
*cur_stat = (new_data + BIT_ULL(32)) - *prev_stat;
}
+
+/**
+ * ice_sched_query_elem - query element information from HW
+ * @hw: pointer to the HW struct
+ * @node_teid: node TEID to be queried
+ * @buf: buffer to element information
+ *
+ * This function queries HW element information
+ */
+enum ice_status
+ice_sched_query_elem(struct ice_hw *hw, u32 node_teid,
+ struct ice_aqc_get_elem *buf)
+{
+ u16 buf_size, num_elem_ret = 0;
+ enum ice_status status;
+
+ buf_size = sizeof(*buf);
+ memset(buf, 0, buf_size);
+ buf->generic[0].node_teid = cpu_to_le32(node_teid);
+ status = ice_aq_query_sched_elems(hw, 1, buf, buf_size, &num_elem_ret,
+ NULL);
+ if (status || num_elem_ret != 1)
+ ice_debug(hw, ICE_DBG_SCHED, "query element failed\n");
+ return status;
+}
void
ice_stat_update32(struct ice_hw *hw, u32 reg, bool prev_stat_loaded,
u64 *prev_stat, u64 *cur_stat);
+enum ice_status
+ice_sched_query_elem(struct ice_hw *hw, u32 node_teid,
+ struct ice_aqc_get_elem *buf);
#endif /* _ICE_COMMON_H_ */
/**
* ice_check_sq_alive
- * @hw: pointer to the hw struct
+ * @hw: pointer to the HW struct
* @cq: pointer to the specific Control queue
*
* Returns true if Queue is enabled else false.
* @hw: pointer to the hardware structure
* @cq: pointer to the specific Control queue
*
- * Configure base address and length registers for the receive (event q)
+ * Configure base address and length registers for the receive (event queue)
*/
static enum ice_status
ice_cfg_rq_regs(struct ice_hw *hw, struct ice_ctl_q_info *cq)
/**
* ice_sq_done - check if FW has processed the Admin Send Queue (ATQ)
- * @hw: pointer to the hw struct
+ * @hw: pointer to the HW struct
* @cq: pointer to the specific Control queue
*
* Returns true if the firmware has processed all descriptors on the
/**
* ice_sq_send_cmd - send command to Control Queue (ATQ)
- * @hw: pointer to the hw struct
+ * @hw: pointer to the HW struct
* @cq: pointer to the specific Control queue
* @desc: prefilled descriptor describing the command (non DMA mem)
* @buf: buffer to use for indirect commands (or NULL for direct commands)
/**
* ice_clean_rq_elem
- * @hw: pointer to the hw struct
+ * @hw: pointer to the HW struct
* @cq: pointer to the specific Control queue
* @e: event info from the receive descriptor, includes any buffers
* @pending: number of events that could be left to process
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019, Intel Corporation. */
+
+#include "ice_common.h"
+#include "ice_sched.h"
+#include "ice_dcb.h"
+
+/**
+ * ice_aq_get_lldp_mib
+ * @hw: pointer to the HW struct
+ * @bridge_type: type of bridge requested
+ * @mib_type: Local, Remote or both Local and Remote MIBs
+ * @buf: pointer to the caller-supplied buffer to store the MIB block
+ * @buf_size: size of the buffer (in bytes)
+ * @local_len: length of the returned Local LLDP MIB
+ * @remote_len: length of the returned Remote LLDP MIB
+ * @cd: pointer to command details structure or NULL
+ *
+ * Requests the complete LLDP MIB (entire packet). (0x0A00)
+ */
+static enum ice_status
+ice_aq_get_lldp_mib(struct ice_hw *hw, u8 bridge_type, u8 mib_type, void *buf,
+ u16 buf_size, u16 *local_len, u16 *remote_len,
+ struct ice_sq_cd *cd)
+{
+ struct ice_aqc_lldp_get_mib *cmd;
+ struct ice_aq_desc desc;
+ enum ice_status status;
+
+ cmd = &desc.params.lldp_get_mib;
+
+ if (buf_size == 0 || !buf)
+ return ICE_ERR_PARAM;
+
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_lldp_get_mib);
+
+ cmd->type = mib_type & ICE_AQ_LLDP_MIB_TYPE_M;
+ cmd->type |= (bridge_type << ICE_AQ_LLDP_BRID_TYPE_S) &
+ ICE_AQ_LLDP_BRID_TYPE_M;
+
+ desc.datalen = cpu_to_le16(buf_size);
+
+ status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
+ if (!status) {
+ if (local_len)
+ *local_len = le16_to_cpu(cmd->local_len);
+ if (remote_len)
+ *remote_len = le16_to_cpu(cmd->remote_len);
+ }
+
+ return status;
+}
+
+/**
+ * ice_aq_cfg_lldp_mib_change
+ * @hw: pointer to the HW struct
+ * @ena_update: Enable or Disable event posting
+ * @cd: pointer to command details structure or NULL
+ *
+ * Enable or Disable posting of an event on ARQ when LLDP MIB
+ * associated with the interface changes (0x0A01)
+ */
+enum ice_status
+ice_aq_cfg_lldp_mib_change(struct ice_hw *hw, bool ena_update,
+ struct ice_sq_cd *cd)
+{
+ struct ice_aqc_lldp_set_mib_change *cmd;
+ struct ice_aq_desc desc;
+
+ cmd = &desc.params.lldp_set_event;
+
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_lldp_set_mib_change);
+
+ if (!ena_update)
+ cmd->command |= ICE_AQ_LLDP_MIB_UPDATE_DIS;
+
+ return ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
+}
+
+/**
+ * ice_aq_stop_lldp
+ * @hw: pointer to the HW struct
+ * @shutdown_lldp_agent: True if LLDP Agent needs to be Shutdown
+ * False if LLDP Agent needs to be Stopped
+ * @cd: pointer to command details structure or NULL
+ *
+ * Stop or Shutdown the embedded LLDP Agent (0x0A05)
+ */
+enum ice_status
+ice_aq_stop_lldp(struct ice_hw *hw, bool shutdown_lldp_agent,
+ struct ice_sq_cd *cd)
+{
+ struct ice_aqc_lldp_stop *cmd;
+ struct ice_aq_desc desc;
+
+ cmd = &desc.params.lldp_stop;
+
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_lldp_stop);
+
+ if (shutdown_lldp_agent)
+ cmd->command |= ICE_AQ_LLDP_AGENT_SHUTDOWN;
+
+ return ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
+}
+
+/**
+ * ice_aq_start_lldp
+ * @hw: pointer to the HW struct
+ * @cd: pointer to command details structure or NULL
+ *
+ * Start the embedded LLDP Agent on all ports. (0x0A06)
+ */
+enum ice_status ice_aq_start_lldp(struct ice_hw *hw, struct ice_sq_cd *cd)
+{
+ struct ice_aqc_lldp_start *cmd;
+ struct ice_aq_desc desc;
+
+ cmd = &desc.params.lldp_start;
+
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_lldp_start);
+
+ cmd->command = ICE_AQ_LLDP_AGENT_START;
+
+ return ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
+}
+
+/**
+ * ice_aq_set_lldp_mib - Set the LLDP MIB
+ * @hw: pointer to the HW struct
+ * @mib_type: Local, Remote or both Local and Remote MIBs
+ * @buf: pointer to the caller-supplied buffer to store the MIB block
+ * @buf_size: size of the buffer (in bytes)
+ * @cd: pointer to command details structure or NULL
+ *
+ * Set the LLDP MIB. (0x0A08)
+ */
+static enum ice_status
+ice_aq_set_lldp_mib(struct ice_hw *hw, u8 mib_type, void *buf, u16 buf_size,
+ struct ice_sq_cd *cd)
+{
+ struct ice_aqc_lldp_set_local_mib *cmd;
+ struct ice_aq_desc desc;
+
+ cmd = &desc.params.lldp_set_mib;
+
+ if (buf_size == 0 || !buf)
+ return ICE_ERR_PARAM;
+
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_lldp_set_local_mib);
+
+ desc.flags |= cpu_to_le16((u16)ICE_AQ_FLAG_RD);
+ desc.datalen = cpu_to_le16(buf_size);
+
+ cmd->type = mib_type;
+ cmd->length = cpu_to_le16(buf_size);
+
+ return ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
+}
+
+/**
+ * ice_get_dcbx_status
+ * @hw: pointer to the HW struct
+ *
+ * Get the DCBX status from the Firmware
+ */
+u8 ice_get_dcbx_status(struct ice_hw *hw)
+{
+ u32 reg;
+
+ reg = rd32(hw, PRTDCB_GENS);
+ return (u8)((reg & PRTDCB_GENS_DCBX_STATUS_M) >>
+ PRTDCB_GENS_DCBX_STATUS_S);
+}
+
+/**
+ * ice_parse_ieee_ets_common_tlv
+ * @buf: Data buffer to be parsed for ETS CFG/REC data
+ * @ets_cfg: Container to store parsed data
+ *
+ * Parses the common data of IEEE 802.1Qaz ETS CFG/REC TLV
+ */
+static void
+ice_parse_ieee_ets_common_tlv(u8 *buf, struct ice_dcb_ets_cfg *ets_cfg)
+{
+ u8 offset = 0;
+ int i;
+
+ /* Priority Assignment Table (4 octets)
+ * Octets:| 1 | 2 | 3 | 4 |
+ * -----------------------------------------
+ * |pri0|pri1|pri2|pri3|pri4|pri5|pri6|pri7|
+ * -----------------------------------------
+ * Bits:|7 4|3 0|7 4|3 0|7 4|3 0|7 4|3 0|
+ * -----------------------------------------
+ */
+ for (i = 0; i < 4; i++) {
+ ets_cfg->prio_table[i * 2] =
+ ((buf[offset] & ICE_IEEE_ETS_PRIO_1_M) >>
+ ICE_IEEE_ETS_PRIO_1_S);
+ ets_cfg->prio_table[i * 2 + 1] =
+ ((buf[offset] & ICE_IEEE_ETS_PRIO_0_M) >>
+ ICE_IEEE_ETS_PRIO_0_S);
+ offset++;
+ }
+
+ /* TC Bandwidth Table (8 octets)
+ * Octets:| 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
+ * ---------------------------------
+ * |tc0|tc1|tc2|tc3|tc4|tc5|tc6|tc7|
+ * ---------------------------------
+ *
+ * TSA Assignment Table (8 octets)
+ * Octets:| 9 | 10| 11| 12| 13| 14| 15| 16|
+ * ---------------------------------
+ * |tc0|tc1|tc2|tc3|tc4|tc5|tc6|tc7|
+ * ---------------------------------
+ */
+ ice_for_each_traffic_class(i) {
+ ets_cfg->tcbwtable[i] = buf[offset];
+ ets_cfg->tsatable[i] = buf[ICE_MAX_TRAFFIC_CLASS + offset++];
+ }
+}
+
+/**
+ * ice_parse_ieee_etscfg_tlv
+ * @tlv: IEEE 802.1Qaz ETS CFG TLV
+ * @dcbcfg: Local store to update ETS CFG data
+ *
+ * Parses IEEE 802.1Qaz ETS CFG TLV
+ */
+static void
+ice_parse_ieee_etscfg_tlv(struct ice_lldp_org_tlv *tlv,
+ struct ice_dcbx_cfg *dcbcfg)
+{
+ struct ice_dcb_ets_cfg *etscfg;
+ u8 *buf = tlv->tlvinfo;
+
+ /* First Octet post subtype
+ * --------------------------
+ * |will-|CBS | Re- | Max |
+ * |ing | |served| TCs |
+ * --------------------------
+ * |1bit | 1bit|3 bits|3bits|
+ */
+ etscfg = &dcbcfg->etscfg;
+ etscfg->willing = ((buf[0] & ICE_IEEE_ETS_WILLING_M) >>
+ ICE_IEEE_ETS_WILLING_S);
+ etscfg->cbs = ((buf[0] & ICE_IEEE_ETS_CBS_M) >> ICE_IEEE_ETS_CBS_S);
+ etscfg->maxtcs = ((buf[0] & ICE_IEEE_ETS_MAXTC_M) >>
+ ICE_IEEE_ETS_MAXTC_S);
+
+ /* Begin parsing at Priority Assignment Table (offset 1 in buf) */
+ ice_parse_ieee_ets_common_tlv(&buf[1], etscfg);
+}
+
+/**
+ * ice_parse_ieee_etsrec_tlv
+ * @tlv: IEEE 802.1Qaz ETS REC TLV
+ * @dcbcfg: Local store to update ETS REC data
+ *
+ * Parses IEEE 802.1Qaz ETS REC TLV
+ */
+static void
+ice_parse_ieee_etsrec_tlv(struct ice_lldp_org_tlv *tlv,
+ struct ice_dcbx_cfg *dcbcfg)
+{
+ u8 *buf = tlv->tlvinfo;
+
+ /* Begin parsing at Priority Assignment Table (offset 1 in buf) */
+ ice_parse_ieee_ets_common_tlv(&buf[1], &dcbcfg->etsrec);
+}
+
+/**
+ * ice_parse_ieee_pfccfg_tlv
+ * @tlv: IEEE 802.1Qaz PFC CFG TLV
+ * @dcbcfg: Local store to update PFC CFG data
+ *
+ * Parses IEEE 802.1Qaz PFC CFG TLV
+ */
+static void
+ice_parse_ieee_pfccfg_tlv(struct ice_lldp_org_tlv *tlv,
+ struct ice_dcbx_cfg *dcbcfg)
+{
+ u8 *buf = tlv->tlvinfo;
+
+ /* ----------------------------------------
+ * |will-|MBC | Re- | PFC | PFC Enable |
+ * |ing | |served| cap | |
+ * -----------------------------------------
+ * |1bit | 1bit|2 bits|4bits| 1 octet |
+ */
+ dcbcfg->pfc.willing = ((buf[0] & ICE_IEEE_PFC_WILLING_M) >>
+ ICE_IEEE_PFC_WILLING_S);
+ dcbcfg->pfc.mbc = ((buf[0] & ICE_IEEE_PFC_MBC_M) >> ICE_IEEE_PFC_MBC_S);
+ dcbcfg->pfc.pfccap = ((buf[0] & ICE_IEEE_PFC_CAP_M) >>
+ ICE_IEEE_PFC_CAP_S);
+ dcbcfg->pfc.pfcena = buf[1];
+}
+
+/**
+ * ice_parse_ieee_app_tlv
+ * @tlv: IEEE 802.1Qaz APP TLV
+ * @dcbcfg: Local store to update APP PRIO data
+ *
+ * Parses IEEE 802.1Qaz APP PRIO TLV
+ */
+static void
+ice_parse_ieee_app_tlv(struct ice_lldp_org_tlv *tlv,
+ struct ice_dcbx_cfg *dcbcfg)
+{
+ u16 offset = 0;
+ u16 typelen;
+ int i = 0;
+ u16 len;
+ u8 *buf;
+
+ typelen = ntohs(tlv->typelen);
+ len = ((typelen & ICE_LLDP_TLV_LEN_M) >> ICE_LLDP_TLV_LEN_S);
+ buf = tlv->tlvinfo;
+
+ /* Removing sizeof(ouisubtype) and reserved byte from len.
+ * Remaining len div 3 is number of APP TLVs.
+ */
+ len -= (sizeof(tlv->ouisubtype) + 1);
+
+ /* Move offset to App Priority Table */
+ offset++;
+
+ /* Application Priority Table (3 octets)
+ * Octets:| 1 | 2 | 3 |
+ * -----------------------------------------
+ * |Priority|Rsrvd| Sel | Protocol ID |
+ * -----------------------------------------
+ * Bits:|23 21|20 19|18 16|15 0|
+ * -----------------------------------------
+ */
+ while (offset < len) {
+ dcbcfg->app[i].priority = ((buf[offset] &
+ ICE_IEEE_APP_PRIO_M) >>
+ ICE_IEEE_APP_PRIO_S);
+ dcbcfg->app[i].selector = ((buf[offset] &
+ ICE_IEEE_APP_SEL_M) >>
+ ICE_IEEE_APP_SEL_S);
+ dcbcfg->app[i].prot_id = (buf[offset + 1] << 0x8) |
+ buf[offset + 2];
+ /* Move to next app */
+ offset += 3;
+ i++;
+ if (i >= ICE_DCBX_MAX_APPS)
+ break;
+ }
+
+ dcbcfg->numapps = i;
+}
+
+/**
+ * ice_parse_ieee_tlv
+ * @tlv: IEEE 802.1Qaz TLV
+ * @dcbcfg: Local store to update ETS REC data
+ *
+ * Get the TLV subtype and send it to parsing function
+ * based on the subtype value
+ */
+static void
+ice_parse_ieee_tlv(struct ice_lldp_org_tlv *tlv, struct ice_dcbx_cfg *dcbcfg)
+{
+ u32 ouisubtype;
+ u8 subtype;
+
+ ouisubtype = ntohl(tlv->ouisubtype);
+ subtype = (u8)((ouisubtype & ICE_LLDP_TLV_SUBTYPE_M) >>
+ ICE_LLDP_TLV_SUBTYPE_S);
+ switch (subtype) {
+ case ICE_IEEE_SUBTYPE_ETS_CFG:
+ ice_parse_ieee_etscfg_tlv(tlv, dcbcfg);
+ break;
+ case ICE_IEEE_SUBTYPE_ETS_REC:
+ ice_parse_ieee_etsrec_tlv(tlv, dcbcfg);
+ break;
+ case ICE_IEEE_SUBTYPE_PFC_CFG:
+ ice_parse_ieee_pfccfg_tlv(tlv, dcbcfg);
+ break;
+ case ICE_IEEE_SUBTYPE_APP_PRI:
+ ice_parse_ieee_app_tlv(tlv, dcbcfg);
+ break;
+ default:
+ break;
+ }
+}
+
+/**
+ * ice_parse_cee_pgcfg_tlv
+ * @tlv: CEE DCBX PG CFG TLV
+ * @dcbcfg: Local store to update ETS CFG data
+ *
+ * Parses CEE DCBX PG CFG TLV
+ */
+static void
+ice_parse_cee_pgcfg_tlv(struct ice_cee_feat_tlv *tlv,
+ struct ice_dcbx_cfg *dcbcfg)
+{
+ struct ice_dcb_ets_cfg *etscfg;
+ u8 *buf = tlv->tlvinfo;
+ u16 offset = 0;
+ int i;
+
+ etscfg = &dcbcfg->etscfg;
+
+ if (tlv->en_will_err & ICE_CEE_FEAT_TLV_WILLING_M)
+ etscfg->willing = 1;
+
+ etscfg->cbs = 0;
+ /* Priority Group Table (4 octets)
+ * Octets:| 1 | 2 | 3 | 4 |
+ * -----------------------------------------
+ * |pri0|pri1|pri2|pri3|pri4|pri5|pri6|pri7|
+ * -----------------------------------------
+ * Bits:|7 4|3 0|7 4|3 0|7 4|3 0|7 4|3 0|
+ * -----------------------------------------
+ */
+ for (i = 0; i < 4; i++) {
+ etscfg->prio_table[i * 2] =
+ ((buf[offset] & ICE_CEE_PGID_PRIO_1_M) >>
+ ICE_CEE_PGID_PRIO_1_S);
+ etscfg->prio_table[i * 2 + 1] =
+ ((buf[offset] & ICE_CEE_PGID_PRIO_0_M) >>
+ ICE_CEE_PGID_PRIO_0_S);
+ offset++;
+ }
+
+ /* PG Percentage Table (8 octets)
+ * Octets:| 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
+ * ---------------------------------
+ * |pg0|pg1|pg2|pg3|pg4|pg5|pg6|pg7|
+ * ---------------------------------
+ */
+ ice_for_each_traffic_class(i)
+ etscfg->tcbwtable[i] = buf[offset++];
+
+ /* Number of TCs supported (1 octet) */
+ etscfg->maxtcs = buf[offset];
+}
+
+/**
+ * ice_parse_cee_pfccfg_tlv
+ * @tlv: CEE DCBX PFC CFG TLV
+ * @dcbcfg: Local store to update PFC CFG data
+ *
+ * Parses CEE DCBX PFC CFG TLV
+ */
+static void
+ice_parse_cee_pfccfg_tlv(struct ice_cee_feat_tlv *tlv,
+ struct ice_dcbx_cfg *dcbcfg)
+{
+ u8 *buf = tlv->tlvinfo;
+
+ if (tlv->en_will_err & ICE_CEE_FEAT_TLV_WILLING_M)
+ dcbcfg->pfc.willing = 1;
+
+ /* ------------------------
+ * | PFC Enable | PFC TCs |
+ * ------------------------
+ * | 1 octet | 1 octet |
+ */
+ dcbcfg->pfc.pfcena = buf[0];
+ dcbcfg->pfc.pfccap = buf[1];
+}
+
+/**
+ * ice_parse_cee_app_tlv
+ * @tlv: CEE DCBX APP TLV
+ * @dcbcfg: Local store to update APP PRIO data
+ *
+ * Parses CEE DCBX APP PRIO TLV
+ */
+static void
+ice_parse_cee_app_tlv(struct ice_cee_feat_tlv *tlv, struct ice_dcbx_cfg *dcbcfg)
+{
+ u16 len, typelen, offset = 0;
+ struct ice_cee_app_prio *app;
+ u8 i;
+
+ typelen = ntohs(tlv->hdr.typelen);
+ len = ((typelen & ICE_LLDP_TLV_LEN_M) >> ICE_LLDP_TLV_LEN_S);
+
+ dcbcfg->numapps = len / sizeof(*app);
+ if (!dcbcfg->numapps)
+ return;
+ if (dcbcfg->numapps > ICE_DCBX_MAX_APPS)
+ dcbcfg->numapps = ICE_DCBX_MAX_APPS;
+
+ for (i = 0; i < dcbcfg->numapps; i++) {
+ u8 up, selector;
+
+ app = (struct ice_cee_app_prio *)(tlv->tlvinfo + offset);
+ for (up = 0; up < ICE_MAX_USER_PRIORITY; up++)
+ if (app->prio_map & BIT(up))
+ break;
+
+ dcbcfg->app[i].priority = up;
+
+ /* Get Selector from lower 2 bits, and convert to IEEE */
+ selector = (app->upper_oui_sel & ICE_CEE_APP_SELECTOR_M);
+ switch (selector) {
+ case ICE_CEE_APP_SEL_ETHTYPE:
+ dcbcfg->app[i].selector = ICE_APP_SEL_ETHTYPE;
+ break;
+ case ICE_CEE_APP_SEL_TCPIP:
+ dcbcfg->app[i].selector = ICE_APP_SEL_TCPIP;
+ break;
+ default:
+ /* Keep selector as it is for unknown types */
+ dcbcfg->app[i].selector = selector;
+ }
+
+ dcbcfg->app[i].prot_id = ntohs(app->protocol);
+ /* Move to next app */
+ offset += sizeof(*app);
+ }
+}
+
+/**
+ * ice_parse_cee_tlv
+ * @tlv: CEE DCBX TLV
+ * @dcbcfg: Local store to update DCBX config data
+ *
+ * Get the TLV subtype and send it to parsing function
+ * based on the subtype value
+ */
+static void
+ice_parse_cee_tlv(struct ice_lldp_org_tlv *tlv, struct ice_dcbx_cfg *dcbcfg)
+{
+ struct ice_cee_feat_tlv *sub_tlv;
+ u8 subtype, feat_tlv_count = 0;
+ u16 len, tlvlen, typelen;
+ u32 ouisubtype;
+
+ ouisubtype = ntohl(tlv->ouisubtype);
+ subtype = (u8)((ouisubtype & ICE_LLDP_TLV_SUBTYPE_M) >>
+ ICE_LLDP_TLV_SUBTYPE_S);
+ /* Return if not CEE DCBX */
+ if (subtype != ICE_CEE_DCBX_TYPE)
+ return;
+
+ typelen = ntohs(tlv->typelen);
+ tlvlen = ((typelen & ICE_LLDP_TLV_LEN_M) >> ICE_LLDP_TLV_LEN_S);
+ len = sizeof(tlv->typelen) + sizeof(ouisubtype) +
+ sizeof(struct ice_cee_ctrl_tlv);
+ /* Return if no CEE DCBX Feature TLVs */
+ if (tlvlen <= len)
+ return;
+
+ sub_tlv = (struct ice_cee_feat_tlv *)((char *)tlv + len);
+ while (feat_tlv_count < ICE_CEE_MAX_FEAT_TYPE) {
+ u16 sublen;
+
+ typelen = ntohs(sub_tlv->hdr.typelen);
+ sublen = ((typelen & ICE_LLDP_TLV_LEN_M) >> ICE_LLDP_TLV_LEN_S);
+ subtype = (u8)((typelen & ICE_LLDP_TLV_TYPE_M) >>
+ ICE_LLDP_TLV_TYPE_S);
+ switch (subtype) {
+ case ICE_CEE_SUBTYPE_PG_CFG:
+ ice_parse_cee_pgcfg_tlv(sub_tlv, dcbcfg);
+ break;
+ case ICE_CEE_SUBTYPE_PFC_CFG:
+ ice_parse_cee_pfccfg_tlv(sub_tlv, dcbcfg);
+ break;
+ case ICE_CEE_SUBTYPE_APP_PRI:
+ ice_parse_cee_app_tlv(sub_tlv, dcbcfg);
+ break;
+ default:
+ return; /* Invalid Sub-type return */
+ }
+ feat_tlv_count++;
+ /* Move to next sub TLV */
+ sub_tlv = (struct ice_cee_feat_tlv *)
+ ((char *)sub_tlv + sizeof(sub_tlv->hdr.typelen) +
+ sublen);
+ }
+}
+
+/**
+ * ice_parse_org_tlv
+ * @tlv: Organization specific TLV
+ * @dcbcfg: Local store to update ETS REC data
+ *
+ * Currently only IEEE 802.1Qaz TLV is supported, all others
+ * will be returned
+ */
+static void
+ice_parse_org_tlv(struct ice_lldp_org_tlv *tlv, struct ice_dcbx_cfg *dcbcfg)
+{
+ u32 ouisubtype;
+ u32 oui;
+
+ ouisubtype = ntohl(tlv->ouisubtype);
+ oui = ((ouisubtype & ICE_LLDP_TLV_OUI_M) >> ICE_LLDP_TLV_OUI_S);
+ switch (oui) {
+ case ICE_IEEE_8021QAZ_OUI:
+ ice_parse_ieee_tlv(tlv, dcbcfg);
+ break;
+ case ICE_CEE_DCBX_OUI:
+ ice_parse_cee_tlv(tlv, dcbcfg);
+ break;
+ default:
+ break;
+ }
+}
+
+/**
+ * ice_lldp_to_dcb_cfg
+ * @lldpmib: LLDPDU to be parsed
+ * @dcbcfg: store for LLDPDU data
+ *
+ * Parse DCB configuration from the LLDPDU
+ */
+enum ice_status ice_lldp_to_dcb_cfg(u8 *lldpmib, struct ice_dcbx_cfg *dcbcfg)
+{
+ struct ice_lldp_org_tlv *tlv;
+ enum ice_status ret = 0;
+ u16 offset = 0;
+ u16 typelen;
+ u16 type;
+ u16 len;
+
+ if (!lldpmib || !dcbcfg)
+ return ICE_ERR_PARAM;
+
+ /* set to the start of LLDPDU */
+ lldpmib += ETH_HLEN;
+ tlv = (struct ice_lldp_org_tlv *)lldpmib;
+ while (1) {
+ typelen = ntohs(tlv->typelen);
+ type = ((typelen & ICE_LLDP_TLV_TYPE_M) >> ICE_LLDP_TLV_TYPE_S);
+ len = ((typelen & ICE_LLDP_TLV_LEN_M) >> ICE_LLDP_TLV_LEN_S);
+ offset += sizeof(typelen) + len;
+
+ /* END TLV or beyond LLDPDU size */
+ if (type == ICE_TLV_TYPE_END || offset > ICE_LLDPDU_SIZE)
+ break;
+
+ switch (type) {
+ case ICE_TLV_TYPE_ORG:
+ ice_parse_org_tlv(tlv, dcbcfg);
+ break;
+ default:
+ break;
+ }
+
+ /* Move to next TLV */
+ tlv = (struct ice_lldp_org_tlv *)
+ ((char *)tlv + sizeof(tlv->typelen) + len);
+ }
+
+ return ret;
+}
+
+/**
+ * ice_aq_get_dcb_cfg
+ * @hw: pointer to the HW struct
+ * @mib_type: mib type for the query
+ * @bridgetype: bridge type for the query (remote)
+ * @dcbcfg: store for LLDPDU data
+ *
+ * Query DCB configuration from the firmware
+ */
+static enum ice_status
+ice_aq_get_dcb_cfg(struct ice_hw *hw, u8 mib_type, u8 bridgetype,
+ struct ice_dcbx_cfg *dcbcfg)
+{
+ enum ice_status ret;
+ u8 *lldpmib;
+
+ /* Allocate the LLDPDU */
+ lldpmib = devm_kzalloc(ice_hw_to_dev(hw), ICE_LLDPDU_SIZE, GFP_KERNEL);
+ if (!lldpmib)
+ return ICE_ERR_NO_MEMORY;
+
+ ret = ice_aq_get_lldp_mib(hw, bridgetype, mib_type, (void *)lldpmib,
+ ICE_LLDPDU_SIZE, NULL, NULL, NULL);
+
+ if (!ret)
+ /* Parse LLDP MIB to get DCB configuration */
+ ret = ice_lldp_to_dcb_cfg(lldpmib, dcbcfg);
+
+ devm_kfree(ice_hw_to_dev(hw), lldpmib);
+
+ return ret;
+}
+
+/**
+ * ice_aq_start_stop_dcbx - Start/Stop DCBx service in FW
+ * @hw: pointer to the HW struct
+ * @start_dcbx_agent: True if DCBx Agent needs to be started
+ * False if DCBx Agent needs to be stopped
+ * @dcbx_agent_status: FW indicates back the DCBx agent status
+ * True if DCBx Agent is active
+ * False if DCBx Agent is stopped
+ * @cd: pointer to command details structure or NULL
+ *
+ * Start/Stop the embedded dcbx Agent. In case that this wrapper function
+ * returns ICE_SUCCESS, caller will need to check if FW returns back the same
+ * value as stated in dcbx_agent_status, and react accordingly. (0x0A09)
+ */
+enum ice_status
+ice_aq_start_stop_dcbx(struct ice_hw *hw, bool start_dcbx_agent,
+ bool *dcbx_agent_status, struct ice_sq_cd *cd)
+{
+ struct ice_aqc_lldp_stop_start_specific_agent *cmd;
+ enum ice_status status;
+ struct ice_aq_desc desc;
+ u16 opcode;
+
+ cmd = &desc.params.lldp_agent_ctrl;
+
+ opcode = ice_aqc_opc_lldp_stop_start_specific_agent;
+
+ ice_fill_dflt_direct_cmd_desc(&desc, opcode);
+
+ if (start_dcbx_agent)
+ cmd->command = ICE_AQC_START_STOP_AGENT_START_DCBX;
+
+ status = ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
+
+ *dcbx_agent_status = false;
+
+ if (!status &&
+ cmd->command == ICE_AQC_START_STOP_AGENT_START_DCBX)
+ *dcbx_agent_status = true;
+
+ return status;
+}
+
+/**
+ * ice_aq_get_cee_dcb_cfg
+ * @hw: pointer to the HW struct
+ * @buff: response buffer that stores CEE operational configuration
+ * @cd: pointer to command details structure or NULL
+ *
+ * Get CEE DCBX mode operational configuration from firmware (0x0A07)
+ */
+static enum ice_status
+ice_aq_get_cee_dcb_cfg(struct ice_hw *hw,
+ struct ice_aqc_get_cee_dcb_cfg_resp *buff,
+ struct ice_sq_cd *cd)
+{
+ struct ice_aq_desc desc;
+
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_cee_dcb_cfg);
+
+ return ice_aq_send_cmd(hw, &desc, (void *)buff, sizeof(*buff), cd);
+}
+
+/**
+ * ice_cee_to_dcb_cfg
+ * @cee_cfg: pointer to CEE configuration struct
+ * @dcbcfg: DCB configuration struct
+ *
+ * Convert CEE configuration from firmware to DCB configuration
+ */
+static void
+ice_cee_to_dcb_cfg(struct ice_aqc_get_cee_dcb_cfg_resp *cee_cfg,
+ struct ice_dcbx_cfg *dcbcfg)
+{
+ u32 status, tlv_status = le32_to_cpu(cee_cfg->tlv_status);
+ u32 ice_aqc_cee_status_mask, ice_aqc_cee_status_shift;
+ u16 app_prio = le16_to_cpu(cee_cfg->oper_app_prio);
+ u8 i, err, sync, oper, app_index, ice_app_sel_type;
+ u16 ice_aqc_cee_app_mask, ice_aqc_cee_app_shift;
+ u16 ice_app_prot_id_type;
+
+ /* CEE PG data to ETS config */
+ dcbcfg->etscfg.maxtcs = cee_cfg->oper_num_tc;
+
+ /* Note that the FW creates the oper_prio_tc nibbles reversed
+ * from those in the CEE Priority Group sub-TLV.
+ */
+ for (i = 0; i < ICE_MAX_TRAFFIC_CLASS / 2; i++) {
+ dcbcfg->etscfg.prio_table[i * 2] =
+ ((cee_cfg->oper_prio_tc[i] & ICE_CEE_PGID_PRIO_0_M) >>
+ ICE_CEE_PGID_PRIO_0_S);
+ dcbcfg->etscfg.prio_table[i * 2 + 1] =
+ ((cee_cfg->oper_prio_tc[i] & ICE_CEE_PGID_PRIO_1_M) >>
+ ICE_CEE_PGID_PRIO_1_S);
+ }
+
+ ice_for_each_traffic_class(i) {
+ dcbcfg->etscfg.tcbwtable[i] = cee_cfg->oper_tc_bw[i];
+
+ if (dcbcfg->etscfg.prio_table[i] == ICE_CEE_PGID_STRICT) {
+ /* Map it to next empty TC */
+ dcbcfg->etscfg.prio_table[i] = cee_cfg->oper_num_tc - 1;
+ dcbcfg->etscfg.tsatable[i] = ICE_IEEE_TSA_STRICT;
+ } else {
+ dcbcfg->etscfg.tsatable[i] = ICE_IEEE_TSA_ETS;
+ }
+ }
+
+ /* CEE PFC data to ETS config */
+ dcbcfg->pfc.pfcena = cee_cfg->oper_pfc_en;
+ dcbcfg->pfc.pfccap = ICE_MAX_TRAFFIC_CLASS;
+
+ app_index = 0;
+ for (i = 0; i < 3; i++) {
+ if (i == 0) {
+ /* FCoE APP */
+ ice_aqc_cee_status_mask = ICE_AQC_CEE_FCOE_STATUS_M;
+ ice_aqc_cee_status_shift = ICE_AQC_CEE_FCOE_STATUS_S;
+ ice_aqc_cee_app_mask = ICE_AQC_CEE_APP_FCOE_M;
+ ice_aqc_cee_app_shift = ICE_AQC_CEE_APP_FCOE_S;
+ ice_app_sel_type = ICE_APP_SEL_ETHTYPE;
+ ice_app_prot_id_type = ICE_APP_PROT_ID_FCOE;
+ } else if (i == 1) {
+ /* iSCSI APP */
+ ice_aqc_cee_status_mask = ICE_AQC_CEE_ISCSI_STATUS_M;
+ ice_aqc_cee_status_shift = ICE_AQC_CEE_ISCSI_STATUS_S;
+ ice_aqc_cee_app_mask = ICE_AQC_CEE_APP_ISCSI_M;
+ ice_aqc_cee_app_shift = ICE_AQC_CEE_APP_ISCSI_S;
+ ice_app_sel_type = ICE_APP_SEL_TCPIP;
+ ice_app_prot_id_type = ICE_APP_PROT_ID_ISCSI;
+ } else {
+ /* FIP APP */
+ ice_aqc_cee_status_mask = ICE_AQC_CEE_FIP_STATUS_M;
+ ice_aqc_cee_status_shift = ICE_AQC_CEE_FIP_STATUS_S;
+ ice_aqc_cee_app_mask = ICE_AQC_CEE_APP_FIP_M;
+ ice_aqc_cee_app_shift = ICE_AQC_CEE_APP_FIP_S;
+ ice_app_sel_type = ICE_APP_SEL_ETHTYPE;
+ ice_app_prot_id_type = ICE_APP_PROT_ID_FIP;
+ }
+
+ status = (tlv_status & ice_aqc_cee_status_mask) >>
+ ice_aqc_cee_status_shift;
+ err = (status & ICE_TLV_STATUS_ERR) ? 1 : 0;
+ sync = (status & ICE_TLV_STATUS_SYNC) ? 1 : 0;
+ oper = (status & ICE_TLV_STATUS_OPER) ? 1 : 0;
+ /* Add FCoE/iSCSI/FIP APP if Error is False and
+ * Oper/Sync is True
+ */
+ if (!err && sync && oper) {
+ dcbcfg->app[app_index].priority =
+ (app_prio & ice_aqc_cee_app_mask) >>
+ ice_aqc_cee_app_shift;
+ dcbcfg->app[app_index].selector = ice_app_sel_type;
+ dcbcfg->app[app_index].prot_id = ice_app_prot_id_type;
+ app_index++;
+ }
+ }
+
+ dcbcfg->numapps = app_index;
+}
+
+/**
+ * ice_get_ieee_dcb_cfg
+ * @pi: port information structure
+ * @dcbx_mode: mode of DCBX (IEEE or CEE)
+ *
+ * Get IEEE or CEE mode DCB configuration from the Firmware
+ */
+static enum ice_status
+ice_get_ieee_or_cee_dcb_cfg(struct ice_port_info *pi, u8 dcbx_mode)
+{
+ struct ice_dcbx_cfg *dcbx_cfg = NULL;
+ enum ice_status ret;
+
+ if (!pi)
+ return ICE_ERR_PARAM;
+
+ if (dcbx_mode == ICE_DCBX_MODE_IEEE)
+ dcbx_cfg = &pi->local_dcbx_cfg;
+ else if (dcbx_mode == ICE_DCBX_MODE_CEE)
+ dcbx_cfg = &pi->desired_dcbx_cfg;
+
+ /* Get Local DCB Config in case of ICE_DCBX_MODE_IEEE
+ * or get CEE DCB Desired Config in case of ICE_DCBX_MODE_CEE
+ */
+ ret = ice_aq_get_dcb_cfg(pi->hw, ICE_AQ_LLDP_MIB_LOCAL,
+ ICE_AQ_LLDP_BRID_TYPE_NEAREST_BRID, dcbx_cfg);
+ if (ret)
+ goto out;
+
+ /* Get Remote DCB Config */
+ dcbx_cfg = &pi->remote_dcbx_cfg;
+ ret = ice_aq_get_dcb_cfg(pi->hw, ICE_AQ_LLDP_MIB_REMOTE,
+ ICE_AQ_LLDP_BRID_TYPE_NEAREST_BRID, dcbx_cfg);
+ /* Don't treat ENOENT as an error for Remote MIBs */
+ if (pi->hw->adminq.sq_last_status == ICE_AQ_RC_ENOENT)
+ ret = 0;
+
+out:
+ return ret;
+}
+
+/**
+ * ice_get_dcb_cfg
+ * @pi: port information structure
+ *
+ * Get DCB configuration from the Firmware
+ */
+enum ice_status ice_get_dcb_cfg(struct ice_port_info *pi)
+{
+ struct ice_aqc_get_cee_dcb_cfg_resp cee_cfg;
+ struct ice_dcbx_cfg *dcbx_cfg;
+ enum ice_status ret;
+
+ if (!pi)
+ return ICE_ERR_PARAM;
+
+ ret = ice_aq_get_cee_dcb_cfg(pi->hw, &cee_cfg, NULL);
+ if (!ret) {
+ /* CEE mode */
+ dcbx_cfg = &pi->local_dcbx_cfg;
+ dcbx_cfg->dcbx_mode = ICE_DCBX_MODE_CEE;
+ dcbx_cfg->tlv_status = le32_to_cpu(cee_cfg.tlv_status);
+ ice_cee_to_dcb_cfg(&cee_cfg, dcbx_cfg);
+ ret = ice_get_ieee_or_cee_dcb_cfg(pi, ICE_DCBX_MODE_CEE);
+ } else if (pi->hw->adminq.sq_last_status == ICE_AQ_RC_ENOENT) {
+ /* CEE mode not enabled try querying IEEE data */
+ dcbx_cfg = &pi->local_dcbx_cfg;
+ dcbx_cfg->dcbx_mode = ICE_DCBX_MODE_IEEE;
+ ret = ice_get_ieee_or_cee_dcb_cfg(pi, ICE_DCBX_MODE_IEEE);
+ }
+
+ return ret;
+}
+
+/**
+ * ice_init_dcb
+ * @hw: pointer to the HW struct
+ *
+ * Update DCB configuration from the Firmware
+ */
+enum ice_status ice_init_dcb(struct ice_hw *hw)
+{
+ struct ice_port_info *pi = hw->port_info;
+ enum ice_status ret = 0;
+
+ if (!hw->func_caps.common_cap.dcb)
+ return ICE_ERR_NOT_SUPPORTED;
+
+ pi->is_sw_lldp = true;
+
+ /* Get DCBX status */
+ pi->dcbx_status = ice_get_dcbx_status(hw);
+
+ if (pi->dcbx_status == ICE_DCBX_STATUS_DONE ||
+ pi->dcbx_status == ICE_DCBX_STATUS_IN_PROGRESS) {
+ /* Get current DCBX configuration */
+ ret = ice_get_dcb_cfg(pi);
+ pi->is_sw_lldp = (hw->adminq.sq_last_status == ICE_AQ_RC_EPERM);
+ if (ret)
+ return ret;
+ } else if (pi->dcbx_status == ICE_DCBX_STATUS_DIS) {
+ return ICE_ERR_NOT_READY;
+ }
+
+ /* Configure the LLDP MIB change event */
+ ret = ice_aq_cfg_lldp_mib_change(hw, true, NULL);
+ if (!ret)
+ pi->is_sw_lldp = false;
+
+ return ret;
+}
+
+/**
+ * ice_add_ieee_ets_common_tlv
+ * @buf: Data buffer to be populated with ice_dcb_ets_cfg data
+ * @ets_cfg: Container for ice_dcb_ets_cfg data
+ *
+ * Populate the TLV buffer with ice_dcb_ets_cfg data
+ */
+static void
+ice_add_ieee_ets_common_tlv(u8 *buf, struct ice_dcb_ets_cfg *ets_cfg)
+{
+ u8 priority0, priority1;
+ u8 offset = 0;
+ int i;
+
+ /* Priority Assignment Table (4 octets)
+ * Octets:| 1 | 2 | 3 | 4 |
+ * -----------------------------------------
+ * |pri0|pri1|pri2|pri3|pri4|pri5|pri6|pri7|
+ * -----------------------------------------
+ * Bits:|7 4|3 0|7 4|3 0|7 4|3 0|7 4|3 0|
+ * -----------------------------------------
+ */
+ for (i = 0; i < ICE_MAX_TRAFFIC_CLASS / 2; i++) {
+ priority0 = ets_cfg->prio_table[i * 2] & 0xF;
+ priority1 = ets_cfg->prio_table[i * 2 + 1] & 0xF;
+ buf[offset] = (priority0 << ICE_IEEE_ETS_PRIO_1_S) | priority1;
+ offset++;
+ }
+
+ /* TC Bandwidth Table (8 octets)
+ * Octets:| 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
+ * ---------------------------------
+ * |tc0|tc1|tc2|tc3|tc4|tc5|tc6|tc7|
+ * ---------------------------------
+ *
+ * TSA Assignment Table (8 octets)
+ * Octets:| 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
+ * ---------------------------------
+ * |tc0|tc1|tc2|tc3|tc4|tc5|tc6|tc7|
+ * ---------------------------------
+ */
+ ice_for_each_traffic_class(i) {
+ buf[offset] = ets_cfg->tcbwtable[i];
+ buf[ICE_MAX_TRAFFIC_CLASS + offset] = ets_cfg->tsatable[i];
+ offset++;
+ }
+}
+
+/**
+ * ice_add_ieee_ets_tlv - Prepare ETS TLV in IEEE format
+ * @tlv: Fill the ETS config data in IEEE format
+ * @dcbcfg: Local store which holds the DCB Config
+ *
+ * Prepare IEEE 802.1Qaz ETS CFG TLV
+ */
+static void
+ice_add_ieee_ets_tlv(struct ice_lldp_org_tlv *tlv, struct ice_dcbx_cfg *dcbcfg)
+{
+ struct ice_dcb_ets_cfg *etscfg;
+ u8 *buf = tlv->tlvinfo;
+ u8 maxtcwilling = 0;
+ u32 ouisubtype;
+ u16 typelen;
+
+ typelen = ((ICE_TLV_TYPE_ORG << ICE_LLDP_TLV_TYPE_S) |
+ ICE_IEEE_ETS_TLV_LEN);
+ tlv->typelen = htons(typelen);
+
+ ouisubtype = ((ICE_IEEE_8021QAZ_OUI << ICE_LLDP_TLV_OUI_S) |
+ ICE_IEEE_SUBTYPE_ETS_CFG);
+ tlv->ouisubtype = htonl(ouisubtype);
+
+ /* First Octet post subtype
+ * --------------------------
+ * |will-|CBS | Re- | Max |
+ * |ing | |served| TCs |
+ * --------------------------
+ * |1bit | 1bit|3 bits|3bits|
+ */
+ etscfg = &dcbcfg->etscfg;
+ if (etscfg->willing)
+ maxtcwilling = BIT(ICE_IEEE_ETS_WILLING_S);
+ maxtcwilling |= etscfg->maxtcs & ICE_IEEE_ETS_MAXTC_M;
+ buf[0] = maxtcwilling;
+
+ /* Begin adding at Priority Assignment Table (offset 1 in buf) */
+ ice_add_ieee_ets_common_tlv(&buf[1], etscfg);
+}
+
+/**
+ * ice_add_ieee_etsrec_tlv - Prepare ETS Recommended TLV in IEEE format
+ * @tlv: Fill ETS Recommended TLV in IEEE format
+ * @dcbcfg: Local store which holds the DCB Config
+ *
+ * Prepare IEEE 802.1Qaz ETS REC TLV
+ */
+static void
+ice_add_ieee_etsrec_tlv(struct ice_lldp_org_tlv *tlv,
+ struct ice_dcbx_cfg *dcbcfg)
+{
+ struct ice_dcb_ets_cfg *etsrec;
+ u8 *buf = tlv->tlvinfo;
+ u32 ouisubtype;
+ u16 typelen;
+
+ typelen = ((ICE_TLV_TYPE_ORG << ICE_LLDP_TLV_TYPE_S) |
+ ICE_IEEE_ETS_TLV_LEN);
+ tlv->typelen = htons(typelen);
+
+ ouisubtype = ((ICE_IEEE_8021QAZ_OUI << ICE_LLDP_TLV_OUI_S) |
+ ICE_IEEE_SUBTYPE_ETS_REC);
+ tlv->ouisubtype = htonl(ouisubtype);
+
+ etsrec = &dcbcfg->etsrec;
+
+ /* First Octet is reserved */
+ /* Begin adding at Priority Assignment Table (offset 1 in buf) */
+ ice_add_ieee_ets_common_tlv(&buf[1], etsrec);
+}
+
+/**
+ * ice_add_ieee_pfc_tlv - Prepare PFC TLV in IEEE format
+ * @tlv: Fill PFC TLV in IEEE format
+ * @dcbcfg: Local store which holds the PFC CFG data
+ *
+ * Prepare IEEE 802.1Qaz PFC CFG TLV
+ */
+static void
+ice_add_ieee_pfc_tlv(struct ice_lldp_org_tlv *tlv, struct ice_dcbx_cfg *dcbcfg)
+{
+ u8 *buf = tlv->tlvinfo;
+ u32 ouisubtype;
+ u16 typelen;
+
+ typelen = ((ICE_TLV_TYPE_ORG << ICE_LLDP_TLV_TYPE_S) |
+ ICE_IEEE_PFC_TLV_LEN);
+ tlv->typelen = htons(typelen);
+
+ ouisubtype = ((ICE_IEEE_8021QAZ_OUI << ICE_LLDP_TLV_OUI_S) |
+ ICE_IEEE_SUBTYPE_PFC_CFG);
+ tlv->ouisubtype = htonl(ouisubtype);
+
+ /* ----------------------------------------
+ * |will-|MBC | Re- | PFC | PFC Enable |
+ * |ing | |served| cap | |
+ * -----------------------------------------
+ * |1bit | 1bit|2 bits|4bits| 1 octet |
+ */
+ if (dcbcfg->pfc.willing)
+ buf[0] = BIT(ICE_IEEE_PFC_WILLING_S);
+
+ if (dcbcfg->pfc.mbc)
+ buf[0] |= BIT(ICE_IEEE_PFC_MBC_S);
+
+ buf[0] |= dcbcfg->pfc.pfccap & 0xF;
+ buf[1] = dcbcfg->pfc.pfcena;
+}
+
+/**
+ * ice_add_ieee_app_pri_tlv - Prepare APP TLV in IEEE format
+ * @tlv: Fill APP TLV in IEEE format
+ * @dcbcfg: Local store which holds the APP CFG data
+ *
+ * Prepare IEEE 802.1Qaz APP CFG TLV
+ */
+static void
+ice_add_ieee_app_pri_tlv(struct ice_lldp_org_tlv *tlv,
+ struct ice_dcbx_cfg *dcbcfg)
+{
+ u16 typelen, len, offset = 0;
+ u8 priority, selector, i = 0;
+ u8 *buf = tlv->tlvinfo;
+ u32 ouisubtype;
+
+ /* No APP TLVs then just return */
+ if (dcbcfg->numapps == 0)
+ return;
+ ouisubtype = ((ICE_IEEE_8021QAZ_OUI << ICE_LLDP_TLV_OUI_S) |
+ ICE_IEEE_SUBTYPE_APP_PRI);
+ tlv->ouisubtype = htonl(ouisubtype);
+
+ /* Move offset to App Priority Table */
+ offset++;
+ /* Application Priority Table (3 octets)
+ * Octets:| 1 | 2 | 3 |
+ * -----------------------------------------
+ * |Priority|Rsrvd| Sel | Protocol ID |
+ * -----------------------------------------
+ * Bits:|23 21|20 19|18 16|15 0|
+ * -----------------------------------------
+ */
+ while (i < dcbcfg->numapps) {
+ priority = dcbcfg->app[i].priority & 0x7;
+ selector = dcbcfg->app[i].selector & 0x7;
+ buf[offset] = (priority << ICE_IEEE_APP_PRIO_S) | selector;
+ buf[offset + 1] = (dcbcfg->app[i].prot_id >> 0x8) & 0xFF;
+ buf[offset + 2] = dcbcfg->app[i].prot_id & 0xFF;
+ /* Move to next app */
+ offset += 3;
+ i++;
+ if (i >= ICE_DCBX_MAX_APPS)
+ break;
+ }
+ /* len includes size of ouisubtype + 1 reserved + 3*numapps */
+ len = sizeof(tlv->ouisubtype) + 1 + (i * 3);
+ typelen = ((ICE_TLV_TYPE_ORG << ICE_LLDP_TLV_TYPE_S) | (len & 0x1FF));
+ tlv->typelen = htons(typelen);
+}
+
+/**
+ * ice_add_dcb_tlv - Add all IEEE TLVs
+ * @tlv: Fill TLV data in IEEE format
+ * @dcbcfg: Local store which holds the DCB Config
+ * @tlvid: Type of IEEE TLV
+ *
+ * Add tlv information
+ */
+static void
+ice_add_dcb_tlv(struct ice_lldp_org_tlv *tlv, struct ice_dcbx_cfg *dcbcfg,
+ u16 tlvid)
+{
+ switch (tlvid) {
+ case ICE_IEEE_TLV_ID_ETS_CFG:
+ ice_add_ieee_ets_tlv(tlv, dcbcfg);
+ break;
+ case ICE_IEEE_TLV_ID_ETS_REC:
+ ice_add_ieee_etsrec_tlv(tlv, dcbcfg);
+ break;
+ case ICE_IEEE_TLV_ID_PFC_CFG:
+ ice_add_ieee_pfc_tlv(tlv, dcbcfg);
+ break;
+ case ICE_IEEE_TLV_ID_APP_PRI:
+ ice_add_ieee_app_pri_tlv(tlv, dcbcfg);
+ break;
+ default:
+ break;
+ }
+}
+
+/**
+ * ice_dcb_cfg_to_lldp - Convert DCB configuration to MIB format
+ * @lldpmib: pointer to the HW struct
+ * @miblen: length of LLDP MIB
+ * @dcbcfg: Local store which holds the DCB Config
+ *
+ * Convert the DCB configuration to MIB format
+ */
+static void
+ice_dcb_cfg_to_lldp(u8 *lldpmib, u16 *miblen, struct ice_dcbx_cfg *dcbcfg)
+{
+ u16 len, offset = 0, tlvid = ICE_TLV_ID_START;
+ struct ice_lldp_org_tlv *tlv;
+ u16 typelen;
+
+ tlv = (struct ice_lldp_org_tlv *)lldpmib;
+ while (1) {
+ ice_add_dcb_tlv(tlv, dcbcfg, tlvid++);
+ typelen = ntohs(tlv->typelen);
+ len = (typelen & ICE_LLDP_TLV_LEN_M) >> ICE_LLDP_TLV_LEN_S;
+ if (len)
+ offset += len + 2;
+ /* END TLV or beyond LLDPDU size */
+ if (tlvid >= ICE_TLV_ID_END_OF_LLDPPDU ||
+ offset > ICE_LLDPDU_SIZE)
+ break;
+ /* Move to next TLV */
+ if (len)
+ tlv = (struct ice_lldp_org_tlv *)
+ ((char *)tlv + sizeof(tlv->typelen) + len);
+ }
+ *miblen = offset;
+}
+
+/**
+ * ice_set_dcb_cfg - Set the local LLDP MIB to FW
+ * @pi: port information structure
+ *
+ * Set DCB configuration to the Firmware
+ */
+enum ice_status ice_set_dcb_cfg(struct ice_port_info *pi)
+{
+ u8 mib_type, *lldpmib = NULL;
+ struct ice_dcbx_cfg *dcbcfg;
+ enum ice_status ret;
+ struct ice_hw *hw;
+ u16 miblen;
+
+ if (!pi)
+ return ICE_ERR_PARAM;
+
+ hw = pi->hw;
+
+ /* update the HW local config */
+ dcbcfg = &pi->local_dcbx_cfg;
+ /* Allocate the LLDPDU */
+ lldpmib = devm_kzalloc(ice_hw_to_dev(hw), ICE_LLDPDU_SIZE, GFP_KERNEL);
+ if (!lldpmib)
+ return ICE_ERR_NO_MEMORY;
+
+ mib_type = SET_LOCAL_MIB_TYPE_LOCAL_MIB;
+ if (dcbcfg->app_mode == ICE_DCBX_APPS_NON_WILLING)
+ mib_type |= SET_LOCAL_MIB_TYPE_CEE_NON_WILLING;
+
+ ice_dcb_cfg_to_lldp(lldpmib, &miblen, dcbcfg);
+ ret = ice_aq_set_lldp_mib(hw, mib_type, (void *)lldpmib, miblen,
+ NULL);
+
+ devm_kfree(ice_hw_to_dev(hw), lldpmib);
+
+ return ret;
+}
+
+/**
+ * ice_aq_query_port_ets - query port ets configuration
+ * @pi: port information structure
+ * @buf: pointer to buffer
+ * @buf_size: buffer size in bytes
+ * @cd: pointer to command details structure or NULL
+ *
+ * query current port ets configuration
+ */
+static enum ice_status
+ice_aq_query_port_ets(struct ice_port_info *pi,
+ struct ice_aqc_port_ets_elem *buf, u16 buf_size,
+ struct ice_sq_cd *cd)
+{
+ struct ice_aqc_query_port_ets *cmd;
+ struct ice_aq_desc desc;
+ enum ice_status status;
+
+ if (!pi)
+ return ICE_ERR_PARAM;
+ cmd = &desc.params.port_ets;
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_query_port_ets);
+ cmd->port_teid = pi->root->info.node_teid;
+
+ status = ice_aq_send_cmd(pi->hw, &desc, buf, buf_size, cd);
+ return status;
+}
+
+/**
+ * ice_update_port_tc_tree_cfg - update TC tree configuration
+ * @pi: port information structure
+ * @buf: pointer to buffer
+ *
+ * update the SW DB with the new TC changes
+ */
+static enum ice_status
+ice_update_port_tc_tree_cfg(struct ice_port_info *pi,
+ struct ice_aqc_port_ets_elem *buf)
+{
+ struct ice_sched_node *node, *tc_node;
+ struct ice_aqc_get_elem elem;
+ enum ice_status status = 0;
+ u32 teid1, teid2;
+ u8 i, j;
+
+ if (!pi)
+ return ICE_ERR_PARAM;
+ /* suspend the missing TC nodes */
+ for (i = 0; i < pi->root->num_children; i++) {
+ teid1 = le32_to_cpu(pi->root->children[i]->info.node_teid);
+ ice_for_each_traffic_class(j) {
+ teid2 = le32_to_cpu(buf->tc_node_teid[j]);
+ if (teid1 == teid2)
+ break;
+ }
+ if (j < ICE_MAX_TRAFFIC_CLASS)
+ continue;
+ /* TC is missing */
+ pi->root->children[i]->in_use = false;
+ }
+ /* add the new TC nodes */
+ ice_for_each_traffic_class(j) {
+ teid2 = le32_to_cpu(buf->tc_node_teid[j]);
+ if (teid2 == ICE_INVAL_TEID)
+ continue;
+ /* Is it already present in the tree ? */
+ for (i = 0; i < pi->root->num_children; i++) {
+ tc_node = pi->root->children[i];
+ if (!tc_node)
+ continue;
+ teid1 = le32_to_cpu(tc_node->info.node_teid);
+ if (teid1 == teid2) {
+ tc_node->tc_num = j;
+ tc_node->in_use = true;
+ break;
+ }
+ }
+ if (i < pi->root->num_children)
+ continue;
+ /* new TC */
+ status = ice_sched_query_elem(pi->hw, teid2, &elem);
+ if (!status)
+ status = ice_sched_add_node(pi, 1, &elem.generic[0]);
+ if (status)
+ break;
+ /* update the TC number */
+ node = ice_sched_find_node_by_teid(pi->root, teid2);
+ if (node)
+ node->tc_num = j;
+ }
+ return status;
+}
+
+/**
+ * ice_query_port_ets - query port ets configuration
+ * @pi: port information structure
+ * @buf: pointer to buffer
+ * @buf_size: buffer size in bytes
+ * @cd: pointer to command details structure or NULL
+ *
+ * query current port ets configuration and update the
+ * SW DB with the TC changes
+ */
+enum ice_status
+ice_query_port_ets(struct ice_port_info *pi,
+ struct ice_aqc_port_ets_elem *buf, u16 buf_size,
+ struct ice_sq_cd *cd)
+{
+ enum ice_status status;
+
+ mutex_lock(&pi->sched_lock);
+ status = ice_aq_query_port_ets(pi, buf, buf_size, cd);
+ if (!status)
+ status = ice_update_port_tc_tree_cfg(pi, buf);
+ mutex_unlock(&pi->sched_lock);
+ return status;
+}
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019, Intel Corporation. */
+
+#ifndef _ICE_DCB_H_
+#define _ICE_DCB_H_
+
+#include "ice_type.h"
+
+#define ICE_DCBX_STATUS_NOT_STARTED 0
+#define ICE_DCBX_STATUS_IN_PROGRESS 1
+#define ICE_DCBX_STATUS_DONE 2
+#define ICE_DCBX_STATUS_DIS 7
+
+#define ICE_TLV_TYPE_END 0
+#define ICE_TLV_TYPE_ORG 127
+
+#define ICE_IEEE_8021QAZ_OUI 0x0080C2
+#define ICE_IEEE_SUBTYPE_ETS_CFG 9
+#define ICE_IEEE_SUBTYPE_ETS_REC 10
+#define ICE_IEEE_SUBTYPE_PFC_CFG 11
+#define ICE_IEEE_SUBTYPE_APP_PRI 12
+
+#define ICE_CEE_DCBX_OUI 0x001B21
+#define ICE_CEE_DCBX_TYPE 2
+#define ICE_CEE_SUBTYPE_PG_CFG 2
+#define ICE_CEE_SUBTYPE_PFC_CFG 3
+#define ICE_CEE_SUBTYPE_APP_PRI 4
+#define ICE_CEE_MAX_FEAT_TYPE 3
+/* Defines for LLDP TLV header */
+#define ICE_LLDP_TLV_LEN_S 0
+#define ICE_LLDP_TLV_LEN_M (0x01FF << ICE_LLDP_TLV_LEN_S)
+#define ICE_LLDP_TLV_TYPE_S 9
+#define ICE_LLDP_TLV_TYPE_M (0x7F << ICE_LLDP_TLV_TYPE_S)
+#define ICE_LLDP_TLV_SUBTYPE_S 0
+#define ICE_LLDP_TLV_SUBTYPE_M (0xFF << ICE_LLDP_TLV_SUBTYPE_S)
+#define ICE_LLDP_TLV_OUI_S 8
+#define ICE_LLDP_TLV_OUI_M (0xFFFFFFUL << ICE_LLDP_TLV_OUI_S)
+
+/* Defines for IEEE ETS TLV */
+#define ICE_IEEE_ETS_MAXTC_S 0
+#define ICE_IEEE_ETS_MAXTC_M (0x7 << ICE_IEEE_ETS_MAXTC_S)
+#define ICE_IEEE_ETS_CBS_S 6
+#define ICE_IEEE_ETS_CBS_M BIT(ICE_IEEE_ETS_CBS_S)
+#define ICE_IEEE_ETS_WILLING_S 7
+#define ICE_IEEE_ETS_WILLING_M BIT(ICE_IEEE_ETS_WILLING_S)
+#define ICE_IEEE_ETS_PRIO_0_S 0
+#define ICE_IEEE_ETS_PRIO_0_M (0x7 << ICE_IEEE_ETS_PRIO_0_S)
+#define ICE_IEEE_ETS_PRIO_1_S 4
+#define ICE_IEEE_ETS_PRIO_1_M (0x7 << ICE_IEEE_ETS_PRIO_1_S)
+#define ICE_CEE_PGID_PRIO_0_S 0
+#define ICE_CEE_PGID_PRIO_0_M (0xF << ICE_CEE_PGID_PRIO_0_S)
+#define ICE_CEE_PGID_PRIO_1_S 4
+#define ICE_CEE_PGID_PRIO_1_M (0xF << ICE_CEE_PGID_PRIO_1_S)
+#define ICE_CEE_PGID_STRICT 15
+
+/* Defines for IEEE TSA types */
+#define ICE_IEEE_TSA_STRICT 0
+#define ICE_IEEE_TSA_ETS 2
+
+/* Defines for IEEE PFC TLV */
+#define ICE_IEEE_PFC_CAP_S 0
+#define ICE_IEEE_PFC_CAP_M (0xF << ICE_IEEE_PFC_CAP_S)
+#define ICE_IEEE_PFC_MBC_S 6
+#define ICE_IEEE_PFC_MBC_M BIT(ICE_IEEE_PFC_MBC_S)
+#define ICE_IEEE_PFC_WILLING_S 7
+#define ICE_IEEE_PFC_WILLING_M BIT(ICE_IEEE_PFC_WILLING_S)
+
+/* Defines for IEEE APP TLV */
+#define ICE_IEEE_APP_SEL_S 0
+#define ICE_IEEE_APP_SEL_M (0x7 << ICE_IEEE_APP_SEL_S)
+#define ICE_IEEE_APP_PRIO_S 5
+#define ICE_IEEE_APP_PRIO_M (0x7 << ICE_IEEE_APP_PRIO_S)
+
+/* TLV definitions for preparing MIB */
+#define ICE_IEEE_TLV_ID_ETS_CFG 3
+#define ICE_IEEE_TLV_ID_ETS_REC 4
+#define ICE_IEEE_TLV_ID_PFC_CFG 5
+#define ICE_IEEE_TLV_ID_APP_PRI 6
+#define ICE_TLV_ID_END_OF_LLDPPDU 7
+#define ICE_TLV_ID_START ICE_IEEE_TLV_ID_ETS_CFG
+
+#define ICE_IEEE_ETS_TLV_LEN 25
+#define ICE_IEEE_PFC_TLV_LEN 6
+#define ICE_IEEE_APP_TLV_LEN 11
+
+/* IEEE 802.1AB LLDP Organization specific TLV */
+struct ice_lldp_org_tlv {
+ __be16 typelen;
+ __be32 ouisubtype;
+ u8 tlvinfo[1];
+} __packed;
+
+struct ice_cee_tlv_hdr {
+ __be16 typelen;
+ u8 operver;
+ u8 maxver;
+};
+
+struct ice_cee_ctrl_tlv {
+ struct ice_cee_tlv_hdr hdr;
+ __be32 seqno;
+ __be32 ackno;
+};
+
+struct ice_cee_feat_tlv {
+ struct ice_cee_tlv_hdr hdr;
+ u8 en_will_err; /* Bits: |En|Will|Err|Reserved(5)| */
+#define ICE_CEE_FEAT_TLV_ENA_M 0x80
+#define ICE_CEE_FEAT_TLV_WILLING_M 0x40
+#define ICE_CEE_FEAT_TLV_ERR_M 0x20
+ u8 subtype;
+ u8 tlvinfo[1];
+};
+
+struct ice_cee_app_prio {
+ __be16 protocol;
+ u8 upper_oui_sel; /* Bits: |Upper OUI(6)|Selector(2)| */
+#define ICE_CEE_APP_SELECTOR_M 0x03
+ __be16 lower_oui;
+ u8 prio_map;
+} __packed;
+
+u8 ice_get_dcbx_status(struct ice_hw *hw);
+enum ice_status ice_lldp_to_dcb_cfg(u8 *lldpmib, struct ice_dcbx_cfg *dcbcfg);
+enum ice_status ice_get_dcb_cfg(struct ice_port_info *pi);
+enum ice_status ice_set_dcb_cfg(struct ice_port_info *pi);
+enum ice_status ice_init_dcb(struct ice_hw *hw);
+enum ice_status
+ice_query_port_ets(struct ice_port_info *pi,
+ struct ice_aqc_port_ets_elem *buf, u16 buf_size,
+ struct ice_sq_cd *cmd_details);
+#ifdef CONFIG_DCB
+enum ice_status
+ice_aq_stop_lldp(struct ice_hw *hw, bool shutdown_lldp_agent,
+ struct ice_sq_cd *cd);
+enum ice_status ice_aq_start_lldp(struct ice_hw *hw, struct ice_sq_cd *cd);
+enum ice_status
+ice_aq_start_stop_dcbx(struct ice_hw *hw, bool start_dcbx_agent,
+ bool *dcbx_agent_status, struct ice_sq_cd *cd);
+enum ice_status
+ice_aq_cfg_lldp_mib_change(struct ice_hw *hw, bool ena_update,
+ struct ice_sq_cd *cd);
+#else /* CONFIG_DCB */
+static inline enum ice_status
+ice_aq_stop_lldp(struct ice_hw __always_unused *hw,
+ bool __always_unused shutdown_lldp_agent,
+ struct ice_sq_cd __always_unused *cd)
+{
+ return 0;
+}
+
+static inline enum ice_status
+ice_aq_start_lldp(struct ice_hw __always_unused *hw,
+ struct ice_sq_cd __always_unused *cd)
+{
+ return 0;
+}
+
+static inline enum ice_status
+ice_aq_start_stop_dcbx(struct ice_hw __always_unused *hw,
+ bool __always_unused start_dcbx_agent,
+ bool *dcbx_agent_status,
+ struct ice_sq_cd __always_unused *cd)
+{
+ *dcbx_agent_status = false;
+
+ return 0;
+}
+
+static inline enum ice_status
+ice_aq_cfg_lldp_mib_change(struct ice_hw __always_unused *hw,
+ bool __always_unused ena_update,
+ struct ice_sq_cd __always_unused *cd)
+{
+ return 0;
+}
+
+#endif /* CONFIG_DCB */
+#endif /* _ICE_DCB_H_ */
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019, Intel Corporation. */
+
+#include "ice_dcb_lib.h"
+
+/**
+ * ice_dcb_get_ena_tc - return bitmap of enabled TCs
+ * @dcbcfg: DCB config to evaluate for enabled TCs
+ */
+u8 ice_dcb_get_ena_tc(struct ice_dcbx_cfg *dcbcfg)
+{
+ u8 i, num_tc, ena_tc = 1;
+
+ num_tc = ice_dcb_get_num_tc(dcbcfg);
+
+ for (i = 0; i < num_tc; i++)
+ ena_tc |= BIT(i);
+
+ return ena_tc;
+}
+
+/**
+ * ice_dcb_get_num_tc - Get the number of TCs from DCBX config
+ * @dcbcfg: config to retrieve number of TCs from
+ */
+u8 ice_dcb_get_num_tc(struct ice_dcbx_cfg *dcbcfg)
+{
+ bool tc_unused = false;
+ u8 num_tc = 0;
+ u8 ret = 0;
+ int i;
+
+ /* Scan the ETS Config Priority Table to find traffic classes
+ * enabled and create a bitmask of enabled TCs
+ */
+ for (i = 0; i < CEE_DCBX_MAX_PRIO; i++)
+ num_tc |= BIT(dcbcfg->etscfg.prio_table[i]);
+
+ /* Scan bitmask for contiguous TCs starting with TC0 */
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+ if (num_tc & BIT(i)) {
+ if (!tc_unused) {
+ ret++;
+ } else {
+ pr_err("Non-contiguous TCs - Disabling DCB\n");
+ return 1;
+ }
+ } else {
+ tc_unused = true;
+ }
+ }
+
+ /* There is always at least 1 TC */
+ if (!ret)
+ ret = 1;
+
+ return ret;
+}
+
+/**
+ * ice_vsi_cfg_dcb_rings - Update rings to reflect DCB TC
+ * @vsi: VSI owner of rings being updated
+ */
+void ice_vsi_cfg_dcb_rings(struct ice_vsi *vsi)
+{
+ struct ice_ring *tx_ring, *rx_ring;
+ u16 qoffset, qcount;
+ int i, n;
+
+ if (!test_bit(ICE_FLAG_DCB_ENA, vsi->back->flags)) {
+ /* Reset the TC information */
+ for (i = 0; i < vsi->num_txq; i++) {
+ tx_ring = vsi->tx_rings[i];
+ tx_ring->dcb_tc = 0;
+ }
+ for (i = 0; i < vsi->num_rxq; i++) {
+ rx_ring = vsi->rx_rings[i];
+ rx_ring->dcb_tc = 0;
+ }
+ return;
+ }
+
+ ice_for_each_traffic_class(n) {
+ if (!(vsi->tc_cfg.ena_tc & BIT(n)))
+ break;
+
+ qoffset = vsi->tc_cfg.tc_info[n].qoffset;
+ qcount = vsi->tc_cfg.tc_info[n].qcount_tx;
+ for (i = qoffset; i < (qoffset + qcount); i++) {
+ tx_ring = vsi->tx_rings[i];
+ rx_ring = vsi->rx_rings[i];
+ tx_ring->dcb_tc = n;
+ rx_ring->dcb_tc = n;
+ }
+ }
+}
+
+/**
+ * ice_pf_dcb_recfg - Reconfigure all VEBs and VSIs
+ * @pf: pointer to the PF struct
+ *
+ * Assumed caller has already disabled all VSIs before
+ * calling this function. Reconfiguring DCB based on
+ * local_dcbx_cfg.
+ */
+static void ice_pf_dcb_recfg(struct ice_pf *pf)
+{
+ struct ice_dcbx_cfg *dcbcfg = &pf->hw.port_info->local_dcbx_cfg;
+ u8 tc_map = 0;
+ int v, ret;
+
+ /* Update each VSI */
+ ice_for_each_vsi(pf, v) {
+ if (!pf->vsi[v])
+ continue;
+
+ if (pf->vsi[v]->type == ICE_VSI_PF)
+ tc_map = ice_dcb_get_ena_tc(dcbcfg);
+ else
+ tc_map = ICE_DFLT_TRAFFIC_CLASS;
+
+ ret = ice_vsi_cfg_tc(pf->vsi[v], tc_map);
+ if (ret)
+ dev_err(&pf->pdev->dev,
+ "Failed to config TC for VSI index: %d\n",
+ pf->vsi[v]->idx);
+ else
+ ice_vsi_map_rings_to_vectors(pf->vsi[v]);
+ }
+}
+
+/**
+ * ice_pf_dcb_cfg - Apply new DCB configuration
+ * @pf: pointer to the PF struct
+ * @new_cfg: DCBX config to apply
+ */
+static int ice_pf_dcb_cfg(struct ice_pf *pf, struct ice_dcbx_cfg *new_cfg)
+{
+ struct ice_dcbx_cfg *old_cfg, *curr_cfg;
+ struct ice_aqc_port_ets_elem buf = { 0 };
+ int ret = 0;
+
+ curr_cfg = &pf->hw.port_info->local_dcbx_cfg;
+
+ /* Enable DCB tagging only when more than one TC */
+ if (ice_dcb_get_num_tc(new_cfg) > 1) {
+ dev_dbg(&pf->pdev->dev, "DCB tagging enabled (num TC > 1)\n");
+ set_bit(ICE_FLAG_DCB_ENA, pf->flags);
+ } else {
+ dev_dbg(&pf->pdev->dev, "DCB tagging disabled (num TC = 1)\n");
+ clear_bit(ICE_FLAG_DCB_ENA, pf->flags);
+ }
+
+ if (!memcmp(new_cfg, curr_cfg, sizeof(*new_cfg))) {
+ dev_dbg(&pf->pdev->dev, "No change in DCB config required\n");
+ return ret;
+ }
+
+ /* Store old config in case FW config fails */
+ old_cfg = devm_kzalloc(&pf->pdev->dev, sizeof(*old_cfg), GFP_KERNEL);
+ memcpy(old_cfg, curr_cfg, sizeof(*old_cfg));
+
+ /* avoid race conditions by holding the lock while disabling and
+ * re-enabling the VSI
+ */
+ rtnl_lock();
+ ice_pf_dis_all_vsi(pf, true);
+
+ memcpy(curr_cfg, new_cfg, sizeof(*curr_cfg));
+ memcpy(&curr_cfg->etsrec, &curr_cfg->etscfg, sizeof(curr_cfg->etsrec));
+
+ /* Only send new config to HW if we are in SW LLDP mode. Otherwise,
+ * the new config came from the HW in the first place.
+ */
+ if (pf->hw.port_info->is_sw_lldp) {
+ ret = ice_set_dcb_cfg(pf->hw.port_info);
+ if (ret) {
+ dev_err(&pf->pdev->dev, "Set DCB Config failed\n");
+ /* Restore previous settings to local config */
+ memcpy(curr_cfg, old_cfg, sizeof(*curr_cfg));
+ goto out;
+ }
+ }
+
+ ret = ice_query_port_ets(pf->hw.port_info, &buf, sizeof(buf), NULL);
+ if (ret) {
+ dev_err(&pf->pdev->dev, "Query Port ETS failed\n");
+ goto out;
+ }
+
+ ice_pf_dcb_recfg(pf);
+
+out:
+ ice_pf_ena_all_vsi(pf, true);
+ rtnl_unlock();
+ devm_kfree(&pf->pdev->dev, old_cfg);
+ return ret;
+}
+
+/**
+ * ice_dcb_rebuild - rebuild DCB post reset
+ * @pf: physical function instance
+ */
+void ice_dcb_rebuild(struct ice_pf *pf)
+{
+ struct ice_aqc_port_ets_elem buf = { 0 };
+ struct ice_dcbx_cfg *prev_cfg;
+ enum ice_status ret;
+ u8 willing;
+
+ ret = ice_query_port_ets(pf->hw.port_info, &buf, sizeof(buf), NULL);
+ if (ret) {
+ dev_err(&pf->pdev->dev, "Query Port ETS failed\n");
+ goto dcb_error;
+ }
+
+ /* If DCB was not enabled previously, we are done */
+ if (!test_bit(ICE_FLAG_DCB_ENA, pf->flags))
+ return;
+
+ /* Save current willing state and force FW to unwilling */
+ willing = pf->hw.port_info->local_dcbx_cfg.etscfg.willing;
+ pf->hw.port_info->local_dcbx_cfg.etscfg.willing = 0x0;
+ ret = ice_set_dcb_cfg(pf->hw.port_info);
+ if (ret) {
+ dev_err(&pf->pdev->dev, "Failed to set DCB to unwilling\n");
+ goto dcb_error;
+ }
+
+ /* Retrieve DCB config and ensure same as current in SW */
+ prev_cfg = devm_kmemdup(&pf->pdev->dev,
+ &pf->hw.port_info->local_dcbx_cfg,
+ sizeof(*prev_cfg), GFP_KERNEL);
+ if (!prev_cfg) {
+ dev_err(&pf->pdev->dev, "Failed to alloc space for DCB cfg\n");
+ goto dcb_error;
+ }
+
+ ice_init_dcb(&pf->hw);
+ if (memcmp(prev_cfg, &pf->hw.port_info->local_dcbx_cfg,
+ sizeof(*prev_cfg))) {
+ /* difference in cfg detected - disable DCB till next MIB */
+ dev_err(&pf->pdev->dev, "Set local MIB not accurate\n");
+ devm_kfree(&pf->pdev->dev, prev_cfg);
+ goto dcb_error;
+ }
+
+ /* fetched config congruent to previous configuration */
+ devm_kfree(&pf->pdev->dev, prev_cfg);
+
+ /* Configuration replayed - reset willing state to previous */
+ pf->hw.port_info->local_dcbx_cfg.etscfg.willing = willing;
+ ret = ice_set_dcb_cfg(pf->hw.port_info);
+ if (ret) {
+ dev_err(&pf->pdev->dev, "Fail restoring prev willing state\n");
+ goto dcb_error;
+ }
+ dev_info(&pf->pdev->dev, "DCB restored after reset\n");
+ ret = ice_query_port_ets(pf->hw.port_info, &buf, sizeof(buf), NULL);
+ if (ret) {
+ dev_err(&pf->pdev->dev, "Query Port ETS failed\n");
+ goto dcb_error;
+ }
+
+ return;
+
+dcb_error:
+ dev_err(&pf->pdev->dev, "Disabling DCB until new settings occur\n");
+ prev_cfg = devm_kzalloc(&pf->pdev->dev, sizeof(*prev_cfg), GFP_KERNEL);
+ prev_cfg->etscfg.willing = true;
+ prev_cfg->etscfg.tcbwtable[0] = ICE_TC_MAX_BW;
+ prev_cfg->etscfg.tsatable[0] = ICE_IEEE_TSA_ETS;
+ memcpy(&prev_cfg->etsrec, &prev_cfg->etscfg, sizeof(prev_cfg->etsrec));
+ ice_pf_dcb_cfg(pf, prev_cfg);
+ devm_kfree(&pf->pdev->dev, prev_cfg);
+}
+
+/**
+ * ice_dcb_init_cfg - set the initial DCB config in SW
+ * @pf: pf to apply config to
+ */
+static int ice_dcb_init_cfg(struct ice_pf *pf)
+{
+ struct ice_dcbx_cfg *newcfg;
+ struct ice_port_info *pi;
+ int ret = 0;
+
+ pi = pf->hw.port_info;
+ newcfg = devm_kzalloc(&pf->pdev->dev, sizeof(*newcfg), GFP_KERNEL);
+ if (!newcfg)
+ return -ENOMEM;
+
+ memcpy(newcfg, &pi->local_dcbx_cfg, sizeof(*newcfg));
+ memset(&pi->local_dcbx_cfg, 0, sizeof(*newcfg));
+
+ dev_info(&pf->pdev->dev, "Configuring initial DCB values\n");
+ if (ice_pf_dcb_cfg(pf, newcfg))
+ ret = -EINVAL;
+
+ devm_kfree(&pf->pdev->dev, newcfg);
+
+ return ret;
+}
+
+/**
+ * ice_dcb_sw_default_config - Apply a default DCB config
+ * @pf: pf to apply config to
+ */
+static int ice_dcb_sw_dflt_cfg(struct ice_pf *pf)
+{
+ struct ice_aqc_port_ets_elem buf = { 0 };
+ struct ice_dcbx_cfg *dcbcfg;
+ struct ice_port_info *pi;
+ struct ice_hw *hw;
+ int ret;
+
+ hw = &pf->hw;
+ pi = hw->port_info;
+ dcbcfg = devm_kzalloc(&pf->pdev->dev, sizeof(*dcbcfg), GFP_KERNEL);
+
+ memset(dcbcfg, 0, sizeof(*dcbcfg));
+ memset(&pi->local_dcbx_cfg, 0, sizeof(*dcbcfg));
+
+ dcbcfg->etscfg.willing = 1;
+ dcbcfg->etscfg.maxtcs = 8;
+ dcbcfg->etscfg.tcbwtable[0] = 100;
+ dcbcfg->etscfg.tsatable[0] = ICE_IEEE_TSA_ETS;
+
+ memcpy(&dcbcfg->etsrec, &dcbcfg->etscfg,
+ sizeof(dcbcfg->etsrec));
+ dcbcfg->etsrec.willing = 0;
+
+ dcbcfg->pfc.willing = 1;
+ dcbcfg->pfc.pfccap = IEEE_8021QAZ_MAX_TCS;
+
+ dcbcfg->numapps = 1;
+ dcbcfg->app[0].selector = ICE_APP_SEL_ETHTYPE;
+ dcbcfg->app[0].priority = 3;
+ dcbcfg->app[0].prot_id = ICE_APP_PROT_ID_FCOE;
+
+ ret = ice_pf_dcb_cfg(pf, dcbcfg);
+ devm_kfree(&pf->pdev->dev, dcbcfg);
+ if (ret)
+ return ret;
+
+ return ice_query_port_ets(pi, &buf, sizeof(buf), NULL);
+}
+
+/**
+ * ice_init_pf_dcb - initialize DCB for a PF
+ * @pf: pf to initiialize DCB for
+ */
+int ice_init_pf_dcb(struct ice_pf *pf)
+{
+ struct device *dev = &pf->pdev->dev;
+ struct ice_port_info *port_info;
+ struct ice_hw *hw = &pf->hw;
+ int sw_default = 0;
+ int err;
+
+ port_info = hw->port_info;
+
+ /* check if device is DCB capable */
+ if (!hw->func_caps.common_cap.dcb) {
+ dev_dbg(dev, "DCB not supported\n");
+ return -EOPNOTSUPP;
+ }
+
+ /* Best effort to put DCBx and LLDP into a good state */
+ port_info->dcbx_status = ice_get_dcbx_status(hw);
+ if (port_info->dcbx_status != ICE_DCBX_STATUS_DONE &&
+ port_info->dcbx_status != ICE_DCBX_STATUS_IN_PROGRESS) {
+ bool dcbx_status;
+
+ /* Attempt to start LLDP engine. Ignore errors
+ * as this will error if it is already started
+ */
+ ice_aq_start_lldp(hw, NULL);
+
+ /* Attempt to start DCBX. Ignore errors as this
+ * will error if it is already started
+ */
+ ice_aq_start_stop_dcbx(hw, true, &dcbx_status, NULL);
+ }
+
+ err = ice_init_dcb(hw);
+ if (err) {
+ /* FW LLDP not in usable state, default to SW DCBx/LLDP */
+ dev_info(&pf->pdev->dev, "FW LLDP not in usable state\n");
+ hw->port_info->dcbx_status = ICE_DCBX_STATUS_NOT_STARTED;
+ hw->port_info->is_sw_lldp = true;
+ }
+
+ if (port_info->dcbx_status == ICE_DCBX_STATUS_DIS)
+ dev_info(&pf->pdev->dev, "DCBX disabled\n");
+
+ /* LLDP disabled in FW */
+ if (port_info->is_sw_lldp) {
+ sw_default = 1;
+ dev_info(&pf->pdev->dev, "DCBx/LLDP in SW mode.\n");
+ }
+
+ if (port_info->dcbx_status == ICE_DCBX_STATUS_NOT_STARTED) {
+ sw_default = 1;
+ dev_info(&pf->pdev->dev, "DCBX not started\n");
+ }
+
+ if (sw_default) {
+ err = ice_dcb_sw_dflt_cfg(pf);
+ if (err) {
+ dev_err(&pf->pdev->dev,
+ "Failed to set local DCB config %d\n", err);
+ err = -EIO;
+ goto dcb_init_err;
+ }
+
+ pf->dcbx_cap = DCB_CAP_DCBX_HOST | DCB_CAP_DCBX_VER_IEEE;
+ set_bit(ICE_FLAG_DCB_CAPABLE, pf->flags);
+ set_bit(ICE_FLAG_DCB_ENA, pf->flags);
+ return 0;
+ }
+
+ /* DCBX in FW and LLDP enabled in FW */
+ pf->dcbx_cap = DCB_CAP_DCBX_LLD_MANAGED | DCB_CAP_DCBX_VER_IEEE;
+
+ set_bit(ICE_FLAG_DCB_CAPABLE, pf->flags);
+
+ err = ice_dcb_init_cfg(pf);
+ if (err)
+ goto dcb_init_err;
+
+ dev_info(&pf->pdev->dev, "DCBX offload supported\n");
+ return err;
+
+dcb_init_err:
+ dev_err(dev, "DCB init failed\n");
+ return err;
+}
+
+/**
+ * ice_update_dcb_stats - Update DCB stats counters
+ * @pf: PF whose stats needs to be updated
+ */
+void ice_update_dcb_stats(struct ice_pf *pf)
+{
+ struct ice_hw_port_stats *prev_ps, *cur_ps;
+ struct ice_hw *hw = &pf->hw;
+ u8 pf_id = hw->pf_id;
+ int i;
+
+ prev_ps = &pf->stats_prev;
+ cur_ps = &pf->stats;
+
+ for (i = 0; i < 8; i++) {
+ ice_stat_update32(hw, GLPRT_PXOFFRXC(pf_id, i),
+ pf->stat_prev_loaded,
+ &prev_ps->priority_xoff_rx[i],
+ &cur_ps->priority_xoff_rx[i]);
+ ice_stat_update32(hw, GLPRT_PXONRXC(pf_id, i),
+ pf->stat_prev_loaded,
+ &prev_ps->priority_xon_rx[i],
+ &cur_ps->priority_xon_rx[i]);
+ ice_stat_update32(hw, GLPRT_PXONTXC(pf_id, i),
+ pf->stat_prev_loaded,
+ &prev_ps->priority_xon_tx[i],
+ &cur_ps->priority_xon_tx[i]);
+ ice_stat_update32(hw, GLPRT_PXOFFTXC(pf_id, i),
+ pf->stat_prev_loaded,
+ &prev_ps->priority_xoff_tx[i],
+ &cur_ps->priority_xoff_tx[i]);
+ ice_stat_update32(hw, GLPRT_RXON2OFFCNT(pf_id, i),
+ pf->stat_prev_loaded,
+ &prev_ps->priority_xon_2_xoff[i],
+ &cur_ps->priority_xon_2_xoff[i]);
+ }
+}
+
+/**
+ * ice_tx_prepare_vlan_flags_dcb - prepare VLAN tagging for DCB
+ * @tx_ring: ring to send buffer on
+ * @first: pointer to struct ice_tx_buf
+ */
+int
+ice_tx_prepare_vlan_flags_dcb(struct ice_ring *tx_ring,
+ struct ice_tx_buf *first)
+{
+ struct sk_buff *skb = first->skb;
+
+ if (!test_bit(ICE_FLAG_DCB_ENA, tx_ring->vsi->back->flags))
+ return 0;
+
+ /* Insert 802.1p priority into VLAN header */
+ if ((first->tx_flags & (ICE_TX_FLAGS_HW_VLAN | ICE_TX_FLAGS_SW_VLAN)) ||
+ skb->priority != TC_PRIO_CONTROL) {
+ first->tx_flags &= ~ICE_TX_FLAGS_VLAN_PR_M;
+ /* Mask the lower 3 bits to set the 802.1p priority */
+ first->tx_flags |= (skb->priority & 0x7) <<
+ ICE_TX_FLAGS_VLAN_PR_S;
+ if (first->tx_flags & ICE_TX_FLAGS_SW_VLAN) {
+ struct vlan_ethhdr *vhdr;
+ int rc;
+
+ rc = skb_cow_head(skb, 0);
+ if (rc < 0)
+ return rc;
+ vhdr = (struct vlan_ethhdr *)skb->data;
+ vhdr->h_vlan_TCI = htons(first->tx_flags >>
+ ICE_TX_FLAGS_VLAN_S);
+ } else {
+ first->tx_flags |= ICE_TX_FLAGS_HW_VLAN;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * ice_dcb_process_lldp_set_mib_change - Process MIB change
+ * @pf: ptr to ice_pf
+ * @event: pointer to the admin queue receive event
+ */
+void
+ice_dcb_process_lldp_set_mib_change(struct ice_pf *pf,
+ struct ice_rq_event_info *event)
+{
+ if (pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) {
+ struct ice_dcbx_cfg *dcbcfg, *prev_cfg;
+ int err;
+
+ prev_cfg = &pf->hw.port_info->local_dcbx_cfg;
+ dcbcfg = devm_kmemdup(&pf->pdev->dev, prev_cfg,
+ sizeof(*dcbcfg), GFP_KERNEL);
+ if (!dcbcfg)
+ return;
+
+ err = ice_lldp_to_dcb_cfg(event->msg_buf, dcbcfg);
+ if (!err)
+ ice_pf_dcb_cfg(pf, dcbcfg);
+
+ devm_kfree(&pf->pdev->dev, dcbcfg);
+
+ /* Get updated DCBx data from firmware */
+ err = ice_get_dcb_cfg(pf->hw.port_info);
+ if (err)
+ dev_err(&pf->pdev->dev,
+ "Failed to get DCB config\n");
+ } else {
+ dev_dbg(&pf->pdev->dev,
+ "MIB Change Event in HOST mode\n");
+ }
+}
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019, Intel Corporation. */
+
+#ifndef _ICE_DCB_LIB_H_
+#define _ICE_DCB_LIB_H_
+
+#include "ice.h"
+#include "ice_lib.h"
+
+#ifdef CONFIG_DCB
+#define ICE_TC_MAX_BW 100 /* Default Max BW percentage */
+
+void ice_dcb_rebuild(struct ice_pf *pf);
+u8 ice_dcb_get_ena_tc(struct ice_dcbx_cfg *dcbcfg);
+u8 ice_dcb_get_num_tc(struct ice_dcbx_cfg *dcbcfg);
+void ice_vsi_cfg_dcb_rings(struct ice_vsi *vsi);
+int ice_init_pf_dcb(struct ice_pf *pf);
+void ice_update_dcb_stats(struct ice_pf *pf);
+int
+ice_tx_prepare_vlan_flags_dcb(struct ice_ring *tx_ring,
+ struct ice_tx_buf *first);
+void
+ice_dcb_process_lldp_set_mib_change(struct ice_pf *pf,
+ struct ice_rq_event_info *event);
+static inline void
+ice_set_cgd_num(struct ice_tlan_ctx *tlan_ctx, struct ice_ring *ring)
+{
+ tlan_ctx->cgd_num = ring->dcb_tc;
+}
+#else
+#define ice_dcb_rebuild(pf) do {} while (0)
+
+static inline u8 ice_dcb_get_ena_tc(struct ice_dcbx_cfg __always_unused *dcbcfg)
+{
+ return ICE_DFLT_TRAFFIC_CLASS;
+}
+
+static inline u8 ice_dcb_get_num_tc(struct ice_dcbx_cfg __always_unused *dcbcfg)
+{
+ return 1;
+}
+
+static inline int ice_init_pf_dcb(struct ice_pf *pf)
+{
+ dev_dbg(&pf->pdev->dev, "DCB not supported\n");
+ return -EOPNOTSUPP;
+}
+
+static inline int
+ice_tx_prepare_vlan_flags_dcb(struct ice_ring __always_unused *tx_ring,
+ struct ice_tx_buf __always_unused *first)
+{
+ return 0;
+}
+
+#define ice_update_dcb_stats(pf) do {} while (0)
+#define ice_vsi_cfg_dcb_rings(vsi) do {} while (0)
+#define ice_dcb_process_lldp_set_mib_change(pf, event) do {} while (0)
+#define ice_set_cgd_num(tlan_ctx, ring) do {} while (0)
+#endif /* CONFIG_DCB */
+#endif /* _ICE_DCB_LIB_H_ */
/* ethtool support for ice */
#include "ice.h"
+#include "ice_lib.h"
+#include "ice_dcb_lib.h"
struct ice_stats {
char stat_string[ETH_GSTRING_LEN];
#define ICE_PF_STATS_LEN ARRAY_SIZE(ice_gstrings_pf_stats)
#define ICE_VSI_STATS_LEN ARRAY_SIZE(ice_gstrings_vsi_stats)
-#define ICE_ALL_STATS_LEN(n) (ICE_PF_STATS_LEN + ICE_VSI_STATS_LEN + \
- ice_q_stats_len(n))
+#define ICE_PFC_STATS_LEN ( \
+ (FIELD_SIZEOF(struct ice_pf, stats.priority_xoff_rx) + \
+ FIELD_SIZEOF(struct ice_pf, stats.priority_xon_rx) + \
+ FIELD_SIZEOF(struct ice_pf, stats.priority_xoff_tx) + \
+ FIELD_SIZEOF(struct ice_pf, stats.priority_xon_tx)) \
+ / sizeof(u64))
+#define ICE_ALL_STATS_LEN(n) (ICE_PF_STATS_LEN + ICE_PFC_STATS_LEN + \
+ ICE_VSI_STATS_LEN + ice_q_stats_len(n))
static const struct ice_stats ice_gstrings_vsi_stats[] = {
ICE_VSI_STAT("tx_unicast", eth_stats.tx_unicast),
static const struct ice_priv_flag ice_gstrings_priv_flags[] = {
ICE_PRIV_FLAG("link-down-on-close", ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA),
+ ICE_PRIV_FLAG("disable-fw-lldp", ICE_FLAG_DISABLE_FW_LLDP),
};
#define ICE_PRIV_FLAG_ARRAY_SIZE ARRAY_SIZE(ice_gstrings_priv_flags)
p += ETH_GSTRING_LEN;
}
+ for (i = 0; i < ICE_MAX_USER_PRIORITY; i++) {
+ snprintf(p, ETH_GSTRING_LEN,
+ "port.tx-priority-%u-xon", i);
+ p += ETH_GSTRING_LEN;
+ snprintf(p, ETH_GSTRING_LEN,
+ "port.tx-priority-%u-xoff", i);
+ p += ETH_GSTRING_LEN;
+ }
+ for (i = 0; i < ICE_MAX_USER_PRIORITY; i++) {
+ snprintf(p, ETH_GSTRING_LEN,
+ "port.rx-priority-%u-xon", i);
+ p += ETH_GSTRING_LEN;
+ snprintf(p, ETH_GSTRING_LEN,
+ "port.rx-priority-%u-xoff", i);
+ p += ETH_GSTRING_LEN;
+ }
break;
case ETH_SS_PRIV_FLAGS:
for (i = 0; i < ICE_PRIV_FLAG_ARRAY_SIZE; i++) {
static int ice_set_priv_flags(struct net_device *netdev, u32 flags)
{
struct ice_netdev_priv *np = netdev_priv(netdev);
+ DECLARE_BITMAP(change_flags, ICE_PF_FLAGS_NBITS);
+ DECLARE_BITMAP(orig_flags, ICE_PF_FLAGS_NBITS);
struct ice_vsi *vsi = np->vsi;
struct ice_pf *pf = vsi->back;
+ int ret = 0;
u32 i;
if (flags > BIT(ICE_PRIV_FLAG_ARRAY_SIZE))
return -EINVAL;
+ set_bit(ICE_FLAG_ETHTOOL_CTXT, pf->flags);
+
+ bitmap_copy(orig_flags, pf->flags, ICE_PF_FLAGS_NBITS);
for (i = 0; i < ICE_PRIV_FLAG_ARRAY_SIZE; i++) {
const struct ice_priv_flag *priv_flag;
clear_bit(priv_flag->bitno, pf->flags);
}
- return 0;
+ bitmap_xor(change_flags, pf->flags, orig_flags, ICE_PF_FLAGS_NBITS);
+
+ if (test_bit(ICE_FLAG_DISABLE_FW_LLDP, change_flags)) {
+ if (test_bit(ICE_FLAG_DISABLE_FW_LLDP, pf->flags)) {
+ enum ice_status status;
+
+ status = ice_aq_cfg_lldp_mib_change(&pf->hw, false,
+ NULL);
+ /* If unregistering for LLDP events fails, this is
+ * not an error state, as there shouldn't be any
+ * events to respond to.
+ */
+ if (status)
+ dev_info(&pf->pdev->dev,
+ "Failed to unreg for LLDP events\n");
+
+ /* The AQ call to stop the FW LLDP agent will generate
+ * an error if the agent is already stopped.
+ */
+ status = ice_aq_stop_lldp(&pf->hw, true, NULL);
+ if (status)
+ dev_warn(&pf->pdev->dev,
+ "Fail to stop LLDP agent\n");
+ /* Use case for having the FW LLDP agent stopped
+ * will likely not need DCB, so failure to init is
+ * not a concern of ethtool
+ */
+ status = ice_init_pf_dcb(pf);
+ if (status)
+ dev_warn(&pf->pdev->dev, "Fail to init DCB\n");
+ } else {
+ enum ice_status status;
+ bool dcbx_agent_status;
+
+ /* AQ command to start FW LLDP agent will return an
+ * error if the agent is already started
+ */
+ status = ice_aq_start_lldp(&pf->hw, NULL);
+ if (status)
+ dev_warn(&pf->pdev->dev,
+ "Fail to start LLDP Agent\n");
+
+ /* AQ command to start FW DCBx agent will fail if
+ * the agent is already started
+ */
+ status = ice_aq_start_stop_dcbx(&pf->hw, true,
+ &dcbx_agent_status,
+ NULL);
+ if (status)
+ dev_dbg(&pf->pdev->dev,
+ "Failed to start FW DCBX\n");
+
+ dev_info(&pf->pdev->dev, "FW DCBX agent is %s\n",
+ dcbx_agent_status ? "ACTIVE" : "DISABLED");
+
+ /* Failure to configure MIB change or init DCB is not
+ * relevant to ethtool. Print notification that
+ * registration/init failed but do not return error
+ * state to ethtool
+ */
+ status = ice_aq_cfg_lldp_mib_change(&pf->hw, false,
+ NULL);
+ if (status)
+ dev_dbg(&pf->pdev->dev,
+ "Fail to reg for MIB change\n");
+
+ status = ice_init_pf_dcb(pf);
+ if (status)
+ dev_dbg(&pf->pdev->dev, "Fail to init DCB\n");
+ }
+ }
+ clear_bit(ICE_FLAG_ETHTOOL_CTXT, pf->flags);
+ return ret;
}
static int ice_get_sset_count(struct net_device *netdev, int sset)
data[i++] = (ice_gstrings_pf_stats[j].sizeof_stat ==
sizeof(u64)) ? *(u64 *)p : *(u32 *)p;
}
+
+ for (j = 0; j < ICE_MAX_USER_PRIORITY; j++) {
+ data[i++] = pf->stats.priority_xon_tx[j];
+ data[i++] = pf->stats.priority_xoff_tx[j];
+ }
+
+ for (j = 0; j < ICE_MAX_USER_PRIORITY; j++) {
+ data[i++] = pf->stats.priority_xon_rx[j];
+ data[i++] = pf->stats.priority_xoff_rx[j];
+ }
}
/**
link_info = &vsi->port_info->phy.link_info;
- /* Initialize supported and advertised settings based on phy settings */
+ /* Initialize supported and advertised settings based on PHY settings */
switch (link_info->phy_type_low) {
case ICE_PHY_TYPE_LOW_100BASE_TX:
ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
struct net_device __always_unused *netdev)
{
/* link is down and the driver needs to fall back on
- * supported phy types to figure out what info to display
+ * supported PHY types to figure out what info to display
*/
ice_phy_type_to_ethtool(netdev, ks);
} else {
/* If autoneg is currently enabled */
if (p->phy.link_info.an_info & ICE_AQ_AN_COMPLETED) {
- /* If autoneg is supported 10GBASE_T is the only phy
+ /* If autoneg is supported 10GBASE_T is the only PHY
* that can disable it, so otherwise return error
*/
if (ethtool_link_ksettings_test_link_mode(ks,
if (!p)
return -EOPNOTSUPP;
- /* Check if this is lan vsi */
+ /* Check if this is LAN VSI */
ice_for_each_vsi(pf, idx)
if (pf->vsi[idx]->type == ICE_VSI_PF) {
if (np->vsi != pf->vsi[idx])
if (!abilities)
return -ENOMEM;
- /* Get the current phy config */
+ /* Get the current PHY config */
status = ice_aq_get_phy_caps(p, false, ICE_AQC_REPORT_SW_CFG, abilities,
NULL);
if (status) {
}
/**
- * ice_get_rxnfc - command to get RX flow classification rules
+ * ice_get_rxnfc - command to get Rx flow classification rules
* @netdev: network interface device structure
* @cmd: ethtool rxnfc command
* @rule_locs: buffer to rturn Rx flow classification rules
struct ice_port_info *pi = np->vsi->port_info;
struct ice_aqc_get_phy_caps_data *pcaps;
struct ice_vsi *vsi = np->vsi;
+ struct ice_dcbx_cfg *dcbx_cfg;
enum ice_status status;
/* Initialize pause params */
pause->rx_pause = 0;
pause->tx_pause = 0;
+ dcbx_cfg = &pi->local_dcbx_cfg;
+
pcaps = devm_kzalloc(&vsi->back->pdev->dev, sizeof(*pcaps),
GFP_KERNEL);
if (!pcaps)
return;
- /* Get current phy config */
+ /* Get current PHY config */
status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_SW_CFG, pcaps,
NULL);
if (status)
pause->autoneg = ((pcaps->caps & ICE_AQC_PHY_AN_MODE) ?
AUTONEG_ENABLE : AUTONEG_DISABLE);
+ if (dcbx_cfg->pfc.pfcena)
+ /* PFC enabled so report LFC as off */
+ goto out;
+
if (pcaps->caps & ICE_AQC_PHY_EN_TX_LINK_PAUSE)
pause->tx_pause = 1;
if (pcaps->caps & ICE_AQC_PHY_EN_RX_LINK_PAUSE)
struct ice_netdev_priv *np = netdev_priv(netdev);
struct ice_link_status *hw_link_info;
struct ice_pf *pf = np->vsi->back;
+ struct ice_dcbx_cfg *dcbx_cfg;
struct ice_vsi *vsi = np->vsi;
struct ice_hw *hw = &pf->hw;
struct ice_port_info *pi;
pi = vsi->port_info;
hw_link_info = &pi->phy.link_info;
+ dcbx_cfg = &pi->local_dcbx_cfg;
link_up = hw_link_info->link_info & ICE_AQ_LINK_UP;
/* Changing the port's flow control is not supported if this isn't the
netdev_info(netdev, "Autoneg did not complete so changing settings may not result in an actual change.\n");
}
+ if (dcbx_cfg->pfc.pfcena) {
+ netdev_info(netdev, "Priority flow control enabled. Cannot set link flow control.\n");
+ return -EOPNOTSUPP;
+ }
if (pause->rx_pause && pause->tx_pause)
pi->fc.req_mode = ICE_FC_FULL;
else if (pause->rx_pause && !pause->tx_pause)
* @key: hash key
* @hfunc: hash function
*
- * Returns -EINVAL if the table specifies an invalid queue id, otherwise
+ * Returns -EINVAL if the table specifies an invalid queue ID, otherwise
* returns 0 after programming the table.
*/
static int
/**
* ice_get_rc_coalesce - get ITR values for specific ring container
* @ec: ethtool structure to fill with driver's coalesce settings
- * @c_type: container type, RX or TX
+ * @c_type: container type, Rx or Tx
* @rc: ring container that the ITR values will come from
*
* Query the device for ice_ring_container specific ITR values. This is
/**
* ice_set_rc_coalesce - set ITR values for specific ring container
- * @c_type: container type, RX or TX
+ * @c_type: container type, Rx or Tx
* @ec: ethtool structure from user to update ITR settings
* @rc: ring container that the ITR values will come from
* @vsi: VSI associated to the ring container
#define PF_MBX_ATQLEN_ATQLEN_M ICE_M(0x3FF, 0)
#define PF_MBX_ATQLEN_ATQENABLE_M BIT(31)
#define PF_MBX_ATQT 0x0022E300
+#define PRTDCB_GENS 0x00083020
+#define PRTDCB_GENS_DCBX_STATUS_S 0
+#define PRTDCB_GENS_DCBX_STATUS_M ICE_M(0x7, 0)
#define GLFLXP_RXDID_FLAGS(_i, _j) (0x0045D000 + ((_i) * 4 + (_j) * 256))
#define GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_S 0
#define GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_M ICE_M(0x3F, 0)
#define GLPRT_PTC64L(_i) (0x00380B80 + ((_i) * 8))
#define GLPRT_PTC9522H(_i) (0x00380D04 + ((_i) * 8))
#define GLPRT_PTC9522L(_i) (0x00380D00 + ((_i) * 8))
+#define GLPRT_PXOFFRXC(_i, _j) (0x00380500 + ((_i) * 8 + (_j) * 64))
+#define GLPRT_PXOFFTXC(_i, _j) (0x00380F40 + ((_i) * 8 + (_j) * 64))
+#define GLPRT_PXONRXC(_i, _j) (0x00380300 + ((_i) * 8 + (_j) * 64))
+#define GLPRT_PXONTXC(_i, _j) (0x00380D40 + ((_i) * 8 + (_j) * 64))
#define GLPRT_RFC(_i) (0x00380AC0 + ((_i) * 8))
#define GLPRT_RJC(_i) (0x00380B00 + ((_i) * 8))
#define GLPRT_RLEC(_i) (0x00380140 + ((_i) * 8))
#define GLPRT_ROC(_i) (0x00380240 + ((_i) * 8))
#define GLPRT_RUC(_i) (0x00380200 + ((_i) * 8))
+#define GLPRT_RXON2OFFCNT(_i, _j) (0x00380700 + ((_i) * 8 + (_j) * 64))
#define GLPRT_TDOLD(_i) (0x00381280 + ((_i) * 8))
#define GLPRT_UPRCH(_i) (0x00381304 + ((_i) * 8))
#define GLPRT_UPRCL(_i) (0x00381300 + ((_i) * 8))
} lo_dword;
union {
__le32 rss; /* RSS Hash */
- __le32 fd_id; /* Flow Director filter id */
+ __le32 fd_id; /* Flow Director filter ID */
} hi_dword;
} qword0;
struct {
ICE_RX_PTYPE_PAYLOAD_LAYER_PAY4 = 3,
};
-/* RX Flex Descriptor
+/* Rx Flex Descriptor
* This descriptor is used instead of the legacy version descriptor when
* ice_rlan_ctx.adv_desc is set
*/
} read;
struct {
/* Qword 0 */
- u8 rxdid; /* descriptor builder profile id */
+ u8 rxdid; /* descriptor builder profile ID */
u8 mir_id_umb_cast; /* mirror=[5:0], umb=[7:6] */
__le16 ptype_flex_flags0; /* ptype=[9:0], ff0=[15:10] */
__le16 pkt_len; /* [15:14] are reserved */
/* Rx Flex Descriptor NIC Profile
* This descriptor corresponds to RxDID 2 which contains
- * metadata fields for RSS, flow id and timestamp info
+ * metadata fields for RSS, flow ID and timestamp info
*/
struct ice_32b_rx_flex_desc_nic {
/* Qword 0 */
ICE_RX_MDID_HASH_HIGH,
};
-/* RX/TX Flag64 packet flag bits */
+/* Rx/Tx Flag64 packet flag bits */
enum ice_flg64_bits {
ICE_FLG_PKT_DSI = 0,
ICE_FLG_EVLAN_x8100 = 15,
ICE_RLAN_RX_HSPLIT_1_SPLIT_ALWAYS = 2,
};
-/* TX Descriptor */
+/* Tx Descriptor */
struct ice_tx_desc {
__le64 buf_addr; /* Address of descriptor's data buf */
__le64 cmd_type_offset_bsz;
#include "ice.h"
#include "ice_lib.h"
+#include "ice_dcb_lib.h"
/**
* ice_setup_rx_ctx - Configure a receive ring context
regval |= (rxdid << QRXFLXP_CNTXT_RXDID_IDX_S) &
QRXFLXP_CNTXT_RXDID_IDX_M;
- /* increasing context priority to pick up profile id;
+ /* increasing context priority to pick up profile ID;
* default is 0x01; setting to 0x03 to ensure profile
* is programming if prev context is of same priority
*/
/* Transmit Queue Length */
tlan_ctx->qlen = ring->count;
+ ice_set_cgd_num(tlan_ctx, ring);
+
/* PF number */
tlan_ctx->pf_num = hw->pf_id;
tlan_ctx->vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_PF;
break;
case ICE_VSI_VF:
- /* Firmware expects vmvf_num to be absolute VF id */
+ /* Firmware expects vmvf_num to be absolute VF ID */
tlan_ctx->vmvf_num = hw->func_caps.vf_base_id + vsi->vf_id;
tlan_ctx->vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_VF;
break;
/**
* ice_vsi_set_num_qs - Set number of queues, descriptors and vectors for a VSI
* @vsi: the VSI being configured
- * @vf_id: Id of the VF being configured
+ * @vf_id: ID of the VF being configured
*
* Return 0 on success and a negative value on error
*/
* ice_vsi_alloc - Allocates the next available struct VSI in the PF
* @pf: board private structure
* @type: type of VSI
- * @vf_id: Id of the VF being configured
+ * @vf_id: ID of the VF being configured
*
* returns a pointer to a VSI on success, NULL on failure.
*/
* through the MSI-X enabling code. On a constrained vector budget, we map Tx
* and Rx rings to the vector as "efficiently" as possible.
*/
+#ifdef CONFIG_DCB
+void ice_vsi_map_rings_to_vectors(struct ice_vsi *vsi)
+#else
static void ice_vsi_map_rings_to_vectors(struct ice_vsi *vsi)
+#endif /* CONFIG_DCB */
{
int q_vectors = vsi->num_q_vectors;
int tx_rings_rem, rx_rings_rem;
}
/**
- * ice_add_mac_to_list - Add a mac address filter entry to the list
+ * ice_add_mac_to_list - Add a MAC address filter entry to the list
* @vsi: the VSI to be forwarded to
* @add_list: pointer to the list which contains MAC filter entries
* @macaddr: the MAC address to be added.
*
- * Adds mac address filter entry to the temp list
+ * Adds MAC address filter entry to the temp list
*
* Returns 0 on success or ENOMEM on failure.
*/
/**
* ice_vsi_add_vlan - Add VSI membership for given VLAN
* @vsi: the VSI being configured
- * @vid: VLAN id to be added
+ * @vid: VLAN ID to be added
*/
int ice_vsi_add_vlan(struct ice_vsi *vsi, u16 vid)
{
/**
* ice_vsi_kill_vlan - Remove VSI membership for a given VLAN
* @vsi: the VSI being configured
- * @vid: VLAN id to be removed
+ * @vid: VLAN ID to be removed
*
* Returns 0 on success and negative on failure
*/
* ice_vsi_stop_tx_rings - Disable Tx rings
* @vsi: the VSI being configured
* @rst_src: reset source
- * @rel_vmvf_num: Relative id of VF/VM
+ * @rel_vmvf_num: Relative ID of VF/VM
* @rings: Tx ring array to be stopped
* @offset: offset within vsi->txq_map
*/
* ice_vsi_stop_lan_tx_rings - Disable LAN Tx rings
* @vsi: the VSI being configured
* @rst_src: reset source
- * @rel_vmvf_num: Relative id of VF/VM
+ * @rel_vmvf_num: Relative ID of VF/VM
*/
int
ice_vsi_stop_lan_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
return -EIO;
}
+static void ice_vsi_set_tc_cfg(struct ice_vsi *vsi)
+{
+ struct ice_dcbx_cfg *cfg = &vsi->port_info->local_dcbx_cfg;
+
+ vsi->tc_cfg.ena_tc = ice_dcb_get_ena_tc(cfg);
+ vsi->tc_cfg.numtc = ice_dcb_get_num_tc(cfg);
+}
+
/**
* ice_vsi_setup - Set up a VSI by a given type
* @pf: board private structure
* @pi: pointer to the port_info instance
* @type: VSI type
- * @vf_id: defines VF id to which this VSI connects. This field is meant to be
+ * @vf_id: defines VF ID to which this VSI connects. This field is meant to be
* used only for ICE_VSI_VF VSI type. For other VSI types, should
* fill-in ICE_INVAL_VFID as input.
*
/* set RSS capabilities */
ice_vsi_set_rss_params(vsi);
- /* set tc configuration */
+ /* set TC configuration */
ice_vsi_set_tc_cfg(vsi);
/* create the VSI */
test_bit(__ICE_CORER_REQ, state) ||
test_bit(__ICE_GLOBR_REQ, state);
}
+
+#ifdef CONFIG_DCB
+/**
+ * ice_vsi_update_q_map - update our copy of the VSI info with new queue map
+ * @vsi: VSI being configured
+ * @ctx: the context buffer returned from AQ VSI update command
+ */
+static void ice_vsi_update_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctx)
+{
+ vsi->info.mapping_flags = ctx->info.mapping_flags;
+ memcpy(&vsi->info.q_mapping, &ctx->info.q_mapping,
+ sizeof(vsi->info.q_mapping));
+ memcpy(&vsi->info.tc_mapping, ctx->info.tc_mapping,
+ sizeof(vsi->info.tc_mapping));
+}
+
+/**
+ * ice_vsi_cfg_netdev_tc - Setup the netdev TC configuration
+ * @vsi: the VSI being configured
+ * @ena_tc: TC map to be enabled
+ */
+static void ice_vsi_cfg_netdev_tc(struct ice_vsi *vsi, u8 ena_tc)
+{
+ struct net_device *netdev = vsi->netdev;
+ struct ice_pf *pf = vsi->back;
+ struct ice_dcbx_cfg *dcbcfg;
+ u8 netdev_tc;
+ int i;
+
+ if (!netdev)
+ return;
+
+ if (!ena_tc) {
+ netdev_reset_tc(netdev);
+ return;
+ }
+
+ if (netdev_set_num_tc(netdev, vsi->tc_cfg.numtc))
+ return;
+
+ dcbcfg = &pf->hw.port_info->local_dcbx_cfg;
+
+ ice_for_each_traffic_class(i)
+ if (vsi->tc_cfg.ena_tc & BIT(i))
+ netdev_set_tc_queue(netdev,
+ vsi->tc_cfg.tc_info[i].netdev_tc,
+ vsi->tc_cfg.tc_info[i].qcount_tx,
+ vsi->tc_cfg.tc_info[i].qoffset);
+
+ for (i = 0; i < ICE_MAX_USER_PRIORITY; i++) {
+ u8 ets_tc = dcbcfg->etscfg.prio_table[i];
+
+ /* Get the mapped netdev TC# for the UP */
+ netdev_tc = vsi->tc_cfg.tc_info[ets_tc].netdev_tc;
+ netdev_set_prio_tc_map(netdev, i, netdev_tc);
+ }
+}
+
+/**
+ * ice_vsi_cfg_tc - Configure VSI Tx Sched for given TC map
+ * @vsi: VSI to be configured
+ * @ena_tc: TC bitmap
+ *
+ * VSI queues expected to be quiesced before calling this function
+ */
+int ice_vsi_cfg_tc(struct ice_vsi *vsi, u8 ena_tc)
+{
+ u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 };
+ struct ice_vsi_ctx *ctx;
+ struct ice_pf *pf = vsi->back;
+ enum ice_status status;
+ int i, ret = 0;
+ u8 num_tc = 0;
+
+ ice_for_each_traffic_class(i) {
+ /* build bitmap of enabled TCs */
+ if (ena_tc & BIT(i))
+ num_tc++;
+ /* populate max_txqs per TC */
+ max_txqs[i] = pf->num_lan_tx;
+ }
+
+ vsi->tc_cfg.ena_tc = ena_tc;
+ vsi->tc_cfg.numtc = num_tc;
+
+ ctx = devm_kzalloc(&pf->pdev->dev, sizeof(*ctx), GFP_KERNEL);
+ if (!ctx)
+ return -ENOMEM;
+
+ ctx->vf_num = 0;
+ ctx->info = vsi->info;
+
+ ice_vsi_setup_q_map(vsi, ctx);
+
+ /* must to indicate which section of VSI context are being modified */
+ ctx->info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_RXQ_MAP_VALID);
+ status = ice_update_vsi(&pf->hw, vsi->idx, ctx, NULL);
+ if (status) {
+ dev_info(&pf->pdev->dev, "Failed VSI Update\n");
+ ret = -EIO;
+ goto out;
+ }
+
+ status = ice_cfg_vsi_lan(vsi->port_info, vsi->idx, vsi->tc_cfg.ena_tc,
+ max_txqs);
+
+ if (status) {
+ dev_err(&pf->pdev->dev,
+ "VSI %d failed TC config, error %d\n",
+ vsi->vsi_num, status);
+ ret = -EIO;
+ goto out;
+ }
+ ice_vsi_update_q_map(vsi, ctx);
+ vsi->info.valid_sections = 0;
+
+ ice_vsi_cfg_netdev_tc(vsi, ena_tc);
+out:
+ devm_kfree(&pf->pdev->dev, ctx);
+ return ret;
+}
+#endif /* CONFIG_DCB */
int ice_vsi_clear(struct ice_vsi *vsi);
+#ifdef CONFIG_DCB
+int ice_vsi_cfg_tc(struct ice_vsi *vsi, u8 ena_tc);
+#endif /* CONFIG_DCB */
+
struct ice_vsi *
ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi,
enum ice_vsi_type type, u16 vf_id);
void ice_vsi_put_qs(struct ice_vsi *vsi);
+#ifdef CONFIG_DCB
+void ice_vsi_map_rings_to_vectors(struct ice_vsi *vsi);
+#endif /* CONFIG_DCB */
+
void ice_vsi_dis_irq(struct ice_vsi *vsi);
void ice_vsi_free_irq(struct ice_vsi *vsi);
#include "ice.h"
#include "ice_lib.h"
+#include "ice_dcb_lib.h"
-#define DRV_VERSION "0.7.2-k"
+#define DRV_VERSION "0.7.4-k"
#define DRV_SUMMARY "Intel(R) Ethernet Connection E800 Series Linux Driver"
const char ice_drv_ver[] = DRV_VERSION;
static const char ice_driver_string[] = DRV_SUMMARY;
static struct workqueue_struct *ice_wq;
static const struct net_device_ops ice_netdev_ops;
-static void ice_pf_dis_all_vsi(struct ice_pf *pf);
static void ice_rebuild(struct ice_pf *pf);
static void ice_vsi_release_all(struct ice_pf *pf);
}
/**
- * ice_add_mac_to_sync_list - creates list of mac addresses to be synced
+ * ice_add_mac_to_sync_list - creates list of MAC addresses to be synced
* @netdev: the net device on which the sync is happening
- * @addr: mac address to sync
+ * @addr: MAC address to sync
*
* This is a callback function which is called by the in kernel device sync
* functions (like __dev_uc_sync, __dev_mc_sync, etc). This function only
* populates the tmp_sync_list, which is later used by ice_add_mac to add the
- * mac filters from the hardware.
+ * MAC filters from the hardware.
*/
static int ice_add_mac_to_sync_list(struct net_device *netdev, const u8 *addr)
{
}
/**
- * ice_add_mac_to_unsync_list - creates list of mac addresses to be unsynced
+ * ice_add_mac_to_unsync_list - creates list of MAC addresses to be unsynced
* @netdev: the net device on which the unsync is happening
- * @addr: mac address to unsync
+ * @addr: MAC address to unsync
*
* This is a callback function which is called by the in kernel device unsync
* functions (like __dev_uc_unsync, __dev_mc_unsync, etc). This function only
* populates the tmp_unsync_list, which is later used by ice_remove_mac to
- * delete the mac filters from the hardware.
+ * delete the MAC filters from the hardware.
*/
static int ice_add_mac_to_unsync_list(struct net_device *netdev, const u8 *addr)
{
netif_addr_unlock_bh(netdev);
}
- /* Remove mac addresses in the unsync list */
+ /* Remove MAC addresses in the unsync list */
status = ice_remove_mac(hw, &vsi->tmp_unsync_list);
ice_free_fltr_list(dev, &vsi->tmp_unsync_list);
if (status) {
}
}
- /* Add mac addresses in the sync list */
+ /* Add MAC addresses in the sync list */
status = ice_add_mac(hw, &vsi->tmp_sync_list);
ice_free_fltr_list(dev, &vsi->tmp_sync_list);
/* If filter is added successfully or already exists, do not go into
*/
if (status && status != ICE_ERR_ALREADY_EXISTS) {
netdev_err(netdev, "Failed to add MAC filters\n");
- /* If there is no more space for new umac filters, vsi
+ /* If there is no more space for new umac filters, VSI
* should go into promiscuous mode. There should be some
* space reserved for promiscuous filters.
*/
test_bit(ICE_VSI_FLAG_PROMISC_CHANGED, vsi->flags)) {
clear_bit(ICE_VSI_FLAG_PROMISC_CHANGED, vsi->flags);
if (vsi->current_netdev_flags & IFF_PROMISC) {
- /* Apply TX filter rule to get traffic from VMs */
+ /* Apply Tx filter rule to get traffic from VMs */
status = ice_cfg_dflt_vsi(hw, vsi->idx, true,
ICE_FLTR_TX);
if (status) {
err = -EIO;
goto out_promisc;
}
- /* Apply RX filter rule to get traffic from wire */
+ /* Apply Rx filter rule to get traffic from wire */
status = ice_cfg_dflt_vsi(hw, vsi->idx, true,
ICE_FLTR_RX);
if (status) {
goto out_promisc;
}
} else {
- /* Clear TX filter rule to stop traffic from VMs */
+ /* Clear Tx filter rule to stop traffic from VMs */
status = ice_cfg_dflt_vsi(hw, vsi->idx, false,
ICE_FLTR_TX);
if (status) {
err = -EIO;
goto out_promisc;
}
- /* Clear RX filter to remove traffic from wire */
+ /* Clear Rx filter to remove traffic from wire */
status = ice_cfg_dflt_vsi(hw, vsi->idx, false,
ICE_FLTR_RX);
if (status) {
}
}
+/**
+ * ice_dis_vsi - pause a VSI
+ * @vsi: the VSI being paused
+ * @locked: is the rtnl_lock already held
+ */
+static void ice_dis_vsi(struct ice_vsi *vsi, bool locked)
+{
+ if (test_bit(__ICE_DOWN, vsi->state))
+ return;
+
+ set_bit(__ICE_NEEDS_RESTART, vsi->state);
+
+ if (vsi->type == ICE_VSI_PF && vsi->netdev) {
+ if (netif_running(vsi->netdev)) {
+ if (!locked) {
+ rtnl_lock();
+ vsi->netdev->netdev_ops->ndo_stop(vsi->netdev);
+ rtnl_unlock();
+ } else {
+ vsi->netdev->netdev_ops->ndo_stop(vsi->netdev);
+ }
+ } else {
+ ice_vsi_close(vsi);
+ }
+ }
+}
+
+/**
+ * ice_pf_dis_all_vsi - Pause all VSIs on a PF
+ * @pf: the PF
+ * @locked: is the rtnl_lock already held
+ */
+#ifdef CONFIG_DCB
+void ice_pf_dis_all_vsi(struct ice_pf *pf, bool locked)
+#else
+static void ice_pf_dis_all_vsi(struct ice_pf *pf, bool locked)
+#endif /* CONFIG_DCB */
+{
+ int v;
+
+ ice_for_each_vsi(pf, v)
+ if (pf->vsi[v])
+ ice_dis_vsi(pf->vsi[v], locked);
+}
+
/**
* ice_prepare_for_reset - prep for the core to reset
* @pf: board private structure
ice_vc_notify_reset(pf);
/* disable the VSIs and their queues that are not already DOWN */
- ice_pf_dis_all_vsi(pf);
+ ice_pf_dis_all_vsi(pf, false);
if (hw->port_info)
ice_sched_clear_port(hw->port_info);
pf->hw.reset_ongoing = false;
ice_rebuild(pf);
/* clear bit to resume normal operations, but
- * ICE_NEEDS_RESTART bit is set incase rebuild failed
+ * ICE_NEEDS_RESTART bit is set in case rebuild failed
*/
clear_bit(__ICE_RESET_OICR_RECV, pf->state);
clear_bit(__ICE_PREPARED_FOR_RESET, pf->state);
}
/**
- * ice_vsi_link_event - update the vsi's netdev
- * @vsi: the vsi on which the link event occurred
- * @link_up: whether or not the vsi needs to be set up or down
+ * ice_vsi_link_event - update the VSI's netdev
+ * @vsi: the VSI on which the link event occurred
+ * @link_up: whether or not the VSI needs to be set up or down
*/
static void ice_vsi_link_event(struct ice_vsi *vsi, bool link_up)
{
case ice_aqc_opc_fw_logging:
ice_output_fw_log(hw, &event.desc, event.msg_buf);
break;
+ case ice_aqc_opc_lldp_set_mib_change:
+ ice_dcb_process_lldp_set_mib_change(pf, &event);
+ break;
default:
dev_dbg(&pf->pdev->dev,
"%s Receive Queue unknown event 0x%04x ignored\n",
/**
* ice_set_ctrlq_len - helper function to set controlq length
- * @hw: pointer to the hw instance
+ * @hw: pointer to the HW instance
*/
static void ice_set_ctrlq_len(struct ice_hw *hw)
{
}
/**
- * ice_vlan_rx_add_vid - Add a vlan id filter to HW offload
+ * ice_vlan_rx_add_vid - Add a VLAN ID filter to HW offload
* @netdev: network interface to be adjusted
* @proto: unused protocol
- * @vid: vlan id to be added
+ * @vid: VLAN ID to be added
*
- * net_device_ops implementation for adding vlan ids
+ * net_device_ops implementation for adding VLAN IDs
*/
static int
ice_vlan_rx_add_vid(struct net_device *netdev, __always_unused __be16 proto,
return ret;
}
- /* Add all VLAN ids including 0 to the switch filter. VLAN id 0 is
+ /* Add all VLAN IDs including 0 to the switch filter. VLAN ID 0 is
* needed to continue allowing all untagged packets since VLAN prune
* list is applied to all packets by the switch
*/
}
/**
- * ice_vlan_rx_kill_vid - Remove a vlan id filter from HW offload
+ * ice_vlan_rx_kill_vid - Remove a VLAN ID filter from HW offload
* @netdev: network interface to be adjusted
* @proto: unused protocol
- * @vid: vlan id to be removed
+ * @vid: VLAN ID to be removed
*
- * net_device_ops implementation for removing vlan ids
+ * net_device_ops implementation for removing VLAN IDs
*/
static int
ice_vlan_rx_kill_vid(struct net_device *netdev, __always_unused __be16 proto,
ice_init_pf(pf);
+ err = ice_init_pf_dcb(pf);
+ if (err) {
+ clear_bit(ICE_FLAG_DCB_CAPABLE, pf->flags);
+ clear_bit(ICE_FLAG_DCB_ENA, pf->flags);
+
+ /* do not fail overall init if DCB init fails */
+ err = 0;
+ }
+
ice_determine_q_usage(pf);
pf->num_alloc_vsi = hw->func_caps.guar_num_vsi;
module_exit(ice_module_exit);
/**
- * ice_set_mac_address - NDO callback to set mac address
+ * ice_set_mac_address - NDO callback to set MAC address
* @netdev: network interface device structure
* @pi: pointer to an address structure
*
return -EBUSY;
}
- /* When we change the mac address we also have to change the mac address
- * based filter rules that were created previously for the old mac
+ /* When we change the MAC address we also have to change the MAC address
+ * based filter rules that were created previously for the old MAC
* address. So first, we remove the old filter rule using ice_remove_mac
* and then create a new filter rule using ice_add_mac. Note that for
- * both these operations, we first need to form a "list" of mac
- * addresses (even though in this case, we have only 1 mac address to be
+ * both these operations, we first need to form a "list" of MAC
+ * addresses (even though in this case, we have only 1 MAC address to be
* added/removed) and this done using ice_add_mac_to_list. Depending on
- * the ensuing operation this "list" of mac addresses is either to be
+ * the ensuing operation this "list" of MAC addresses is either to be
* added or removed from the filter.
*/
err = ice_add_mac_to_list(vsi, &r_mac_list, netdev->dev_addr);
return err;
}
- /* change the netdev's mac address */
+ /* change the netdev's MAC address */
memcpy(netdev->dev_addr, mac, netdev->addr_len);
netdev_dbg(vsi->netdev, "updated mac address to %pM\n",
netdev->dev_addr);
- /* write new mac address to the firmware */
+ /* write new MAC address to the firmware */
flags = ICE_AQC_MAN_MAC_UPDATE_LAA_WOL;
status = ice_aq_manage_mac_write(hw, mac, flags, NULL);
if (status) {
* @tb: pointer to array of nladdr (unused)
* @dev: the net device pointer
* @addr: the MAC address entry being added
- * @vid: VLAN id
+ * @vid: VLAN ID
* @flags: instructions from stack about fdb operation
* @extack: netlink extended ack
*/
* @tb: pointer to array of nladdr (unused)
* @dev: the net device pointer
* @addr: the MAC address entry being added
- * @vid: VLAN id
+ * @vid: VLAN ID
*/
static int
ice_fdb_del(struct ndmsg *ndm, __always_unused struct nlattr *tb[],
}
/**
- * ice_vsi_vlan_setup - Setup vlan offload properties on a VSI
- * @vsi: VSI to setup vlan properties for
+ * ice_vsi_vlan_setup - Setup VLAN offload properties on a VSI
+ * @vsi: VSI to setup VLAN properties for
*/
static int ice_vsi_vlan_setup(struct ice_vsi *vsi)
{
if (err)
return err;
}
+ ice_vsi_cfg_dcb_rings(vsi);
err = ice_vsi_cfg_lan_txqs(vsi);
if (!err)
ice_stat_update32(hw, GLPRT_LXOFFTXC(pf_id), pf->stat_prev_loaded,
&prev_ps->link_xoff_tx, &cur_ps->link_xoff_tx);
+ ice_update_dcb_stats(pf);
+
ice_stat_update32(hw, GLPRT_CRCERRS(pf_id), pf->stat_prev_loaded,
&prev_ps->crc_errors, &cur_ps->crc_errors);
}
/**
- * ice_dis_vsi - pause a VSI
- * @vsi: the VSI being paused
+ * ice_ena_vsi - resume a VSI
+ * @vsi: the VSI being resume
* @locked: is the rtnl_lock already held
*/
-static void ice_dis_vsi(struct ice_vsi *vsi, bool locked)
+static int ice_ena_vsi(struct ice_vsi *vsi, bool locked)
{
- if (test_bit(__ICE_DOWN, vsi->state))
- return;
+ int err = 0;
- set_bit(__ICE_NEEDS_RESTART, vsi->state);
+ if (!test_bit(__ICE_NEEDS_RESTART, vsi->state))
+ return err;
+
+ clear_bit(__ICE_NEEDS_RESTART, vsi->state);
+
+ if (vsi->netdev && vsi->type == ICE_VSI_PF) {
+ struct net_device *netd = vsi->netdev;
- if (vsi->type == ICE_VSI_PF && vsi->netdev) {
if (netif_running(vsi->netdev)) {
- if (!locked) {
+ if (locked) {
+ err = netd->netdev_ops->ndo_open(netd);
+ } else {
rtnl_lock();
- vsi->netdev->netdev_ops->ndo_stop(vsi->netdev);
+ err = netd->netdev_ops->ndo_open(netd);
rtnl_unlock();
- } else {
- vsi->netdev->netdev_ops->ndo_stop(vsi->netdev);
}
- } else {
- ice_vsi_close(vsi);
- }
- }
-}
-
-/**
- * ice_ena_vsi - resume a VSI
- * @vsi: the VSI being resume
- */
-static int ice_ena_vsi(struct ice_vsi *vsi)
-{
- int err = 0;
-
- if (test_and_clear_bit(__ICE_NEEDS_RESTART, vsi->state) &&
- vsi->netdev) {
- if (netif_running(vsi->netdev)) {
- rtnl_lock();
- err = vsi->netdev->netdev_ops->ndo_open(vsi->netdev);
- rtnl_unlock();
} else {
err = ice_vsi_open(vsi);
}
return err;
}
-/**
- * ice_pf_dis_all_vsi - Pause all VSIs on a PF
- * @pf: the PF
- */
-static void ice_pf_dis_all_vsi(struct ice_pf *pf)
-{
- int v;
-
- ice_for_each_vsi(pf, v)
- if (pf->vsi[v])
- ice_dis_vsi(pf->vsi[v], false);
-}
-
/**
* ice_pf_ena_all_vsi - Resume all VSIs on a PF
* @pf: the PF
+ * @locked: is the rtnl_lock already held
*/
-static int ice_pf_ena_all_vsi(struct ice_pf *pf)
+#ifdef CONFIG_DCB
+int ice_pf_ena_all_vsi(struct ice_pf *pf, bool locked)
+#else
+static int ice_pf_ena_all_vsi(struct ice_pf *pf, bool locked)
+#endif /* CONFIG_DCB */
{
int v;
ice_for_each_vsi(pf, v)
if (pf->vsi[v])
- if (ice_ena_vsi(pf->vsi[v]))
+ if (ice_ena_vsi(pf->vsi[v], locked))
return -EIO;
return 0;
if (err)
goto err_sched_init_port;
+ ice_dcb_rebuild(pf);
+
/* reset search_hint of irq_trackers to 0 since interrupts are
* reclaimed and could be allocated from beginning during VSI rebuild
*/
}
/* restart the VSIs that were rebuilt and running before the reset */
- err = ice_pf_ena_all_vsi(pf);
+ err = ice_pf_ena_all_vsi(pf, false);
if (err) {
dev_err(&pf->pdev->dev, "error enabling VSIs\n");
/* no need to disable VSIs in tear down path in ice_rebuild()
/**
* ice_bridge_getlink - Get the hardware bridge mode
* @skb: skb buff
- * @pid: process id
+ * @pid: process ID
* @seq: RTNL message seq
* @dev: the netdev being configured
* @filter_mask: filter mask passed in
/**
* ice_aq_read_nvm
- * @hw: pointer to the hw struct
+ * @hw: pointer to the HW struct
* @module_typeid: module pointer location in words from the NVM beginning
* @offset: byte offset from the module beginning
* @length: length of the section to be read (in bytes from the offset)
/**
* ice_init_nvm - initializes NVM setting
- * @hw: pointer to the hw struct
+ * @hw: pointer to the HW struct
*
* This function reads and populates NVM settings such as Shadow RAM size,
* max_timeout, and blank_nvm_mode
u32 fla, gens_stat;
u8 sr_size;
- /* The SR size is stored regardless of the nvm programming mode
+ /* The SR size is stored regardless of the NVM programming mode
* as the blank mode may be used in the factory line.
*/
gens_stat = rd32(hw, GLNVM_GENS);
/**
* ice_sched_find_node_by_teid - Find the Tx scheduler node in SW DB
* @start_node: pointer to the starting ice_sched_node struct in a sub-tree
- * @teid: node teid to search
+ * @teid: node TEID to search
*
- * This function searches for a node matching the teid in the scheduling tree
+ * This function searches for a node matching the TEID in the scheduling tree
* from the SW DB. The search is recursive and is restricted by the number of
* layers it has searched through; stopping at the max supported layer.
*
start_node->info.data.elem_type == ICE_AQC_ELEM_TYPE_LEAF)
return NULL;
- /* Check if teid matches to any of the children nodes */
+ /* Check if TEID matches to any of the children nodes */
for (i = 0; i < start_node->num_children; i++)
if (ICE_TXSCHED_GET_NODE_TEID(start_node->children[i]) == teid)
return start_node->children[i];
/**
* ice_aqc_send_sched_elem_cmd - send scheduling elements cmd
- * @hw: pointer to the hw struct
+ * @hw: pointer to the HW struct
* @cmd_opc: cmd opcode
* @elems_req: number of elements to request
* @buf: pointer to buffer
/**
* ice_aq_query_sched_elems - query scheduler elements
- * @hw: pointer to the hw struct
+ * @hw: pointer to the HW struct
* @elems_req: number of elements to query
* @buf: pointer to buffer
* @buf_size: buffer size in bytes
*
* Query scheduling elements (0x0404)
*/
-static enum ice_status
+enum ice_status
ice_aq_query_sched_elems(struct ice_hw *hw, u16 elems_req,
struct ice_aqc_get_elem *buf, u16 buf_size,
u16 *elems_ret, struct ice_sq_cd *cd)
elems_ret, cd);
}
-/**
- * ice_sched_query_elem - query element information from hw
- * @hw: pointer to the hw struct
- * @node_teid: node teid to be queried
- * @buf: buffer to element information
- *
- * This function queries HW element information
- */
-static enum ice_status
-ice_sched_query_elem(struct ice_hw *hw, u32 node_teid,
- struct ice_aqc_get_elem *buf)
-{
- u16 buf_size, num_elem_ret = 0;
- enum ice_status status;
-
- buf_size = sizeof(*buf);
- memset(buf, 0, buf_size);
- buf->generic[0].node_teid = cpu_to_le32(node_teid);
- status = ice_aq_query_sched_elems(hw, 1, buf, buf_size, &num_elem_ret,
- NULL);
- if (status || num_elem_ret != 1)
- ice_debug(hw, ICE_DBG_SCHED, "query element failed\n");
- return status;
-}
-
/**
* ice_sched_add_node - Insert the Tx scheduler node in SW DB
* @pi: port information structure
/**
* ice_aq_delete_sched_elems - delete scheduler elements
- * @hw: pointer to the hw struct
+ * @hw: pointer to the HW struct
* @grps_req: number of groups to delete
* @buf: pointer to buffer
* @buf_size: buffer size in bytes
}
/**
- * ice_sched_remove_elems - remove nodes from hw
- * @hw: pointer to the hw struct
+ * ice_sched_remove_elems - remove nodes from HW
+ * @hw: pointer to the HW struct
* @parent: pointer to the parent node
* @num_nodes: number of nodes
* @node_teids: array of node teids to be deleted
*
- * This function remove nodes from hw
+ * This function remove nodes from HW
*/
static enum ice_status
ice_sched_remove_elems(struct ice_hw *hw, struct ice_sched_node *parent,
/**
* ice_sched_get_first_node - get the first node of the given layer
- * @hw: pointer to the hw struct
+ * @hw: pointer to the HW struct
* @parent: pointer the base node of the subtree
* @layer: layer number
*
/**
* ice_aq_get_dflt_topo - gets default scheduler topology
- * @hw: pointer to the hw struct
+ * @hw: pointer to the HW struct
* @lport: logical port number
* @buf: pointer to buffer
* @buf_size: buffer size in bytes
/**
* ice_aq_add_sched_elems - adds scheduling element
- * @hw: pointer to the hw struct
+ * @hw: pointer to the HW struct
* @grps_req: the number of groups that are requested to be added
* @buf: pointer to buffer
* @buf_size: buffer size in bytes
/**
* ice_aq_suspend_sched_elems - suspend scheduler elements
- * @hw: pointer to the hw struct
+ * @hw: pointer to the HW struct
* @elems_req: number of elements to suspend
* @buf: pointer to buffer
* @buf_size: buffer size in bytes
/**
* ice_aq_resume_sched_elems - resume scheduler elements
- * @hw: pointer to the hw struct
+ * @hw: pointer to the HW struct
* @elems_req: number of elements to resume
* @buf: pointer to buffer
* @buf_size: buffer size in bytes
/**
* ice_aq_query_sched_res - query scheduler resource
- * @hw: pointer to the hw struct
+ * @hw: pointer to the HW struct
* @buf_size: buffer size in bytes
* @buf: pointer to buffer
* @cd: pointer to command details structure or NULL
}
/**
- * ice_sched_suspend_resume_elems - suspend or resume hw nodes
- * @hw: pointer to the hw struct
+ * ice_sched_suspend_resume_elems - suspend or resume HW nodes
+ * @hw: pointer to the HW struct
* @num_nodes: number of nodes
* @node_teids: array of node teids to be suspended or resumed
* @suspend: true means suspend / false means resume
*
- * This function suspends or resumes hw nodes
+ * This function suspends or resumes HW nodes
*/
static enum ice_status
ice_sched_suspend_resume_elems(struct ice_hw *hw, u8 num_nodes, u32 *node_teids,
}
/**
- * ice_sched_clear_agg - clears the agg related information
+ * ice_sched_clear_agg - clears the aggregator related information
* @hw: pointer to the hardware structure
*
- * This function removes agg list and free up agg related memory
+ * This function removes aggregator list and free up aggregator related memory
* previously allocated.
*/
void ice_sched_clear_agg(struct ice_hw *hw)
/**
* ice_sched_cleanup_all - cleanup scheduler elements from SW DB for all ports
- * @hw: pointer to the hw struct
+ * @hw: pointer to the HW struct
*
* Cleanup scheduling elements from SW DB for all the ports
*/
}
/**
- * ice_sched_add_elems - add nodes to hw and SW DB
+ * ice_sched_add_elems - add nodes to HW and SW DB
* @pi: port information structure
* @tc_node: pointer to the branch node
* @parent: pointer to the parent node
* @layer: layer number to add nodes
* @num_nodes: number of nodes
* @num_nodes_added: pointer to num nodes added
- * @first_node_teid: if new nodes are added then return the teid of first node
+ * @first_node_teid: if new nodes are added then return the TEID of first node
*
- * This function add nodes to hw as well as to SW DB for a given layer
+ * This function add nodes to HW as well as to SW DB for a given layer
*/
static enum ice_status
ice_sched_add_elems(struct ice_port_info *pi, struct ice_sched_node *tc_node,
* @parent: pointer to parent node
* @layer: layer number to add nodes
* @num_nodes: number of nodes to be added
- * @first_node_teid: pointer to the first node teid
+ * @first_node_teid: pointer to the first node TEID
* @num_nodes_added: pointer to number of nodes added
*
* This function add nodes to a given layer.
*num_nodes_added += num_added;
}
- /* Don't modify the first node teid memory if the first node was
+ /* Don't modify the first node TEID memory if the first node was
* added already in the above call. Instead send some temp
* memory for all other recursive calls.
*/
/**
* ice_sched_get_qgrp_layer - get the current queue group layer number
- * @hw: pointer to the hw struct
+ * @hw: pointer to the HW struct
*
* This function returns the current queue group layer number
*/
/**
* ice_sched_get_vsi_layer - get the current VSI layer number
- * @hw: pointer to the hw struct
+ * @hw: pointer to the HW struct
*
* This function returns the current VSI layer number
*/
* 7 4
* 5 or less sw_entry_point_layer
*/
- /* calculate the vsi layer based on number of layers. */
+ /* calculate the VSI layer based on number of layers. */
if (hw->num_tx_sched_layers > ICE_VSI_LAYER_OFFSET + 1) {
u8 layer = hw->num_tx_sched_layers - ICE_VSI_LAYER_OFFSET;
goto err_init_port;
}
- /* If the last node is a leaf node then the index of the Q group
+ /* If the last node is a leaf node then the index of the queue group
* layer is two less than the number of elements.
*/
if (num_elems > 2 && buf[0].generic[num_elems - 1].data.elem_type ==
/**
* ice_sched_find_node_in_subtree - Find node in part of base node subtree
- * @hw: pointer to the hw struct
+ * @hw: pointer to the HW struct
* @base: pointer to the base node
* @node: pointer to the node to search
*
}
/**
- * ice_sched_get_free_qparent - Get a free lan or rdma q group node
+ * ice_sched_get_free_qparent - Get a free LAN or RDMA queue group node
* @pi: port information structure
* @vsi_handle: software VSI handle
* @tc: branch number
- * @owner: lan or rdma
+ * @owner: LAN or RDMA
*
- * This function retrieves a free lan or rdma q group node
+ * This function retrieves a free LAN or RDMA queue group node
*/
struct ice_sched_node *
ice_sched_get_free_qparent(struct ice_port_info *pi, u16 vsi_handle, u8 tc,
if (!vsi_ctx)
return NULL;
vsi_node = vsi_ctx->sched.vsi_node[tc];
- /* validate invalid VSI id */
+ /* validate invalid VSI ID */
if (!vsi_node)
goto lan_q_exit;
- /* get the first q group node from VSI sub-tree */
+ /* get the first queue group node from VSI sub-tree */
qgrp_node = ice_sched_get_first_node(pi->hw, vsi_node, qgrp_layer);
while (qgrp_node) {
/* make sure the qgroup node is part of the VSI subtree */
}
/**
- * ice_sched_get_vsi_node - Get a VSI node based on VSI id
- * @hw: pointer to the hw struct
+ * ice_sched_get_vsi_node - Get a VSI node based on VSI ID
+ * @hw: pointer to the HW struct
* @tc_node: pointer to the TC node
* @vsi_handle: software VSI handle
*
- * This function retrieves a VSI node for a given VSI id from a given
+ * This function retrieves a VSI node for a given VSI ID from a given
* TC branch
*/
static struct ice_sched_node *
/**
* ice_sched_calc_vsi_child_nodes - calculate number of VSI child nodes
- * @hw: pointer to the hw struct
+ * @hw: pointer to the HW struct
* @num_qs: number of queues
* @num_nodes: num nodes array
*
qgl = ice_sched_get_qgrp_layer(hw);
vsil = ice_sched_get_vsi_layer(hw);
- /* calculate num nodes from q group to VSI layer */
+ /* calculate num nodes from queue group to VSI layer */
for (i = qgl; i > vsil; i--) {
/* round to the next integer if there is a remainder */
num = DIV_ROUND_UP(num, hw->max_children[i]);
* @vsi_handle: software VSI handle
* @tc_node: pointer to the TC node
* @num_nodes: pointer to the num nodes that needs to be added per layer
- * @owner: node owner (lan or rdma)
+ * @owner: node owner (LAN or RDMA)
*
* This function adds the VSI child nodes to tree. It gets called for
- * lan and rdma separately.
+ * LAN and RDMA separately.
*/
static enum ice_status
ice_sched_add_vsi_child_nodes(struct ice_port_info *pi, u16 vsi_handle,
/**
* ice_sched_calc_vsi_support_nodes - calculate number of VSI support nodes
- * @hw: pointer to the hw struct
+ * @hw: pointer to the HW struct
* @tc_node: pointer to TC node
* @num_nodes: pointer to num nodes array
*
/* calculate number of supported nodes needed for this VSI */
ice_sched_calc_vsi_support_nodes(hw, tc_node, num_nodes);
- /* add vsi supported nodes to tc subtree */
+ /* add VSI supported nodes to TC subtree */
return ice_sched_add_vsi_support_nodes(pi, vsi_handle, tc_node,
num_nodes);
}
* @vsi_handle: software VSI handle
* @tc: TC number
* @maxqs: max number of queues
- * @owner: lan or rdma
+ * @owner: LAN or RDMA
* @enable: TC enabled or disabled
*
* This function adds/updates VSI nodes based on the number of queues. If TC is
return ICE_ERR_PARAM;
vsi_node = ice_sched_get_vsi_node(hw, tc_node, vsi_handle);
- /* suspend the VSI if tc is not enabled */
+ /* suspend the VSI if TC is not enabled */
if (!enable) {
if (vsi_node && vsi_node->in_use) {
u32 teid = le32_to_cpu(vsi_node->info.node_teid);
}
/**
- * ice_sched_rm_agg_vsi_entry - remove agg related VSI info entry
+ * ice_sched_rm_agg_vsi_entry - remove aggregator related VSI info entry
* @pi: port information structure
* @vsi_handle: software VSI handle
*
ice_free_sched_node(pi, vsi_node);
vsi_ctx->sched.vsi_node[i] = NULL;
- /* clean up agg related vsi info if any */
+ /* clean up aggregator related VSI info if any */
ice_sched_rm_agg_vsi_info(pi, vsi_handle);
}
if (owner == ICE_SCHED_NODE_OWNER_LAN)
};
/* FW AQ command calls */
+enum ice_status
+ice_aq_query_sched_elems(struct ice_hw *hw, u16 elems_req,
+ struct ice_aqc_get_elem *buf, u16 buf_size,
+ u16 *elems_ret, struct ice_sq_cd *cd);
enum ice_status ice_sched_init_port(struct ice_port_info *pi);
enum ice_status ice_sched_query_res_alloc(struct ice_hw *hw);
void ice_sched_clear_port(struct ice_port_info *pi);
ICE_ERR_PARAM = -1,
ICE_ERR_NOT_IMPL = -2,
ICE_ERR_NOT_READY = -3,
+ ICE_ERR_NOT_SUPPORTED = -4,
ICE_ERR_BAD_PTR = -5,
ICE_ERR_INVAL_SIZE = -6,
ICE_ERR_DEVICE_NOT_SUPPORTED = -8,
* byte 6 = 0x2: to identify it as locally administered SA MAC
* byte 12 = 0x81 & byte 13 = 0x00:
* In case of VLAN filter first two bytes defines ether type (0x8100)
- * and remaining two bytes are placeholder for programming a given VLAN id
+ * and remaining two bytes are placeholder for programming a given VLAN ID
* In case of Ether type filter it is treated as header without VLAN tag
* and byte 12 and 13 is used to program a given Ether type instead
*/
/**
* ice_aq_alloc_free_res - command to allocate/free resources
- * @hw: pointer to the hw struct
+ * @hw: pointer to the HW struct
* @num_entries: number of resource entries in buffer
* @buf: Indirect buffer to hold data parameters and response
* @buf_size: size of buffer for indirect commands
/**
* ice_init_def_sw_recp - initialize the recipe book keeping tables
- * @hw: pointer to the hw struct
+ * @hw: pointer to the HW struct
*
* Allocate memory for the entire recipe table and initialize the structures/
* entries corresponding to basic recipes.
/**
* ice_aq_add_vsi
- * @hw: pointer to the hw struct
+ * @hw: pointer to the HW struct
* @vsi_ctx: pointer to a VSI context struct
* @cd: pointer to command details structure or NULL
*
/**
* ice_aq_free_vsi
- * @hw: pointer to the hw struct
+ * @hw: pointer to the HW struct
* @vsi_ctx: pointer to a VSI context struct
* @keep_vsi_alloc: keep VSI allocation as part of this PF's resources
* @cd: pointer to command details structure or NULL
/**
* ice_aq_update_vsi
- * @hw: pointer to the hw struct
+ * @hw: pointer to the HW struct
* @vsi_ctx: pointer to a VSI context struct
* @cd: pointer to command details structure or NULL
*
/**
* ice_is_vsi_valid - check whether the VSI is valid or not
- * @hw: pointer to the hw struct
+ * @hw: pointer to the HW struct
* @vsi_handle: VSI handle
*
* check whether the VSI is valid or not
}
/**
- * ice_get_hw_vsi_num - return the hw VSI number
- * @hw: pointer to the hw struct
+ * ice_get_hw_vsi_num - return the HW VSI number
+ * @hw: pointer to the HW struct
* @vsi_handle: VSI handle
*
- * return the hw VSI number
+ * return the HW VSI number
* Caution: call this function only if VSI is valid (ice_is_vsi_valid)
*/
u16 ice_get_hw_vsi_num(struct ice_hw *hw, u16 vsi_handle)
/**
* ice_get_vsi_ctx - return the VSI context entry for a given VSI handle
- * @hw: pointer to the hw struct
+ * @hw: pointer to the HW struct
* @vsi_handle: VSI handle
*
* return the VSI context entry for a given VSI handle
/**
* ice_save_vsi_ctx - save the VSI context for a given VSI handle
- * @hw: pointer to the hw struct
+ * @hw: pointer to the HW struct
* @vsi_handle: VSI handle
* @vsi: VSI context pointer
*
/**
* ice_clear_vsi_ctx - clear the VSI context entry
- * @hw: pointer to the hw struct
+ * @hw: pointer to the HW struct
* @vsi_handle: VSI handle
*
* clear the VSI context entry
/**
* ice_clear_all_vsi_ctx - clear all the VSI context entries
- * @hw: pointer to the hw struct
+ * @hw: pointer to the HW struct
*/
void ice_clear_all_vsi_ctx(struct ice_hw *hw)
{
/**
* ice_add_vsi - add VSI context to the hardware and VSI handle list
- * @hw: pointer to the hw struct
+ * @hw: pointer to the HW struct
* @vsi_handle: unique VSI handle provided by drivers
* @vsi_ctx: pointer to a VSI context struct
* @cd: pointer to command details structure or NULL
return status;
tmp_vsi_ctx = ice_get_vsi_ctx(hw, vsi_handle);
if (!tmp_vsi_ctx) {
- /* Create a new vsi context */
+ /* Create a new VSI context */
tmp_vsi_ctx = devm_kzalloc(ice_hw_to_dev(hw),
sizeof(*tmp_vsi_ctx), GFP_KERNEL);
if (!tmp_vsi_ctx) {
/**
* ice_free_vsi- free VSI context from hardware and VSI handle list
- * @hw: pointer to the hw struct
+ * @hw: pointer to the HW struct
* @vsi_handle: unique VSI handle
* @vsi_ctx: pointer to a VSI context struct
* @keep_vsi_alloc: keep VSI allocation as part of this PF's resources
/**
* ice_update_vsi
- * @hw: pointer to the hw struct
+ * @hw: pointer to the HW struct
* @vsi_handle: unique VSI handle
* @vsi_ctx: pointer to a VSI context struct
* @cd: pointer to command details structure or NULL
/**
* ice_aq_alloc_free_vsi_list
- * @hw: pointer to the hw struct
- * @vsi_list_id: VSI list id returned or used for lookup
+ * @hw: pointer to the HW struct
+ * @vsi_list_id: VSI list ID returned or used for lookup
* @lkup_type: switch rule filter lookup type
* @opc: switch rules population command type - pass in the command opcode
*
/**
* ice_aq_sw_rules - add/update/remove switch rules
- * @hw: pointer to the hw struct
+ * @hw: pointer to the HW struct
* @rule_list: pointer to switch rule population list
* @rule_list_sz: total size of the rule list in bytes
* @num_rules: number of switch rules in the rule_list
* 1. The switch is a VEB AND
* 2
* 2.1 The lookup is a directional lookup like ethertype,
- * promiscuous, ethertype-mac, promiscuous-vlan
+ * promiscuous, ethertype-MAC, promiscuous-VLAN
* and default-port OR
* 2.2 The lookup is VLAN, OR
* 2.3 The lookup is MAC with mcast or bcast addr for MAC, OR
* @hw: pointer to the hardware structure
* @m_ent: the management entry for which sw marker needs to be added
* @sw_marker: sw marker to tag the Rx descriptor with
- * @l_id: large action resource id
+ * @l_id: large action resource ID
*
* Create a large action to hold software marker and update the switch rule
* entry pointed by m_ent with newly created large action
struct ice_aqc_sw_rules_elem *lg_act, *rx_tx;
/* For software marker we need 3 large actions
* 1. FWD action: FWD TO VSI or VSI LIST
- * 2. GENERIC VALUE action to hold the profile id
- * 3. GENERIC VALUE action to hold the software marker id
+ * 2. GENERIC VALUE action to hold the profile ID
+ * 3. GENERIC VALUE action to hold the software marker ID
*/
const u16 num_lg_acts = 3;
enum ice_status status;
ice_fill_sw_rule(hw, &m_ent->fltr_info, rx_tx,
ice_aqc_opc_update_sw_rules);
- /* Update the action to point to the large action id */
+ /* Update the action to point to the large action ID */
rx_tx->pdata.lkup_tx_rx.act =
cpu_to_le32(ICE_SINGLE_ACT_PTR |
((l_id << ICE_SINGLE_ACT_PTR_VAL_S) &
ICE_SINGLE_ACT_PTR_VAL_M));
- /* Use the filter rule id of the previously created rule with single
+ /* Use the filter rule ID of the previously created rule with single
* act. Once the update happens, hardware will treat this as large
* action
*/
* @hw: pointer to the hardware structure
* @vsi_handle_arr: array of VSI handles to set in the VSI mapping
* @num_vsi: number of VSI handles in the array
- * @vsi_list_id: VSI list id generated as part of allocate resource
+ * @vsi_list_id: VSI list ID generated as part of allocate resource
*
- * Helper function to create a new entry of VSI list id to VSI mapping
- * using the given VSI list id
+ * Helper function to create a new entry of VSI list ID to VSI mapping
+ * using the given VSI list ID
*/
static struct ice_vsi_list_map_info *
ice_create_vsi_list_map(struct ice_hw *hw, u16 *vsi_handle_arr, u16 num_vsi,
* @hw: pointer to the hardware structure
* @vsi_handle_arr: array of VSI handles to form a VSI list
* @num_vsi: number of VSI handles in the array
- * @vsi_list_id: VSI list id generated as part of allocate resource
+ * @vsi_list_id: VSI list ID generated as part of allocate resource
* @remove: Boolean value to indicate if this is a remove action
* @opc: switch rules population command type - pass in the command opcode
* @lkup_type: lookup type of the filter
*
* Call AQ command to add a new switch rule or update existing switch rule
- * using the given VSI list id
+ * using the given VSI list ID
*/
static enum ice_status
ice_update_vsi_list_rule(struct ice_hw *hw, u16 *vsi_handle_arr, u16 num_vsi,
/**
* ice_create_vsi_list_rule - Creates and populates a VSI list rule
- * @hw: pointer to the hw struct
+ * @hw: pointer to the HW struct
* @vsi_handle_arr: array of VSI handles to form a VSI list
* @num_vsi: number of VSI handles in the array
* @vsi_list_id: stores the ID of the VSI list to be created
* @f_info: filter information for switch rule
*
* Call AQ command to update a previously created switch rule with a
- * VSI list id
+ * VSI list ID
*/
static enum ice_status
ice_update_pkt_fwd_rule(struct ice_hw *hw, struct ice_fltr_info *f_info)
/**
* ice_update_sw_rule_bridge_mode
- * @hw: pointer to the hw struct
+ * @hw: pointer to the HW struct
*
* Updates unicast switch filter rules based on VEB/VEPA mode
*/
* Allocate a new VSI list and add two VSIs
* to this list using switch rule command
* Update the previously created switch rule with the
- * newly created VSI list id
+ * newly created VSI list ID
* if a VSI list was previously created
* Add the new VSI to the previously created VSI list set
* using the update switch rule command
return 0;
/* Update the previously created VSI list set with
- * the new VSI id passed in
+ * the new VSI ID passed in
*/
vsi_list_id = cur_fltr->fwd_id.vsi_list_id;
opcode = ice_aqc_opc_update_sw_rules;
status = ice_update_vsi_list_rule(hw, &vsi_handle, 1,
vsi_list_id, false, opcode,
new_fltr->lkup_type);
- /* update VSI list mapping info with new VSI id */
+ /* update VSI list mapping info with new VSI ID */
if (!status)
set_bit(vsi_handle, m_entry->vsi_list_info->vsi_map);
}
* @hw: pointer to the hardware structure
* @recp_id: lookup type for which VSI lists needs to be searched
* @vsi_handle: VSI handle to be found in VSI list
- * @vsi_list_id: VSI list id found containing vsi_handle
+ * @vsi_list_id: VSI list ID found containing vsi_handle
*
* Helper function to search a VSI list with single entry containing given VSI
* handle element. This can be extended further to search VSI list with more
/**
* ice_add_rule_internal - add rule for a given lookup type
* @hw: pointer to the hardware structure
- * @recp_id: lookup type (recipe id) for which rule has to be added
+ * @recp_id: lookup type (recipe ID) for which rule has to be added
* @f_entry: structure containing MAC forwarding information
*
* Adds or updates the rule lists for a given recipe
/**
* ice_remove_vsi_list_rule
* @hw: pointer to the hardware structure
- * @vsi_list_id: VSI list id generated as part of allocate resource
+ * @vsi_list_id: VSI list ID generated as part of allocate resource
* @lkup_type: switch rule filter lookup type
*
* The VSI list should be emptied before this function is called to remove the
/**
* ice_remove_rule_internal - Remove a filter rule of a given type
* @hw: pointer to the hardware structure
- * @recp_id: recipe id for which the rule needs to removed
+ * @recp_id: recipe ID for which the rule needs to removed
* @f_entry: rule entry containing filter information
*/
static enum ice_status
status = ice_rem_update_vsi_list(hw, vsi_handle, list_elem);
if (status)
goto exit;
- /* if vsi count goes to zero after updating the vsi list */
+ /* if VSI count goes to zero after updating the VSI list */
if (list_elem->vsi_count == 0)
remove_rule = true;
}
return ICE_ERR_PARAM;
hw_vsi_id = ice_get_hw_vsi_num(hw, vsi_handle);
m_list_itr->fltr_info.fwd_id.hw_vsi_id = hw_vsi_id;
- /* update the src in case it is vsi num */
+ /* update the src in case it is VSI num */
if (m_list_itr->fltr_info.src_id != ICE_SRC_ID_VSI)
return ICE_ERR_PARAM;
m_list_itr->fltr_info.src = hw_vsi_id;
((u8 *)r_iter + (elem_sent * s_rule_size));
}
- /* Fill up rule id based on the value returned from FW */
+ /* Fill up rule ID based on the value returned from FW */
r_iter = s_rule;
list_for_each_entry(m_list_itr, m_list, list_entry) {
struct ice_fltr_info *f_info = &m_list_itr->fltr_info;
ice_get_hw_vsi_num(hw, f_entry->fltr_info.vsi_handle);
new_fltr = &f_entry->fltr_info;
- /* VLAN id should only be 12 bits */
+ /* VLAN ID should only be 12 bits */
if (new_fltr->l_data.vlan.vlan_id > ICE_MAX_VLAN_ID)
return ICE_ERR_PARAM;
}
}
} else if (v_list_itr->vsi_list_info->ref_cnt == 1) {
- /* Update existing VSI list to add new VSI id only if it used
+ /* Update existing VSI list to add new VSI ID only if it used
* by one VLAN rule.
*/
cur_fltr = &v_list_itr->fltr_info;
/* If VLAN rule exists and VSI list being used by this rule is
* referenced by more than 1 VLAN rule. Then create a new VSI
* list appending previous VSI with new VSI and update existing
- * VLAN rule to point to new VSI list id
+ * VLAN rule to point to new VSI list ID
*/
struct ice_fltr_info tmp_fltr;
u16 vsi_handle_arr[2];
struct ice_fltr_mgmt_list_entry *fm_entry;
enum ice_status status = 0;
- /* check to make sure VSI id is valid and within boundary */
+ /* check to make sure VSI ID is valid and within boundary */
if (!ice_is_vsi_valid(hw, vsi_handle))
return ICE_ERR_PARAM;
/**
* ice_remove_promisc - Remove promisc based filter rules
* @hw: pointer to the hardware structure
- * @recp_id: recipe id for which the rule needs to removed
+ * @recp_id: recipe ID for which the rule needs to removed
* @v_list: list of promisc entries
*/
static enum ice_status
* ice_replay_vsi_fltr - Replay filters for requested VSI
* @hw: pointer to the hardware structure
* @vsi_handle: driver VSI handle
- * @recp_id: Recipe id for which rules need to be replayed
+ * @recp_id: Recipe ID for which rules need to be replayed
* @list_head: list for which filters need to be replayed
*
* Replays the filter of recipe recp_id for a VSI represented via vsi_handle.
f_entry.fltr_info = itr->fltr_info;
if (itr->vsi_count < 2 && recp_id != ICE_SW_LKUP_VLAN &&
itr->fltr_info.vsi_handle == vsi_handle) {
- /* update the src in case it is vsi num */
+ /* update the src in case it is VSI num */
if (f_entry.fltr_info.src_id == ICE_SRC_ID_VSI)
f_entry.fltr_info.src = hw_vsi_id;
status = ice_add_rule_internal(hw, recp_id, &f_entry);
clear_bit(vsi_handle, itr->vsi_list_info->vsi_map);
f_entry.fltr_info.vsi_handle = vsi_handle;
f_entry.fltr_info.fltr_act = ICE_FWD_TO_VSI;
- /* update the src in case it is vsi num */
+ /* update the src in case it is VSI num */
if (f_entry.fltr_info.src_id == ICE_SRC_ID_VSI)
f_entry.fltr_info.src = hw_vsi_id;
if (recp_id == ICE_SW_LKUP_VLAN)
/**
* ice_rm_all_sw_replay_rule_info - deletes filter replay rules
- * @hw: pointer to the hw struct
+ * @hw: pointer to the HW struct
*
* Deletes the filter replay rules.
*/
ICE_SW_LKUP_LAST
};
-/* type of filter src id */
+/* type of filter src ID */
enum ice_src_id {
ICE_SRC_ID_UNKNOWN = 0,
ICE_SRC_ID_VSI,
/* Depending on filter action */
union {
- /* queue id in case of ICE_FWD_TO_Q and starting
- * queue id in case of ICE_FWD_TO_QGRP.
+ /* queue ID in case of ICE_FWD_TO_Q and starting
+ * queue ID in case of ICE_FWD_TO_QGRP.
*/
u16 q_id:11;
u16 hw_vsi_id:10;
DECLARE_BITMAP(r_bitmap, ICE_MAX_NUM_RECIPES);
};
-/* Bookkeeping structure to hold bitmap of VSIs corresponding to VSI list id */
+/* Bookkeeping structure to hold bitmap of VSIs corresponding to VSI list ID */
struct ice_vsi_list_map_info {
struct list_head list_entry;
DECLARE_BITMAP(vsi_map, ICE_MAX_VSI);
* used for VLAN membership.
*/
struct ice_fltr_mgmt_list_entry {
- /* back pointer to VSI list id to VSI list mapping */
+ /* back pointer to VSI list ID to VSI list mapping */
struct ice_vsi_list_map_info *vsi_list_info;
u16 vsi_count;
#define ICE_INVAL_LG_ACT_INDEX 0xffff
#include <linux/prefetch.h>
#include <linux/mm.h>
#include "ice.h"
+#include "ice_dcb_lib.h"
#define ICE_RX_HDR_SIZE 256
if (!rx_ring->netdev || !cleaned_count)
return false;
- /* get the RX descriptor and buffer based on next_to_use */
+ /* get the Rx descriptor and buffer based on next_to_use */
rx_desc = ICE_RX_DESC(rx_ring, ntu);
bi = &rx_ring->rx_buf[ntu];
* ice_receive_skb - Send a completed packet up the stack
* @rx_ring: Rx ring in play
* @skb: packet to send up
- * @vlan_tag: vlan tag for packet
+ * @vlan_tag: VLAN tag for packet
*
* This function sends the completed packet (via. skb) up the stack using
- * gro receive functions (with/without vlan tag)
+ * gro receive functions (with/without VLAN tag)
*/
static void
ice_receive_skb(struct ice_ring *rx_ring, struct sk_buff *skb, u16 vlan_tag)
u16 cleaned_count = ICE_DESC_UNUSED(rx_ring);
bool failure = false;
- /* start the loop to process RX packets bounded by 'budget' */
+ /* start the loop to process Rx packets bounded by 'budget' */
while (likely(total_rx_pkts < (unsigned int)budget)) {
union ice_32b_rx_flex_desc *rx_desc;
struct ice_rx_buf *rx_buf;
cleaned_count = 0;
}
- /* get the RX desc from RX ring based on 'next_to_clean' */
+ /* get the Rx desc from Rx ring based on 'next_to_clean' */
rx_desc = ICE_RX_DESC(rx_ring, rx_ring->next_to_clean);
/* status_error_len will always be zero for unused descriptors
return failure ? budget : (int)total_rx_pkts;
}
-static unsigned int ice_itr_divisor(struct ice_port_info *pi)
+/**
+ * ice_adjust_itr_by_size_and_speed - Adjust ITR based on current traffic
+ * @port_info: port_info structure containing the current link speed
+ * @avg_pkt_size: average size of Tx or Rx packets based on clean routine
+ * @itr: itr value to update
+ *
+ * Calculate how big of an increment should be applied to the ITR value passed
+ * in based on wmem_default, SKB overhead, Ethernet overhead, and the current
+ * link speed.
+ *
+ * The following is a calculation derived from:
+ * wmem_default / (size + overhead) = desired_pkts_per_int
+ * rate / bits_per_byte / (size + Ethernet overhead) = pkt_rate
+ * (desired_pkt_rate / pkt_rate) * usecs_per_sec = ITR value
+ *
+ * Assuming wmem_default is 212992 and overhead is 640 bytes per
+ * packet, (256 skb, 64 headroom, 320 shared info), we can reduce the
+ * formula down to:
+ *
+ * wmem_default * bits_per_byte * usecs_per_sec pkt_size + 24
+ * ITR = -------------------------------------------- * --------------
+ * rate pkt_size + 640
+ */
+static unsigned int
+ice_adjust_itr_by_size_and_speed(struct ice_port_info *port_info,
+ unsigned int avg_pkt_size,
+ unsigned int itr)
{
- switch (pi->phy.link_info.link_speed) {
+ switch (port_info->phy.link_info.link_speed) {
+ case ICE_AQ_LINK_SPEED_100GB:
+ itr += DIV_ROUND_UP(17 * (avg_pkt_size + 24),
+ avg_pkt_size + 640);
+ break;
+ case ICE_AQ_LINK_SPEED_50GB:
+ itr += DIV_ROUND_UP(34 * (avg_pkt_size + 24),
+ avg_pkt_size + 640);
+ break;
case ICE_AQ_LINK_SPEED_40GB:
- return ICE_ITR_ADAPTIVE_MIN_INC * 1024;
+ itr += DIV_ROUND_UP(43 * (avg_pkt_size + 24),
+ avg_pkt_size + 640);
+ break;
case ICE_AQ_LINK_SPEED_25GB:
+ itr += DIV_ROUND_UP(68 * (avg_pkt_size + 24),
+ avg_pkt_size + 640);
+ break;
case ICE_AQ_LINK_SPEED_20GB:
- return ICE_ITR_ADAPTIVE_MIN_INC * 512;
- case ICE_AQ_LINK_SPEED_100MB:
- return ICE_ITR_ADAPTIVE_MIN_INC * 32;
+ itr += DIV_ROUND_UP(85 * (avg_pkt_size + 24),
+ avg_pkt_size + 640);
+ break;
+ case ICE_AQ_LINK_SPEED_10GB:
+ /* fall through */
default:
- return ICE_ITR_ADAPTIVE_MIN_INC * 256;
+ itr += DIV_ROUND_UP(170 * (avg_pkt_size + 24),
+ avg_pkt_size + 640);
+ break;
}
+
+ if ((itr & ICE_ITR_MASK) > ICE_ITR_ADAPTIVE_MAX_USECS) {
+ itr &= ICE_ITR_ADAPTIVE_LATENCY;
+ itr += ICE_ITR_ADAPTIVE_MAX_USECS;
+ }
+
+ return itr;
}
/**
static void
ice_update_itr(struct ice_q_vector *q_vector, struct ice_ring_container *rc)
{
- unsigned int avg_wire_size, packets, bytes, itr;
unsigned long next_update = jiffies;
+ unsigned int packets, bytes, itr;
bool container_is_rx;
if (!rc->ring || !ITR_IS_DYNAMIC(rc->itr_setting))
if (packets && packets < 4 && bytes < 9000 &&
(q_vector->tx.target_itr & ICE_ITR_ADAPTIVE_LATENCY)) {
itr = ICE_ITR_ADAPTIVE_LATENCY;
- goto adjust_by_size;
+ goto adjust_by_size_and_speed;
}
} else if (packets < 4) {
/* If we have Tx and Rx ITR maxed and Tx ITR is running in
*/
itr = ICE_ITR_ADAPTIVE_BULK;
-adjust_by_size:
- /* If packet counts are 256 or greater we can assume we have a gross
- * overestimation of what the rate should be. Instead of trying to fine
- * tune it just use the formula below to try and dial in an exact value
- * gives the current packet size of the frame.
- */
- avg_wire_size = bytes / packets;
+adjust_by_size_and_speed:
- /* The following is a crude approximation of:
- * wmem_default / (size + overhead) = desired_pkts_per_int
- * rate / bits_per_byte / (size + ethernet overhead) = pkt_rate
- * (desired_pkt_rate / pkt_rate) * usecs_per_sec = ITR value
- *
- * Assuming wmem_default is 212992 and overhead is 640 bytes per
- * packet, (256 skb, 64 headroom, 320 shared info), we can reduce the
- * formula down to
- *
- * (170 * (size + 24)) / (size + 640) = ITR
- *
- * We first do some math on the packet size and then finally bitshift
- * by 8 after rounding up. We also have to account for PCIe link speed
- * difference as ITR scales based on this.
- */
- if (avg_wire_size <= 60) {
- /* Start at 250k ints/sec */
- avg_wire_size = 4096;
- } else if (avg_wire_size <= 380) {
- /* 250K ints/sec to 60K ints/sec */
- avg_wire_size *= 40;
- avg_wire_size += 1696;
- } else if (avg_wire_size <= 1084) {
- /* 60K ints/sec to 36K ints/sec */
- avg_wire_size *= 15;
- avg_wire_size += 11452;
- } else if (avg_wire_size <= 1980) {
- /* 36K ints/sec to 30K ints/sec */
- avg_wire_size *= 5;
- avg_wire_size += 22420;
- } else {
- /* plateau at a limit of 30K ints/sec */
- avg_wire_size = 32256;
- }
-
- /* If we are in low latency mode halve our delay which doubles the
- * rate to somewhere between 100K to 16K ints/sec
- */
- if (itr & ICE_ITR_ADAPTIVE_LATENCY)
- avg_wire_size >>= 1;
-
- /* Resultant value is 256 times larger than it needs to be. This
- * gives us room to adjust the value as needed to either increase
- * or decrease the value based on link speeds of 10G, 2.5G, 1G, etc.
- *
- * Use addition as we have already recorded the new latency flag
- * for the ITR value.
- */
- itr += DIV_ROUND_UP(avg_wire_size,
- ice_itr_divisor(q_vector->vsi->port_info)) *
- ICE_ITR_ADAPTIVE_MIN_INC;
-
- if ((itr & ICE_ITR_MASK) > ICE_ITR_ADAPTIVE_MAX_USECS) {
- itr &= ICE_ITR_ADAPTIVE_LATENCY;
- itr += ICE_ITR_ADAPTIVE_MAX_USECS;
- }
+ /* based on checks above packets cannot be 0 so division is safe */
+ itr = ice_adjust_itr_by_size_and_speed(q_vector->vsi->port_info,
+ bytes / packets, itr);
clear_counts:
/* write back value */
}
/**
- * ice_tx_prepare_vlan_flags - prepare generic TX VLAN tagging flags for HW
+ * ice_tx_prepare_vlan_flags - prepare generic Tx VLAN tagging flags for HW
* @tx_ring: ring to send buffer on
* @first: pointer to struct ice_tx_buf
*
* to the encapsulated ethertype.
*/
skb->protocol = vlan_get_protocol(skb);
- goto out;
+ return 0;
}
/* if we have a HW VLAN tag being added, default to the HW one */
first->tx_flags |= ICE_TX_FLAGS_SW_VLAN;
}
-out:
- return 0;
+ return ice_tx_prepare_vlan_flags_dcb(tx_ring, first);
}
/**
#define ICE_TX_FLAGS_HW_VLAN BIT(1)
#define ICE_TX_FLAGS_SW_VLAN BIT(2)
#define ICE_TX_FLAGS_VLAN_M 0xffff0000
+#define ICE_TX_FLAGS_VLAN_PR_M 0xe0000000
+#define ICE_TX_FLAGS_VLAN_PR_S 29
#define ICE_TX_FLAGS_VLAN_S 16
#define ICE_RX_DMA_ATTR \
};
u16 q_index; /* Queue number of ring */
u32 txq_teid; /* Added Tx queue TEID */
+#ifdef CONFIG_DCB
+ u8 dcb_tc; /* Traffic class of ring */
+#endif /* CONFIG_DCB */
u16 count; /* Number of descriptors */
u16 reg_idx; /* HW register index of the ring */
};
/* Different reset sources for which a disable queue AQ call has to be made in
- * order to clean the TX scheduler as a part of the reset
+ * order to clean the Tx scheduler as a part of the reset
*/
enum ice_disq_rst_src {
ICE_NO_RESET = 0,
struct ice_hw_common_caps {
u32 valid_functions;
- /* TX/RX queues */
- u16 num_rxq; /* Number/Total RX queues */
- u16 rxq_first_id; /* First queue ID for RX queues */
- u16 num_txq; /* Number/Total TX queues */
- u16 txq_first_id; /* First queue ID for TX queues */
+ /* Tx/Rx queues */
+ u16 num_rxq; /* Number/Total Rx queues */
+ u16 rxq_first_id; /* First queue ID for Rx queues */
+ u16 num_txq; /* Number/Total Tx queues */
+ u16 txq_first_id; /* First queue ID for Tx queues */
/* MSI-X vectors */
u16 num_msix_vectors;
/* RSS related capabilities */
u16 rss_table_size; /* 512 for PFs and 64 for VFs */
u8 rss_table_entry_width; /* RSS Entry width in bits */
+
+ u8 dcb;
};
/* Function specific capabilities */
#define ice_for_each_traffic_class(_i) \
for ((_i) = 0; (_i) < ICE_MAX_TRAFFIC_CLASS; (_i)++)
+#define ICE_INVAL_TEID 0xFFFFFFFF
+
struct ice_sched_node {
struct ice_sched_node *parent;
struct ice_sched_node *sibling; /* next sibling in the same layer */
struct ice_sched_node **children;
struct ice_aqc_txsched_elem_data info;
- u32 agg_id; /* aggregator group id */
+ u32 agg_id; /* aggregator group ID */
u16 vsi_handle;
u8 in_use; /* suspended or in use */
u8 tx_sched_layer; /* Logical Layer (1-9) */
#define ICE_SCHED_DFLT_RL_PROF_ID 0
#define ICE_SCHED_DFLT_BW_WT 1
-/* vsi type list entry to locate corresponding vsi/ag nodes */
+/* VSI type list entry to locate corresponding VSI/ag nodes */
struct ice_sched_vsi_info {
struct ice_sched_node *vsi_node[ICE_MAX_TRAFFIC_CLASS];
struct ice_sched_node *ag_node[ICE_MAX_TRAFFIC_CLASS];
u8 rdma_ena;
};
+/* CEE or IEEE 802.1Qaz ETS Configuration data */
+struct ice_dcb_ets_cfg {
+ u8 willing;
+ u8 cbs;
+ u8 maxtcs;
+ u8 prio_table[ICE_MAX_TRAFFIC_CLASS];
+ u8 tcbwtable[ICE_MAX_TRAFFIC_CLASS];
+ u8 tsatable[ICE_MAX_TRAFFIC_CLASS];
+};
+
+/* CEE or IEEE 802.1Qaz PFC Configuration data */
+struct ice_dcb_pfc_cfg {
+ u8 willing;
+ u8 mbc;
+ u8 pfccap;
+ u8 pfcena;
+};
+
+/* CEE or IEEE 802.1Qaz Application Priority data */
+struct ice_dcb_app_priority_table {
+ u16 prot_id;
+ u8 priority;
+ u8 selector;
+};
+
+#define ICE_MAX_USER_PRIORITY 8
+#define ICE_DCBX_MAX_APPS 32
+#define ICE_LLDPDU_SIZE 1500
+#define ICE_TLV_STATUS_OPER 0x1
+#define ICE_TLV_STATUS_SYNC 0x2
+#define ICE_TLV_STATUS_ERR 0x4
+#define ICE_APP_PROT_ID_FCOE 0x8906
+#define ICE_APP_PROT_ID_ISCSI 0x0cbc
+#define ICE_APP_PROT_ID_FIP 0x8914
+#define ICE_APP_SEL_ETHTYPE 0x1
+#define ICE_APP_SEL_TCPIP 0x2
+#define ICE_CEE_APP_SEL_ETHTYPE 0x0
+#define ICE_CEE_APP_SEL_TCPIP 0x1
+
+struct ice_dcbx_cfg {
+ u32 numapps;
+ u32 tlv_status; /* CEE mode TLV status */
+ struct ice_dcb_ets_cfg etscfg;
+ struct ice_dcb_ets_cfg etsrec;
+ struct ice_dcb_pfc_cfg pfc;
+ struct ice_dcb_app_priority_table app[ICE_DCBX_MAX_APPS];
+ u8 dcbx_mode;
+#define ICE_DCBX_MODE_CEE 0x1
+#define ICE_DCBX_MODE_IEEE 0x2
+ u8 app_mode;
+#define ICE_DCBX_APPS_NON_WILLING 0x1
+};
+
struct ice_port_info {
struct ice_sched_node *root; /* Root Node per Port */
- struct ice_hw *hw; /* back pointer to hw instance */
+ struct ice_hw *hw; /* back pointer to HW instance */
u32 last_node_teid; /* scheduler last node info */
u16 sw_id; /* Initial switch ID belongs to port */
u16 pf_vf_num;
struct ice_mac_info mac;
struct ice_phy_info phy;
struct mutex sched_lock; /* protect access to TXSched tree */
+ struct ice_dcbx_cfg local_dcbx_cfg; /* Oper/Local Cfg */
+ /* DCBX info */
+ struct ice_dcbx_cfg remote_dcbx_cfg; /* Peer Cfg */
+ struct ice_dcbx_cfg desired_dcbx_cfg; /* CEE Desired Cfg */
+ /* LLDP/DCBX Status */
+ u8 dcbx_status;
+ u8 is_sw_lldp;
u8 lport;
#define ICE_LPORT_MASK 0xff
u8 is_vf;
u8 pf_id; /* device profile info */
- /* TX Scheduler values */
+ /* Tx Scheduler values */
u16 num_tx_sched_layers;
u16 num_tx_sched_phys_layers;
u8 flattened_layers;
struct ice_vsi_ctx *vsi_ctx[ICE_MAX_VSI];
u8 evb_veb; /* true for VEB, false for VEPA */
- u8 reset_ongoing; /* true if hw is in reset, false otherwise */
+ u8 reset_ongoing; /* true if HW is in reset, false otherwise */
struct ice_bus_info bus;
struct ice_nvm_info nvm;
struct ice_hw_dev_caps dev_caps; /* device capabilities */
u64 link_xoff_rx; /* lxoffrxc */
u64 link_xon_tx; /* lxontxc */
u64 link_xoff_tx; /* lxofftxc */
+ u64 priority_xon_rx[8]; /* pxonrxc[8] */
+ u64 priority_xoff_rx[8]; /* pxoffrxc[8] */
+ u64 priority_xon_tx[8]; /* pxontxc[8] */
+ u64 priority_xoff_tx[8]; /* pxofftxc[8] */
+ u64 priority_xon_2_xoff[8]; /* pxon2offc[8] */
u64 rx_size_64; /* prc64 */
u64 rx_size_127; /* prc127 */
u64 rx_size_255; /* prc255 */
}
/**
- * ice_vsi_set_pvid_fill_ctxt - Set VSI ctxt for add pvid
- * @ctxt: the vsi ctxt to fill
- * @vid: the VLAN id to set as a PVID
+ * ice_vsi_set_pvid_fill_ctxt - Set VSI ctxt for add PVID
+ * @ctxt: the VSI ctxt to fill
+ * @vid: the VLAN ID to set as a PVID
*/
static void ice_vsi_set_pvid_fill_ctxt(struct ice_vsi_ctx *ctxt, u16 vid)
{
}
/**
- * ice_vsi_kill_pvid_fill_ctxt - Set VSI ctx for remove pvid
+ * ice_vsi_kill_pvid_fill_ctxt - Set VSI ctx for remove PVID
* @ctxt: the VSI ctxt to fill
*/
static void ice_vsi_kill_pvid_fill_ctxt(struct ice_vsi_ctx *ctxt)
/**
* ice_vsi_manage_pvid - Enable or disable port VLAN for VSI
* @vsi: the VSI to update
- * @vid: the VLAN id to set as a PVID
- * @enable: true for enable pvid false for disable
+ * @vid: the VLAN ID to set as a PVID
+ * @enable: true for enable PVID false for disable
*/
static int ice_vsi_manage_pvid(struct ice_vsi *vsi, u16 vid, bool enable)
{
* ice_vf_vsi_setup - Set up a VF VSI
* @pf: board private structure
* @pi: pointer to the port_info instance
- * @vf_id: defines VF id to which this VSI connects.
+ * @vf_id: defines VF ID to which this VSI connects.
*
* Returns pointer to the successfully allocated VSI struct on success,
* otherwise returns NULL on failure.
/* Clear this bit after VF initialization since we shouldn't reclaim
* and reassign interrupts for synchronous or asynchronous VFR events.
- * We dont want to reconfigure interrupts since AVF driver doesn't
+ * We don't want to reconfigure interrupts since AVF driver doesn't
* expect vector assignment to be changed unless there is a request for
* more vectors.
*/
/**
* ice_find_vsi_from_id
* @pf: the pf structure to search for the VSI
- * @id: id of the VSI it is searching for
+ * @id: ID of the VSI it is searching for
*
- * searches for the VSI with the given id
+ * searches for the VSI with the given ID
*/
static struct ice_vsi *ice_find_vsi_from_id(struct ice_pf *pf, u16 id)
{
/**
* ice_vc_isvalid_vsi_id
* @vf: pointer to the VF info
- * @vsi_id: VF relative VSI id
+ * @vsi_id: VF relative VSI ID
*
- * check for the valid VSI id
+ * check for the valid VSI ID
*/
static bool ice_vc_isvalid_vsi_id(struct ice_vf *vf, u16 vsi_id)
{
/**
* ice_vc_isvalid_q_id
* @vf: pointer to the VF info
- * @vsi_id: VSI id
- * @qid: VSI relative queue id
+ * @vsi_id: VSI ID
+ * @qid: VSI relative queue ID
*
- * check for the valid queue id
+ * check for the valid queue ID
*/
static bool ice_vc_isvalid_q_id(struct ice_vf *vf, u16 vsi_id, u8 qid)
{
* ice_vc_handle_mac_addr_msg
* @vf: pointer to the VF info
* @msg: pointer to the msg buffer
- * @set: true if mac filters are being set, false otherwise
+ * @set: true if MAC filters are being set, false otherwise
*
* add guest MAC address filter
*/
maddr, vf->vf_id);
continue;
} else {
- /* VF can't remove dflt_lan_addr/bcast mac */
+ /* VF can't remove dflt_lan_addr/bcast MAC */
dev_err(&pf->pdev->dev,
"VF can't remove default MAC address or MAC %pM programmed by PF for VF %d\n",
maddr, vf->vf_id);
goto handle_mac_exit;
}
- /* get here if maddr is multicast or if VF can change mac */
+ /* get here if maddr is multicast or if VF can change MAC */
if (ice_add_mac_to_list(vsi, &mac_list, al->list[i].addr)) {
v_ret = VIRTCHNL_STATUS_ERR_NO_MEMORY;
goto handle_mac_exit;
* VFs get a default number of queues but can use this message to request a
* different number. If the request is successful, PF will reset the VF and
* return 0. If unsuccessful, PF will send message informing VF of number of
- * available queue pairs via virtchnl message response to vf.
+ * available queue pairs via virtchnl message response to VF.
*/
static int ice_vc_request_qs_msg(struct ice_vf *vf, u8 *msg)
{
* ice_set_vf_port_vlan
* @netdev: network interface device structure
* @vf_id: VF identifier
- * @vlan_id: VLAN id being set
+ * @vlan_id: VLAN ID being set
* @qos: priority setting
* @vlan_proto: VLAN protocol
*
- * program VF Port VLAN id and/or qos
+ * program VF Port VLAN ID and/or QoS
*/
int
ice_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos,
return ret;
}
- /* If pvid, then remove all filters on the old VLAN */
+ /* If PVID, then remove all filters on the old VLAN */
if (vsi->info.pvid)
ice_vsi_kill_vlan(vsi, (le16_to_cpu(vsi->info.pvid) &
VLAN_VID_MASK));
* @msg: pointer to the msg buffer
* @add_v: Add VLAN if true, otherwise delete VLAN
*
- * Process virtchnl op to add or remove programmed guest VLAN id
+ * Process virtchnl op to add or remove programmed guest VLAN ID
*/
static int ice_vc_process_vlan_msg(struct ice_vf *vf, u8 *msg, bool add_v)
{
* @vf: pointer to the VF info
* @msg: pointer to the msg buffer
*
- * Add and program guest VLAN id
+ * Add and program guest VLAN ID
*/
static int ice_vc_add_vlan_msg(struct ice_vf *vf, u8 *msg)
{
* @vf: pointer to the VF info
* @msg: pointer to the msg buffer
*
- * remove programmed guest VLAN id
+ * remove programmed guest VLAN ID
*/
static int ice_vc_remove_vlan_msg(struct ice_vf *vf, u8 *msg)
{
* ice_set_vf_mac
* @netdev: network interface device structure
* @vf_id: VF identifier
- * @mac: mac address
+ * @mac: MAC address
*
- * program VF mac address
+ * program VF MAC address
*/
int ice_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac)
{
return -EINVAL;
}
- /* copy mac into dflt_lan_addr and trigger a VF reset. The reset
+ /* copy MAC into dflt_lan_addr and trigger a VF reset. The reset
* flow will use the updated dflt_lan_addr and add a MAC filter
* using ice_add_mac. Also set pf_set_mac to indicate that the PF has
* set the MAC address for this VF.
struct ice_vf {
struct ice_pf *pf;
- s16 vf_id; /* VF id in the PF space */
+ s16 vf_id; /* VF ID in the PF space */
u32 driver_caps; /* reported by VF driver */
int first_vector_idx; /* first vector index of this VF */
- struct ice_sw *vf_sw_id; /* switch id the VF VSIs connect to */
+ struct ice_sw *vf_sw_id; /* switch ID the VF VSIs connect to */
struct virtchnl_version_info vf_ver;
struct virtchnl_ether_addr dflt_lan_addr;
u16 port_vlan_id;
u8 trusted;
u16 lan_vsi_idx; /* index into PF struct */
u16 lan_vsi_num; /* ID as used by firmware */
- u64 num_mdd_events; /* number of mdd events detected */
+ u64 num_mdd_events; /* number of MDD events detected */
u64 num_inval_msgs; /* number of continuous invalid msgs */
u64 num_valid_msgs; /* number of valid msgs detected */
- unsigned long vf_caps; /* vf's adv. capabilities */
+ unsigned long vf_caps; /* VF's adv. capabilities */
DECLARE_BITMAP(vf_states, ICE_VF_STATES_NBITS); /* VF runtime states */
unsigned int tx_rate; /* Tx bandwidth limit in Mbps */
u8 link_forced;
int err;
int cpu;
- dev = alloc_etherdev_mqs(sizeof(struct mvneta_port), txq_number, rxq_number);
+ dev = devm_alloc_etherdev_mqs(&pdev->dev, sizeof(struct mvneta_port),
+ txq_number, rxq_number);
if (!dev)
return -ENOMEM;
dev->irq = irq_of_parse_and_map(dn, 0);
- if (dev->irq == 0) {
- err = -EINVAL;
- goto err_free_netdev;
- }
+ if (dev->irq == 0)
+ return -EINVAL;
phy_mode = of_get_phy_mode(dn);
if (phy_mode < 0) {
phylink_destroy(pp->phylink);
err_free_irq:
irq_dispose_mapping(dev->irq);
-err_free_netdev:
- free_netdev(dev);
return err;
}
free_percpu(pp->stats);
irq_dispose_mapping(dev->irq);
phylink_destroy(pp->phylink);
- free_netdev(dev);
if (pp->bm_priv) {
mvneta_bm_pool_destroy(pp->bm_priv, pp->pool_long, 1 << pp->id);
#
mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \
en_tx.o en_rx.o en_dim.o en_txrx.o en/xdp.o en_stats.o \
- en_selftest.o en/port.o en/monitor_stats.o en/reporter_tx.o
+ en_selftest.o en/port.o en/monitor_stats.o en/reporter_tx.o \
+ en/params.o
#
# Netdev extra
struct mlx5_cmd *cmd = &dev->cmd;
snprintf(cmd->wq_name, sizeof(cmd->wq_name), "mlx5_cmd_%s",
- dev_name(&dev->pdev->dev));
+ dev->priv.name);
}
static void clean_debug_files(struct mlx5_core_dev *dev)
memset(cmd, 0, sizeof(*cmd));
cmd_if_rev = cmdif_rev(dev);
if (cmd_if_rev != CMD_IF_REV) {
- dev_err(&dev->pdev->dev,
- "Driver cmdif rev(%d) differs from firmware's(%d)\n",
- CMD_IF_REV, cmd_if_rev);
+ mlx5_core_err(dev,
+ "Driver cmdif rev(%d) differs from firmware's(%d)\n",
+ CMD_IF_REV, cmd_if_rev);
return -EINVAL;
}
cmd->log_sz = cmd_l >> 4 & 0xf;
cmd->log_stride = cmd_l & 0xf;
if (1 << cmd->log_sz > MLX5_MAX_COMMANDS) {
- dev_err(&dev->pdev->dev, "firmware reports too many outstanding commands %d\n",
- 1 << cmd->log_sz);
+ mlx5_core_err(dev, "firmware reports too many outstanding commands %d\n",
+ 1 << cmd->log_sz);
err = -EINVAL;
goto err_free_page;
}
if (cmd->log_sz + cmd->log_stride > MLX5_ADAPTER_PAGE_SHIFT) {
- dev_err(&dev->pdev->dev, "command queue size overflow\n");
+ mlx5_core_err(dev, "command queue size overflow\n");
err = -EINVAL;
goto err_free_page;
}
cmd->cmdif_rev = ioread32be(&dev->iseg->cmdif_rev_fw_sub) >> 16;
if (cmd->cmdif_rev > CMD_IF_REV) {
- dev_err(&dev->pdev->dev, "driver does not support command interface version. driver %d, firmware %d\n",
- CMD_IF_REV, cmd->cmdif_rev);
+ mlx5_core_err(dev, "driver does not support command interface version. driver %d, firmware %d\n",
+ CMD_IF_REV, cmd->cmdif_rev);
err = -EOPNOTSUPP;
goto err_free_page;
}
cmd_h = (u32)((u64)(cmd->dma) >> 32);
cmd_l = (u32)(cmd->dma);
if (cmd_l & 0xfff) {
- dev_err(&dev->pdev->dev, "invalid command queue address\n");
+ mlx5_core_err(dev, "invalid command queue address\n");
err = -ENOMEM;
goto err_free_page;
}
set_wqname(dev);
cmd->wq = create_singlethread_workqueue(cmd->wq_name);
if (!cmd->wq) {
- dev_err(&dev->pdev->dev, "failed to create command workqueue\n");
+ mlx5_core_err(dev, "failed to create command workqueue\n");
err = -ENOMEM;
goto err_cache;
}
TP_ARGS(tracer, trace_timestamp, lost, event_id, msg),
TP_STRUCT__entry(
- __string(dev_name, dev_name(&tracer->dev->pdev->dev))
+ __string(dev_name, tracer->dev->priv.name)
__field(u64, trace_timestamp)
__field(bool, lost)
__field(u8, event_id)
),
TP_fast_assign(
- __assign_str(dev_name, dev_name(&tracer->dev->pdev->dev));
+ __assign_str(dev_name, tracer->dev->priv.name);
__entry->trace_timestamp = trace_timestamp;
__entry->lost = lost;
__entry->event_id = event_id;
struct net_dim_cq_moder rx_cq_moderation;
struct net_dim_cq_moder tx_cq_moderation;
bool lro_en;
- u32 lro_wqe_sz;
u8 tx_min_inline_mode;
bool vlan_strip_disable;
bool scatter_fcs_en;
struct mlx5e_xdp_wqe_info {
u8 num_wqebbs;
- u8 num_ds;
+ u8 num_pkts;
};
struct mlx5e_xdp_mpwqe {
/* Current MPWQE session */
struct mlx5e_tx_wqe *wqe;
u8 ds_count;
+ u8 pkt_count;
u8 max_ds_count;
+ u8 complete;
+ u8 inline_on;
};
struct mlx5e_xdpsq;
/* dirtied @completion */
u32 xdpi_fifo_cc;
u16 cc;
- bool redirect_flush;
/* dirtied @xmit */
u32 xdpi_fifo_pc ____cacheline_aligned_in_smp;
struct mlx5e_icosq {
/* data path */
+ u16 cc;
+ u16 pc;
- /* dirtied @xmit */
- u16 pc ____cacheline_aligned_in_smp;
-
+ struct mlx5_wqe_ctrl_seg *doorbell_cseg;
struct mlx5e_cq cq;
/* write@xmit, read@completion */
typedef void (*mlx5e_fp_dealloc_wqe)(struct mlx5e_rq*, u16);
enum mlx5e_rq_flag {
- MLX5E_RQ_FLAG_XDP_XMIT = BIT(0),
+ MLX5E_RQ_FLAG_XDP_XMIT,
+ MLX5E_RQ_FLAG_XDP_REDIRECT,
};
struct mlx5e_rq_frag_info {
struct mlx5e_mpw_info *info;
mlx5e_fp_skb_from_cqe_mpwrq skb_from_cqe_mpwrq;
u16 num_strides;
+ u16 actual_wq_head;
u8 log_stride_sz;
- bool umr_in_progress;
+ u8 umr_in_progress;
+ u8 umr_last_bulk;
} mpwqe;
};
struct {
netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
struct mlx5e_tx_wqe *wqe, u16 pi, bool xmit_more);
+void mlx5e_trigger_irq(struct mlx5e_icosq *sq);
void mlx5e_completion_event(struct mlx5_core_cq *mcq);
void mlx5e_cq_error_event(struct mlx5_core_cq *mcq, enum mlx5_event event);
int mlx5e_napi_poll(struct napi_struct *napi, int budget);
* switching channels
*/
typedef int (*mlx5e_fp_hw_modify)(struct mlx5e_priv *priv);
+int mlx5e_safe_reopen_channels(struct mlx5e_priv *priv);
int mlx5e_safe_switch_channels(struct mlx5e_priv *priv,
struct mlx5e_channels *new_chs,
mlx5e_fp_hw_modify hw_modify);
*/
wmb();
- mlx5_write64((__be32 *)ctrl, uar_map, NULL);
+ mlx5_write64((__be32 *)ctrl, uar_map);
}
static inline void mlx5e_cq_arm(struct mlx5e_cq *cq)
int mlx5e_attach_netdev(struct mlx5e_priv *priv);
void mlx5e_detach_netdev(struct mlx5e_priv *priv);
void mlx5e_destroy_netdev(struct mlx5e_priv *priv);
+void mlx5e_set_netdev_mtu_boundaries(struct mlx5e_priv *priv);
void mlx5e_build_nic_params(struct mlx5_core_dev *mdev,
struct mlx5e_rss_params *rss_params,
struct mlx5e_params *params,
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include "en/params.h"
+
+u32 mlx5e_rx_get_linear_frag_sz(struct mlx5e_params *params)
+{
+ u16 hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu);
+ u16 linear_rq_headroom = params->xdp_prog ?
+ XDP_PACKET_HEADROOM : MLX5_RX_HEADROOM;
+ u32 frag_sz;
+
+ linear_rq_headroom += NET_IP_ALIGN;
+
+ frag_sz = MLX5_SKB_FRAG_SZ(linear_rq_headroom + hw_mtu);
+
+ if (params->xdp_prog && frag_sz < PAGE_SIZE)
+ frag_sz = PAGE_SIZE;
+
+ return frag_sz;
+}
+
+u8 mlx5e_mpwqe_log_pkts_per_wqe(struct mlx5e_params *params)
+{
+ u32 linear_frag_sz = mlx5e_rx_get_linear_frag_sz(params);
+
+ return MLX5_MPWRQ_LOG_WQE_SZ - order_base_2(linear_frag_sz);
+}
+
+bool mlx5e_rx_is_linear_skb(struct mlx5e_params *params)
+{
+ u32 frag_sz = mlx5e_rx_get_linear_frag_sz(params);
+
+ return !params->lro_en && frag_sz <= PAGE_SIZE;
+}
+
+#define MLX5_MAX_MPWQE_LOG_WQE_STRIDE_SZ ((BIT(__mlx5_bit_sz(wq, log_wqe_stride_size)) - 1) + \
+ MLX5_MPWQE_LOG_STRIDE_SZ_BASE)
+bool mlx5e_rx_mpwqe_is_linear_skb(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params)
+{
+ u32 frag_sz = mlx5e_rx_get_linear_frag_sz(params);
+ s8 signed_log_num_strides_param;
+ u8 log_num_strides;
+
+ if (!mlx5e_rx_is_linear_skb(params))
+ return false;
+
+ if (order_base_2(frag_sz) > MLX5_MAX_MPWQE_LOG_WQE_STRIDE_SZ)
+ return false;
+
+ if (MLX5_CAP_GEN(mdev, ext_stride_num_range))
+ return true;
+
+ log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ - order_base_2(frag_sz);
+ signed_log_num_strides_param =
+ (s8)log_num_strides - MLX5_MPWQE_LOG_NUM_STRIDES_BASE;
+
+ return signed_log_num_strides_param >= 0;
+}
+
+u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5e_params *params)
+{
+ u8 log_pkts_per_wqe = mlx5e_mpwqe_log_pkts_per_wqe(params);
+
+ /* Numbers are unsigned, don't subtract to avoid underflow. */
+ if (params->log_rq_mtu_frames <
+ log_pkts_per_wqe + MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW)
+ return MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW;
+
+ return params->log_rq_mtu_frames - log_pkts_per_wqe;
+}
+
+u8 mlx5e_mpwqe_get_log_stride_size(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params)
+{
+ if (mlx5e_rx_mpwqe_is_linear_skb(mdev, params))
+ return order_base_2(mlx5e_rx_get_linear_frag_sz(params));
+
+ return MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev);
+}
+
+u8 mlx5e_mpwqe_get_log_num_strides(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params)
+{
+ return MLX5_MPWRQ_LOG_WQE_SZ -
+ mlx5e_mpwqe_get_log_stride_size(mdev, params);
+}
+
+u16 mlx5e_get_rq_headroom(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params)
+{
+ u16 linear_rq_headroom = params->xdp_prog ?
+ XDP_PACKET_HEADROOM : MLX5_RX_HEADROOM;
+ bool is_linear_skb;
+
+ linear_rq_headroom += NET_IP_ALIGN;
+
+ is_linear_skb = (params->rq_wq_type == MLX5_WQ_TYPE_CYCLIC) ?
+ mlx5e_rx_is_linear_skb(params) :
+ mlx5e_rx_mpwqe_is_linear_skb(mdev, params);
+
+ return is_linear_skb ? linear_rq_headroom : 0;
+}
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef __MLX5_EN_PARAMS_H__
+#define __MLX5_EN_PARAMS_H__
+
+#include "en.h"
+
+u32 mlx5e_rx_get_linear_frag_sz(struct mlx5e_params *params);
+u8 mlx5e_mpwqe_log_pkts_per_wqe(struct mlx5e_params *params);
+bool mlx5e_rx_is_linear_skb(struct mlx5e_params *params);
+bool mlx5e_rx_mpwqe_is_linear_skb(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params);
+u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5e_params *params);
+u8 mlx5e_mpwqe_get_log_stride_size(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params);
+u8 mlx5e_mpwqe_get_log_num_strides(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params);
+u16 mlx5e_get_rq_headroom(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params);
+
+#endif /* __MLX5_EN_PARAMS_H__ */
static int mlx5e_tx_reporter_recover_all(struct mlx5e_priv *priv)
{
- int err;
+ int err = 0;
rtnl_lock();
mutex_lock(&priv->state_lock);
- mlx5e_close_locked(priv->netdev);
- err = mlx5e_open_locked(priv->netdev);
+
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
+ goto out;
+
+ err = mlx5e_safe_reopen_channels(priv);
+
+out:
mutex_unlock(&priv->state_lock);
rtnl_unlock();
return -EOPNOTSUPP;
}
+ if (!(mlx5e_eswitch_rep(*out_dev) &&
+ mlx5e_is_uplink_rep(netdev_priv(*out_dev))))
+ return -EOPNOTSUPP;
+
return 0;
}
if (ret)
return ret;
- if (mlx5_lag_is_multipath(mdev) && !rt->rt_gateway)
+ if (mlx5_lag_is_multipath(mdev) && rt->rt_gw_family != AF_INET)
return -ENETUNREACH;
#else
return -EOPNOTSUPP;
if (dev->rtnl_link_ops)
return dev->rtnl_link_ops->kind;
else
- return "";
+ return "unknown";
}
static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv,
headers_c, headers_v);
} else {
netdev_warn(priv->netdev,
- "decapsulation offload is not supported for %s net device (%d)\n",
- mlx5e_netdev_kind(filter_dev), tunnel_type);
+ "decapsulation offload is not supported for %s (kind: \"%s\")\n",
+ netdev_name(filter_dev),
+ mlx5e_netdev_kind(filter_dev));
+
return -EOPNOTSUPP;
}
return err;
#include <linux/bpf_trace.h>
#include "en/xdp.h"
+int mlx5e_xdp_max_mtu(struct mlx5e_params *params)
+{
+ int hr = NET_IP_ALIGN + XDP_PACKET_HEADROOM;
+
+ /* Let S := SKB_DATA_ALIGN(sizeof(struct skb_shared_info)).
+ * The condition checked in mlx5e_rx_is_linear_skb is:
+ * SKB_DATA_ALIGN(sw_mtu + hard_mtu + hr) + S <= PAGE_SIZE (1)
+ * (Note that hw_mtu == sw_mtu + hard_mtu.)
+ * What is returned from this function is:
+ * max_mtu = PAGE_SIZE - S - hr - hard_mtu (2)
+ * After assigning sw_mtu := max_mtu, the left side of (1) turns to
+ * SKB_DATA_ALIGN(PAGE_SIZE - S) + S, which is equal to PAGE_SIZE,
+ * because both PAGE_SIZE and S are already aligned. Any number greater
+ * than max_mtu would make the left side of (1) greater than PAGE_SIZE,
+ * so max_mtu is the maximum MTU allowed.
+ */
+
+ return MLX5E_HW2SW_MTU(params, SKB_MAX_HEAD(hr));
+}
+
static inline bool
mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_dma_info *di,
struct xdp_buff *xdp)
if (unlikely(err))
goto xdp_abort;
__set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags);
- rq->xdpsq.redirect_flush = true;
+ __set_bit(MLX5E_RQ_FLAG_XDP_REDIRECT, rq->flags);
mlx5e_page_dma_unmap(rq, di);
rq->stats->xdp_redirect++;
return true;
static void mlx5e_xdp_mpwqe_session_start(struct mlx5e_xdpsq *sq)
{
struct mlx5e_xdp_mpwqe *session = &sq->mpwqe;
+ struct mlx5e_xdpsq_stats *stats = sq->stats;
struct mlx5_wq_cyc *wq = &sq->wq;
u8 wqebbs;
u16 pi;
mlx5e_xdpsq_fetch_wqe(sq, &session->wqe);
prefetchw(session->wqe->data);
- session->ds_count = MLX5E_XDP_TX_EMPTY_DS_COUNT;
+ session->ds_count = MLX5E_XDP_TX_EMPTY_DS_COUNT;
+ session->pkt_count = 0;
+ session->complete = 0;
pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
MLX5E_XDP_MPW_MAX_WQEBBS);
session->max_ds_count = MLX5_SEND_WQEBB_NUM_DS * wqebbs;
+
+ mlx5e_xdp_update_inline_state(sq);
+
+ stats->mpwqe++;
}
static void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq *sq)
cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_count);
wi->num_wqebbs = DIV_ROUND_UP(ds_count, MLX5_SEND_WQEBB_NUM_DS);
- wi->num_ds = ds_count - MLX5E_XDP_TX_EMPTY_DS_COUNT;
+ wi->num_pkts = session->pkt_count;
sq->pc += wi->num_wqebbs;
struct mlx5e_xdp_mpwqe *session = &sq->mpwqe;
struct mlx5e_xdpsq_stats *stats = sq->stats;
- dma_addr_t dma_addr = xdpi->dma_addr;
struct xdp_frame *xdpf = xdpi->xdpf;
- unsigned int dma_len = xdpf->len;
- if (unlikely(sq->hw_mtu < dma_len)) {
+ if (unlikely(sq->hw_mtu < xdpf->len)) {
stats->err++;
return false;
}
mlx5e_xdp_mpwqe_session_start(sq);
}
- mlx5e_xdp_mpwqe_add_dseg(sq, dma_addr, dma_len);
+ mlx5e_xdp_mpwqe_add_dseg(sq, xdpi, stats);
- if (unlikely(session->ds_count == session->max_ds_count))
+ if (unlikely(session->complete ||
+ session->ds_count == session->max_ds_count))
mlx5e_xdp_mpwqe_complete(sq);
mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, xdpi);
sqcc += wi->num_wqebbs;
- for (j = 0; j < wi->num_ds; j++) {
+ for (j = 0; j < wi->num_pkts; j++) {
struct mlx5e_xdp_info xdpi =
mlx5e_xdpi_fifo_pop(xdpi_fifo);
if (is_redirect) {
- xdp_return_frame(xdpi.xdpf);
dma_unmap_single(sq->pdev, xdpi.dma_addr,
xdpi.xdpf->len, DMA_TO_DEVICE);
+ xdp_return_frame(xdpi.xdpf);
} else {
/* Recycle RX page */
mlx5e_page_release(rq, &xdpi.di, true);
sq->cc += wi->num_wqebbs;
- for (i = 0; i < wi->num_ds; i++) {
+ for (i = 0; i < wi->num_pkts; i++) {
struct mlx5e_xdp_info xdpi =
mlx5e_xdpi_fifo_pop(xdpi_fifo);
if (is_redirect) {
- xdp_return_frame(xdpi.xdpf);
dma_unmap_single(sq->pdev, xdpi.dma_addr,
xdpi.xdpf->len, DMA_TO_DEVICE);
+ xdp_return_frame(xdpi.xdpf);
} else {
/* Recycle RX page */
mlx5e_page_release(rq, &xdpi.di, false);
mlx5e_xmit_xdp_doorbell(xdpsq);
- if (xdpsq->redirect_flush) {
+ if (test_bit(MLX5E_RQ_FLAG_XDP_REDIRECT, rq->flags)) {
xdp_do_flush_map();
- xdpsq->redirect_flush = false;
+ __clear_bit(MLX5E_RQ_FLAG_XDP_REDIRECT, rq->flags);
}
}
#include "en.h"
-#define MLX5E_XDP_MAX_MTU ((int)(PAGE_SIZE - \
- MLX5_SKB_FRAG_SZ(XDP_PACKET_HEADROOM)))
#define MLX5E_XDP_MIN_INLINE (ETH_HLEN + VLAN_HLEN)
#define MLX5E_XDP_TX_EMPTY_DS_COUNT \
(sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS)
#define MLX5E_XDP_TX_DS_COUNT (MLX5E_XDP_TX_EMPTY_DS_COUNT + 1 /* SG DS */)
+int mlx5e_xdp_max_mtu(struct mlx5e_params *params);
bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di,
void *va, u16 *rx_headroom, u32 *len);
bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq);
}
}
+/* Enable inline WQEs to shift some load from a congested HCA (HW) to
+ * a less congested cpu (SW).
+ */
+static inline void mlx5e_xdp_update_inline_state(struct mlx5e_xdpsq *sq)
+{
+ u16 outstanding = sq->xdpi_fifo_pc - sq->xdpi_fifo_cc;
+ struct mlx5e_xdp_mpwqe *session = &sq->mpwqe;
+
+#define MLX5E_XDP_INLINE_WATERMARK_LOW 10
+#define MLX5E_XDP_INLINE_WATERMARK_HIGH 128
+
+ if (session->inline_on) {
+ if (outstanding <= MLX5E_XDP_INLINE_WATERMARK_LOW)
+ session->inline_on = 0;
+ return;
+ }
+
+ /* inline is false */
+ if (outstanding >= MLX5E_XDP_INLINE_WATERMARK_HIGH)
+ session->inline_on = 1;
+}
+
static inline void
-mlx5e_xdp_mpwqe_add_dseg(struct mlx5e_xdpsq *sq, dma_addr_t dma_addr, u16 dma_len)
+mlx5e_xdp_mpwqe_add_dseg(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info *xdpi,
+ struct mlx5e_xdpsq_stats *stats)
{
struct mlx5e_xdp_mpwqe *session = &sq->mpwqe;
+ dma_addr_t dma_addr = xdpi->dma_addr;
+ struct xdp_frame *xdpf = xdpi->xdpf;
struct mlx5_wqe_data_seg *dseg =
- (struct mlx5_wqe_data_seg *)session->wqe + session->ds_count++;
+ (struct mlx5_wqe_data_seg *)session->wqe + session->ds_count;
+ u16 dma_len = xdpf->len;
+ session->pkt_count++;
+
+#define MLX5E_XDP_INLINE_WQE_SZ_THRSD (256 - sizeof(struct mlx5_wqe_inline_seg))
+
+ if (session->inline_on && dma_len <= MLX5E_XDP_INLINE_WQE_SZ_THRSD) {
+ struct mlx5_wqe_inline_seg *inline_dseg =
+ (struct mlx5_wqe_inline_seg *)dseg;
+ u16 ds_len = sizeof(*inline_dseg) + dma_len;
+ u16 ds_cnt = DIV_ROUND_UP(ds_len, MLX5_SEND_WQE_DS);
+
+ if (unlikely(session->ds_count + ds_cnt > session->max_ds_count)) {
+ /* Not enough space for inline wqe, send with memory pointer */
+ session->complete = true;
+ goto no_inline;
+ }
+
+ inline_dseg->byte_count = cpu_to_be32(dma_len | MLX5_INLINE_SEG);
+ memcpy(inline_dseg->data, xdpf->data, dma_len);
+
+ session->ds_count += ds_cnt;
+ stats->inlnw++;
+ return;
+ }
+
+no_inline:
dseg->addr = cpu_to_be64(dma_addr);
dseg->byte_count = cpu_to_be32(dma_len);
dseg->lkey = sq->mkey_be;
+ session->ds_count++;
}
static inline void mlx5e_xdpsq_fetch_wqe(struct mlx5e_xdpsq *sq,
{
return fifo->xi[(*fifo->cc)++ & fifo->mask];
}
-
#endif
break;
case MLX5_MODULE_ID_SFP:
modinfo->type = ETH_MODULE_SFF_8472;
- modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN;
+ modinfo->eeprom_len = MLX5_EEPROM_PAGE_LENGTH;
break;
default:
netdev_err(priv->netdev, "%s: cable type not recognized:0x%x\n",
struct mlx5e_channel *c;
int i;
- if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state) ||
+ priv->channels.params.xdp_prog)
return 0;
for (i = 0; i < channels->num; i++) {
#include "lib/eq.h"
#include "en/monitor_stats.h"
#include "en/reporter.h"
+#include "en/params.h"
struct mlx5e_rq_param {
u32 rqc[MLX5_ST_SZ_DW(rqc)];
return true;
}
-static u32 mlx5e_rx_get_linear_frag_sz(struct mlx5e_params *params)
-{
- u16 hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu);
- u16 linear_rq_headroom = params->xdp_prog ?
- XDP_PACKET_HEADROOM : MLX5_RX_HEADROOM;
- u32 frag_sz;
-
- linear_rq_headroom += NET_IP_ALIGN;
-
- frag_sz = MLX5_SKB_FRAG_SZ(linear_rq_headroom + hw_mtu);
-
- if (params->xdp_prog && frag_sz < PAGE_SIZE)
- frag_sz = PAGE_SIZE;
-
- return frag_sz;
-}
-
-static u8 mlx5e_mpwqe_log_pkts_per_wqe(struct mlx5e_params *params)
-{
- u32 linear_frag_sz = mlx5e_rx_get_linear_frag_sz(params);
-
- return MLX5_MPWRQ_LOG_WQE_SZ - order_base_2(linear_frag_sz);
-}
-
-static bool mlx5e_rx_is_linear_skb(struct mlx5_core_dev *mdev,
- struct mlx5e_params *params)
-{
- u32 frag_sz = mlx5e_rx_get_linear_frag_sz(params);
-
- return !params->lro_en && frag_sz <= PAGE_SIZE;
-}
-
-#define MLX5_MAX_MPWQE_LOG_WQE_STRIDE_SZ ((BIT(__mlx5_bit_sz(wq, log_wqe_stride_size)) - 1) + \
- MLX5_MPWQE_LOG_STRIDE_SZ_BASE)
-static bool mlx5e_rx_mpwqe_is_linear_skb(struct mlx5_core_dev *mdev,
- struct mlx5e_params *params)
-{
- u32 frag_sz = mlx5e_rx_get_linear_frag_sz(params);
- s8 signed_log_num_strides_param;
- u8 log_num_strides;
-
- if (!mlx5e_rx_is_linear_skb(mdev, params))
- return false;
-
- if (order_base_2(frag_sz) > MLX5_MAX_MPWQE_LOG_WQE_STRIDE_SZ)
- return false;
-
- if (MLX5_CAP_GEN(mdev, ext_stride_num_range))
- return true;
-
- log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ - order_base_2(frag_sz);
- signed_log_num_strides_param =
- (s8)log_num_strides - MLX5_MPWQE_LOG_NUM_STRIDES_BASE;
-
- return signed_log_num_strides_param >= 0;
-}
-
-static u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5e_params *params)
-{
- if (params->log_rq_mtu_frames <
- mlx5e_mpwqe_log_pkts_per_wqe(params) + MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW)
- return MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW;
-
- return params->log_rq_mtu_frames - mlx5e_mpwqe_log_pkts_per_wqe(params);
-}
-
-static u8 mlx5e_mpwqe_get_log_stride_size(struct mlx5_core_dev *mdev,
- struct mlx5e_params *params)
-{
- if (mlx5e_rx_mpwqe_is_linear_skb(mdev, params))
- return order_base_2(mlx5e_rx_get_linear_frag_sz(params));
-
- return MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev);
-}
-
-static u8 mlx5e_mpwqe_get_log_num_strides(struct mlx5_core_dev *mdev,
- struct mlx5e_params *params)
-{
- return MLX5_MPWRQ_LOG_WQE_SZ -
- mlx5e_mpwqe_get_log_stride_size(mdev, params);
-}
-
-static u16 mlx5e_get_rq_headroom(struct mlx5_core_dev *mdev,
- struct mlx5e_params *params)
-{
- u16 linear_rq_headroom = params->xdp_prog ?
- XDP_PACKET_HEADROOM : MLX5_RX_HEADROOM;
- bool is_linear_skb;
-
- linear_rq_headroom += NET_IP_ALIGN;
-
- is_linear_skb = (params->rq_wq_type == MLX5_WQ_TYPE_CYCLIC) ?
- mlx5e_rx_is_linear_skb(mdev, params) :
- mlx5e_rx_mpwqe_is_linear_skb(mdev, params);
-
- return is_linear_skb ? linear_rq_headroom : 0;
-}
-
void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev,
struct mlx5e_params *params)
{
- params->lro_wqe_sz = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ;
params->log_rq_mtu_frames = is_kdump_kernel() ?
MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE :
MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE;
}
static int mlx5e_init_di_list(struct mlx5e_rq *rq,
- struct mlx5e_params *params,
int wq_sz, int cpu)
{
int len = wq_sz << rq->wqe.info.log_num_frags;
goto err_free;
}
- err = mlx5e_init_di_list(rq, params, wq_sz, c->cpu);
+ err = mlx5e_init_di_list(rq, wq_sz, c->cpu);
if (err)
goto err_free;
rq->post_wqes = mlx5e_post_rx_wqes;
goto err_free;
}
- rq->wqe.skb_from_cqe = mlx5e_rx_is_linear_skb(mdev, params) ?
+ rq->wqe.skb_from_cqe = mlx5e_rx_is_linear_skb(params) ?
mlx5e_skb_from_cqe_linear :
mlx5e_skb_from_cqe_nonlinear;
rq->mkey_be = c->mkey_be;
if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
struct mlx5_wq_ll *wq = &rq->mpwqe.wq;
+ u16 head = wq->head;
+ int i;
- /* UMR WQE (if in progress) is always at wq->head */
- if (rq->mpwqe.umr_in_progress)
- rq->dealloc_wqe(rq, wq->head);
+ /* Outstanding UMR WQEs (in progress) start at wq->head */
+ for (i = 0; i < rq->mpwqe.umr_in_progress; i++) {
+ rq->dealloc_wqe(rq, head);
+ head = mlx5_wq_ll_get_wqe_next_ix(wq, head);
+ }
while (!mlx5_wq_ll_is_empty(wq)) {
struct mlx5e_rx_wqe_ll *wqe;
if (params->rx_dim_enabled)
__set_bit(MLX5E_RQ_STATE_AM, &c->rq.state);
- if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_NO_CSUM_COMPLETE))
+ /* We disable csum_complete when XDP is enabled since
+ * XDP programs might manipulate packets which will render
+ * skb->checksum incorrect.
+ */
+ if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_NO_CSUM_COMPLETE) || c->xdp)
__set_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &c->rq.state);
return 0;
static void mlx5e_activate_rq(struct mlx5e_rq *rq)
{
- struct mlx5e_icosq *sq = &rq->channel->icosq;
- struct mlx5_wq_cyc *wq = &sq->wq;
- struct mlx5e_tx_wqe *nopwqe;
-
- u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
-
set_bit(MLX5E_RQ_STATE_ENABLED, &rq->state);
- sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_NOP;
- nopwqe = mlx5e_post_nop(wq, sq->sqn, &sq->pc);
- mlx5e_notify_hw(wq, sq->pc, sq->uar_map, &nopwqe->ctrl);
+ mlx5e_trigger_irq(&rq->channel->icosq);
}
static void mlx5e_deactivate_rq(struct mlx5e_rq *rq)
static int mlx5e_alloc_icosq_db(struct mlx5e_icosq *sq, int numa)
{
- u8 wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
+ int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
sq->db.ico_wqe = kvzalloc_node(array_size(wq_sz,
sizeof(*sq->db.ico_wqe)),
dseg->lkey = sq->mkey_be;
wi->num_wqebbs = 1;
- wi->num_ds = 1;
+ wi->num_pkts = 1;
}
}
byte_count += MLX5E_METADATA_ETHER_LEN;
#endif
- if (mlx5e_rx_is_linear_skb(mdev, params)) {
+ if (mlx5e_rx_is_linear_skb(params)) {
int frag_stride;
frag_stride = mlx5e_rx_get_linear_frag_sz(params);
return order_base_2(sz);
}
+static u8 mlx5e_get_rq_log_wq_sz(void *rqc)
+{
+ void *wq = MLX5_ADDR_OF(rqc, rqc, wq);
+
+ return MLX5_GET(wq, wq, log_wq_sz);
+}
+
static void mlx5e_build_rq_param(struct mlx5e_priv *priv,
struct mlx5e_params *params,
struct mlx5e_rq_param *param)
param->is_mpw = MLX5E_GET_PFLAG(params, MLX5E_PFLAG_XDP_TX_MPWQE);
}
+static u8 mlx5e_build_icosq_log_wq_sz(struct mlx5e_params *params,
+ struct mlx5e_rq_param *rqp)
+{
+ switch (params->rq_wq_type) {
+ case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
+ return order_base_2(MLX5E_UMR_WQEBBS) +
+ mlx5e_get_rq_log_wq_sz(rqp->rqc);
+ default: /* MLX5_WQ_TYPE_CYCLIC */
+ return MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE;
+ }
+}
+
static void mlx5e_build_channel_param(struct mlx5e_priv *priv,
struct mlx5e_params *params,
struct mlx5e_channel_param *cparam)
{
- u8 icosq_log_wq_sz = MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE;
+ u8 icosq_log_wq_sz;
mlx5e_build_rq_param(priv, params, &cparam->rq);
+
+ icosq_log_wq_sz = mlx5e_build_icosq_log_wq_sz(params, &cparam->rq);
+
mlx5e_build_sq_param(priv, params, &cparam->sq);
mlx5e_build_xdpsq_param(priv, params, &cparam->xdp_sq);
mlx5e_build_icosq_param(priv, icosq_log_wq_sz, &cparam->icosq);
mlx5e_activate_channel(chs->c[i]);
}
+#define MLX5E_RQ_WQES_TIMEOUT 20000 /* msecs */
+
static int mlx5e_wait_channels_min_rx_wqes(struct mlx5e_channels *chs)
{
int err = 0;
int i;
- for (i = 0; i < chs->num; i++)
- err |= mlx5e_wait_for_min_rx_wqes(&chs->c[i]->rq,
- err ? 0 : 20000);
+ for (i = 0; i < chs->num; i++) {
+ int timeout = err ? 0 : MLX5E_RQ_WQES_TIMEOUT;
+
+ err |= mlx5e_wait_for_min_rx_wqes(&chs->c[i]->rq, timeout);
+ }
return err ? -ETIMEDOUT : 0;
}
MLX5_TIRC_LRO_ENABLE_MASK_IPV4_LRO |
MLX5_TIRC_LRO_ENABLE_MASK_IPV6_LRO);
MLX5_SET(tirc, tirc, lro_max_ip_payload_size,
- (params->lro_wqe_sz - ROUGH_MAX_L2_L3_HDR_SZ) >> 8);
+ (MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ - ROUGH_MAX_L2_L3_HDR_SZ) >> 8);
MLX5_SET(tirc, tirc, lro_timeout_period_usecs, params->lro_timeout);
}
return 0;
}
+void mlx5e_set_netdev_mtu_boundaries(struct mlx5e_priv *priv)
+{
+ struct mlx5e_params *params = &priv->channels.params;
+ struct net_device *netdev = priv->netdev;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u16 max_mtu;
+
+ /* MTU range: 68 - hw-specific max */
+ netdev->min_mtu = ETH_MIN_MTU;
+
+ mlx5_query_port_max_mtu(mdev, &max_mtu, 1);
+ netdev->max_mtu = min_t(unsigned int, MLX5E_HW2SW_MTU(params, max_mtu),
+ ETH_MAX_MTU);
+}
+
static void mlx5e_netdev_set_tcs(struct net_device *netdev)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
return 0;
}
+int mlx5e_safe_reopen_channels(struct mlx5e_priv *priv)
+{
+ struct mlx5e_channels new_channels = {};
+
+ new_channels.params = priv->channels.params;
+ return mlx5e_safe_switch_channels(priv, &new_channels, NULL);
+}
+
void mlx5e_timestamp_init(struct mlx5e_priv *priv)
{
priv->tstamp.tx_type = HWTSTAMP_TX_OFF;
new_channels.params.sw_mtu = new_mtu;
if (params->xdp_prog &&
- !mlx5e_rx_is_linear_skb(priv->mdev, &new_channels.params)) {
+ !mlx5e_rx_is_linear_skb(&new_channels.params)) {
netdev_err(netdev, "MTU(%d) > %d is not allowed while XDP enabled\n",
- new_mtu, MLX5E_XDP_MAX_MTU);
+ new_mtu, mlx5e_xdp_max_mtu(params));
err = -EINVAL;
goto out;
}
if (!report_failed)
goto unlock;
- mlx5e_close_locked(priv->netdev);
- err = mlx5e_open_locked(priv->netdev);
+ err = mlx5e_safe_reopen_channels(priv);
if (err)
netdev_err(priv->netdev,
- "mlx5e_open_locked failed recovering from a tx_timeout, err(%d).\n",
+ "mlx5e_safe_reopen_channels failed recovering from a tx_timeout, err(%d).\n",
err);
unlock:
new_channels.params = priv->channels.params;
new_channels.params.xdp_prog = prog;
- if (!mlx5e_rx_is_linear_skb(priv->mdev, &new_channels.params)) {
+ if (!mlx5e_rx_is_linear_skb(&new_channels.params)) {
netdev_warn(netdev, "XDP is not allowed with MTU(%d) > %d\n",
- new_channels.params.sw_mtu, MLX5E_XDP_MAX_MTU);
+ new_channels.params.sw_mtu,
+ mlx5e_xdp_max_mtu(&new_channels.params));
return -EINVAL;
}
mlx5e_set_rq_type(priv->mdev, &priv->channels.params);
if (was_opened && reset)
- mlx5e_open_locked(netdev);
+ err = mlx5e_open_locked(netdev);
if (!test_bit(MLX5E_STATE_OPENED, &priv->state) || reset)
goto unlock;
if (!slow_pci_heuristic(mdev) &&
mlx5e_striding_rq_possible(mdev, params) &&
(mlx5e_rx_mpwqe_is_linear_skb(mdev, params) ||
- !mlx5e_rx_is_linear_skb(mdev, params)))
+ !mlx5e_rx_is_linear_skb(params)))
MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ, true);
mlx5e_set_rq_type(mdev, params);
mlx5e_init_rq_type_params(mdev, params);
{
enum mlx5e_traffic_types tt;
- rss_params->hfunc = ETH_RSS_HASH_XOR;
+ rss_params->hfunc = ETH_RSS_HASH_TOP;
netdev_rss_key_fill(rss_params->toeplitz_hash_key,
sizeof(rss_params->toeplitz_hash_key));
mlx5e_build_default_indir_rqt(rss_params->indirection_rqt,
{
struct net_device *netdev = priv->netdev;
struct mlx5_core_dev *mdev = priv->mdev;
- u16 max_mtu;
mlx5e_init_l2_addr(priv);
if (!netif_running(netdev))
mlx5_set_port_admin_status(mdev, MLX5_PORT_DOWN);
- /* MTU range: 68 - hw-specific max */
- netdev->min_mtu = ETH_MIN_MTU;
- mlx5_query_port_max_mtu(priv->mdev, &max_mtu, 1);
- netdev->max_mtu = MLX5E_HW2SW_MTU(&priv->channels.params, max_mtu);
+ mlx5e_set_netdev_mtu_boundaries(priv);
mlx5e_set_dev_port_mtu(priv);
mlx5_lag_add(mdev, netdev);
struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
- if (!mlx5e_tc_tun_device_to_offload(priv, netdev))
+ if (!mlx5e_tc_tun_device_to_offload(priv, netdev) &&
+ !is_vlan_dev(netdev))
return NOTIFY_OK;
switch (event) {
static void mlx5e_vf_rep_enable(struct mlx5e_priv *priv)
{
- struct net_device *netdev = priv->netdev;
- struct mlx5_core_dev *mdev = priv->mdev;
- u16 max_mtu;
-
- netdev->min_mtu = ETH_MIN_MTU;
- mlx5_query_port_max_mtu(mdev, &max_mtu, 1);
- netdev->max_mtu = MLX5E_HW2SW_MTU(&priv->channels.params, max_mtu);
+ mlx5e_set_netdev_mtu_boundaries(priv);
}
static int uplink_rep_async_event(struct notifier_block *nb, unsigned long event, void *data)
mlx5e_page_release(rq, &dma_info[i], recycle);
}
-static void mlx5e_post_rx_mpwqe(struct mlx5e_rq *rq)
+static void mlx5e_post_rx_mpwqe(struct mlx5e_rq *rq, u8 n)
{
struct mlx5_wq_ll *wq = &rq->mpwqe.wq;
- struct mlx5e_rx_wqe_ll *wqe = mlx5_wq_ll_get_wqe(wq, wq->head);
- rq->mpwqe.umr_in_progress = false;
+ do {
+ u16 next_wqe_index = mlx5_wq_ll_get_wqe_next_ix(wq, wq->head);
- mlx5_wq_ll_push(wq, be16_to_cpu(wqe->next.next_wqe_index));
+ mlx5_wq_ll_push(wq, next_wqe_index);
+ } while (--n);
/* ensure wqes are visible to device before updating doorbell record */
dma_wmb();
static inline u16 mlx5e_icosq_wrap_cnt(struct mlx5e_icosq *sq)
{
- return sq->pc >> MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE;
+ return mlx5_wq_cyc_get_ctr_wrap_cnt(&sq->wq, sq->pc);
}
static inline void mlx5e_fill_icosq_frag_edge(struct mlx5e_icosq *sq,
bitmap_zero(wi->xdp_xmit_bitmap, MLX5_MPWRQ_PAGES_PER_WQE);
wi->consumed_strides = 0;
- rq->mpwqe.umr_in_progress = true;
-
umr_wqe->ctrl.opmod_idx_opcode =
cpu_to_be32((sq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) |
MLX5_OPCODE_UMR);
sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_UMR;
sq->pc += MLX5E_UMR_WQEBBS;
- mlx5e_notify_hw(wq, sq->pc, sq->uar_map, &umr_wqe->ctrl);
+
+ sq->doorbell_cseg = &umr_wqe->ctrl;
return 0;
return !!err;
}
-static inline void mlx5e_poll_ico_single_cqe(struct mlx5e_cq *cq,
- struct mlx5e_icosq *sq,
- struct mlx5e_rq *rq,
- struct mlx5_cqe64 *cqe)
-{
- struct mlx5_wq_cyc *wq = &sq->wq;
- u16 ci = mlx5_wq_cyc_ctr2ix(wq, be16_to_cpu(cqe->wqe_counter));
- struct mlx5e_sq_wqe_info *icowi = &sq->db.ico_wqe[ci];
-
- mlx5_cqwq_pop(&cq->wq);
-
- if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ)) {
- netdev_WARN_ONCE(cq->channel->netdev,
- "Bad OP in ICOSQ CQE: 0x%x\n", get_cqe_opcode(cqe));
- return;
- }
-
- if (likely(icowi->opcode == MLX5_OPCODE_UMR)) {
- mlx5e_post_rx_mpwqe(rq);
- return;
- }
-
- if (unlikely(icowi->opcode != MLX5_OPCODE_NOP))
- netdev_WARN_ONCE(cq->channel->netdev,
- "Bad OPCODE in ICOSQ WQE info: 0x%x\n", icowi->opcode);
-}
-
static void mlx5e_poll_ico_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq)
{
struct mlx5e_icosq *sq = container_of(cq, struct mlx5e_icosq, cq);
struct mlx5_cqe64 *cqe;
+ u8 completed_umr = 0;
+ u16 sqcc;
+ int i;
if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state)))
return;
if (likely(!cqe))
return;
- /* by design, there's only a single cqe */
- mlx5e_poll_ico_single_cqe(cq, sq, rq, cqe);
+ /* sq->cc must be updated only after mlx5_cqwq_update_db_record(),
+ * otherwise a cq overrun may occur
+ */
+ sqcc = sq->cc;
+
+ i = 0;
+ do {
+ u16 wqe_counter;
+ bool last_wqe;
+
+ mlx5_cqwq_pop(&cq->wq);
+
+ wqe_counter = be16_to_cpu(cqe->wqe_counter);
+
+ if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ)) {
+ netdev_WARN_ONCE(cq->channel->netdev,
+ "Bad OP in ICOSQ CQE: 0x%x\n", get_cqe_opcode(cqe));
+ break;
+ }
+ do {
+ struct mlx5e_sq_wqe_info *wi;
+ u16 ci;
+
+ last_wqe = (sqcc == wqe_counter);
+
+ ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc);
+ wi = &sq->db.ico_wqe[ci];
+
+ if (likely(wi->opcode == MLX5_OPCODE_UMR)) {
+ sqcc += MLX5E_UMR_WQEBBS;
+ completed_umr++;
+ } else if (likely(wi->opcode == MLX5_OPCODE_NOP)) {
+ sqcc++;
+ } else {
+ netdev_WARN_ONCE(cq->channel->netdev,
+ "Bad OPCODE in ICOSQ WQE info: 0x%x\n",
+ wi->opcode);
+ }
+
+ } while (!last_wqe);
+
+ } while ((++i < MLX5E_TX_CQ_POLL_BUDGET) && (cqe = mlx5_cqwq_get_cqe(&cq->wq)));
+
+ sq->cc = sqcc;
mlx5_cqwq_update_db_record(&cq->wq);
+
+ if (likely(completed_umr)) {
+ mlx5e_post_rx_mpwqe(rq, completed_umr);
+ rq->mpwqe.umr_in_progress -= completed_umr;
+ }
}
bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq)
{
+ struct mlx5e_icosq *sq = &rq->channel->icosq;
struct mlx5_wq_ll *wq = &rq->mpwqe.wq;
+ u8 missing, i;
+ u16 head;
if (unlikely(!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state)))
return false;
- mlx5e_poll_ico_cq(&rq->channel->icosq.cq, rq);
+ mlx5e_poll_ico_cq(&sq->cq, rq);
+
+ missing = mlx5_wq_ll_missing(wq) - rq->mpwqe.umr_in_progress;
- if (mlx5_wq_ll_is_full(wq))
+ if (unlikely(rq->mpwqe.umr_in_progress > rq->mpwqe.umr_last_bulk))
+ rq->stats->congst_umr++;
+
+#define UMR_WQE_BULK (2)
+ if (likely(missing < UMR_WQE_BULK))
return false;
- if (!rq->mpwqe.umr_in_progress)
- mlx5e_alloc_rx_mpwqe(rq, wq->head);
- else
- rq->stats->congst_umr += mlx5_wq_ll_missing(wq) > 2;
+ head = rq->mpwqe.actual_wq_head;
+ i = missing;
+ do {
+ if (unlikely(mlx5e_alloc_rx_mpwqe(rq, head)))
+ break;
+ head = mlx5_wq_ll_get_wqe_next_ix(wq, head);
+ } while (--i);
+
+ rq->mpwqe.umr_last_bulk = missing - i;
+ if (sq->doorbell_cseg) {
+ mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, sq->doorbell_cseg);
+ sq->doorbell_cseg = NULL;
+ }
+
+ rq->mpwqe.umr_in_progress += rq->mpwqe.umr_last_bulk;
+ rq->mpwqe.actual_wq_head = head;
return false;
}
{
*proto = ((struct ethhdr *)skb->data)->h_proto;
*proto = __vlan_get_protocol(skb, *proto, network_depth);
- return (*proto == htons(ETH_P_IP) || *proto == htons(ETH_P_IPV6));
+
+ if (*proto == htons(ETH_P_IP))
+ return pskb_may_pull(skb, *network_depth + sizeof(struct iphdr));
+
+ if (*proto == htons(ETH_P_IPV6))
+ return pskb_may_pull(skb, *network_depth + sizeof(struct ipv6hdr));
+
+ return false;
}
static inline void mlx5e_enable_ecn(struct mlx5e_rq *rq, struct sk_buff *skb)
rq->stats->ecn_mark += !!rc;
}
-static u32 mlx5e_get_fcs(const struct sk_buff *skb)
-{
- const void *fcs_bytes;
- u32 _fcs_bytes;
-
- fcs_bytes = skb_header_pointer(skb, skb->len - ETH_FCS_LEN,
- ETH_FCS_LEN, &_fcs_bytes);
-
- return __get_unaligned_cpu32(fcs_bytes);
-}
-
static u8 get_ip_proto(struct sk_buff *skb, int network_depth, __be16 proto)
{
void *ip_p = skb->data + network_depth;
#define short_frame(size) ((size) <= ETH_ZLEN + ETH_FCS_LEN)
+#define MAX_PADDING 8
+
+static void
+tail_padding_csum_slow(struct sk_buff *skb, int offset, int len,
+ struct mlx5e_rq_stats *stats)
+{
+ stats->csum_complete_tail_slow++;
+ skb->csum = csum_block_add(skb->csum,
+ skb_checksum(skb, offset, len, 0),
+ offset);
+}
+
+static void
+tail_padding_csum(struct sk_buff *skb, int offset,
+ struct mlx5e_rq_stats *stats)
+{
+ u8 tail_padding[MAX_PADDING];
+ int len = skb->len - offset;
+ void *tail;
+
+ if (unlikely(len > MAX_PADDING)) {
+ tail_padding_csum_slow(skb, offset, len, stats);
+ return;
+ }
+
+ tail = skb_header_pointer(skb, offset, len, tail_padding);
+ if (unlikely(!tail)) {
+ tail_padding_csum_slow(skb, offset, len, stats);
+ return;
+ }
+
+ stats->csum_complete_tail++;
+ skb->csum = csum_block_add(skb->csum, csum_partial(tail, len, 0), offset);
+}
+
+static void
+mlx5e_skb_padding_csum(struct sk_buff *skb, int network_depth, __be16 proto,
+ struct mlx5e_rq_stats *stats)
+{
+ struct ipv6hdr *ip6;
+ struct iphdr *ip4;
+ int pkt_len;
+
+ switch (proto) {
+ case htons(ETH_P_IP):
+ ip4 = (struct iphdr *)(skb->data + network_depth);
+ pkt_len = network_depth + ntohs(ip4->tot_len);
+ break;
+ case htons(ETH_P_IPV6):
+ ip6 = (struct ipv6hdr *)(skb->data + network_depth);
+ pkt_len = network_depth + sizeof(*ip6) + ntohs(ip6->payload_len);
+ break;
+ default:
+ return;
+ }
+
+ if (likely(pkt_len >= skb->len))
+ return;
+
+ tail_padding_csum(skb, pkt_len, stats);
+}
+
static inline void mlx5e_handle_csum(struct net_device *netdev,
struct mlx5_cqe64 *cqe,
struct mlx5e_rq *rq,
return;
}
- if (unlikely(test_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &rq->state)))
+ /* True when explicitly set via priv flag, or XDP prog is loaded */
+ if (test_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &rq->state))
goto csum_unnecessary;
/* CQE csum doesn't cover padding octets in short ethernet
skb->csum = csum_partial(skb->data + ETH_HLEN,
network_depth - ETH_HLEN,
skb->csum);
- if (unlikely(netdev->features & NETIF_F_RXFCS))
- skb->csum = csum_block_add(skb->csum,
- (__force __wsum)mlx5e_get_fcs(skb),
- skb->len - ETH_FCS_LEN);
+
+ mlx5e_skb_padding_csum(skb, network_depth, proto, stats);
stats->csum_complete++;
return;
}
csum_unnecessary:
if (likely((cqe->hds_ip_ext & CQE_L3_OK) &&
- ((cqe->hds_ip_ext & CQE_L4_OK) ||
- (get_cqe_l4_hdr_type(cqe) == CQE_L4_HDR_TYPE_NONE)))) {
+ (cqe->hds_ip_ext & CQE_L4_OK))) {
skb->ip_summed = CHECKSUM_UNNECESSARY;
if (cqe_is_tunneled(cqe)) {
skb->csum_level = 1;
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_unnecessary) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_none) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_complete) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_complete_tail) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_complete_tail_slow) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_unnecessary_inner) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_drop) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_redirect) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_xmit) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_mpwqe) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_inlnw) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_full) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_err) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_cqe) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_wake) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_cqe_err) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_xmit) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_mpwqe) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_inlnw) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_full) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_err) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_cqes) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_buff_alloc_err) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_blks) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_pkts) },
- { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_page_reuse) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_reuse) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_full) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_empty) },
s->rx_removed_vlan_packets += rq_stats->removed_vlan_packets;
s->rx_csum_none += rq_stats->csum_none;
s->rx_csum_complete += rq_stats->csum_complete;
+ s->rx_csum_complete_tail += rq_stats->csum_complete_tail;
+ s->rx_csum_complete_tail_slow += rq_stats->csum_complete_tail_slow;
s->rx_csum_unnecessary += rq_stats->csum_unnecessary;
s->rx_csum_unnecessary_inner += rq_stats->csum_unnecessary_inner;
s->rx_xdp_drop += rq_stats->xdp_drop;
s->rx_xdp_redirect += rq_stats->xdp_redirect;
s->rx_xdp_tx_xmit += xdpsq_stats->xmit;
+ s->rx_xdp_tx_mpwqe += xdpsq_stats->mpwqe;
+ s->rx_xdp_tx_inlnw += xdpsq_stats->inlnw;
s->rx_xdp_tx_full += xdpsq_stats->full;
s->rx_xdp_tx_err += xdpsq_stats->err;
s->rx_xdp_tx_cqe += xdpsq_stats->cqes;
s->rx_buff_alloc_err += rq_stats->buff_alloc_err;
s->rx_cqe_compress_blks += rq_stats->cqe_compress_blks;
s->rx_cqe_compress_pkts += rq_stats->cqe_compress_pkts;
- s->rx_page_reuse += rq_stats->page_reuse;
s->rx_cache_reuse += rq_stats->cache_reuse;
s->rx_cache_full += rq_stats->cache_full;
s->rx_cache_empty += rq_stats->cache_empty;
s->ch_eq_rearm += ch_stats->eq_rearm;
/* xdp redirect */
s->tx_xdp_xmit += xdpsq_red_stats->xmit;
+ s->tx_xdp_mpwqe += xdpsq_red_stats->mpwqe;
+ s->tx_xdp_inlnw += xdpsq_red_stats->inlnw;
s->tx_xdp_full += xdpsq_red_stats->full;
s->tx_xdp_err += xdpsq_red_stats->err;
s->tx_xdp_cqes += xdpsq_red_stats->cqes;
{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, packets) },
{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, bytes) },
{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_complete) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_complete_tail) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_complete_tail_slow) },
{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_unnecessary) },
{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_unnecessary_inner) },
{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_none) },
{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, buff_alloc_err) },
{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cqe_compress_blks) },
{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cqe_compress_pkts) },
- { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, page_reuse) },
{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_reuse) },
{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_full) },
{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_empty) },
static const struct counter_desc rq_xdpsq_stats_desc[] = {
{ MLX5E_DECLARE_RQ_XDPSQ_STAT(struct mlx5e_xdpsq_stats, xmit) },
+ { MLX5E_DECLARE_RQ_XDPSQ_STAT(struct mlx5e_xdpsq_stats, mpwqe) },
+ { MLX5E_DECLARE_RQ_XDPSQ_STAT(struct mlx5e_xdpsq_stats, inlnw) },
{ MLX5E_DECLARE_RQ_XDPSQ_STAT(struct mlx5e_xdpsq_stats, full) },
{ MLX5E_DECLARE_RQ_XDPSQ_STAT(struct mlx5e_xdpsq_stats, err) },
{ MLX5E_DECLARE_RQ_XDPSQ_STAT(struct mlx5e_xdpsq_stats, cqes) },
static const struct counter_desc xdpsq_stats_desc[] = {
{ MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, xmit) },
+ { MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, mpwqe) },
+ { MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, inlnw) },
{ MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, full) },
{ MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, err) },
{ MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, cqes) },
u64 rx_csum_unnecessary;
u64 rx_csum_none;
u64 rx_csum_complete;
+ u64 rx_csum_complete_tail;
+ u64 rx_csum_complete_tail_slow;
u64 rx_csum_unnecessary_inner;
u64 rx_xdp_drop;
u64 rx_xdp_redirect;
u64 rx_xdp_tx_xmit;
+ u64 rx_xdp_tx_mpwqe;
+ u64 rx_xdp_tx_inlnw;
u64 rx_xdp_tx_full;
u64 rx_xdp_tx_err;
u64 rx_xdp_tx_cqe;
u64 tx_queue_wake;
u64 tx_cqe_err;
u64 tx_xdp_xmit;
+ u64 tx_xdp_mpwqe;
+ u64 tx_xdp_inlnw;
u64 tx_xdp_full;
u64 tx_xdp_err;
u64 tx_xdp_cqes;
u64 rx_buff_alloc_err;
u64 rx_cqe_compress_blks;
u64 rx_cqe_compress_pkts;
- u64 rx_page_reuse;
u64 rx_cache_reuse;
u64 rx_cache_full;
u64 rx_cache_empty;
u64 packets;
u64 bytes;
u64 csum_complete;
+ u64 csum_complete_tail;
+ u64 csum_complete_tail_slow;
u64 csum_unnecessary;
u64 csum_unnecessary_inner;
u64 csum_none;
u64 buff_alloc_err;
u64 cqe_compress_blks;
u64 cqe_compress_pkts;
- u64 page_reuse;
u64 cache_reuse;
u64 cache_full;
u64 cache_empty;
struct mlx5e_xdpsq_stats {
u64 xmit;
+ u64 mpwqe;
+ u64 inlnw;
u64 full;
u64 err;
/* dirtied @completion */
return 0;
}
+static void *get_match_headers_criteria(u32 flags,
+ struct mlx5_flow_spec *spec)
+{
+ return (flags & MLX5_FLOW_CONTEXT_ACTION_DECAP) ?
+ MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ inner_headers) :
+ MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ outer_headers);
+}
+
+static void *get_match_headers_value(u32 flags,
+ struct mlx5_flow_spec *spec)
+{
+ return (flags & MLX5_FLOW_CONTEXT_ACTION_DECAP) ?
+ MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ inner_headers) :
+ MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ outer_headers);
+}
+
static int __parse_cls_flower(struct mlx5e_priv *priv,
struct mlx5_flow_spec *spec,
struct tc_cls_flower_offload *f,
/* In decap flow, header pointers should point to the inner
* headers, outer header were already set by parse_tunnel_attr
*/
- headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
- inner_headers);
- headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
- inner_headers);
+ headers_c = get_match_headers_criteria(MLX5_FLOW_CONTEXT_ACTION_DECAP,
+ spec);
+ headers_v = get_match_headers_value(MLX5_FLOW_CONTEXT_ACTION_DECAP,
+ spec);
}
if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
if (match.mask->n_proto)
*match_level = MLX5_MATCH_L2;
}
-
- if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) {
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN) ||
+ is_vlan_dev(filter_dev)) {
+ struct flow_dissector_key_vlan filter_dev_mask;
+ struct flow_dissector_key_vlan filter_dev_key;
struct flow_match_vlan match;
- flow_rule_match_vlan(rule, &match);
+ if (is_vlan_dev(filter_dev)) {
+ match.key = &filter_dev_key;
+ match.key->vlan_id = vlan_dev_vlan_id(filter_dev);
+ match.key->vlan_tpid = vlan_dev_vlan_proto(filter_dev);
+ match.key->vlan_priority = 0;
+ match.mask = &filter_dev_mask;
+ memset(match.mask, 0xff, sizeof(*match.mask));
+ match.mask->vlan_priority = 0;
+ } else {
+ flow_rule_match_vlan(rule, &match);
+ }
if (match.mask->vlan_id ||
match.mask->vlan_priority ||
match.mask->vlan_tpid) {
u8 field;
u8 size;
u32 offset;
+ u32 match_offset;
};
-#define OFFLOAD(fw_field, size, field, off) \
- {MLX5_ACTION_IN_FIELD_OUT_ ## fw_field, size, offsetof(struct pedit_headers, field) + (off)}
+#define OFFLOAD(fw_field, size, field, off, match_field) \
+ {MLX5_ACTION_IN_FIELD_OUT_ ## fw_field, size, \
+ offsetof(struct pedit_headers, field) + (off), \
+ MLX5_BYTE_OFF(fte_match_set_lyr_2_4, match_field)}
+
+static bool cmp_val_mask(void *valp, void *maskp, void *matchvalp,
+ void *matchmaskp, int size)
+{
+ bool same = false;
+
+ switch (size) {
+ case sizeof(u8):
+ same = ((*(u8 *)valp) & (*(u8 *)maskp)) ==
+ ((*(u8 *)matchvalp) & (*(u8 *)matchmaskp));
+ break;
+ case sizeof(u16):
+ same = ((*(u16 *)valp) & (*(u16 *)maskp)) ==
+ ((*(u16 *)matchvalp) & (*(u16 *)matchmaskp));
+ break;
+ case sizeof(u32):
+ same = ((*(u32 *)valp) & (*(u32 *)maskp)) ==
+ ((*(u32 *)matchvalp) & (*(u32 *)matchmaskp));
+ break;
+ }
+
+ return same;
+}
static struct mlx5_fields fields[] = {
- OFFLOAD(DMAC_47_16, 4, eth.h_dest[0], 0),
- OFFLOAD(DMAC_15_0, 2, eth.h_dest[4], 0),
- OFFLOAD(SMAC_47_16, 4, eth.h_source[0], 0),
- OFFLOAD(SMAC_15_0, 2, eth.h_source[4], 0),
- OFFLOAD(ETHERTYPE, 2, eth.h_proto, 0),
- OFFLOAD(FIRST_VID, 2, vlan.h_vlan_TCI, 0),
-
- OFFLOAD(IP_TTL, 1, ip4.ttl, 0),
- OFFLOAD(SIPV4, 4, ip4.saddr, 0),
- OFFLOAD(DIPV4, 4, ip4.daddr, 0),
-
- OFFLOAD(SIPV6_127_96, 4, ip6.saddr.s6_addr32[0], 0),
- OFFLOAD(SIPV6_95_64, 4, ip6.saddr.s6_addr32[1], 0),
- OFFLOAD(SIPV6_63_32, 4, ip6.saddr.s6_addr32[2], 0),
- OFFLOAD(SIPV6_31_0, 4, ip6.saddr.s6_addr32[3], 0),
- OFFLOAD(DIPV6_127_96, 4, ip6.daddr.s6_addr32[0], 0),
- OFFLOAD(DIPV6_95_64, 4, ip6.daddr.s6_addr32[1], 0),
- OFFLOAD(DIPV6_63_32, 4, ip6.daddr.s6_addr32[2], 0),
- OFFLOAD(DIPV6_31_0, 4, ip6.daddr.s6_addr32[3], 0),
- OFFLOAD(IPV6_HOPLIMIT, 1, ip6.hop_limit, 0),
-
- OFFLOAD(TCP_SPORT, 2, tcp.source, 0),
- OFFLOAD(TCP_DPORT, 2, tcp.dest, 0),
- OFFLOAD(TCP_FLAGS, 1, tcp.ack_seq, 5),
-
- OFFLOAD(UDP_SPORT, 2, udp.source, 0),
- OFFLOAD(UDP_DPORT, 2, udp.dest, 0),
+ OFFLOAD(DMAC_47_16, 4, eth.h_dest[0], 0, dmac_47_16),
+ OFFLOAD(DMAC_15_0, 2, eth.h_dest[4], 0, dmac_15_0),
+ OFFLOAD(SMAC_47_16, 4, eth.h_source[0], 0, smac_47_16),
+ OFFLOAD(SMAC_15_0, 2, eth.h_source[4], 0, smac_15_0),
+ OFFLOAD(ETHERTYPE, 2, eth.h_proto, 0, ethertype),
+ OFFLOAD(FIRST_VID, 2, vlan.h_vlan_TCI, 0, first_vid),
+
+ OFFLOAD(IP_TTL, 1, ip4.ttl, 0, ttl_hoplimit),
+ OFFLOAD(SIPV4, 4, ip4.saddr, 0, src_ipv4_src_ipv6.ipv4_layout.ipv4),
+ OFFLOAD(DIPV4, 4, ip4.daddr, 0, dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
+
+ OFFLOAD(SIPV6_127_96, 4, ip6.saddr.s6_addr32[0], 0,
+ src_ipv4_src_ipv6.ipv6_layout.ipv6[0]),
+ OFFLOAD(SIPV6_95_64, 4, ip6.saddr.s6_addr32[1], 0,
+ src_ipv4_src_ipv6.ipv6_layout.ipv6[4]),
+ OFFLOAD(SIPV6_63_32, 4, ip6.saddr.s6_addr32[2], 0,
+ src_ipv4_src_ipv6.ipv6_layout.ipv6[8]),
+ OFFLOAD(SIPV6_31_0, 4, ip6.saddr.s6_addr32[3], 0,
+ src_ipv4_src_ipv6.ipv6_layout.ipv6[12]),
+ OFFLOAD(DIPV6_127_96, 4, ip6.daddr.s6_addr32[0], 0,
+ dst_ipv4_dst_ipv6.ipv6_layout.ipv6[0]),
+ OFFLOAD(DIPV6_95_64, 4, ip6.daddr.s6_addr32[1], 0,
+ dst_ipv4_dst_ipv6.ipv6_layout.ipv6[4]),
+ OFFLOAD(DIPV6_63_32, 4, ip6.daddr.s6_addr32[2], 0,
+ dst_ipv4_dst_ipv6.ipv6_layout.ipv6[8]),
+ OFFLOAD(DIPV6_31_0, 4, ip6.daddr.s6_addr32[3], 0,
+ dst_ipv4_dst_ipv6.ipv6_layout.ipv6[12]),
+ OFFLOAD(IPV6_HOPLIMIT, 1, ip6.hop_limit, 0, ttl_hoplimit),
+
+ OFFLOAD(TCP_SPORT, 2, tcp.source, 0, tcp_sport),
+ OFFLOAD(TCP_DPORT, 2, tcp.dest, 0, tcp_dport),
+ OFFLOAD(TCP_FLAGS, 1, tcp.ack_seq, 5, tcp_flags),
+
+ OFFLOAD(UDP_SPORT, 2, udp.source, 0, udp_sport),
+ OFFLOAD(UDP_DPORT, 2, udp.dest, 0, udp_dport),
};
/* On input attr->max_mod_hdr_actions tells how many HW actions can be parsed at
*/
static int offload_pedit_fields(struct pedit_headers_action *hdrs,
struct mlx5e_tc_flow_parse_attr *parse_attr,
+ u32 *action_flags,
struct netlink_ext_ack *extack)
{
struct pedit_headers *set_masks, *add_masks, *set_vals, *add_vals;
+ void *headers_c = get_match_headers_criteria(*action_flags,
+ &parse_attr->spec);
+ void *headers_v = get_match_headers_value(*action_flags,
+ &parse_attr->spec);
int i, action_size, nactions, max_actions, first, last, next_z;
void *s_masks_p, *a_masks_p, *vals_p;
struct mlx5_fields *f;
nactions = parse_attr->num_mod_hdr_actions;
for (i = 0; i < ARRAY_SIZE(fields); i++) {
+ bool skip;
+
f = &fields[i];
/* avoid seeing bits set from previous iterations */
s_mask = 0;
return -EOPNOTSUPP;
}
+ skip = false;
if (s_mask) {
+ void *match_mask = headers_c + f->match_offset;
+ void *match_val = headers_v + f->match_offset;
+
cmd = MLX5_ACTION_TYPE_SET;
mask = s_mask;
vals_p = (void *)set_vals + f->offset;
+ /* don't rewrite if we have a match on the same value */
+ if (cmp_val_mask(vals_p, s_masks_p, match_val,
+ match_mask, f->size))
+ skip = true;
/* clear to denote we consumed this field */
memset(s_masks_p, 0, f->size);
} else {
+ u32 zero = 0;
+
cmd = MLX5_ACTION_TYPE_ADD;
mask = a_mask;
vals_p = (void *)add_vals + f->offset;
+ /* add 0 is no change */
+ if (!memcmp(vals_p, &zero, f->size))
+ skip = true;
/* clear to denote we consumed this field */
memset(a_masks_p, 0, f->size);
}
+ if (skip)
+ continue;
field_bsize = f->size * BITS_PER_BYTE;
return 0;
}
+static int mlx5e_flow_namespace_max_modify_action(struct mlx5_core_dev *mdev,
+ int namespace)
+{
+ if (namespace == MLX5_FLOW_NAMESPACE_FDB) /* FDB offloading */
+ return MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, max_modify_header_actions);
+ else /* namespace is MLX5_FLOW_NAMESPACE_KERNEL - NIC offloading */
+ return MLX5_CAP_FLOWTABLE_NIC_RX(mdev, max_modify_header_actions);
+}
+
static int alloc_mod_hdr_actions(struct mlx5e_priv *priv,
struct pedit_headers_action *hdrs,
int namespace,
hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].pedits;
action_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto);
- if (namespace == MLX5_FLOW_NAMESPACE_FDB) /* FDB offloading */
- max_actions = MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, max_modify_header_actions);
- else /* namespace is MLX5_FLOW_NAMESPACE_KERNEL - NIC offloading */
- max_actions = MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, max_modify_header_actions);
-
+ max_actions = mlx5e_flow_namespace_max_modify_action(priv->mdev, namespace);
/* can get up to crazingly 16 HW actions in 32 bits pedit SW key */
max_actions = min(max_actions, nkeys * 16);
goto out_err;
}
+ if (!mlx5e_flow_namespace_max_modify_action(priv->mdev, namespace)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "The pedit offload action is not supported");
+ goto out_err;
+ }
+
mask = act->mangle.mask;
val = act->mangle.val;
offset = act->mangle.offset;
static int alloc_tc_pedit_action(struct mlx5e_priv *priv, int namespace,
struct mlx5e_tc_flow_parse_attr *parse_attr,
struct pedit_headers_action *hdrs,
+ u32 *action_flags,
struct netlink_ext_ack *extack)
{
struct pedit_headers *cmd_masks;
goto out_err;
}
- err = offload_pedit_fields(hdrs, parse_attr, extack);
+ err = offload_pedit_fields(hdrs, parse_attr, action_flags, extack);
if (err < 0)
goto out_dealloc_parsed_actions;
u8 ip_proto;
int i;
- if (actions & MLX5_FLOW_CONTEXT_ACTION_DECAP)
- headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, inner_headers);
- else
- headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers);
-
+ headers_v = get_match_headers_value(actions, spec);
ethertype = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ethertype);
/* for non-IP we only re-write MACs, so we're okay */
actions = flow->nic_attr->action;
if (flow->flags & MLX5E_TC_FLOW_EGRESS &&
- !(actions & MLX5_FLOW_CONTEXT_ACTION_DECAP))
+ !((actions & MLX5_FLOW_CONTEXT_ACTION_DECAP) ||
+ (actions & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP)))
return false;
if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
.mangle.mask = ~(u32)be16_to_cpu(*(__be16 *)&mask16),
.mangle.val = (u32)be16_to_cpu(*(__be16 *)&val16),
};
+ u8 match_prio_mask, match_prio_val;
+ void *headers_c, *headers_v;
int err;
- if (act->vlan.prio) {
- NL_SET_ERR_MSG_MOD(extack, "Setting VLAN prio is not supported");
+ headers_c = get_match_headers_criteria(*action, &parse_attr->spec);
+ headers_v = get_match_headers_value(*action, &parse_attr->spec);
+
+ if (!(MLX5_GET(fte_match_set_lyr_2_4, headers_c, cvlan_tag) &&
+ MLX5_GET(fte_match_set_lyr_2_4, headers_v, cvlan_tag))) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "VLAN rewrite action must have VLAN protocol match");
+ return -EOPNOTSUPP;
+ }
+
+ match_prio_mask = MLX5_GET(fte_match_set_lyr_2_4, headers_c, first_prio);
+ match_prio_val = MLX5_GET(fte_match_set_lyr_2_4, headers_v, first_prio);
+ if (act->vlan.prio != (match_prio_val & match_prio_mask)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Changing VLAN prio is not supported");
return -EOPNOTSUPP;
}
}
break;
default:
- return -EINVAL;
+ NL_SET_ERR_MSG_MOD(extack, "The offload action is not supported");
+ return -EOPNOTSUPP;
}
}
if (hdrs[TCA_PEDIT_KEY_EX_CMD_SET].pedits ||
hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].pedits) {
err = alloc_tc_pedit_action(priv, MLX5_FLOW_NAMESPACE_KERNEL,
- parse_attr, hdrs, extack);
+ parse_attr, hdrs, &action, extack);
if (err)
return err;
+ /* in case all pedit actions are skipped, remove the MOD_HDR
+ * flag.
+ */
+ if (parse_attr->num_mod_hdr_actions == 0)
+ action &= ~MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
}
attr->action = action;
return 0;
}
+static int add_vlan_push_action(struct mlx5e_priv *priv,
+ struct mlx5_esw_flow_attr *attr,
+ struct net_device **out_dev,
+ u32 *action)
+{
+ struct net_device *vlan_dev = *out_dev;
+ struct flow_action_entry vlan_act = {
+ .id = FLOW_ACTION_VLAN_PUSH,
+ .vlan.vid = vlan_dev_vlan_id(vlan_dev),
+ .vlan.proto = vlan_dev_vlan_proto(vlan_dev),
+ .vlan.prio = 0,
+ };
+ int err;
+
+ err = parse_tc_vlan_action(priv, &vlan_act, attr, action);
+ if (err)
+ return err;
+
+ *out_dev = dev_get_by_index_rcu(dev_net(vlan_dev),
+ dev_get_iflink(vlan_dev));
+ if (is_vlan_dev(*out_dev))
+ err = add_vlan_push_action(priv, attr, out_dev, action);
+
+ return err;
+}
+
+static int add_vlan_pop_action(struct mlx5e_priv *priv,
+ struct mlx5_esw_flow_attr *attr,
+ u32 *action)
+{
+ int nest_level = vlan_get_encap_level(attr->parse_attr->filter_dev);
+ struct flow_action_entry vlan_act = {
+ .id = FLOW_ACTION_VLAN_POP,
+ };
+ int err = 0;
+
+ while (nest_level--) {
+ err = parse_tc_vlan_action(priv, &vlan_act, attr, action);
+ if (err)
+ return err;
+ }
+
+ return err;
+}
+
static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
struct flow_action *flow_action,
- struct mlx5e_tc_flow_parse_attr *parse_attr,
struct mlx5e_tc_flow *flow,
struct netlink_ext_ack *extack)
{
struct pedit_headers_action hdrs[2] = {};
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct mlx5_esw_flow_attr *attr = flow->esw_attr;
+ struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr;
struct mlx5e_rep_priv *rpriv = priv->ppriv;
const struct ip_tunnel_info *info = NULL;
const struct flow_action_entry *act;
uplink_upper == out_dev)
out_dev = uplink_dev;
+ if (is_vlan_dev(out_dev)) {
+ err = add_vlan_push_action(priv, attr,
+ &out_dev,
+ &action);
+ if (err)
+ return err;
+ }
+ if (is_vlan_dev(parse_attr->filter_dev)) {
+ err = add_vlan_pop_action(priv, attr,
+ &action);
+ if (err)
+ return err;
+ }
+
if (!mlx5e_eswitch_rep(out_dev))
return -EOPNOTSUPP;
out_dev->ifindex;
parse_attr->tun_info[attr->out_count] = *info;
encap = false;
- attr->parse_attr = parse_attr;
attr->dests[attr->out_count].flags |=
MLX5_ESW_DEST_ENCAP;
attr->out_count++;
break;
}
default:
- return -EINVAL;
+ NL_SET_ERR_MSG_MOD(extack, "The offload action is not supported");
+ return -EOPNOTSUPP;
}
}
if (hdrs[TCA_PEDIT_KEY_EX_CMD_SET].pedits ||
hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].pedits) {
err = alloc_tc_pedit_action(priv, MLX5_FLOW_NAMESPACE_FDB,
- parse_attr, hdrs, extack);
+ parse_attr, hdrs, &action, extack);
if (err)
return err;
+ /* in case all pedit actions are skipped, remove the MOD_HDR
+ * flag. we might have set split_count either by pedit or
+ * pop/push. if there is no pop/push either, reset it too.
+ */
+ if (parse_attr->num_mod_hdr_actions == 0) {
+ action &= ~MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+ if (!((action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) ||
+ (action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH)))
+ attr->split_count = 0;
+ }
}
attr->action = action;
if (err)
goto err_free;
- err = parse_tc_fdb_actions(priv, &rule->action, parse_attr, flow, extack);
+ err = parse_tc_fdb_actions(priv, &rule->action, flow, extack);
if (err)
goto err_free;
net_dim(&rq->dim, dim_sample);
}
+void mlx5e_trigger_irq(struct mlx5e_icosq *sq)
+{
+ struct mlx5_wq_cyc *wq = &sq->wq;
+ struct mlx5e_tx_wqe *nopwqe;
+ u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+
+ sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_NOP;
+ nopwqe = mlx5e_post_nop(wq, sq->sqn, &sq->pc);
+ mlx5e_notify_hw(wq, sq->pc, sq->uar_map, &nopwqe->ctrl);
+}
+
int mlx5e_napi_poll(struct napi_struct *napi, int budget)
{
struct mlx5e_channel *c = container_of(napi, struct mlx5e_channel,
__raw_writel((__force u32)cpu_to_be32(val), addr);
/* We still want ordering, just not swabbing, so add a barrier */
- mb();
+ wmb();
}
EXPORT_SYMBOL(mlx5_eq_update_ci);
#include "ecpf.h"
#include "lib/eq.h"
-enum {
- FDB_FAST_PATH = 0,
- FDB_SLOW_PATH
-};
-
/* There are two match-all miss flows, one for unicast dst mac and
* one for multicast.
*/
{
int err;
- mutex_init(&esw->fdb_table.offloads.fdb_prio_lock);
-
err = esw_offloads_steering_init(esw, total_nvports);
if (err)
return err;
static int any_notifier(struct notifier_block *, unsigned long, void *);
static int temp_warn(struct notifier_block *, unsigned long, void *);
static int port_module(struct notifier_block *, unsigned long, void *);
+static int pcie_core(struct notifier_block *, unsigned long, void *);
/* handler which forwards the event to events->nh, driver notifiers */
static int forward_event(struct notifier_block *, unsigned long, void *);
{.nb.notifier_call = any_notifier, .event_type = MLX5_EVENT_TYPE_NOTIFY_ANY },
{.nb.notifier_call = temp_warn, .event_type = MLX5_EVENT_TYPE_TEMP_WARN_EVENT },
{.nb.notifier_call = port_module, .event_type = MLX5_EVENT_TYPE_PORT_MODULE_EVENT },
+ {.nb.notifier_call = pcie_core, .event_type = MLX5_EVENT_TYPE_GENERAL_EVENT },
/* Events to be forwarded (as is) to mlx5 core interfaces (mlx5e/mlx5_ib) */
{.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_PORT_CHANGE },
struct mlx5_events {
struct mlx5_core_dev *dev;
+ struct workqueue_struct *wq;
struct mlx5_event_nb notifiers[ARRAY_SIZE(events_nbs_ref)];
/* driver notifier chain */
struct atomic_notifier_head nh;
/* port module events stats */
struct mlx5_pme_stats pme_stats;
+ /*pcie_core*/
+ struct work_struct pcie_core_work;
};
static const char *eqe_type_str(u8 type)
return NOTIFY_OK;
}
+enum {
+ MLX5_PCI_POWER_COULD_NOT_BE_READ = 0x0,
+ MLX5_PCI_POWER_SUFFICIENT_REPORTED = 0x1,
+ MLX5_PCI_POWER_INSUFFICIENT_REPORTED = 0x2,
+};
+
+static void mlx5_pcie_event(struct work_struct *work)
+{
+ u32 out[MLX5_ST_SZ_DW(mpein_reg)] = {0};
+ u32 in[MLX5_ST_SZ_DW(mpein_reg)] = {0};
+ struct mlx5_events *events;
+ struct mlx5_core_dev *dev;
+ u8 power_status;
+ u16 pci_power;
+
+ events = container_of(work, struct mlx5_events, pcie_core_work);
+ dev = events->dev;
+
+ if (!MLX5_CAP_MCAM_FEATURE(dev, pci_status_and_power))
+ return;
+
+ mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out),
+ MLX5_REG_MPEIN, 0, 0);
+ power_status = MLX5_GET(mpein_reg, out, pwr_status);
+ pci_power = MLX5_GET(mpein_reg, out, pci_power);
+
+ switch (power_status) {
+ case MLX5_PCI_POWER_COULD_NOT_BE_READ:
+ mlx5_core_info_rl(dev,
+ "PCIe slot power capability was not advertised.\n");
+ break;
+ case MLX5_PCI_POWER_INSUFFICIENT_REPORTED:
+ mlx5_core_warn_rl(dev,
+ "Detected insufficient power on the PCIe slot (%uW).\n",
+ pci_power);
+ break;
+ case MLX5_PCI_POWER_SUFFICIENT_REPORTED:
+ mlx5_core_info_rl(dev,
+ "PCIe slot advertised sufficient power (%uW).\n",
+ pci_power);
+ break;
+ }
+}
+
+static int pcie_core(struct notifier_block *nb, unsigned long type, void *data)
+{
+ struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb,
+ struct mlx5_event_nb,
+ nb);
+ struct mlx5_events *events = event_nb->ctx;
+ struct mlx5_eqe *eqe = data;
+
+ switch (eqe->sub_type) {
+ case MLX5_GENERAL_SUBTYPE_PCI_POWER_CHANGE_EVENT:
+ queue_work(events->wq, &events->pcie_core_work);
+ break;
+ default:
+ return NOTIFY_DONE;
+ }
+
+ return NOTIFY_OK;
+}
+
void mlx5_get_pme_stats(struct mlx5_core_dev *dev, struct mlx5_pme_stats *stats)
{
*stats = dev->priv.events->pme_stats;
ATOMIC_INIT_NOTIFIER_HEAD(&events->nh);
events->dev = dev;
dev->priv.events = events;
+ events->wq = create_singlethread_workqueue("mlx5_events");
+ if (!events->wq)
+ return -ENOMEM;
+ INIT_WORK(&events->pcie_core_work, mlx5_pcie_event);
+
return 0;
}
void mlx5_events_cleanup(struct mlx5_core_dev *dev)
{
+ destroy_workqueue(dev->priv.events->wq);
kvfree(dev->priv.events);
}
for (i = ARRAY_SIZE(events_nbs_ref) - 1; i >= 0 ; i--)
mlx5_eq_notifier_unregister(dev, &events->notifiers[i].nb);
+ flush_workqueue(events->wq);
}
int mlx5_notifier_register(struct mlx5_core_dev *dev, struct notifier_block *nb)
*conn->qp.wq.sq.db = cpu_to_be32(conn->qp.sq.pc);
/* Make sure that doorbell record is visible before ringing */
wmb();
- mlx5_write64(wqe, conn->fdev->conn_res.uar->map + MLX5_BF_OFFSET, NULL);
+ mlx5_write64(wqe, conn->fdev->conn_res.uar->map + MLX5_BF_OFFSET);
}
static void mlx5_fpga_conn_post_send(struct mlx5_fpga_conn *conn,
#include <linux/mlx5/eq.h>
+#include "mlx5_core.h"
#include "lib/eq.h"
#include "fpga/cmd.h"
};
#define mlx5_fpga_dbg(__adev, format, ...) \
- dev_dbg(&(__adev)->mdev->pdev->dev, "FPGA: %s:%d:(pid %d): " format, \
- __func__, __LINE__, current->pid, ##__VA_ARGS__)
+ mlx5_core_dbg((__adev)->mdev, "FPGA: %s:%d:(pid %d): " format, \
+ __func__, __LINE__, current->pid, ##__VA_ARGS__)
#define mlx5_fpga_err(__adev, format, ...) \
- dev_err(&(__adev)->mdev->pdev->dev, "FPGA: %s:%d:(pid %d): " format, \
- __func__, __LINE__, current->pid, ##__VA_ARGS__)
+ mlx5_core_err((__adev)->mdev, "FPGA: %s:%d:(pid %d): " format, \
+ __func__, __LINE__, current->pid, ##__VA_ARGS__)
#define mlx5_fpga_warn(__adev, format, ...) \
- dev_warn(&(__adev)->mdev->pdev->dev, "FPGA: %s:%d:(pid %d): " format, \
- __func__, __LINE__, current->pid, ##__VA_ARGS__)
+ mlx5_core_warn((__adev)->mdev, "FPGA: %s:%d:(pid %d): " format, \
+ __func__, __LINE__, current->pid, ##__VA_ARGS__)
#define mlx5_fpga_warn_ratelimited(__adev, format, ...) \
- dev_warn_ratelimited(&(__adev)->mdev->pdev->dev, "FPGA: %s:%d: " \
- format, __func__, __LINE__, ##__VA_ARGS__)
+ mlx5_core_err_rl((__adev)->mdev, "FPGA: %s:%d: " \
+ format, __func__, __LINE__, ##__VA_ARGS__)
#define mlx5_fpga_notice(__adev, format, ...) \
- dev_notice(&(__adev)->mdev->pdev->dev, "FPGA: " format, ##__VA_ARGS__)
+ mlx5_core_info((__adev)->mdev, "FPGA: " format, ##__VA_ARGS__)
#define mlx5_fpga_info(__adev, format, ...) \
- dev_info(&(__adev)->mdev->pdev->dev, "FPGA: " format, ##__VA_ARGS__)
+ mlx5_core_info((__adev)->mdev, "FPGA: " format, ##__VA_ARGS__)
int mlx5_fpga_init(struct mlx5_core_dev *mdev);
void mlx5_fpga_cleanup(struct mlx5_core_dev *mdev);
return ret;
}
-static void mlx5_fpga_tls_release_swid(struct idr *idr,
- spinlock_t *idr_spinlock, u32 swid)
+static void *mlx5_fpga_tls_release_swid(struct idr *idr,
+ spinlock_t *idr_spinlock, u32 swid)
{
unsigned long flags;
+ void *ptr;
spin_lock_irqsave(idr_spinlock, flags);
- idr_remove(idr, swid);
+ ptr = idr_remove(idr, swid);
spin_unlock_irqrestore(idr_spinlock, flags);
+ return ptr;
}
static void mlx_tls_kfree_complete(struct mlx5_fpga_conn *conn,
kfree(buf);
}
-struct mlx5_teardown_stream_context {
- struct mlx5_fpga_tls_command_context cmd;
- u32 swid;
-};
-
static void
mlx5_fpga_tls_teardown_completion(struct mlx5_fpga_conn *conn,
struct mlx5_fpga_device *fdev,
struct mlx5_fpga_tls_command_context *cmd,
struct mlx5_fpga_dma_buf *resp)
{
- struct mlx5_teardown_stream_context *ctx =
- container_of(cmd, struct mlx5_teardown_stream_context, cmd);
-
if (resp) {
u32 syndrome = MLX5_GET(tls_resp, resp->sg[0].data, syndrome);
mlx5_fpga_err(fdev,
"Teardown stream failed with syndrome = %d",
syndrome);
- else if (MLX5_GET(tls_cmd, cmd->buf.sg[0].data, direction_sx))
- mlx5_fpga_tls_release_swid(&fdev->tls->tx_idr,
- &fdev->tls->tx_idr_spinlock,
- ctx->swid);
- else
- mlx5_fpga_tls_release_swid(&fdev->tls->rx_idr,
- &fdev->tls->rx_idr_spinlock,
- ctx->swid);
}
mlx5_fpga_tls_put_command_ctx(cmd);
}
void *cmd;
int ret;
- rcu_read_lock();
- flow = idr_find(&mdev->fpga->tls->rx_idr, ntohl(handle));
- rcu_read_unlock();
-
- if (!flow) {
- WARN_ONCE(1, "Received NULL pointer for handle\n");
- return -EINVAL;
- }
-
buf = kzalloc(size, GFP_ATOMIC);
if (!buf)
return -ENOMEM;
cmd = (buf + 1);
+ rcu_read_lock();
+ flow = idr_find(&mdev->fpga->tls->rx_idr, ntohl(handle));
+ if (unlikely(!flow)) {
+ rcu_read_unlock();
+ WARN_ONCE(1, "Received NULL pointer for handle\n");
+ kfree(buf);
+ return -EINVAL;
+ }
mlx5_fpga_tls_flow_to_cmd(flow, cmd);
+ rcu_read_unlock();
MLX5_SET(tls_cmd, cmd, swid, ntohl(handle));
MLX5_SET64(tls_cmd, cmd, tls_rcd_sn, be64_to_cpu(rcd_sn));
static void mlx5_fpga_tls_send_teardown_cmd(struct mlx5_core_dev *mdev,
void *flow, u32 swid, gfp_t flags)
{
- struct mlx5_teardown_stream_context *ctx;
+ struct mlx5_fpga_tls_command_context *ctx;
struct mlx5_fpga_dma_buf *buf;
void *cmd;
if (!ctx)
return;
- buf = &ctx->cmd.buf;
+ buf = &ctx->buf;
cmd = (ctx + 1);
MLX5_SET(tls_cmd, cmd, command_type, CMD_TEARDOWN_STREAM);
MLX5_SET(tls_cmd, cmd, swid, swid);
buf->sg[0].data = cmd;
buf->sg[0].size = MLX5_TLS_COMMAND_SIZE;
- ctx->swid = swid;
- mlx5_fpga_tls_cmd_send(mdev->fpga, &ctx->cmd,
+ mlx5_fpga_tls_cmd_send(mdev->fpga, ctx,
mlx5_fpga_tls_teardown_completion);
}
struct mlx5_fpga_tls *tls = mdev->fpga->tls;
void *flow;
- rcu_read_lock();
if (direction_sx)
- flow = idr_find(&tls->tx_idr, swid);
+ flow = mlx5_fpga_tls_release_swid(&tls->tx_idr,
+ &tls->tx_idr_spinlock,
+ swid);
else
- flow = idr_find(&tls->rx_idr, swid);
-
- rcu_read_unlock();
+ flow = mlx5_fpga_tls_release_swid(&tls->rx_idr,
+ &tls->rx_idr_spinlock,
+ swid);
if (!flow) {
mlx5_fpga_err(mdev->fpga, "No flow information for swid %u\n",
return;
}
+ synchronize_rcu(); /* before kfree(flow) */
mlx5_fpga_tls_send_teardown_cmd(mdev, flow, swid, flags);
}
struct mlx5_flow_root_namespace *root = find_root(&prio->node);
struct mlx5_ft_underlay_qp *uqp;
int min_level = INT_MAX;
- int err;
+ int err = 0;
u32 qpn;
if (root->root_ft)
if (!steering->fdb_sub_ns)
return -ENOMEM;
+ maj_prio = fs_create_prio(&steering->fdb_root_ns->ns, FDB_BYPASS_PATH,
+ 1);
+ if (IS_ERR(maj_prio)) {
+ err = PTR_ERR(maj_prio);
+ goto out_err;
+ }
+
levels = 2 * FDB_MAX_PRIO * (FDB_MAX_CHAIN + 1);
- maj_prio = fs_create_prio_chained(&steering->fdb_root_ns->ns, 0,
+ maj_prio = fs_create_prio_chained(&steering->fdb_root_ns->ns,
+ FDB_FAST_PATH,
levels);
if (IS_ERR(maj_prio)) {
err = PTR_ERR(maj_prio);
steering->fdb_sub_ns[chain] = ns;
}
- maj_prio = fs_create_prio(&steering->fdb_root_ns->ns, 1, 1);
+ maj_prio = fs_create_prio(&steering->fdb_root_ns->ns, FDB_SLOW_PATH, 1);
if (IS_ERR(maj_prio)) {
err = PTR_ERR(maj_prio);
goto out_err;
nic_state = mlx5_get_nic_state(dev);
if (nic_state == MLX5_NIC_IFC_INVALID) {
- dev_err(&dev->pdev->dev, "health recovery flow aborted since the nic state is invalid\n");
+ mlx5_core_err(dev, "health recovery flow aborted since the nic state is invalid\n");
return;
}
- dev_err(&dev->pdev->dev, "starting health recovery flow\n");
+ mlx5_core_err(dev, "starting health recovery flow\n");
mlx5_recover_device(dev);
}
if (!test_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags))
schedule_delayed_work(&health->recover_work, recover_delay);
else
- dev_err(&dev->pdev->dev,
- "new health works are not permitted at this stage\n");
+ mlx5_core_err(dev,
+ "new health works are not permitted at this stage\n");
spin_unlock_irqrestore(&health->wq_lock, flags);
}
return;
for (i = 0; i < ARRAY_SIZE(h->assert_var); i++)
- dev_err(&dev->pdev->dev, "assert_var[%d] 0x%08x\n", i, ioread32be(h->assert_var + i));
+ mlx5_core_err(dev, "assert_var[%d] 0x%08x\n", i,
+ ioread32be(h->assert_var + i));
- dev_err(&dev->pdev->dev, "assert_exit_ptr 0x%08x\n", ioread32be(&h->assert_exit_ptr));
- dev_err(&dev->pdev->dev, "assert_callra 0x%08x\n", ioread32be(&h->assert_callra));
+ mlx5_core_err(dev, "assert_exit_ptr 0x%08x\n",
+ ioread32be(&h->assert_exit_ptr));
+ mlx5_core_err(dev, "assert_callra 0x%08x\n",
+ ioread32be(&h->assert_callra));
sprintf(fw_str, "%d.%d.%d", fw_rev_maj(dev), fw_rev_min(dev), fw_rev_sub(dev));
- dev_err(&dev->pdev->dev, "fw_ver %s\n", fw_str);
- dev_err(&dev->pdev->dev, "hw_id 0x%08x\n", ioread32be(&h->hw_id));
- dev_err(&dev->pdev->dev, "irisc_index %d\n", ioread8(&h->irisc_index));
- dev_err(&dev->pdev->dev, "synd 0x%x: %s\n", ioread8(&h->synd), hsynd_str(ioread8(&h->synd)));
- dev_err(&dev->pdev->dev, "ext_synd 0x%04x\n", ioread16be(&h->ext_synd));
+ mlx5_core_err(dev, "fw_ver %s\n", fw_str);
+ mlx5_core_err(dev, "hw_id 0x%08x\n", ioread32be(&h->hw_id));
+ mlx5_core_err(dev, "irisc_index %d\n", ioread8(&h->irisc_index));
+ mlx5_core_err(dev, "synd 0x%x: %s\n", ioread8(&h->synd),
+ hsynd_str(ioread8(&h->synd)));
+ mlx5_core_err(dev, "ext_synd 0x%04x\n", ioread16be(&h->ext_synd));
fw = ioread32be(&h->fw_ver);
- dev_err(&dev->pdev->dev, "raw fw_ver 0x%08x\n", fw);
+ mlx5_core_err(dev, "raw fw_ver 0x%08x\n", fw);
}
static unsigned long get_next_poll_jiffies(void)
if (!test_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags))
queue_work(health->wq, &health->work);
else
- dev_err(&dev->pdev->dev,
- "new health works are not permitted at this stage\n");
+ mlx5_core_err(dev, "new health works are not permitted at this stage\n");
spin_unlock_irqrestore(&health->wq_lock, flags);
}
health->prev = count;
if (health->miss_counter == MAX_MISSES) {
- dev_err(&dev->pdev->dev, "device's health compromised - reached miss count\n");
+ mlx5_core_err(dev, "device's health compromised - reached miss count\n");
print_health_info(dev);
}
cancel_delayed_work_sync(&dev->priv.health.recover_work);
}
+void mlx5_health_flush(struct mlx5_core_dev *dev)
+{
+ struct mlx5_core_health *health = &dev->priv.health;
+
+ flush_workqueue(health->wq);
+}
+
void mlx5_health_cleanup(struct mlx5_core_dev *dev)
{
struct mlx5_core_health *health = &dev->priv.health;
return -ENOMEM;
strcpy(name, "mlx5_health");
- strcat(name, dev_name(&dev->pdev->dev));
+ strcat(name, dev->priv.name);
health->wq = create_singlethread_workqueue(name);
kfree(name);
if (!health->wq)
void *ppriv)
{
struct mlx5e_priv *priv = mlx5i_epriv(netdev);
- u16 max_mtu;
int err;
err = mlx5e_netdev_init(netdev, priv, mdev, profile, ppriv);
if (err)
return err;
- mlx5_query_port_max_mtu(mdev, &max_mtu, 1);
- netdev->mtu = max_mtu;
+ mlx5e_set_netdev_mtu_boundaries(priv);
+ netdev->mtu = netdev->max_mtu;
mlx5e_build_nic_params(mdev, &priv->rss_params, &priv->channels.params,
mlx5e_get_netdev_max_channels(netdev),
static int set_hca_cap(struct mlx5_core_dev *dev)
{
- struct pci_dev *pdev = dev->pdev;
int err;
err = handle_hca_cap(dev);
if (err) {
- dev_err(&pdev->dev, "handle_hca_cap failed\n");
+ mlx5_core_err(dev, "handle_hca_cap failed\n");
goto out;
}
err = handle_hca_cap_atomic(dev);
if (err) {
- dev_err(&pdev->dev, "handle_hca_cap_atomic failed\n");
+ mlx5_core_err(dev, "handle_hca_cap_atomic failed\n");
goto out;
}
err = handle_hca_cap_odp(dev);
if (err) {
- dev_err(&pdev->dev, "handle_hca_cap_odp failed\n");
+ mlx5_core_err(dev, "handle_hca_cap_odp failed\n");
goto out;
}
return -EOPNOTSUPP;
}
-static int mlx5_pci_init(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
+static int mlx5_pci_init(struct mlx5_core_dev *dev, struct pci_dev *pdev,
+ const struct pci_device_id *id)
{
- struct pci_dev *pdev = dev->pdev;
+ struct mlx5_priv *priv = &dev->priv;
int err = 0;
- pci_set_drvdata(dev->pdev, dev);
- strncpy(priv->name, dev_name(&pdev->dev), MLX5_MAX_NAME_LEN);
- priv->name[MLX5_MAX_NAME_LEN - 1] = 0;
-
- mutex_init(&priv->pgdir_mutex);
- INIT_LIST_HEAD(&priv->pgdir_list);
- spin_lock_init(&priv->mkey_lock);
+ dev->pdev = pdev;
+ priv->pci_dev_data = id->driver_data;
- mutex_init(&priv->alloc_mutex);
+ pci_set_drvdata(dev->pdev, dev);
+ dev->bar_addr = pci_resource_start(pdev, 0);
priv->numa_node = dev_to_node(&dev->pdev->dev);
- if (mlx5_debugfs_root)
- priv->dbg_root =
- debugfs_create_dir(pci_name(pdev), mlx5_debugfs_root);
-
err = mlx5_pci_enable_device(dev);
if (err) {
- dev_err(&pdev->dev, "Cannot enable PCI device, aborting\n");
- goto err_dbg;
+ mlx5_core_err(dev, "Cannot enable PCI device, aborting\n");
+ return err;
}
err = request_bar(pdev);
if (err) {
- dev_err(&pdev->dev, "error requesting BARs, aborting\n");
+ mlx5_core_err(dev, "error requesting BARs, aborting\n");
goto err_disable;
}
err = set_dma_caps(pdev);
if (err) {
- dev_err(&pdev->dev, "Failed setting DMA capabilities mask, aborting\n");
+ mlx5_core_err(dev, "Failed setting DMA capabilities mask, aborting\n");
goto err_clr_master;
}
pci_enable_atomic_ops_to_root(pdev, PCI_EXP_DEVCAP2_ATOMIC_COMP128))
mlx5_core_dbg(dev, "Enabling pci atomics failed\n");
- dev->iseg_base = pci_resource_start(dev->pdev, 0);
+ dev->iseg_base = dev->bar_addr;
dev->iseg = ioremap(dev->iseg_base, sizeof(*dev->iseg));
if (!dev->iseg) {
err = -ENOMEM;
- dev_err(&pdev->dev, "Failed mapping initialization segment, aborting\n");
+ mlx5_core_err(dev, "Failed mapping initialization segment, aborting\n");
goto err_clr_master;
}
release_bar(dev->pdev);
err_disable:
mlx5_pci_disable_device(dev);
-
-err_dbg:
- debugfs_remove(priv->dbg_root);
return err;
}
-static void mlx5_pci_close(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
+static void mlx5_pci_close(struct mlx5_core_dev *dev)
{
iounmap(dev->iseg);
pci_clear_master(dev->pdev);
release_bar(dev->pdev);
mlx5_pci_disable_device(dev);
- debugfs_remove_recursive(priv->dbg_root);
}
-static int mlx5_init_once(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
+static int mlx5_init_once(struct mlx5_core_dev *dev)
{
- struct pci_dev *pdev = dev->pdev;
int err;
- priv->devcom = mlx5_devcom_register_device(dev);
- if (IS_ERR(priv->devcom))
- dev_err(&pdev->dev, "failed to register with devcom (0x%p)\n",
- priv->devcom);
+ dev->priv.devcom = mlx5_devcom_register_device(dev);
+ if (IS_ERR(dev->priv.devcom))
+ mlx5_core_err(dev, "failed to register with devcom (0x%p)\n",
+ dev->priv.devcom);
err = mlx5_query_board_id(dev);
if (err) {
- dev_err(&pdev->dev, "query board id failed\n");
+ mlx5_core_err(dev, "query board id failed\n");
goto err_devcom;
}
err = mlx5_eq_table_init(dev);
if (err) {
- dev_err(&pdev->dev, "failed to initialize eq\n");
+ mlx5_core_err(dev, "failed to initialize eq\n");
goto err_devcom;
}
err = mlx5_events_init(dev);
if (err) {
- dev_err(&pdev->dev, "failed to initialize events\n");
+ mlx5_core_err(dev, "failed to initialize events\n");
goto err_eq_cleanup;
}
err = mlx5_cq_debugfs_init(dev);
if (err) {
- dev_err(&pdev->dev, "failed to initialize cq debugfs\n");
+ mlx5_core_err(dev, "failed to initialize cq debugfs\n");
goto err_events_cleanup;
}
err = mlx5_init_rl_table(dev);
if (err) {
- dev_err(&pdev->dev, "Failed to init rate limiting\n");
+ mlx5_core_err(dev, "Failed to init rate limiting\n");
goto err_tables_cleanup;
}
err = mlx5_mpfs_init(dev);
if (err) {
- dev_err(&pdev->dev, "Failed to init l2 table %d\n", err);
+ mlx5_core_err(dev, "Failed to init l2 table %d\n", err);
goto err_rl_cleanup;
}
err = mlx5_eswitch_init(dev);
if (err) {
- dev_err(&pdev->dev, "Failed to init eswitch %d\n", err);
+ mlx5_core_err(dev, "Failed to init eswitch %d\n", err);
goto err_mpfs_cleanup;
}
err = mlx5_sriov_init(dev);
if (err) {
- dev_err(&pdev->dev, "Failed to init sriov %d\n", err);
+ mlx5_core_err(dev, "Failed to init sriov %d\n", err);
goto err_eswitch_cleanup;
}
err = mlx5_fpga_init(dev);
if (err) {
- dev_err(&pdev->dev, "Failed to init fpga device %d\n", err);
+ mlx5_core_err(dev, "Failed to init fpga device %d\n", err);
goto err_sriov_cleanup;
}
mlx5_devcom_unregister_device(dev->priv.devcom);
}
-static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
- bool boot)
+static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot)
{
- struct pci_dev *pdev = dev->pdev;
int err;
- dev->caps.embedded_cpu = mlx5_read_embedded_cpu(dev);
- mutex_lock(&dev->intf_state_mutex);
- if (test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) {
- dev_warn(&dev->pdev->dev, "%s: interface is up, NOP\n",
- __func__);
- goto out;
- }
-
- dev_info(&pdev->dev, "firmware version: %d.%d.%d\n", fw_rev_maj(dev),
- fw_rev_min(dev), fw_rev_sub(dev));
+ mlx5_core_info(dev, "firmware version: %d.%d.%d\n", fw_rev_maj(dev),
+ fw_rev_min(dev), fw_rev_sub(dev));
/* Only PFs hold the relevant PCIe information for this query */
if (mlx5_core_is_pf(dev))
pcie_print_link_status(dev->pdev);
- /* on load removing any previous indication of internal error, device is
- * up
- */
- dev->state = MLX5_DEVICE_STATE_UP;
-
/* wait for firmware to accept initialization segments configurations
*/
err = wait_fw_init(dev, FW_PRE_INIT_TIMEOUT_MILI);
if (err) {
- dev_err(&dev->pdev->dev, "Firmware over %d MS in pre-initializing state, aborting\n",
- FW_PRE_INIT_TIMEOUT_MILI);
- goto out_err;
+ mlx5_core_err(dev, "Firmware over %d MS in pre-initializing state, aborting\n",
+ FW_PRE_INIT_TIMEOUT_MILI);
+ return err;
}
err = mlx5_cmd_init(dev);
if (err) {
- dev_err(&pdev->dev, "Failed initializing command interface, aborting\n");
- goto out_err;
+ mlx5_core_err(dev, "Failed initializing command interface, aborting\n");
+ return err;
}
err = wait_fw_init(dev, FW_INIT_TIMEOUT_MILI);
if (err) {
- dev_err(&dev->pdev->dev, "Firmware over %d MS in initializing state, aborting\n",
- FW_INIT_TIMEOUT_MILI);
+ mlx5_core_err(dev, "Firmware over %d MS in initializing state, aborting\n",
+ FW_INIT_TIMEOUT_MILI);
goto err_cmd_cleanup;
}
err = mlx5_core_enable_hca(dev, 0);
if (err) {
- dev_err(&pdev->dev, "enable hca failed\n");
+ mlx5_core_err(dev, "enable hca failed\n");
goto err_cmd_cleanup;
}
err = mlx5_core_set_issi(dev);
if (err) {
- dev_err(&pdev->dev, "failed to set issi\n");
+ mlx5_core_err(dev, "failed to set issi\n");
goto err_disable_hca;
}
err = mlx5_satisfy_startup_pages(dev, 1);
if (err) {
- dev_err(&pdev->dev, "failed to allocate boot pages\n");
+ mlx5_core_err(dev, "failed to allocate boot pages\n");
goto err_disable_hca;
}
err = set_hca_ctrl(dev);
if (err) {
- dev_err(&pdev->dev, "set_hca_ctrl failed\n");
+ mlx5_core_err(dev, "set_hca_ctrl failed\n");
goto reclaim_boot_pages;
}
err = set_hca_cap(dev);
if (err) {
- dev_err(&pdev->dev, "set_hca_cap failed\n");
+ mlx5_core_err(dev, "set_hca_cap failed\n");
goto reclaim_boot_pages;
}
err = mlx5_satisfy_startup_pages(dev, 0);
if (err) {
- dev_err(&pdev->dev, "failed to allocate init pages\n");
+ mlx5_core_err(dev, "failed to allocate init pages\n");
goto reclaim_boot_pages;
}
err = mlx5_cmd_init_hca(dev, sw_owner_id);
if (err) {
- dev_err(&pdev->dev, "init hca failed\n");
+ mlx5_core_err(dev, "init hca failed\n");
goto reclaim_boot_pages;
}
err = mlx5_query_hca_caps(dev);
if (err) {
- dev_err(&pdev->dev, "query hca failed\n");
- goto err_stop_poll;
+ mlx5_core_err(dev, "query hca failed\n");
+ goto stop_health;
}
- if (boot) {
- err = mlx5_init_once(dev, priv);
- if (err) {
- dev_err(&pdev->dev, "sw objs init failed\n");
- goto err_stop_poll;
- }
+ return 0;
+
+stop_health:
+ mlx5_stop_health_poll(dev, boot);
+reclaim_boot_pages:
+ mlx5_reclaim_startup_pages(dev);
+err_disable_hca:
+ mlx5_core_disable_hca(dev, 0);
+err_cmd_cleanup:
+ mlx5_cmd_cleanup(dev);
+
+ return err;
+}
+
+static int mlx5_function_teardown(struct mlx5_core_dev *dev, bool boot)
+{
+ int err;
+
+ mlx5_stop_health_poll(dev, boot);
+ err = mlx5_cmd_teardown_hca(dev);
+ if (err) {
+ mlx5_core_err(dev, "tear_down_hca failed, skip cleanup\n");
+ return err;
}
+ mlx5_reclaim_startup_pages(dev);
+ mlx5_core_disable_hca(dev, 0);
+ mlx5_cmd_cleanup(dev);
+
+ return 0;
+}
+
+static int mlx5_load(struct mlx5_core_dev *dev)
+{
+ int err;
dev->priv.uar = mlx5_get_uars_page(dev);
if (IS_ERR(dev->priv.uar)) {
- dev_err(&pdev->dev, "Failed allocating uar, aborting\n");
+ mlx5_core_err(dev, "Failed allocating uar, aborting\n");
err = PTR_ERR(dev->priv.uar);
- goto err_get_uars;
+ return err;
}
mlx5_events_start(dev);
err = mlx5_eq_table_create(dev);
if (err) {
- dev_err(&pdev->dev, "Failed to create EQs\n");
+ mlx5_core_err(dev, "Failed to create EQs\n");
goto err_eq_table;
}
err = mlx5_fw_tracer_init(dev->tracer);
if (err) {
- dev_err(&pdev->dev, "Failed to init FW tracer\n");
+ mlx5_core_err(dev, "Failed to init FW tracer\n");
goto err_fw_tracer;
}
err = mlx5_fpga_device_start(dev);
if (err) {
- dev_err(&pdev->dev, "fpga device start failed %d\n", err);
+ mlx5_core_err(dev, "fpga device start failed %d\n", err);
goto err_fpga_start;
}
err = mlx5_accel_ipsec_init(dev);
if (err) {
- dev_err(&pdev->dev, "IPSec device start failed %d\n", err);
+ mlx5_core_err(dev, "IPSec device start failed %d\n", err);
goto err_ipsec_start;
}
err = mlx5_accel_tls_init(dev);
if (err) {
- dev_err(&pdev->dev, "TLS device start failed %d\n", err);
+ mlx5_core_err(dev, "TLS device start failed %d\n", err);
goto err_tls_start;
}
err = mlx5_init_fs(dev);
if (err) {
- dev_err(&pdev->dev, "Failed to init flow steering\n");
+ mlx5_core_err(dev, "Failed to init flow steering\n");
goto err_fs;
}
err = mlx5_core_set_hca_defaults(dev);
if (err) {
- dev_err(&pdev->dev, "Failed to set hca defaults\n");
+ mlx5_core_err(dev, "Failed to set hca defaults\n");
goto err_fs;
}
err = mlx5_sriov_attach(dev);
if (err) {
- dev_err(&pdev->dev, "sriov init failed %d\n", err);
+ mlx5_core_err(dev, "sriov init failed %d\n", err);
goto err_sriov;
}
err = mlx5_ec_init(dev);
if (err) {
- dev_err(&pdev->dev, "Failed to init embedded CPU\n");
+ mlx5_core_err(dev, "Failed to init embedded CPU\n");
goto err_ec;
}
- if (mlx5_device_registered(dev)) {
- mlx5_attach_device(dev);
- } else {
- err = mlx5_register_device(dev);
- if (err) {
- dev_err(&pdev->dev, "mlx5_register_device failed %d\n", err);
- goto err_reg_dev;
- }
- }
-
- set_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state);
-out:
- mutex_unlock(&dev->intf_state_mutex);
-
return 0;
-err_reg_dev:
- mlx5_ec_cleanup(dev);
-
err_ec:
mlx5_sriov_detach(dev);
-
err_sriov:
mlx5_cleanup_fs(dev);
-
err_fs:
mlx5_accel_tls_cleanup(dev);
-
err_tls_start:
mlx5_accel_ipsec_cleanup(dev);
-
err_ipsec_start:
mlx5_fpga_device_stop(dev);
-
err_fpga_start:
mlx5_fw_tracer_cleanup(dev->tracer);
-
err_fw_tracer:
mlx5_eq_table_destroy(dev);
-
err_eq_table:
mlx5_pagealloc_stop(dev);
mlx5_events_stop(dev);
- mlx5_put_uars_page(dev, priv->uar);
+ mlx5_put_uars_page(dev, dev->priv.uar);
+ return err;
+}
-err_get_uars:
- if (boot)
- mlx5_cleanup_once(dev);
+static void mlx5_unload(struct mlx5_core_dev *dev)
+{
+ mlx5_ec_cleanup(dev);
+ mlx5_sriov_detach(dev);
+ mlx5_cleanup_fs(dev);
+ mlx5_accel_ipsec_cleanup(dev);
+ mlx5_accel_tls_cleanup(dev);
+ mlx5_fpga_device_stop(dev);
+ mlx5_fw_tracer_cleanup(dev->tracer);
+ mlx5_eq_table_destroy(dev);
+ mlx5_pagealloc_stop(dev);
+ mlx5_events_stop(dev);
+ mlx5_put_uars_page(dev, dev->priv.uar);
+}
-err_stop_poll:
- mlx5_stop_health_poll(dev, boot);
- if (mlx5_cmd_teardown_hca(dev)) {
- dev_err(&dev->pdev->dev, "tear_down_hca failed, skip cleanup\n");
- goto out_err;
+static int mlx5_load_one(struct mlx5_core_dev *dev, bool boot)
+{
+ int err = 0;
+
+ dev->caps.embedded_cpu = mlx5_read_embedded_cpu(dev);
+ mutex_lock(&dev->intf_state_mutex);
+ if (test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) {
+ mlx5_core_warn(dev, "interface is up, NOP\n");
+ goto out;
}
+ /* remove any previous indication of internal error */
+ dev->state = MLX5_DEVICE_STATE_UP;
-reclaim_boot_pages:
- mlx5_reclaim_startup_pages(dev);
+ err = mlx5_function_setup(dev, boot);
+ if (err)
+ goto out;
-err_disable_hca:
- mlx5_core_disable_hca(dev, 0);
+ if (boot) {
+ err = mlx5_init_once(dev);
+ if (err) {
+ mlx5_core_err(dev, "sw objs init failed\n");
+ goto function_teardown;
+ }
+ }
-err_cmd_cleanup:
- mlx5_cmd_cleanup(dev);
+ err = mlx5_load(dev);
+ if (err)
+ goto err_load;
-out_err:
+ if (mlx5_device_registered(dev)) {
+ mlx5_attach_device(dev);
+ } else {
+ err = mlx5_register_device(dev);
+ if (err) {
+ mlx5_core_err(dev, "register device failed %d\n", err);
+ goto err_reg_dev;
+ }
+ }
+
+ set_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state);
+out:
+ mutex_unlock(&dev->intf_state_mutex);
+
+ return err;
+
+err_reg_dev:
+ mlx5_unload(dev);
+err_load:
+ if (boot)
+ mlx5_cleanup_once(dev);
+function_teardown:
+ mlx5_function_teardown(dev, boot);
dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
mutex_unlock(&dev->intf_state_mutex);
return err;
}
-static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
- bool cleanup)
+static int mlx5_unload_one(struct mlx5_core_dev *dev, bool cleanup)
{
int err = 0;
mutex_lock(&dev->intf_state_mutex);
if (!test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) {
- dev_warn(&dev->pdev->dev, "%s: interface is down, NOP\n",
- __func__);
+ mlx5_core_warn(dev, "%s: interface is down, NOP\n",
+ __func__);
if (cleanup)
mlx5_cleanup_once(dev);
goto out;
if (mlx5_device_registered(dev))
mlx5_detach_device(dev);
- mlx5_ec_cleanup(dev);
- mlx5_sriov_detach(dev);
- mlx5_cleanup_fs(dev);
- mlx5_accel_ipsec_cleanup(dev);
- mlx5_accel_tls_cleanup(dev);
- mlx5_fpga_device_stop(dev);
- mlx5_fw_tracer_cleanup(dev->tracer);
- mlx5_eq_table_destroy(dev);
- mlx5_pagealloc_stop(dev);
- mlx5_events_stop(dev);
- mlx5_put_uars_page(dev, priv->uar);
+ mlx5_unload(dev);
+
if (cleanup)
mlx5_cleanup_once(dev);
- mlx5_stop_health_poll(dev, cleanup);
-
- err = mlx5_cmd_teardown_hca(dev);
- if (err) {
- dev_err(&dev->pdev->dev, "tear_down_hca failed, skip cleanup\n");
- goto out;
- }
- mlx5_reclaim_startup_pages(dev);
- mlx5_core_disable_hca(dev, 0);
- mlx5_cmd_cleanup(dev);
+ mlx5_function_teardown(dev, cleanup);
out:
mutex_unlock(&dev->intf_state_mutex);
return err;
#endif
};
-#define MLX5_IB_MOD "mlx5_ib"
-static int init_one(struct pci_dev *pdev,
- const struct pci_device_id *id)
+static int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx, const char *name)
{
- struct mlx5_core_dev *dev;
- struct devlink *devlink;
- struct mlx5_priv *priv;
+ struct mlx5_priv *priv = &dev->priv;
int err;
- devlink = devlink_alloc(&mlx5_devlink_ops, sizeof(*dev));
- if (!devlink) {
- dev_err(&pdev->dev, "kzalloc failed\n");
- return -ENOMEM;
- }
-
- dev = devlink_priv(devlink);
- priv = &dev->priv;
- priv->pci_dev_data = id->driver_data;
-
- pci_set_drvdata(pdev, dev);
+ strncpy(priv->name, name, MLX5_MAX_NAME_LEN);
+ priv->name[MLX5_MAX_NAME_LEN - 1] = 0;
- dev->pdev = pdev;
- dev->profile = &profile[prof_sel];
+ dev->profile = &profile[profile_idx];
INIT_LIST_HEAD(&priv->ctx_list);
spin_lock_init(&priv->ctx_lock);
INIT_LIST_HEAD(&priv->bfregs.reg_head.list);
INIT_LIST_HEAD(&priv->bfregs.wc_head.list);
- err = mlx5_pci_init(dev, priv);
- if (err) {
- dev_err(&pdev->dev, "mlx5_pci_init failed with error code %d\n", err);
- goto clean_dev;
+ mutex_init(&priv->alloc_mutex);
+ mutex_init(&priv->pgdir_mutex);
+ INIT_LIST_HEAD(&priv->pgdir_list);
+ spin_lock_init(&priv->mkey_lock);
+
+ priv->dbg_root = debugfs_create_dir(name, mlx5_debugfs_root);
+ if (!priv->dbg_root) {
+ pr_err("mlx5_core: %s error, Cannot create debugfs dir, aborting\n", name);
+ return -ENOMEM;
}
err = mlx5_health_init(dev);
- if (err) {
- dev_err(&pdev->dev, "mlx5_health_init failed with error code %d\n", err);
- goto close_pci;
- }
+ if (err)
+ goto err_health_init;
err = mlx5_pagealloc_init(dev);
if (err)
goto err_pagealloc_init;
- err = mlx5_load_one(dev, priv, true);
+ return 0;
+
+err_pagealloc_init:
+ mlx5_health_cleanup(dev);
+err_health_init:
+ debugfs_remove(dev->priv.dbg_root);
+
+ return err;
+}
+
+static void mlx5_mdev_uninit(struct mlx5_core_dev *dev)
+{
+ mlx5_pagealloc_cleanup(dev);
+ mlx5_health_cleanup(dev);
+ debugfs_remove_recursive(dev->priv.dbg_root);
+}
+
+#define MLX5_IB_MOD "mlx5_ib"
+static int init_one(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+ struct mlx5_core_dev *dev;
+ struct devlink *devlink;
+ int err;
+
+ devlink = devlink_alloc(&mlx5_devlink_ops, sizeof(*dev));
+ if (!devlink) {
+ dev_err(&pdev->dev, "kzalloc failed\n");
+ return -ENOMEM;
+ }
+
+ dev = devlink_priv(devlink);
+
+ err = mlx5_mdev_init(dev, prof_sel, dev_name(&pdev->dev));
+ if (err)
+ goto mdev_init_err;
+
+ err = mlx5_pci_init(dev, pdev, id);
+ if (err) {
+ mlx5_core_err(dev, "mlx5_pci_init failed with error code %d\n",
+ err);
+ goto pci_init_err;
+ }
+
+ err = mlx5_load_one(dev, true);
if (err) {
- dev_err(&pdev->dev, "mlx5_load_one failed with error code %d\n", err);
+ mlx5_core_err(dev, "mlx5_load_one failed with error code %d\n",
+ err);
goto err_load_one;
}
return 0;
clean_load:
- mlx5_unload_one(dev, priv, true);
+ mlx5_unload_one(dev, true);
+
err_load_one:
- mlx5_pagealloc_cleanup(dev);
-err_pagealloc_init:
- mlx5_health_cleanup(dev);
-close_pci:
- mlx5_pci_close(dev, priv);
-clean_dev:
+ mlx5_pci_close(dev);
+pci_init_err:
+ mlx5_mdev_uninit(dev);
+mdev_init_err:
devlink_free(devlink);
return err;
{
struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
struct devlink *devlink = priv_to_devlink(dev);
- struct mlx5_priv *priv = &dev->priv;
devlink_unregister(devlink);
mlx5_unregister_device(dev);
- if (mlx5_unload_one(dev, priv, true)) {
- dev_err(&dev->pdev->dev, "mlx5_unload_one failed\n");
- mlx5_health_cleanup(dev);
+ if (mlx5_unload_one(dev, true)) {
+ mlx5_core_err(dev, "mlx5_unload_one failed\n");
+ mlx5_health_flush(dev);
return;
}
- mlx5_pagealloc_cleanup(dev);
- mlx5_health_cleanup(dev);
- mlx5_pci_close(dev, priv);
+ mlx5_pci_close(dev);
+ mlx5_mdev_uninit(dev);
devlink_free(devlink);
}
pci_channel_state_t state)
{
struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
- struct mlx5_priv *priv = &dev->priv;
- dev_info(&pdev->dev, "%s was called\n", __func__);
+ mlx5_core_info(dev, "%s was called\n", __func__);
mlx5_enter_error_state(dev, false);
- mlx5_unload_one(dev, priv, false);
+ mlx5_unload_one(dev, false);
/* In case of kernel call drain the health wq */
if (state) {
mlx5_drain_health_wq(dev);
count = ioread32be(health->health_counter);
if (count && count != 0xffffffff) {
if (last_count && last_count != count) {
- dev_info(&pdev->dev, "Counter value 0x%x after %d iterations\n", count, i);
+ mlx5_core_info(dev,
+ "wait vital counter value 0x%x after %d iterations\n",
+ count, i);
return 0;
}
last_count = count;
struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
int err;
- dev_info(&pdev->dev, "%s was called\n", __func__);
+ mlx5_core_info(dev, "%s was called\n", __func__);
err = mlx5_pci_enable_device(dev);
if (err) {
- dev_err(&pdev->dev, "%s: mlx5_pci_enable_device failed with error code: %d\n"
- , __func__, err);
+ mlx5_core_err(dev, "%s: mlx5_pci_enable_device failed with error code: %d\n",
+ __func__, err);
return PCI_ERS_RESULT_DISCONNECT;
}
pci_save_state(pdev);
if (wait_vital(pdev)) {
- dev_err(&pdev->dev, "%s: wait_vital timed out\n", __func__);
+ mlx5_core_err(dev, "%s: wait_vital timed out\n", __func__);
return PCI_ERS_RESULT_DISCONNECT;
}
static void mlx5_pci_resume(struct pci_dev *pdev)
{
struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
- struct mlx5_priv *priv = &dev->priv;
int err;
- dev_info(&pdev->dev, "%s was called\n", __func__);
+ mlx5_core_info(dev, "%s was called\n", __func__);
- err = mlx5_load_one(dev, priv, false);
+ err = mlx5_load_one(dev, false);
if (err)
- dev_err(&pdev->dev, "%s: mlx5_load_one failed with error code: %d\n"
- , __func__, err);
+ mlx5_core_err(dev, "%s: mlx5_load_one failed with error code: %d\n",
+ __func__, err);
else
- dev_info(&pdev->dev, "%s: device recovered\n", __func__);
+ mlx5_core_info(dev, "%s: device recovered\n", __func__);
}
static const struct pci_error_handlers mlx5_err_handler = {
static void shutdown(struct pci_dev *pdev)
{
struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
- struct mlx5_priv *priv = &dev->priv;
int err;
- dev_info(&pdev->dev, "Shutdown was called\n");
+ mlx5_core_info(dev, "Shutdown was called\n");
err = mlx5_try_fast_unload(dev);
if (err)
- mlx5_unload_one(dev, priv, false);
+ mlx5_unload_one(dev, false);
mlx5_pci_disable_device(dev);
}
extern uint mlx5_core_debug_mask;
#define mlx5_core_dbg(__dev, format, ...) \
- dev_dbg(&(__dev)->pdev->dev, "%s:%d:(pid %d): " format, \
+ pr_debug("%s:%s:%d:(pid %d): " format, (__dev)->priv.name, \
__func__, __LINE__, current->pid, \
##__VA_ARGS__)
#define mlx5_core_dbg_once(__dev, format, ...) \
- dev_dbg_once(&(__dev)->pdev->dev, "%s:%d:(pid %d): " format, \
+ pr_debug_once("%s:%s:%d:(pid %d): " format, (__dev)->priv.name, \
__func__, __LINE__, current->pid, \
##__VA_ARGS__)
} while (0)
#define mlx5_core_err(__dev, format, ...) \
- dev_err(&(__dev)->pdev->dev, "%s:%d:(pid %d): " format, \
+ pr_err("%s:%s:%d:(pid %d): " format, (__dev)->priv.name, \
__func__, __LINE__, current->pid, \
##__VA_ARGS__)
-#define mlx5_core_err_rl(__dev, format, ...) \
- dev_err_ratelimited(&(__dev)->pdev->dev, \
- "%s:%d:(pid %d): " format, \
- __func__, __LINE__, current->pid, \
+#define mlx5_core_err_rl(__dev, format, ...) \
+ pr_err_ratelimited("%s:%s:%d:(pid %d): " format, (__dev)->priv.name, \
+ __func__, __LINE__, current->pid, \
##__VA_ARGS__)
#define mlx5_core_warn(__dev, format, ...) \
- dev_warn(&(__dev)->pdev->dev, "%s:%d:(pid %d): " format, \
+ pr_warn("%s:%s:%d:(pid %d): " format, (__dev)->priv.name, \
__func__, __LINE__, current->pid, \
##__VA_ARGS__)
#define mlx5_core_warn_once(__dev, format, ...) \
- dev_warn_once(&(__dev)->pdev->dev, "%s:%d:(pid %d): " format, \
+ pr_warn_once("%s:%s:%d:(pid %d): " format, (__dev)->priv.name, \
__func__, __LINE__, current->pid, \
##__VA_ARGS__)
+#define mlx5_core_warn_rl(__dev, format, ...) \
+ pr_warn_ratelimited("%s:%s:%d:(pid %d): " format, (__dev)->priv.name, \
+ __func__, __LINE__, current->pid, \
+ ##__VA_ARGS__)
+
#define mlx5_core_info(__dev, format, ...) \
- dev_info(&(__dev)->pdev->dev, format, ##__VA_ARGS__)
+ pr_info("%s " format, (__dev)->priv.name, ##__VA_ARGS__)
+
+#define mlx5_core_info_rl(__dev, format, ...) \
+ pr_info_ratelimited("%s:%s:%d:(pid %d): " format, (__dev)->priv.name, \
+ __func__, __LINE__, current->pid, \
+ ##__VA_ARGS__)
enum {
MLX5_CMD_DATA, /* print command payload only */
size -= offset + size - MLX5_EEPROM_PAGE_LENGTH;
i2c_addr = MLX5_I2C_ADDR_LOW;
- if (offset >= MLX5_EEPROM_PAGE_LENGTH) {
- i2c_addr = MLX5_I2C_ADDR_HIGH;
- offset -= MLX5_EEPROM_PAGE_LENGTH;
- }
MLX5_SET(mcia_reg, in, l, 0);
MLX5_SET(mcia_reg, in, module, module_num);
else
system_page_index = index;
- return (pci_resource_start(mdev->pdev, 0) >> PAGE_SHIFT) + system_page_index;
+ return (mdev->bar_addr >> PAGE_SHIFT) + system_page_index;
}
static void up_rel_func(struct kref *kref)
*wq->db = cpu_to_be32(wq->wqe_ctr);
}
+static inline u16 mlx5_wq_cyc_get_ctr_wrap_cnt(struct mlx5_wq_cyc *wq, u16 ctr)
+{
+ return ctr >> wq->fbc.log_sz;
+}
+
static inline u16 mlx5_wq_cyc_ctr2ix(struct mlx5_wq_cyc *wq, u16 ctr)
{
return ctr & wq->fbc.sz_m1;
return mlx5_frag_buf_get_wqe(&wq->fbc, ix);
}
+static inline u16 mlx5_wq_ll_get_wqe_next_ix(struct mlx5_wq_ll *wq, u16 ix)
+{
+ struct mlx5_wqe_srq_next_seg *wqe = mlx5_wq_ll_get_wqe(wq, ix);
+
+ return be16_to_cpu(wqe->next_wqe_index);
+}
+
static inline void mlx5_wq_ll_push(struct mlx5_wq_ll *wq, u16 head_next)
{
wq->head = head_next;
spectrum1_mr_tcam.o spectrum2_mr_tcam.o \
spectrum_mr_tcam.o spectrum_mr.o \
spectrum_qdisc.o spectrum_span.o \
- spectrum_nve.o spectrum_nve_vxlan.o
+ spectrum_nve.o spectrum_nve_vxlan.o \
+ spectrum_dpipe.o
mlxsw_spectrum-$(CONFIG_MLXSW_SPECTRUM_DCB) += spectrum_dcb.o
-mlxsw_spectrum-$(CONFIG_NET_DEVLINK) += spectrum_dpipe.o
obj-$(CONFIG_MLXSW_MINIMAL) += mlxsw_minimal.o
mlxsw_minimal-objs := minimal.o
if (!(mlxsw_core->bus->features & MLXSW_BUS_F_TXRX))
return 0;
- emad_wq = alloc_workqueue("mlxsw_core_emad", WQ_MEM_RECLAIM, 0);
+ emad_wq = alloc_workqueue("mlxsw_core_emad", 0, 0);
if (!emad_wq)
return -ENOMEM;
mlxsw_core->emad_wq = emad_wq;
static int
mlxsw_devlink_sb_pool_set(struct devlink *devlink,
unsigned int sb_index, u16 pool_index, u32 size,
- enum devlink_sb_threshold_type threshold_type)
+ enum devlink_sb_threshold_type threshold_type,
+ struct netlink_ext_ack *extack)
{
struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver;
if (!mlxsw_driver->sb_pool_set)
return -EOPNOTSUPP;
return mlxsw_driver->sb_pool_set(mlxsw_core, sb_index,
- pool_index, size, threshold_type);
+ pool_index, size, threshold_type,
+ extack);
}
static void *__dl_port(struct devlink_port *devlink_port)
static int mlxsw_devlink_sb_port_pool_set(struct devlink_port *devlink_port,
unsigned int sb_index, u16 pool_index,
- u32 threshold)
+ u32 threshold,
+ struct netlink_ext_ack *extack)
{
struct mlxsw_core *mlxsw_core = devlink_priv(devlink_port->devlink);
struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver;
!mlxsw_core_port_check(mlxsw_core_port))
return -EOPNOTSUPP;
return mlxsw_driver->sb_port_pool_set(mlxsw_core_port, sb_index,
- pool_index, threshold);
+ pool_index, threshold, extack);
}
static int
mlxsw_devlink_sb_tc_pool_bind_set(struct devlink_port *devlink_port,
unsigned int sb_index, u16 tc_index,
enum devlink_sb_pool_type pool_type,
- u16 pool_index, u32 threshold)
+ u16 pool_index, u32 threshold,
+ struct netlink_ext_ack *extack)
{
struct mlxsw_core *mlxsw_core = devlink_priv(devlink_port->devlink);
struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver;
return -EOPNOTSUPP;
return mlxsw_driver->sb_tc_pool_bind_set(mlxsw_core_port, sb_index,
tc_index, pool_type,
- pool_index, threshold);
+ pool_index, threshold, extack);
}
static int mlxsw_devlink_sb_occ_snapshot(struct devlink *devlink,
pool_type, p_cur, p_max);
}
+static int
+mlxsw_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req,
+ struct netlink_ext_ack *extack)
+{
+ struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
+ char fw_info_psid[MLXSW_REG_MGIR_FW_INFO_PSID_SIZE];
+ u32 hw_rev, fw_major, fw_minor, fw_sub_minor;
+ char mgir_pl[MLXSW_REG_MGIR_LEN];
+ char buf[32];
+ int err;
+
+ err = devlink_info_driver_name_put(req,
+ mlxsw_core->bus_info->device_kind);
+ if (err)
+ return err;
+
+ mlxsw_reg_mgir_pack(mgir_pl);
+ err = mlxsw_reg_query(mlxsw_core, MLXSW_REG(mgir), mgir_pl);
+ if (err)
+ return err;
+ mlxsw_reg_mgir_unpack(mgir_pl, &hw_rev, fw_info_psid, &fw_major,
+ &fw_minor, &fw_sub_minor);
+
+ sprintf(buf, "%X", hw_rev);
+ err = devlink_info_version_fixed_put(req, "hw.revision", buf);
+ if (err)
+ return err;
+
+ err = devlink_info_version_fixed_put(req, "fw.psid", fw_info_psid);
+ if (err)
+ return err;
+
+ sprintf(buf, "%d.%d.%d", fw_major, fw_minor, fw_sub_minor);
+ err = devlink_info_version_running_put(req, "fw.version", buf);
+ if (err)
+ return err;
+
+ return 0;
+}
+
static int mlxsw_devlink_core_bus_device_reload(struct devlink *devlink,
struct netlink_ext_ack *extack)
{
.sb_occ_max_clear = mlxsw_devlink_sb_occ_max_clear,
.sb_occ_port_pool_get = mlxsw_devlink_sb_occ_port_pool_get,
.sb_occ_tc_port_bind_get = mlxsw_devlink_sb_occ_tc_port_bind_get,
+ .info_get = mlxsw_devlink_info_get,
};
static int
{
int err;
- mlxsw_wq = alloc_workqueue(mlxsw_core_driver_name, WQ_MEM_RECLAIM, 0);
+ mlxsw_wq = alloc_workqueue(mlxsw_core_driver_name, 0, 0);
if (!mlxsw_wq)
return -ENOMEM;
- mlxsw_owq = alloc_ordered_workqueue("%s_ordered", WQ_MEM_RECLAIM,
+ mlxsw_owq = alloc_ordered_workqueue("%s_ordered", 0,
mlxsw_core_driver_name);
if (!mlxsw_owq) {
err = -ENOMEM;
struct devlink_sb_pool_info *pool_info);
int (*sb_pool_set)(struct mlxsw_core *mlxsw_core,
unsigned int sb_index, u16 pool_index, u32 size,
- enum devlink_sb_threshold_type threshold_type);
+ enum devlink_sb_threshold_type threshold_type,
+ struct netlink_ext_ack *extack);
int (*sb_port_pool_get)(struct mlxsw_core_port *mlxsw_core_port,
unsigned int sb_index, u16 pool_index,
u32 *p_threshold);
int (*sb_port_pool_set)(struct mlxsw_core_port *mlxsw_core_port,
unsigned int sb_index, u16 pool_index,
- u32 threshold);
+ u32 threshold, struct netlink_ext_ack *extack);
int (*sb_tc_pool_bind_get)(struct mlxsw_core_port *mlxsw_core_port,
unsigned int sb_index, u16 tc_index,
enum devlink_sb_pool_type pool_type,
int (*sb_tc_pool_bind_set)(struct mlxsw_core_port *mlxsw_core_port,
unsigned int sb_index, u16 tc_index,
enum devlink_sb_pool_type pool_type,
- u16 pool_index, u32 threshold);
+ u16 pool_index, u32 threshold,
+ struct netlink_ext_ack *extack);
int (*sb_occ_snapshot)(struct mlxsw_core *mlxsw_core,
unsigned int sb_index);
int (*sb_occ_max_clear)(struct mlxsw_core *mlxsw_core,
#define MLXSW_PCI_SW_RESET 0xF0010
#define MLXSW_PCI_SW_RESET_RST_BIT BIT(0)
-#define MLXSW_PCI_SW_RESET_TIMEOUT_MSECS 13000
+#define MLXSW_PCI_SW_RESET_TIMEOUT_MSECS 20000
#define MLXSW_PCI_SW_RESET_WAIT_MSECS 100
#define MLXSW_PCI_FW_READY 0xA1844
#define MLXSW_PCI_FW_READY_MASK 0xFFFF
mlxsw_reg_mpar_pa_id_set(payload, pa_id);
}
+/* MGIR - Management General Information Register
+ * ----------------------------------------------
+ * MGIR register allows software to query the hardware and firmware general
+ * information.
+ */
+#define MLXSW_REG_MGIR_ID 0x9020
+#define MLXSW_REG_MGIR_LEN 0x9C
+
+MLXSW_REG_DEFINE(mgir, MLXSW_REG_MGIR_ID, MLXSW_REG_MGIR_LEN);
+
+/* reg_mgir_hw_info_device_hw_revision
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, mgir, hw_info_device_hw_revision, 0x0, 16, 16);
+
+#define MLXSW_REG_MGIR_FW_INFO_PSID_SIZE 16
+
+/* reg_mgir_fw_info_psid
+ * PSID (ASCII string).
+ * Access: RO
+ */
+MLXSW_ITEM_BUF(reg, mgir, fw_info_psid, 0x30, MLXSW_REG_MGIR_FW_INFO_PSID_SIZE);
+
+/* reg_mgir_fw_info_extended_major
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, mgir, fw_info_extended_major, 0x44, 0, 32);
+
+/* reg_mgir_fw_info_extended_minor
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, mgir, fw_info_extended_minor, 0x48, 0, 32);
+
+/* reg_mgir_fw_info_extended_sub_minor
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, mgir, fw_info_extended_sub_minor, 0x4C, 0, 32);
+
+static inline void mlxsw_reg_mgir_pack(char *payload)
+{
+ MLXSW_REG_ZERO(mgir, payload);
+}
+
+static inline void
+mlxsw_reg_mgir_unpack(char *payload, u32 *hw_rev, char *fw_info_psid,
+ u32 *fw_major, u32 *fw_minor, u32 *fw_sub_minor)
+{
+ *hw_rev = mlxsw_reg_mgir_hw_info_device_hw_revision_get(payload);
+ mlxsw_reg_mgir_fw_info_psid_memcpy_from(payload, fw_info_psid);
+ *fw_major = mlxsw_reg_mgir_fw_info_extended_major_get(payload);
+ *fw_minor = mlxsw_reg_mgir_fw_info_extended_minor_get(payload);
+ *fw_sub_minor = mlxsw_reg_mgir_fw_info_extended_sub_minor_get(payload);
+}
+
/* MRSR - Management Reset and Shutdown Register
* ---------------------------------------------
* MRSR register is used to reset or shutdown the switch or
MLXSW_REG(mcia),
MLXSW_REG(mpat),
MLXSW_REG(mpar),
+ MLXSW_REG(mgir),
MLXSW_REG(mrsr),
MLXSW_REG(mlcr),
MLXSW_REG(mpsc),
#include <linux/dcbnl.h>
#include <linux/inetdevice.h>
#include <linux/netlink.h>
-#include <linux/random.h>
+#include <linux/jhash.h>
#include <net/switchdev.h>
#include <net/pkt_cls.h>
#include <net/tc_act/tc_mirred.h>
if (err)
return err;
+ mlxsw_sp_port->link.autoneg = autoneg;
+
if (!netif_running(dev))
return 0;
- mlxsw_sp_port->link.autoneg = autoneg;
-
mlxsw_sp_port_admin_status_set(mlxsw_sp_port, false);
mlxsw_sp_port_admin_status_set(mlxsw_sp_port, true);
err = mlxsw_sp_port_ets_set(mlxsw_sp_port,
MLXSW_REG_QEEC_HIERARCY_TC,
i + 8, i,
- false, 0);
+ true, 100);
if (err)
return err;
}
u32 seed;
int err;
- get_random_bytes(&seed, sizeof(seed));
+ seed = jhash(mlxsw_sp->base_mac, sizeof(mlxsw_sp->base_mac), 0);
mlxsw_reg_slcr_pack(slcr_pl, MLXSW_REG_SLCR_LAG_HASH_SMAC |
MLXSW_REG_SLCR_LAG_HASH_DMAC |
MLXSW_REG_SLCR_LAG_HASH_ETHERTYPE |
struct devlink_sb_pool_info *pool_info);
int mlxsw_sp_sb_pool_set(struct mlxsw_core *mlxsw_core,
unsigned int sb_index, u16 pool_index, u32 size,
- enum devlink_sb_threshold_type threshold_type);
+ enum devlink_sb_threshold_type threshold_type,
+ struct netlink_ext_ack *extack);
int mlxsw_sp_sb_port_pool_get(struct mlxsw_core_port *mlxsw_core_port,
unsigned int sb_index, u16 pool_index,
u32 *p_threshold);
int mlxsw_sp_sb_port_pool_set(struct mlxsw_core_port *mlxsw_core_port,
unsigned int sb_index, u16 pool_index,
- u32 threshold);
+ u32 threshold, struct netlink_ext_ack *extack);
int mlxsw_sp_sb_tc_pool_bind_get(struct mlxsw_core_port *mlxsw_core_port,
unsigned int sb_index, u16 tc_index,
enum devlink_sb_pool_type pool_type,
int mlxsw_sp_sb_tc_pool_bind_set(struct mlxsw_core_port *mlxsw_core_port,
unsigned int sb_index, u16 tc_index,
enum devlink_sb_pool_type pool_type,
- u16 pool_index, u32 threshold);
+ u16 pool_index, u32 threshold,
+ struct netlink_ext_ack *extack);
int mlxsw_sp_sb_occ_snapshot(struct mlxsw_core *mlxsw_core,
unsigned int sb_index);
int mlxsw_sp_sb_occ_max_clear(struct mlxsw_core *mlxsw_core,
#include <linux/dcbnl.h>
#include <linux/if_ether.h>
#include <linux/list.h>
+#include <linux/netlink.h>
#include "spectrum.h"
#include "core.h"
struct mlxsw_sp_sb_pr {
enum mlxsw_reg_sbpr_mode mode;
u32 size;
+ u8 freeze_mode:1,
+ freeze_size:1;
};
struct mlxsw_cp_sb_occ {
u32 max_buff;
u16 pool_index;
struct mlxsw_cp_sb_occ occ;
+ u8 freeze_pool:1,
+ freeze_thresh:1;
};
#define MLXSW_SP_SB_INFI -1U
u8 pool;
};
-/* Order ingress pools before egress pools. */
+#define MLXSW_SP_SB_POOL_ING 0
+#define MLXSW_SP_SB_POOL_EGR 4
+#define MLXSW_SP_SB_POOL_EGR_MC 8
+#define MLXSW_SP_SB_POOL_ING_CPU 9
+#define MLXSW_SP_SB_POOL_EGR_CPU 10
+
static const struct mlxsw_sp_sb_pool_des mlxsw_sp1_sb_pool_dess[] = {
{MLXSW_REG_SBXX_DIR_INGRESS, 0},
{MLXSW_REG_SBXX_DIR_INGRESS, 1},
{MLXSW_REG_SBXX_DIR_EGRESS, 2},
{MLXSW_REG_SBXX_DIR_EGRESS, 3},
{MLXSW_REG_SBXX_DIR_EGRESS, 15},
+ {MLXSW_REG_SBXX_DIR_INGRESS, 4},
+ {MLXSW_REG_SBXX_DIR_EGRESS, 4},
};
static const struct mlxsw_sp_sb_pool_des mlxsw_sp2_sb_pool_dess[] = {
{MLXSW_REG_SBXX_DIR_EGRESS, 1},
{MLXSW_REG_SBXX_DIR_EGRESS, 2},
{MLXSW_REG_SBXX_DIR_EGRESS, 3},
+ {MLXSW_REG_SBXX_DIR_EGRESS, 15},
+ {MLXSW_REG_SBXX_DIR_INGRESS, 4},
+ {MLXSW_REG_SBXX_DIR_EGRESS, 4},
};
#define MLXSW_SP_SB_ING_TC_COUNT 8
unsigned int pool_count;
const struct mlxsw_sp_sb_pool_des *pool_dess;
const struct mlxsw_sp_sb_pm *pms;
+ const struct mlxsw_sp_sb_pm *pms_cpu;
const struct mlxsw_sp_sb_pr *prs;
const struct mlxsw_sp_sb_mm *mms;
const struct mlxsw_sp_sb_cm *cms_ingress;
{
const u32 pbs[] = {
[0] = MLXSW_SP_PB_HEADROOM * mlxsw_sp_port->mapping.width,
- [9] = 2 * MLXSW_PORT_MAX_MTU,
+ [9] = MLXSW_PORT_MAX_MTU,
};
struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
char pbmc_pl[MLXSW_REG_PBMC_LEN];
.size = _size, \
}
+#define MLXSW_SP_SB_PR_EXT(_mode, _size, _freeze_mode, _freeze_size) \
+ { \
+ .mode = _mode, \
+ .size = _size, \
+ .freeze_mode = _freeze_mode, \
+ .freeze_size = _freeze_size, \
+ }
+
#define MLXSW_SP1_SB_PR_INGRESS_SIZE 12440000
-#define MLXSW_SP1_SB_PR_INGRESS_MNG_SIZE (200 * 1000)
#define MLXSW_SP1_SB_PR_EGRESS_SIZE 13232000
+#define MLXSW_SP1_SB_PR_CPU_SIZE (256 * 1000)
+/* Order according to mlxsw_sp1_sb_pool_dess */
static const struct mlxsw_sp_sb_pr mlxsw_sp1_sb_prs[] = {
- /* Ingress pools. */
MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC,
MLXSW_SP1_SB_PR_INGRESS_SIZE),
MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0),
MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0),
- MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC,
- MLXSW_SP1_SB_PR_INGRESS_MNG_SIZE),
- /* Egress pools. */
- MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC,
- MLXSW_SP1_SB_PR_EGRESS_SIZE),
MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0),
+ MLXSW_SP_SB_PR_EXT(MLXSW_REG_SBPR_MODE_DYNAMIC,
+ MLXSW_SP1_SB_PR_EGRESS_SIZE, true, false),
MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0),
MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0),
- MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_STATIC, MLXSW_SP_SB_INFI),
+ MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0),
+ MLXSW_SP_SB_PR_EXT(MLXSW_REG_SBPR_MODE_STATIC, MLXSW_SP_SB_INFI,
+ true, true),
+ MLXSW_SP_SB_PR_EXT(MLXSW_REG_SBPR_MODE_DYNAMIC,
+ MLXSW_SP1_SB_PR_CPU_SIZE, true, false),
+ MLXSW_SP_SB_PR_EXT(MLXSW_REG_SBPR_MODE_DYNAMIC,
+ MLXSW_SP1_SB_PR_CPU_SIZE, true, false),
};
#define MLXSW_SP2_SB_PR_INGRESS_SIZE 40960000
-#define MLXSW_SP2_SB_PR_INGRESS_MNG_SIZE (200 * 1000)
#define MLXSW_SP2_SB_PR_EGRESS_SIZE 40960000
+#define MLXSW_SP2_SB_PR_CPU_SIZE (256 * 1000)
+/* Order according to mlxsw_sp2_sb_pool_dess */
static const struct mlxsw_sp_sb_pr mlxsw_sp2_sb_prs[] = {
- /* Ingress pools. */
MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC,
MLXSW_SP2_SB_PR_INGRESS_SIZE),
MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_STATIC, 0),
MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_STATIC, 0),
- MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC,
- MLXSW_SP2_SB_PR_INGRESS_MNG_SIZE),
- /* Egress pools. */
- MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC,
- MLXSW_SP2_SB_PR_EGRESS_SIZE),
MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_STATIC, 0),
+ MLXSW_SP_SB_PR_EXT(MLXSW_REG_SBPR_MODE_DYNAMIC,
+ MLXSW_SP2_SB_PR_EGRESS_SIZE, true, false),
MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_STATIC, 0),
MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_STATIC, 0),
+ MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_STATIC, 0),
+ MLXSW_SP_SB_PR_EXT(MLXSW_REG_SBPR_MODE_STATIC, MLXSW_SP_SB_INFI,
+ true, true),
+ MLXSW_SP_SB_PR_EXT(MLXSW_REG_SBPR_MODE_DYNAMIC,
+ MLXSW_SP2_SB_PR_CPU_SIZE, true, false),
+ MLXSW_SP_SB_PR_EXT(MLXSW_REG_SBPR_MODE_DYNAMIC,
+ MLXSW_SP2_SB_PR_CPU_SIZE, true, false),
};
static int mlxsw_sp_sb_prs_init(struct mlxsw_sp *mlxsw_sp,
.pool_index = _pool, \
}
+#define MLXSW_SP_SB_CM_ING(_min_buff, _max_buff) \
+ { \
+ .min_buff = _min_buff, \
+ .max_buff = _max_buff, \
+ .pool_index = MLXSW_SP_SB_POOL_ING, \
+ }
+
+#define MLXSW_SP_SB_CM_EGR(_min_buff, _max_buff) \
+ { \
+ .min_buff = _min_buff, \
+ .max_buff = _max_buff, \
+ .pool_index = MLXSW_SP_SB_POOL_EGR, \
+ }
+
+#define MLXSW_SP_SB_CM_EGR_MC(_min_buff, _max_buff) \
+ { \
+ .min_buff = _min_buff, \
+ .max_buff = _max_buff, \
+ .pool_index = MLXSW_SP_SB_POOL_EGR_MC, \
+ .freeze_pool = true, \
+ .freeze_thresh = true, \
+ }
+
static const struct mlxsw_sp_sb_cm mlxsw_sp1_sb_cms_ingress[] = {
- MLXSW_SP_SB_CM(10000, 8, 0),
- MLXSW_SP_SB_CM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN, 0),
- MLXSW_SP_SB_CM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN, 0),
- MLXSW_SP_SB_CM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN, 0),
- MLXSW_SP_SB_CM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN, 0),
- MLXSW_SP_SB_CM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN, 0),
- MLXSW_SP_SB_CM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN, 0),
- MLXSW_SP_SB_CM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN, 0),
- MLXSW_SP_SB_CM(0, 0, 0), /* dummy, this PG does not exist */
- MLXSW_SP_SB_CM(20000, 1, 3),
+ MLXSW_SP_SB_CM_ING(10000, 8),
+ MLXSW_SP_SB_CM_ING(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN),
+ MLXSW_SP_SB_CM_ING(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN),
+ MLXSW_SP_SB_CM_ING(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN),
+ MLXSW_SP_SB_CM_ING(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN),
+ MLXSW_SP_SB_CM_ING(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN),
+ MLXSW_SP_SB_CM_ING(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN),
+ MLXSW_SP_SB_CM_ING(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN),
+ MLXSW_SP_SB_CM_ING(0, 0), /* dummy, this PG does not exist */
+ MLXSW_SP_SB_CM(10000, 8, MLXSW_SP_SB_POOL_ING_CPU),
};
static const struct mlxsw_sp_sb_cm mlxsw_sp2_sb_cms_ingress[] = {
- MLXSW_SP_SB_CM(0, 7, 0),
- MLXSW_SP_SB_CM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN, 0),
- MLXSW_SP_SB_CM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN, 0),
- MLXSW_SP_SB_CM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN, 0),
- MLXSW_SP_SB_CM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN, 0),
- MLXSW_SP_SB_CM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN, 0),
- MLXSW_SP_SB_CM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN, 0),
- MLXSW_SP_SB_CM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN, 0),
- MLXSW_SP_SB_CM(0, 0, 0), /* dummy, this PG does not exist */
- MLXSW_SP_SB_CM(20000, 1, 3),
+ MLXSW_SP_SB_CM_ING(0, 7),
+ MLXSW_SP_SB_CM_ING(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN),
+ MLXSW_SP_SB_CM_ING(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN),
+ MLXSW_SP_SB_CM_ING(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN),
+ MLXSW_SP_SB_CM_ING(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN),
+ MLXSW_SP_SB_CM_ING(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN),
+ MLXSW_SP_SB_CM_ING(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN),
+ MLXSW_SP_SB_CM_ING(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN),
+ MLXSW_SP_SB_CM_ING(0, 0), /* dummy, this PG does not exist */
+ MLXSW_SP_SB_CM(10000, 8, MLXSW_SP_SB_POOL_ING_CPU),
};
static const struct mlxsw_sp_sb_cm mlxsw_sp1_sb_cms_egress[] = {
- MLXSW_SP_SB_CM(1500, 9, 4),
- MLXSW_SP_SB_CM(1500, 9, 4),
- MLXSW_SP_SB_CM(1500, 9, 4),
- MLXSW_SP_SB_CM(1500, 9, 4),
- MLXSW_SP_SB_CM(1500, 9, 4),
- MLXSW_SP_SB_CM(1500, 9, 4),
- MLXSW_SP_SB_CM(1500, 9, 4),
- MLXSW_SP_SB_CM(1500, 9, 4),
- MLXSW_SP_SB_CM(0, MLXSW_SP_SB_INFI, 8),
- MLXSW_SP_SB_CM(0, MLXSW_SP_SB_INFI, 8),
- MLXSW_SP_SB_CM(0, MLXSW_SP_SB_INFI, 8),
- MLXSW_SP_SB_CM(0, MLXSW_SP_SB_INFI, 8),
- MLXSW_SP_SB_CM(0, MLXSW_SP_SB_INFI, 8),
- MLXSW_SP_SB_CM(0, MLXSW_SP_SB_INFI, 8),
- MLXSW_SP_SB_CM(0, MLXSW_SP_SB_INFI, 8),
- MLXSW_SP_SB_CM(0, MLXSW_SP_SB_INFI, 8),
- MLXSW_SP_SB_CM(1, 0xff, 4),
+ MLXSW_SP_SB_CM_EGR(1500, 9),
+ MLXSW_SP_SB_CM_EGR(1500, 9),
+ MLXSW_SP_SB_CM_EGR(1500, 9),
+ MLXSW_SP_SB_CM_EGR(1500, 9),
+ MLXSW_SP_SB_CM_EGR(1500, 9),
+ MLXSW_SP_SB_CM_EGR(1500, 9),
+ MLXSW_SP_SB_CM_EGR(1500, 9),
+ MLXSW_SP_SB_CM_EGR(1500, 9),
+ MLXSW_SP_SB_CM_EGR_MC(0, MLXSW_SP_SB_INFI),
+ MLXSW_SP_SB_CM_EGR_MC(0, MLXSW_SP_SB_INFI),
+ MLXSW_SP_SB_CM_EGR_MC(0, MLXSW_SP_SB_INFI),
+ MLXSW_SP_SB_CM_EGR_MC(0, MLXSW_SP_SB_INFI),
+ MLXSW_SP_SB_CM_EGR_MC(0, MLXSW_SP_SB_INFI),
+ MLXSW_SP_SB_CM_EGR_MC(0, MLXSW_SP_SB_INFI),
+ MLXSW_SP_SB_CM_EGR_MC(0, MLXSW_SP_SB_INFI),
+ MLXSW_SP_SB_CM_EGR_MC(0, MLXSW_SP_SB_INFI),
+ MLXSW_SP_SB_CM_EGR(1, 0xff),
};
static const struct mlxsw_sp_sb_cm mlxsw_sp2_sb_cms_egress[] = {
- MLXSW_SP_SB_CM(0, 7, 4),
- MLXSW_SP_SB_CM(0, 7, 4),
- MLXSW_SP_SB_CM(0, 7, 4),
- MLXSW_SP_SB_CM(0, 7, 4),
- MLXSW_SP_SB_CM(0, 7, 4),
- MLXSW_SP_SB_CM(0, 7, 4),
- MLXSW_SP_SB_CM(0, 7, 4),
- MLXSW_SP_SB_CM(0, 7, 4),
- MLXSW_SP_SB_CM(0, 7, 4),
- MLXSW_SP_SB_CM(0, 7, 4),
- MLXSW_SP_SB_CM(0, 7, 4),
- MLXSW_SP_SB_CM(0, 7, 4),
- MLXSW_SP_SB_CM(0, 7, 4),
- MLXSW_SP_SB_CM(0, 7, 4),
- MLXSW_SP_SB_CM(0, 7, 4),
- MLXSW_SP_SB_CM(0, 7, 4),
- MLXSW_SP_SB_CM(1, 0xff, 4),
+ MLXSW_SP_SB_CM_EGR(0, 7),
+ MLXSW_SP_SB_CM_EGR(0, 7),
+ MLXSW_SP_SB_CM_EGR(0, 7),
+ MLXSW_SP_SB_CM_EGR(0, 7),
+ MLXSW_SP_SB_CM_EGR(0, 7),
+ MLXSW_SP_SB_CM_EGR(0, 7),
+ MLXSW_SP_SB_CM_EGR(0, 7),
+ MLXSW_SP_SB_CM_EGR(0, 7),
+ MLXSW_SP_SB_CM_EGR_MC(0, MLXSW_SP_SB_INFI),
+ MLXSW_SP_SB_CM_EGR_MC(0, MLXSW_SP_SB_INFI),
+ MLXSW_SP_SB_CM_EGR_MC(0, MLXSW_SP_SB_INFI),
+ MLXSW_SP_SB_CM_EGR_MC(0, MLXSW_SP_SB_INFI),
+ MLXSW_SP_SB_CM_EGR_MC(0, MLXSW_SP_SB_INFI),
+ MLXSW_SP_SB_CM_EGR_MC(0, MLXSW_SP_SB_INFI),
+ MLXSW_SP_SB_CM_EGR_MC(0, MLXSW_SP_SB_INFI),
+ MLXSW_SP_SB_CM_EGR_MC(0, MLXSW_SP_SB_INFI),
+ MLXSW_SP_SB_CM_EGR(1, 0xff),
};
-#define MLXSW_SP_CPU_PORT_SB_CM MLXSW_SP_SB_CM(0, 0, 4)
+#define MLXSW_SP_CPU_PORT_SB_CM MLXSW_SP_SB_CM(0, 0, MLXSW_SP_SB_POOL_EGR_CPU)
static const struct mlxsw_sp_sb_cm mlxsw_sp_cpu_port_sb_cms[] = {
MLXSW_SP_CPU_PORT_SB_CM,
- MLXSW_SP_SB_CM(MLXSW_PORT_MAX_MTU, 0, 4),
- MLXSW_SP_SB_CM(MLXSW_PORT_MAX_MTU, 0, 4),
- MLXSW_SP_SB_CM(MLXSW_PORT_MAX_MTU, 0, 4),
- MLXSW_SP_SB_CM(MLXSW_PORT_MAX_MTU, 0, 4),
- MLXSW_SP_SB_CM(MLXSW_PORT_MAX_MTU, 0, 4),
+ MLXSW_SP_SB_CM(1000, 8, MLXSW_SP_SB_POOL_EGR_CPU),
+ MLXSW_SP_SB_CM(1000, 8, MLXSW_SP_SB_POOL_EGR_CPU),
+ MLXSW_SP_SB_CM(1000, 8, MLXSW_SP_SB_POOL_EGR_CPU),
+ MLXSW_SP_SB_CM(1000, 8, MLXSW_SP_SB_POOL_EGR_CPU),
+ MLXSW_SP_SB_CM(1000, 8, MLXSW_SP_SB_POOL_EGR_CPU),
MLXSW_SP_CPU_PORT_SB_CM,
- MLXSW_SP_SB_CM(MLXSW_PORT_MAX_MTU, 0, 4),
+ MLXSW_SP_SB_CM(1000, 8, MLXSW_SP_SB_POOL_EGR_CPU),
MLXSW_SP_CPU_PORT_SB_CM,
MLXSW_SP_CPU_PORT_SB_CM,
MLXSW_SP_CPU_PORT_SB_CM,
.max_buff = _max_buff, \
}
+/* Order according to mlxsw_sp1_sb_pool_dess */
static const struct mlxsw_sp_sb_pm mlxsw_sp1_sb_pms[] = {
- /* Ingress pools. */
MLXSW_SP_SB_PM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MAX),
MLXSW_SP_SB_PM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN),
MLXSW_SP_SB_PM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN),
- MLXSW_SP_SB_PM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MAX),
- /* Egress pools. */
+ MLXSW_SP_SB_PM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN),
MLXSW_SP_SB_PM(0, 7),
MLXSW_SP_SB_PM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN),
MLXSW_SP_SB_PM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN),
MLXSW_SP_SB_PM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN),
MLXSW_SP_SB_PM(10000, 90000),
+ MLXSW_SP_SB_PM(0, 8), /* 50% occupancy */
+ MLXSW_SP_SB_PM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN),
};
+/* Order according to mlxsw_sp2_sb_pool_dess */
static const struct mlxsw_sp_sb_pm mlxsw_sp2_sb_pms[] = {
- /* Ingress pools. */
MLXSW_SP_SB_PM(0, 7),
MLXSW_SP_SB_PM(0, 0),
MLXSW_SP_SB_PM(0, 0),
- MLXSW_SP_SB_PM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MAX),
- /* Egress pools. */
+ MLXSW_SP_SB_PM(0, 0),
MLXSW_SP_SB_PM(0, 7),
MLXSW_SP_SB_PM(0, 0),
MLXSW_SP_SB_PM(0, 0),
MLXSW_SP_SB_PM(0, 0),
+ MLXSW_SP_SB_PM(10000, 90000),
+ MLXSW_SP_SB_PM(0, 8), /* 50% occupancy */
+ MLXSW_SP_SB_PM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN),
};
-static int mlxsw_sp_port_sb_pms_init(struct mlxsw_sp_port *mlxsw_sp_port)
+/* Order according to mlxsw_sp*_sb_pool_dess */
+static const struct mlxsw_sp_sb_pm mlxsw_sp_cpu_port_sb_pms[] = {
+ MLXSW_SP_SB_PM(0, 0),
+ MLXSW_SP_SB_PM(0, 0),
+ MLXSW_SP_SB_PM(0, 0),
+ MLXSW_SP_SB_PM(0, 0),
+ MLXSW_SP_SB_PM(0, 0),
+ MLXSW_SP_SB_PM(0, 0),
+ MLXSW_SP_SB_PM(0, 0),
+ MLXSW_SP_SB_PM(0, 0),
+ MLXSW_SP_SB_PM(0, 90000),
+ MLXSW_SP_SB_PM(0, 0),
+ MLXSW_SP_SB_PM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MAX),
+};
+
+static int mlxsw_sp_sb_pms_init(struct mlxsw_sp *mlxsw_sp, u8 local_port,
+ const struct mlxsw_sp_sb_pm *pms,
+ bool skip_ingress)
{
- struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
- int i;
- int err;
+ int i, err;
for (i = 0; i < mlxsw_sp->sb_vals->pool_count; i++) {
- const struct mlxsw_sp_sb_pm *pm = &mlxsw_sp->sb_vals->pms[i];
+ const struct mlxsw_sp_sb_pm *pm = &pms[i];
+ const struct mlxsw_sp_sb_pool_des *des;
u32 max_buff;
u32 min_buff;
+ des = &mlxsw_sp->sb_vals->pool_dess[i];
+ if (skip_ingress && des->dir == MLXSW_REG_SBXX_DIR_INGRESS)
+ continue;
+
min_buff = mlxsw_sp_bytes_cells(mlxsw_sp, pm->min_buff);
max_buff = pm->max_buff;
if (mlxsw_sp_sb_pool_is_static(mlxsw_sp, i))
max_buff = mlxsw_sp_bytes_cells(mlxsw_sp, max_buff);
- err = mlxsw_sp_sb_pm_write(mlxsw_sp, mlxsw_sp_port->local_port,
- i, min_buff, max_buff);
+ err = mlxsw_sp_sb_pm_write(mlxsw_sp, local_port, i, min_buff,
+ max_buff);
if (err)
return err;
}
return 0;
}
-#define MLXSW_SP_SB_MM(_min_buff, _max_buff, _pool) \
+static int mlxsw_sp_port_sb_pms_init(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+
+ return mlxsw_sp_sb_pms_init(mlxsw_sp, mlxsw_sp_port->local_port,
+ mlxsw_sp->sb_vals->pms, false);
+}
+
+static int mlxsw_sp_cpu_port_sb_pms_init(struct mlxsw_sp *mlxsw_sp)
+{
+ return mlxsw_sp_sb_pms_init(mlxsw_sp, 0, mlxsw_sp->sb_vals->pms_cpu,
+ true);
+}
+
+#define MLXSW_SP_SB_MM(_min_buff, _max_buff) \
{ \
.min_buff = _min_buff, \
.max_buff = _max_buff, \
- .pool_index = _pool, \
+ .pool_index = MLXSW_SP_SB_POOL_EGR, \
}
static const struct mlxsw_sp_sb_mm mlxsw_sp_sb_mms[] = {
- MLXSW_SP_SB_MM(0, 6, 4),
- MLXSW_SP_SB_MM(0, 6, 4),
- MLXSW_SP_SB_MM(0, 6, 4),
- MLXSW_SP_SB_MM(0, 6, 4),
- MLXSW_SP_SB_MM(0, 6, 4),
- MLXSW_SP_SB_MM(0, 6, 4),
- MLXSW_SP_SB_MM(0, 6, 4),
- MLXSW_SP_SB_MM(0, 6, 4),
- MLXSW_SP_SB_MM(0, 6, 4),
- MLXSW_SP_SB_MM(0, 6, 4),
- MLXSW_SP_SB_MM(0, 6, 4),
- MLXSW_SP_SB_MM(0, 6, 4),
- MLXSW_SP_SB_MM(0, 6, 4),
- MLXSW_SP_SB_MM(0, 6, 4),
- MLXSW_SP_SB_MM(0, 6, 4),
+ MLXSW_SP_SB_MM(0, 6),
+ MLXSW_SP_SB_MM(0, 6),
+ MLXSW_SP_SB_MM(0, 6),
+ MLXSW_SP_SB_MM(0, 6),
+ MLXSW_SP_SB_MM(0, 6),
+ MLXSW_SP_SB_MM(0, 6),
+ MLXSW_SP_SB_MM(0, 6),
+ MLXSW_SP_SB_MM(0, 6),
+ MLXSW_SP_SB_MM(0, 6),
+ MLXSW_SP_SB_MM(0, 6),
+ MLXSW_SP_SB_MM(0, 6),
+ MLXSW_SP_SB_MM(0, 6),
+ MLXSW_SP_SB_MM(0, 6),
+ MLXSW_SP_SB_MM(0, 6),
+ MLXSW_SP_SB_MM(0, 6),
};
static int mlxsw_sp_sb_mms_init(struct mlxsw_sp *mlxsw_sp)
{
int i;
- for (i = 0; i < mlxsw_sp->sb_vals->pool_count; ++i)
+ for (i = 0; i < mlxsw_sp->sb_vals->pool_count; ++i) {
if (mlxsw_sp->sb_vals->pool_dess[i].dir ==
- MLXSW_REG_SBXX_DIR_EGRESS)
- goto out;
- WARN(1, "No egress pools\n");
+ MLXSW_REG_SBXX_DIR_INGRESS)
+ (*p_ingress_len)++;
+ else
+ (*p_egress_len)++;
+ }
-out:
- *p_ingress_len = i;
- *p_egress_len = mlxsw_sp->sb_vals->pool_count - i;
+ WARN(*p_egress_len == 0, "No egress pools\n");
}
const struct mlxsw_sp_sb_vals mlxsw_sp1_sb_vals = {
.pool_count = ARRAY_SIZE(mlxsw_sp1_sb_pool_dess),
.pool_dess = mlxsw_sp1_sb_pool_dess,
.pms = mlxsw_sp1_sb_pms,
+ .pms_cpu = mlxsw_sp_cpu_port_sb_pms,
.prs = mlxsw_sp1_sb_prs,
.mms = mlxsw_sp_sb_mms,
.cms_ingress = mlxsw_sp1_sb_cms_ingress,
.pool_count = ARRAY_SIZE(mlxsw_sp2_sb_pool_dess),
.pool_dess = mlxsw_sp2_sb_pool_dess,
.pms = mlxsw_sp2_sb_pms,
+ .pms_cpu = mlxsw_sp_cpu_port_sb_pms,
.prs = mlxsw_sp2_sb_prs,
.mms = mlxsw_sp_sb_mms,
.cms_ingress = mlxsw_sp2_sb_cms_ingress,
int mlxsw_sp_buffers_init(struct mlxsw_sp *mlxsw_sp)
{
u32 max_headroom_size;
- u16 ing_pool_count;
- u16 eg_pool_count;
+ u16 ing_pool_count = 0;
+ u16 eg_pool_count = 0;
int err;
if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, CELL_SIZE))
err = mlxsw_sp_cpu_port_sb_cms_init(mlxsw_sp);
if (err)
goto err_sb_cpu_port_sb_cms_init;
+ err = mlxsw_sp_cpu_port_sb_pms_init(mlxsw_sp);
+ if (err)
+ goto err_sb_cpu_port_pms_init;
err = mlxsw_sp_sb_mms_init(mlxsw_sp);
if (err)
goto err_sb_mms_init;
err_devlink_sb_register:
err_sb_mms_init:
+err_sb_cpu_port_pms_init:
err_sb_cpu_port_sb_cms_init:
err_sb_prs_init:
mlxsw_sp_sb_ports_fini(mlxsw_sp);
int mlxsw_sp_sb_pool_set(struct mlxsw_core *mlxsw_core,
unsigned int sb_index, u16 pool_index, u32 size,
- enum devlink_sb_threshold_type threshold_type)
+ enum devlink_sb_threshold_type threshold_type,
+ struct netlink_ext_ack *extack)
{
struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
u32 pool_size = mlxsw_sp_bytes_cells(mlxsw_sp, size);
+ const struct mlxsw_sp_sb_pr *pr;
enum mlxsw_reg_sbpr_mode mode;
- if (size > MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_BUFFER_SIZE))
+ mode = (enum mlxsw_reg_sbpr_mode) threshold_type;
+ pr = &mlxsw_sp->sb_vals->prs[pool_index];
+
+ if (size > MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_BUFFER_SIZE)) {
+ NL_SET_ERR_MSG_MOD(extack, "Exceeded shared buffer size");
return -EINVAL;
+ }
+
+ if (pr->freeze_mode && pr->mode != mode) {
+ NL_SET_ERR_MSG_MOD(extack, "Changing this pool's threshold type is forbidden");
+ return -EINVAL;
+ };
+
+ if (pr->freeze_size && pr->size != size) {
+ NL_SET_ERR_MSG_MOD(extack, "Changing this pool's size is forbidden");
+ return -EINVAL;
+ };
- mode = (enum mlxsw_reg_sbpr_mode) threshold_type;
return mlxsw_sp_sb_pr_write(mlxsw_sp, pool_index, mode,
pool_size, false);
}
}
static int mlxsw_sp_sb_threshold_in(struct mlxsw_sp *mlxsw_sp, u16 pool_index,
- u32 threshold, u32 *p_max_buff)
+ u32 threshold, u32 *p_max_buff,
+ struct netlink_ext_ack *extack)
{
struct mlxsw_sp_sb_pr *pr = mlxsw_sp_sb_pr_get(mlxsw_sp, pool_index);
val = threshold + MLXSW_SP_SB_THRESHOLD_TO_ALPHA_OFFSET;
if (val < MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN ||
- val > MLXSW_REG_SBXX_DYN_MAX_BUFF_MAX)
+ val > MLXSW_REG_SBXX_DYN_MAX_BUFF_MAX) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid dynamic threshold value");
return -EINVAL;
+ }
*p_max_buff = val;
} else {
*p_max_buff = mlxsw_sp_bytes_cells(mlxsw_sp, threshold);
int mlxsw_sp_sb_port_pool_set(struct mlxsw_core_port *mlxsw_core_port,
unsigned int sb_index, u16 pool_index,
- u32 threshold)
+ u32 threshold, struct netlink_ext_ack *extack)
{
struct mlxsw_sp_port *mlxsw_sp_port =
mlxsw_core_port_driver_priv(mlxsw_core_port);
int err;
err = mlxsw_sp_sb_threshold_in(mlxsw_sp, pool_index,
- threshold, &max_buff);
+ threshold, &max_buff, extack);
if (err)
return err;
int mlxsw_sp_sb_tc_pool_bind_set(struct mlxsw_core_port *mlxsw_core_port,
unsigned int sb_index, u16 tc_index,
enum devlink_sb_pool_type pool_type,
- u16 pool_index, u32 threshold)
+ u16 pool_index, u32 threshold,
+ struct netlink_ext_ack *extack)
{
struct mlxsw_sp_port *mlxsw_sp_port =
mlxsw_core_port_driver_priv(mlxsw_core_port);
struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
u8 local_port = mlxsw_sp_port->local_port;
+ const struct mlxsw_sp_sb_cm *cm;
u8 pg_buff = tc_index;
enum mlxsw_reg_sbxx_dir dir = (enum mlxsw_reg_sbxx_dir) pool_type;
u32 max_buff;
int err;
- if (dir != mlxsw_sp->sb_vals->pool_dess[pool_index].dir)
+ if (dir != mlxsw_sp->sb_vals->pool_dess[pool_index].dir) {
+ NL_SET_ERR_MSG_MOD(extack, "Binding egress TC to ingress pool and vice versa is forbidden");
return -EINVAL;
+ }
+
+ if (dir == MLXSW_REG_SBXX_DIR_INGRESS)
+ cm = &mlxsw_sp->sb_vals->cms_ingress[tc_index];
+ else
+ cm = &mlxsw_sp->sb_vals->cms_egress[tc_index];
+
+ if (cm->freeze_pool && cm->pool_index != pool_index) {
+ NL_SET_ERR_MSG_MOD(extack, "Binding this TC to a different pool is forbidden");
+ return -EINVAL;
+ }
+
+ if (cm->freeze_thresh && cm->max_buff != threshold) {
+ NL_SET_ERR_MSG_MOD(extack, "Changing this TC's threshold is forbidden");
+ return -EINVAL;
+ }
err = mlxsw_sp_sb_threshold_in(mlxsw_sp, pool_index,
- threshold, &max_buff);
+ threshold, &max_buff, extack);
if (err)
return err;
#ifndef _MLXSW_PIPELINE_H_
#define _MLXSW_PIPELINE_H_
-#if IS_ENABLED(CONFIG_NET_DEVLINK)
-
int mlxsw_sp_dpipe_init(struct mlxsw_sp *mlxsw_sp);
void mlxsw_sp_dpipe_fini(struct mlxsw_sp *mlxsw_sp);
-#else
-
-static inline int mlxsw_sp_dpipe_init(struct mlxsw_sp *mlxsw_sp)
-{
- return 0;
-}
-
-static inline void mlxsw_sp_dpipe_fini(struct mlxsw_sp *mlxsw_sp)
-{
-}
-
-#endif
-
#define MLXSW_SP_DPIPE_TABLE_NAME_ERIF "mlxsw_erif"
#define MLXSW_SP_DPIPE_TABLE_NAME_HOST4 "mlxsw_host4"
#define MLXSW_SP_DPIPE_TABLE_NAME_HOST6 "mlxsw_host6"
#include <linux/socket.h>
#include <linux/route.h>
#include <linux/gcd.h>
-#include <linux/random.h>
#include <linux/if_macvlan.h>
#include <linux/refcount.h>
+#include <linux/jhash.h>
#include <net/netevent.h>
#include <net/neighbour.h>
#include <net/arp.h>
MLXSW_REG_RAUHT_OP_WRITE_DELETE;
}
-static void
+static int
mlxsw_sp_router_neigh_entry_op4(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_neigh_entry *neigh_entry,
enum mlxsw_reg_rauht_op op)
if (neigh_entry->counter_valid)
mlxsw_reg_rauht_pack_counter(rauht_pl,
neigh_entry->counter_index);
- mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
}
-static void
+static int
mlxsw_sp_router_neigh_entry_op6(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_neigh_entry *neigh_entry,
enum mlxsw_reg_rauht_op op)
if (neigh_entry->counter_valid)
mlxsw_reg_rauht_pack_counter(rauht_pl,
neigh_entry->counter_index);
- mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
}
bool mlxsw_sp_neigh_ipv6_ignore(struct mlxsw_sp_neigh_entry *neigh_entry)
struct mlxsw_sp_neigh_entry *neigh_entry,
bool adding)
{
+ enum mlxsw_reg_rauht_op op = mlxsw_sp_rauht_op(adding);
+ int err;
+
if (!adding && !neigh_entry->connected)
return;
neigh_entry->connected = adding;
if (neigh_entry->key.n->tbl->family == AF_INET) {
- mlxsw_sp_router_neigh_entry_op4(mlxsw_sp, neigh_entry,
- mlxsw_sp_rauht_op(adding));
+ err = mlxsw_sp_router_neigh_entry_op4(mlxsw_sp, neigh_entry,
+ op);
+ if (err)
+ return;
} else if (neigh_entry->key.n->tbl->family == AF_INET6) {
if (mlxsw_sp_neigh_ipv6_ignore(neigh_entry))
return;
- mlxsw_sp_router_neigh_entry_op6(mlxsw_sp, neigh_entry,
- mlxsw_sp_rauht_op(adding));
+ err = mlxsw_sp_router_neigh_entry_op6(mlxsw_sp, neigh_entry,
+ op);
+ if (err)
+ return;
} else {
WARN_ON_ONCE(1);
+ return;
}
+
+ if (adding)
+ neigh_entry->key.n->flags |= NTF_OFFLOADED;
+ else
+ neigh_entry->key.n->flags &= ~NTF_OFFLOADED;
}
void
static bool mlxsw_sp_fib6_rt_can_mp(const struct fib6_info *rt)
{
/* RTF_CACHE routes are ignored */
- return !(rt->fib6_flags & RTF_ADDRCONF) && rt->fib6_nh.fib_nh_has_gw;
+ return !(rt->fib6_flags & RTF_ADDRCONF) && rt->fib6_nh.fib_nh_gw_family;
}
static struct fib6_info *
static bool mlxsw_sp_rt6_is_gateway(const struct mlxsw_sp *mlxsw_sp,
const struct fib6_info *rt)
{
- return rt->fib6_nh.fib_nh_has_gw ||
+ return rt->fib6_nh.fib_nh_gw_family ||
mlxsw_sp_nexthop6_ipip_type(mlxsw_sp, rt, NULL);
}
fr_info = container_of(info, struct fib_rule_notifier_info, info);
rule = fr_info->rule;
+ /* Rule only affects locally generated traffic */
+ if (rule->iifindex == info->net->loopback_dev->ifindex)
+ return 0;
+
switch (info->family) {
case AF_INET:
if (!fib4_rule_default(rule) && !rule->l3mdev)
return notifier_from_errno(err);
break;
case FIB_EVENT_ENTRY_ADD:
+ case FIB_EVENT_ENTRY_REPLACE: /* fall through */
+ case FIB_EVENT_ENTRY_APPEND: /* fall through */
if (router->aborted) {
NL_SET_ERR_MSG_MOD(info->extack, "FIB offload was aborted. Not configuring route");
return notifier_from_errno(-EINVAL);
}
+ if (info->family == AF_INET) {
+ struct fib_entry_notifier_info *fen_info = ptr;
+
+ if (fen_info->fi->fib_nh_is_v6) {
+ NL_SET_ERR_MSG_MOD(info->extack, "IPv6 gateway with IPv4 route is not supported");
+ return notifier_from_errno(-EINVAL);
+ }
+ }
break;
}
/* A RIF is not created for macvlan netdevs. Their MAC is used to
* populate the FDB
*/
- if (netif_is_macvlan(dev))
+ if (netif_is_macvlan(dev) || netif_is_l3_master(dev))
return 0;
for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) {
char recr2_pl[MLXSW_REG_RECR2_LEN];
u32 seed;
- get_random_bytes(&seed, sizeof(seed));
+ seed = jhash(mlxsw_sp->base_mac, sizeof(mlxsw_sp->base_mac), 0);
mlxsw_reg_recr2_pack(recr2_pl, seed);
mlxsw_sp_mp4_hash_init(recr2_pl);
mlxsw_sp_mp6_hash_init(recr2_pl);
dev = rt->dst.dev;
*saddrp = fl4.saddr;
- *daddrp = rt->rt_gateway;
+ if (rt->rt_gw_family == AF_INET)
+ *daddrp = rt->rt_gw4;
+ /* can not offload if route has an IPv6 gateway */
+ else if (rt->rt_gw_family == AF_INET6)
+ dev = NULL;
out:
ip_rt_put(rt);
u16 fid_index;
int err = 0;
- if (switchdev_trans_ph_prepare(trans))
+ if (switchdev_trans_ph_commit(trans))
return 0;
bridge_port = mlxsw_sp_bridge_port_find(mlxsw_sp->bridge, orig_dev);
struct netdev_hw_addr *hw_addr)
{
struct ocelot *ocelot = port->ocelot;
- struct netdev_hw_addr *ha = kzalloc(sizeof(*ha), GFP_KERNEL);
+ struct netdev_hw_addr *ha = kzalloc(sizeof(*ha), GFP_ATOMIC);
if (!ha)
return -ENOMEM;
ETH_GSTRING_LEN);
}
-static void ocelot_check_stats(struct work_struct *work)
+static void ocelot_update_stats(struct ocelot *ocelot)
{
- struct delayed_work *del_work = to_delayed_work(work);
- struct ocelot *ocelot = container_of(del_work, struct ocelot, stats_work);
int i, j;
mutex_lock(&ocelot->stats_lock);
}
}
- cancel_delayed_work(&ocelot->stats_work);
+ mutex_unlock(&ocelot->stats_lock);
+}
+
+static void ocelot_check_stats_work(struct work_struct *work)
+{
+ struct delayed_work *del_work = to_delayed_work(work);
+ struct ocelot *ocelot = container_of(del_work, struct ocelot,
+ stats_work);
+
+ ocelot_update_stats(ocelot);
+
queue_delayed_work(ocelot->stats_queue, &ocelot->stats_work,
OCELOT_STATS_CHECK_DELAY);
-
- mutex_unlock(&ocelot->stats_lock);
}
static void ocelot_get_ethtool_stats(struct net_device *dev,
int i;
/* check and update now */
- ocelot_check_stats(&ocelot->stats_work.work);
+ ocelot_update_stats(ocelot);
/* Copy all counters */
for (i = 0; i < ocelot->num_stats; i++)
ANA_CPUQ_8021_CFG_CPUQ_BPDU_VAL(6),
ANA_CPUQ_8021_CFG, i);
- INIT_DELAYED_WORK(&ocelot->stats_work, ocelot_check_stats);
+ INIT_DELAYED_WORK(&ocelot->stats_work, ocelot_check_stats_work);
queue_delayed_work(ocelot->stats_queue, &ocelot->stats_work,
OCELOT_STATS_CHECK_DELAY);
return 0;
dma_object->addr))) {
vxge_os_dma_free(devh->pdev, memblock,
&dma_object->acc_handle);
+ memblock = NULL;
goto exit;
}
nfpcore/nfp_resource.o \
nfpcore/nfp_rtsym.o \
nfpcore/nfp_target.o \
+ ccm.o \
nfp_asm.o \
nfp_app.o \
nfp_app_nic.o \
}
if (knode->sel->off || knode->sel->offshift || knode->sel->offmask ||
knode->sel->offoff || knode->fshift) {
- NL_SET_ERR_MSG_MOD(extack, "variable offseting not supported");
+ NL_SET_ERR_MSG_MOD(extack, "variable offsetting not supported");
return false;
}
if (knode->sel->hoff || knode->sel->hmask) {
k = &knode->sel->keys[0];
if (k->offmask) {
- NL_SET_ERR_MSG_MOD(extack, "offset mask - variable offseting not supported");
+ NL_SET_ERR_MSG_MOD(extack, "offset mask - variable offsetting not supported");
return false;
}
if (k->off) {
int nfp_abm_ctrl_prio_map_update(struct nfp_abm_link *alink, u32 *packed)
{
+ const u32 cmd = NFP_NET_CFG_MBOX_CMD_PCI_DSCP_PRIOMAP_SET;
struct nfp_net *nn = alink->vnic;
unsigned int i;
int err;
+ err = nfp_net_mbox_lock(nn, alink->abm->prio_map_len);
+ if (err)
+ return err;
+
/* Write data_len and wipe reserved */
nn_writeq(nn, nn->tlv_caps.mbox_off + NFP_NET_ABM_MBOX_DATALEN,
alink->abm->prio_map_len);
nn_writel(nn, nn->tlv_caps.mbox_off + NFP_NET_ABM_MBOX_DATA + i,
packed[i / sizeof(u32)]);
- err = nfp_net_reconfig_mbox(nn,
- NFP_NET_CFG_MBOX_CMD_PCI_DSCP_PRIOMAP_SET);
+ err = nfp_net_mbox_reconfig_and_unlock(nn, cmd);
if (err)
nfp_err(alink->abm->app->cpp,
"setting DSCP -> VQ map failed with error %d\n", err);
}
}
-static struct net_device *nfp_abm_repr_get(struct nfp_app *app, u32 port_id)
+static struct net_device *
+nfp_abm_repr_get(struct nfp_app *app, u32 port_id, bool *redir_egress)
{
enum nfp_repr_type rtype;
struct nfp_reprs *reprs;
.eswitch_mode_get = nfp_abm_eswitch_mode_get,
.eswitch_mode_set = nfp_abm_eswitch_mode_set,
- .repr_get = nfp_abm_repr_get,
+ .dev_get = nfp_abm_repr_get,
};
#include <linux/bug.h>
#include <linux/jiffies.h>
#include <linux/skbuff.h>
-#include <linux/wait.h>
+#include "../ccm.h"
#include "../nfp_app.h"
#include "../nfp_net.h"
#include "fw.h"
#include "main.h"
-#define NFP_BPF_TAG_ALLOC_SPAN (U16_MAX / 4)
-
-static bool nfp_bpf_all_tags_busy(struct nfp_app_bpf *bpf)
-{
- u16 used_tags;
-
- used_tags = bpf->tag_alloc_next - bpf->tag_alloc_last;
-
- return used_tags > NFP_BPF_TAG_ALLOC_SPAN;
-}
-
-static int nfp_bpf_alloc_tag(struct nfp_app_bpf *bpf)
-{
- /* All FW communication for BPF is request-reply. To make sure we
- * don't reuse the message ID too early after timeout - limit the
- * number of requests in flight.
- */
- if (nfp_bpf_all_tags_busy(bpf)) {
- cmsg_warn(bpf, "all FW request contexts busy!\n");
- return -EAGAIN;
- }
-
- WARN_ON(__test_and_set_bit(bpf->tag_alloc_next, bpf->tag_allocator));
- return bpf->tag_alloc_next++;
-}
-
-static void nfp_bpf_free_tag(struct nfp_app_bpf *bpf, u16 tag)
-{
- WARN_ON(!__test_and_clear_bit(tag, bpf->tag_allocator));
-
- while (!test_bit(bpf->tag_alloc_last, bpf->tag_allocator) &&
- bpf->tag_alloc_last != bpf->tag_alloc_next)
- bpf->tag_alloc_last++;
-}
-
static struct sk_buff *
nfp_bpf_cmsg_alloc(struct nfp_app_bpf *bpf, unsigned int size)
{
return size;
}
-static u8 nfp_bpf_cmsg_get_type(struct sk_buff *skb)
-{
- struct cmsg_hdr *hdr;
-
- hdr = (struct cmsg_hdr *)skb->data;
-
- return hdr->type;
-}
-
-static unsigned int nfp_bpf_cmsg_get_tag(struct sk_buff *skb)
-{
- struct cmsg_hdr *hdr;
-
- hdr = (struct cmsg_hdr *)skb->data;
-
- return be16_to_cpu(hdr->tag);
-}
-
-static struct sk_buff *__nfp_bpf_reply(struct nfp_app_bpf *bpf, u16 tag)
-{
- unsigned int msg_tag;
- struct sk_buff *skb;
-
- skb_queue_walk(&bpf->cmsg_replies, skb) {
- msg_tag = nfp_bpf_cmsg_get_tag(skb);
- if (msg_tag == tag) {
- nfp_bpf_free_tag(bpf, tag);
- __skb_unlink(skb, &bpf->cmsg_replies);
- return skb;
- }
- }
-
- return NULL;
-}
-
-static struct sk_buff *nfp_bpf_reply(struct nfp_app_bpf *bpf, u16 tag)
-{
- struct sk_buff *skb;
-
- nfp_ctrl_lock(bpf->app->ctrl);
- skb = __nfp_bpf_reply(bpf, tag);
- nfp_ctrl_unlock(bpf->app->ctrl);
-
- return skb;
-}
-
-static struct sk_buff *nfp_bpf_reply_drop_tag(struct nfp_app_bpf *bpf, u16 tag)
-{
- struct sk_buff *skb;
-
- nfp_ctrl_lock(bpf->app->ctrl);
- skb = __nfp_bpf_reply(bpf, tag);
- if (!skb)
- nfp_bpf_free_tag(bpf, tag);
- nfp_ctrl_unlock(bpf->app->ctrl);
-
- return skb;
-}
-
-static struct sk_buff *
-nfp_bpf_cmsg_wait_reply(struct nfp_app_bpf *bpf, enum nfp_bpf_cmsg_type type,
- int tag)
-{
- struct sk_buff *skb;
- int i, err;
-
- for (i = 0; i < 50; i++) {
- udelay(4);
- skb = nfp_bpf_reply(bpf, tag);
- if (skb)
- return skb;
- }
-
- err = wait_event_interruptible_timeout(bpf->cmsg_wq,
- skb = nfp_bpf_reply(bpf, tag),
- msecs_to_jiffies(5000));
- /* We didn't get a response - try last time and atomically drop
- * the tag even if no response is matched.
- */
- if (!skb)
- skb = nfp_bpf_reply_drop_tag(bpf, tag);
- if (err < 0) {
- cmsg_warn(bpf, "%s waiting for response to 0x%02x: %d\n",
- err == ERESTARTSYS ? "interrupted" : "error",
- type, err);
- return ERR_PTR(err);
- }
- if (!skb) {
- cmsg_warn(bpf, "timeout waiting for response to 0x%02x\n",
- type);
- return ERR_PTR(-ETIMEDOUT);
- }
-
- return skb;
-}
-
-static struct sk_buff *
-nfp_bpf_cmsg_communicate(struct nfp_app_bpf *bpf, struct sk_buff *skb,
- enum nfp_bpf_cmsg_type type, unsigned int reply_size)
-{
- struct cmsg_hdr *hdr;
- int tag;
-
- nfp_ctrl_lock(bpf->app->ctrl);
- tag = nfp_bpf_alloc_tag(bpf);
- if (tag < 0) {
- nfp_ctrl_unlock(bpf->app->ctrl);
- dev_kfree_skb_any(skb);
- return ERR_PTR(tag);
- }
-
- hdr = (void *)skb->data;
- hdr->ver = CMSG_MAP_ABI_VERSION;
- hdr->type = type;
- hdr->tag = cpu_to_be16(tag);
-
- __nfp_app_ctrl_tx(bpf->app, skb);
-
- nfp_ctrl_unlock(bpf->app->ctrl);
-
- skb = nfp_bpf_cmsg_wait_reply(bpf, type, tag);
- if (IS_ERR(skb))
- return skb;
-
- hdr = (struct cmsg_hdr *)skb->data;
- if (hdr->type != __CMSG_REPLY(type)) {
- cmsg_warn(bpf, "cmsg drop - wrong type 0x%02x != 0x%02lx!\n",
- hdr->type, __CMSG_REPLY(type));
- goto err_free;
- }
- /* 0 reply_size means caller will do the validation */
- if (reply_size && skb->len != reply_size) {
- cmsg_warn(bpf, "cmsg drop - type 0x%02x wrong size %d != %d!\n",
- type, skb->len, reply_size);
- goto err_free;
- }
-
- return skb;
-err_free:
- dev_kfree_skb_any(skb);
- return ERR_PTR(-EIO);
-}
-
static int
nfp_bpf_ctrl_rc_to_errno(struct nfp_app_bpf *bpf,
struct cmsg_reply_map_simple *reply)
req->map_type = cpu_to_be32(map->map_type);
req->map_flags = 0;
- skb = nfp_bpf_cmsg_communicate(bpf, skb, CMSG_TYPE_MAP_ALLOC,
- sizeof(*reply));
+ skb = nfp_ccm_communicate(&bpf->ccm, skb, NFP_CCM_TYPE_BPF_MAP_ALLOC,
+ sizeof(*reply));
if (IS_ERR(skb))
return PTR_ERR(skb);
req = (void *)skb->data;
req->tid = cpu_to_be32(nfp_map->tid);
- skb = nfp_bpf_cmsg_communicate(bpf, skb, CMSG_TYPE_MAP_FREE,
- sizeof(*reply));
+ skb = nfp_ccm_communicate(&bpf->ccm, skb, NFP_CCM_TYPE_BPF_MAP_FREE,
+ sizeof(*reply));
if (IS_ERR(skb)) {
cmsg_warn(bpf, "leaking map - I/O error\n");
return;
}
static int
-nfp_bpf_ctrl_entry_op(struct bpf_offloaded_map *offmap,
- enum nfp_bpf_cmsg_type op,
+nfp_bpf_ctrl_entry_op(struct bpf_offloaded_map *offmap, enum nfp_ccm_type op,
u8 *key, u8 *value, u64 flags, u8 *out_key, u8 *out_value)
{
struct nfp_bpf_map *nfp_map = offmap->dev_priv;
memcpy(nfp_bpf_ctrl_req_val(bpf, req, 0), value,
map->value_size);
- skb = nfp_bpf_cmsg_communicate(bpf, skb, op,
- nfp_bpf_cmsg_map_reply_size(bpf, 1));
+ skb = nfp_ccm_communicate(&bpf->ccm, skb, op,
+ nfp_bpf_cmsg_map_reply_size(bpf, 1));
if (IS_ERR(skb))
return PTR_ERR(skb);
int nfp_bpf_ctrl_update_entry(struct bpf_offloaded_map *offmap,
void *key, void *value, u64 flags)
{
- return nfp_bpf_ctrl_entry_op(offmap, CMSG_TYPE_MAP_UPDATE,
+ return nfp_bpf_ctrl_entry_op(offmap, NFP_CCM_TYPE_BPF_MAP_UPDATE,
key, value, flags, NULL, NULL);
}
int nfp_bpf_ctrl_del_entry(struct bpf_offloaded_map *offmap, void *key)
{
- return nfp_bpf_ctrl_entry_op(offmap, CMSG_TYPE_MAP_DELETE,
+ return nfp_bpf_ctrl_entry_op(offmap, NFP_CCM_TYPE_BPF_MAP_DELETE,
key, NULL, 0, NULL, NULL);
}
int nfp_bpf_ctrl_lookup_entry(struct bpf_offloaded_map *offmap,
void *key, void *value)
{
- return nfp_bpf_ctrl_entry_op(offmap, CMSG_TYPE_MAP_LOOKUP,
+ return nfp_bpf_ctrl_entry_op(offmap, NFP_CCM_TYPE_BPF_MAP_LOOKUP,
key, NULL, 0, NULL, value);
}
int nfp_bpf_ctrl_getfirst_entry(struct bpf_offloaded_map *offmap,
void *next_key)
{
- return nfp_bpf_ctrl_entry_op(offmap, CMSG_TYPE_MAP_GETFIRST,
+ return nfp_bpf_ctrl_entry_op(offmap, NFP_CCM_TYPE_BPF_MAP_GETFIRST,
NULL, NULL, 0, next_key, NULL);
}
int nfp_bpf_ctrl_getnext_entry(struct bpf_offloaded_map *offmap,
void *key, void *next_key)
{
- return nfp_bpf_ctrl_entry_op(offmap, CMSG_TYPE_MAP_GETNEXT,
+ return nfp_bpf_ctrl_entry_op(offmap, NFP_CCM_TYPE_BPF_MAP_GETNEXT,
key, NULL, 0, next_key, NULL);
}
void nfp_bpf_ctrl_msg_rx(struct nfp_app *app, struct sk_buff *skb)
{
struct nfp_app_bpf *bpf = app->priv;
- unsigned int tag;
if (unlikely(skb->len < sizeof(struct cmsg_reply_map_simple))) {
cmsg_warn(bpf, "cmsg drop - too short %d!\n", skb->len);
- goto err_free;
+ dev_kfree_skb_any(skb);
+ return;
}
- if (nfp_bpf_cmsg_get_type(skb) == CMSG_TYPE_BPF_EVENT) {
+ if (nfp_ccm_get_type(skb) == NFP_CCM_TYPE_BPF_BPF_EVENT) {
if (!nfp_bpf_event_output(bpf, skb->data, skb->len))
dev_consume_skb_any(skb);
else
dev_kfree_skb_any(skb);
- return;
}
- nfp_ctrl_lock(bpf->app->ctrl);
-
- tag = nfp_bpf_cmsg_get_tag(skb);
- if (unlikely(!test_bit(tag, bpf->tag_allocator))) {
- cmsg_warn(bpf, "cmsg drop - no one is waiting for tag %u!\n",
- tag);
- goto err_unlock;
- }
-
- __skb_queue_tail(&bpf->cmsg_replies, skb);
- wake_up_interruptible_all(&bpf->cmsg_wq);
-
- nfp_ctrl_unlock(bpf->app->ctrl);
-
- return;
-err_unlock:
- nfp_ctrl_unlock(bpf->app->ctrl);
-err_free:
- dev_kfree_skb_any(skb);
+ nfp_ccm_rx(&bpf->ccm, skb);
}
void
nfp_bpf_ctrl_msg_rx_raw(struct nfp_app *app, const void *data, unsigned int len)
{
+ const struct nfp_ccm_hdr *hdr = data;
struct nfp_app_bpf *bpf = app->priv;
- const struct cmsg_hdr *hdr = data;
if (unlikely(len < sizeof(struct cmsg_reply_map_simple))) {
cmsg_warn(bpf, "cmsg drop - too short %d!\n", len);
return;
}
- if (hdr->type == CMSG_TYPE_BPF_EVENT)
+ if (hdr->type == NFP_CCM_TYPE_BPF_BPF_EVENT)
nfp_bpf_event_output(bpf, data, len);
else
cmsg_warn(bpf, "cmsg drop - msg type %d with raw buffer!\n",
#include <linux/bitops.h>
#include <linux/types.h>
+#include "../ccm.h"
/* Kernel's enum bpf_reg_type is not uABI so people may change it breaking
* our FW ABI. In that case we will do translation in the driver.
/*
* Types defined for map related control messages
*/
-#define CMSG_MAP_ABI_VERSION 1
-
-enum nfp_bpf_cmsg_type {
- CMSG_TYPE_MAP_ALLOC = 1,
- CMSG_TYPE_MAP_FREE = 2,
- CMSG_TYPE_MAP_LOOKUP = 3,
- CMSG_TYPE_MAP_UPDATE = 4,
- CMSG_TYPE_MAP_DELETE = 5,
- CMSG_TYPE_MAP_GETNEXT = 6,
- CMSG_TYPE_MAP_GETFIRST = 7,
- CMSG_TYPE_BPF_EVENT = 8,
- __CMSG_TYPE_MAP_MAX,
-};
-
-#define CMSG_TYPE_MAP_REPLY_BIT 7
-#define __CMSG_REPLY(req) (BIT(CMSG_TYPE_MAP_REPLY_BIT) | (req))
/* BPF ABIv2 fixed-length control message fields */
#define CMSG_MAP_KEY_LW 16
CMSG_RC_ERR_MAP_E2BIG = 7,
};
-struct cmsg_hdr {
- u8 type;
- u8 ver;
- __be16 tag;
-};
-
struct cmsg_reply_map_simple {
- struct cmsg_hdr hdr;
+ struct nfp_ccm_hdr hdr;
__be32 rc;
};
struct cmsg_req_map_alloc_tbl {
- struct cmsg_hdr hdr;
+ struct nfp_ccm_hdr hdr;
__be32 key_size; /* in bytes */
__be32 value_size; /* in bytes */
__be32 max_entries;
};
struct cmsg_req_map_free_tbl {
- struct cmsg_hdr hdr;
+ struct nfp_ccm_hdr hdr;
__be32 tid;
};
};
struct cmsg_req_map_op {
- struct cmsg_hdr hdr;
+ struct nfp_ccm_hdr hdr;
__be32 tid;
__be32 count;
__be32 flags;
};
struct cmsg_bpf_event {
- struct cmsg_hdr hdr;
+ struct nfp_ccm_hdr hdr;
__be32 cpu_id;
__be64 map_ptr;
__be32 data_size;
bpf->app = app;
app->priv = bpf;
- skb_queue_head_init(&bpf->cmsg_replies);
- init_waitqueue_head(&bpf->cmsg_wq);
INIT_LIST_HEAD(&bpf->map_list);
- err = rhashtable_init(&bpf->maps_neutral, &nfp_bpf_maps_neutral_params);
+ err = nfp_ccm_init(&bpf->ccm, app);
if (err)
goto err_free_bpf;
+ err = rhashtable_init(&bpf->maps_neutral, &nfp_bpf_maps_neutral_params);
+ if (err)
+ goto err_clean_ccm;
+
nfp_bpf_init_capabilities(bpf);
err = nfp_bpf_parse_capabilities(app);
err_free_neutral_maps:
rhashtable_destroy(&bpf->maps_neutral);
+err_clean_ccm:
+ nfp_ccm_clean(&bpf->ccm);
err_free_bpf:
kfree(bpf);
return err;
struct nfp_app_bpf *bpf = app->priv;
bpf_offload_dev_destroy(bpf->bpf_dev);
- WARN_ON(!skb_queue_empty(&bpf->cmsg_replies));
+ nfp_ccm_clean(&bpf->ccm);
WARN_ON(!list_empty(&bpf->map_list));
WARN_ON(bpf->maps_in_use || bpf->map_elems_in_use);
rhashtable_free_and_destroy(&bpf->maps_neutral,
#include <linux/types.h>
#include <linux/wait.h>
+#include "../ccm.h"
#include "../nfp_asm.h"
#include "fw.h"
/**
* struct nfp_app_bpf - bpf app priv structure
* @app: backpointer to the app
+ * @ccm: common control message handler data
*
* @bpf_dev: BPF offload device handle
*
- * @tag_allocator: bitmap of control message tags in use
- * @tag_alloc_next: next tag bit to allocate
- * @tag_alloc_last: next tag bit to be freed
- *
- * @cmsg_replies: received cmsg replies waiting to be consumed
- * @cmsg_wq: work queue for waiting for cmsg replies
- *
* @cmsg_key_sz: size of key in cmsg element array
* @cmsg_val_sz: size of value in cmsg element array
*
*/
struct nfp_app_bpf {
struct nfp_app *app;
+ struct nfp_ccm ccm;
struct bpf_offload_dev *bpf_dev;
- DECLARE_BITMAP(tag_allocator, U16_MAX + 1);
- u16 tag_alloc_next;
- u16 tag_alloc_last;
-
- struct sk_buff_head cmsg_replies;
- struct wait_queue_head cmsg_wq;
-
unsigned int cmsg_key_sz;
unsigned int cmsg_val_sz;
#include <net/tc_act/tc_mirred.h>
#include "main.h"
+#include "../ccm.h"
#include "../nfp_app.h"
#include "../nfp_net_ctrl.h"
#include "../nfp_net.h"
if (len < sizeof(struct cmsg_bpf_event) + pkt_size + data_size)
return -EINVAL;
- if (cbe->hdr.ver != CMSG_MAP_ABI_VERSION)
+ if (cbe->hdr.ver != NFP_CCM_ABI_VERSION)
return -EINVAL;
rcu_read_lock();
--- /dev/null
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (C) 2016-2019 Netronome Systems, Inc. */
+
+#include <linux/bitops.h>
+
+#include "ccm.h"
+#include "nfp_app.h"
+#include "nfp_net.h"
+
+#define NFP_CCM_TYPE_REPLY_BIT 7
+#define __NFP_CCM_REPLY(req) (BIT(NFP_CCM_TYPE_REPLY_BIT) | (req))
+
+#define ccm_warn(app, msg...) nn_dp_warn(&(app)->ctrl->dp, msg)
+
+#define NFP_CCM_TAG_ALLOC_SPAN (U16_MAX / 4)
+
+static bool nfp_ccm_all_tags_busy(struct nfp_ccm *ccm)
+{
+ u16 used_tags;
+
+ used_tags = ccm->tag_alloc_next - ccm->tag_alloc_last;
+
+ return used_tags > NFP_CCM_TAG_ALLOC_SPAN;
+}
+
+static int nfp_ccm_alloc_tag(struct nfp_ccm *ccm)
+{
+ /* CCM is for FW communication which is request-reply. To make sure
+ * we don't reuse the message ID too early after timeout - limit the
+ * number of requests in flight.
+ */
+ if (unlikely(nfp_ccm_all_tags_busy(ccm))) {
+ ccm_warn(ccm->app, "all FW request contexts busy!\n");
+ return -EAGAIN;
+ }
+
+ WARN_ON(__test_and_set_bit(ccm->tag_alloc_next, ccm->tag_allocator));
+ return ccm->tag_alloc_next++;
+}
+
+static void nfp_ccm_free_tag(struct nfp_ccm *ccm, u16 tag)
+{
+ WARN_ON(!__test_and_clear_bit(tag, ccm->tag_allocator));
+
+ while (!test_bit(ccm->tag_alloc_last, ccm->tag_allocator) &&
+ ccm->tag_alloc_last != ccm->tag_alloc_next)
+ ccm->tag_alloc_last++;
+}
+
+static struct sk_buff *__nfp_ccm_reply(struct nfp_ccm *ccm, u16 tag)
+{
+ unsigned int msg_tag;
+ struct sk_buff *skb;
+
+ skb_queue_walk(&ccm->replies, skb) {
+ msg_tag = nfp_ccm_get_tag(skb);
+ if (msg_tag == tag) {
+ nfp_ccm_free_tag(ccm, tag);
+ __skb_unlink(skb, &ccm->replies);
+ return skb;
+ }
+ }
+
+ return NULL;
+}
+
+static struct sk_buff *
+nfp_ccm_reply(struct nfp_ccm *ccm, struct nfp_app *app, u16 tag)
+{
+ struct sk_buff *skb;
+
+ nfp_ctrl_lock(app->ctrl);
+ skb = __nfp_ccm_reply(ccm, tag);
+ nfp_ctrl_unlock(app->ctrl);
+
+ return skb;
+}
+
+static struct sk_buff *
+nfp_ccm_reply_drop_tag(struct nfp_ccm *ccm, struct nfp_app *app, u16 tag)
+{
+ struct sk_buff *skb;
+
+ nfp_ctrl_lock(app->ctrl);
+ skb = __nfp_ccm_reply(ccm, tag);
+ if (!skb)
+ nfp_ccm_free_tag(ccm, tag);
+ nfp_ctrl_unlock(app->ctrl);
+
+ return skb;
+}
+
+static struct sk_buff *
+nfp_ccm_wait_reply(struct nfp_ccm *ccm, struct nfp_app *app,
+ enum nfp_ccm_type type, int tag)
+{
+ struct sk_buff *skb;
+ int i, err;
+
+ for (i = 0; i < 50; i++) {
+ udelay(4);
+ skb = nfp_ccm_reply(ccm, app, tag);
+ if (skb)
+ return skb;
+ }
+
+ err = wait_event_interruptible_timeout(ccm->wq,
+ skb = nfp_ccm_reply(ccm, app,
+ tag),
+ msecs_to_jiffies(5000));
+ /* We didn't get a response - try last time and atomically drop
+ * the tag even if no response is matched.
+ */
+ if (!skb)
+ skb = nfp_ccm_reply_drop_tag(ccm, app, tag);
+ if (err < 0) {
+ ccm_warn(app, "%s waiting for response to 0x%02x: %d\n",
+ err == ERESTARTSYS ? "interrupted" : "error",
+ type, err);
+ return ERR_PTR(err);
+ }
+ if (!skb) {
+ ccm_warn(app, "timeout waiting for response to 0x%02x\n", type);
+ return ERR_PTR(-ETIMEDOUT);
+ }
+
+ return skb;
+}
+
+struct sk_buff *
+nfp_ccm_communicate(struct nfp_ccm *ccm, struct sk_buff *skb,
+ enum nfp_ccm_type type, unsigned int reply_size)
+{
+ struct nfp_app *app = ccm->app;
+ struct nfp_ccm_hdr *hdr;
+ int reply_type, tag;
+
+ nfp_ctrl_lock(app->ctrl);
+ tag = nfp_ccm_alloc_tag(ccm);
+ if (tag < 0) {
+ nfp_ctrl_unlock(app->ctrl);
+ dev_kfree_skb_any(skb);
+ return ERR_PTR(tag);
+ }
+
+ hdr = (void *)skb->data;
+ hdr->ver = NFP_CCM_ABI_VERSION;
+ hdr->type = type;
+ hdr->tag = cpu_to_be16(tag);
+
+ __nfp_app_ctrl_tx(app, skb);
+
+ nfp_ctrl_unlock(app->ctrl);
+
+ skb = nfp_ccm_wait_reply(ccm, app, type, tag);
+ if (IS_ERR(skb))
+ return skb;
+
+ reply_type = nfp_ccm_get_type(skb);
+ if (reply_type != __NFP_CCM_REPLY(type)) {
+ ccm_warn(app, "cmsg drop - wrong type 0x%02x != 0x%02lx!\n",
+ reply_type, __NFP_CCM_REPLY(type));
+ goto err_free;
+ }
+ /* 0 reply_size means caller will do the validation */
+ if (reply_size && skb->len != reply_size) {
+ ccm_warn(app, "cmsg drop - type 0x%02x wrong size %d != %d!\n",
+ type, skb->len, reply_size);
+ goto err_free;
+ }
+
+ return skb;
+err_free:
+ dev_kfree_skb_any(skb);
+ return ERR_PTR(-EIO);
+}
+
+void nfp_ccm_rx(struct nfp_ccm *ccm, struct sk_buff *skb)
+{
+ struct nfp_app *app = ccm->app;
+ unsigned int tag;
+
+ if (unlikely(skb->len < sizeof(struct nfp_ccm_hdr))) {
+ ccm_warn(app, "cmsg drop - too short %d!\n", skb->len);
+ goto err_free;
+ }
+
+ nfp_ctrl_lock(app->ctrl);
+
+ tag = nfp_ccm_get_tag(skb);
+ if (unlikely(!test_bit(tag, ccm->tag_allocator))) {
+ ccm_warn(app, "cmsg drop - no one is waiting for tag %u!\n",
+ tag);
+ goto err_unlock;
+ }
+
+ __skb_queue_tail(&ccm->replies, skb);
+ wake_up_interruptible_all(&ccm->wq);
+
+ nfp_ctrl_unlock(app->ctrl);
+ return;
+
+err_unlock:
+ nfp_ctrl_unlock(app->ctrl);
+err_free:
+ dev_kfree_skb_any(skb);
+}
+
+int nfp_ccm_init(struct nfp_ccm *ccm, struct nfp_app *app)
+{
+ ccm->app = app;
+ skb_queue_head_init(&ccm->replies);
+ init_waitqueue_head(&ccm->wq);
+ return 0;
+}
+
+void nfp_ccm_clean(struct nfp_ccm *ccm)
+{
+ WARN_ON(!skb_queue_empty(&ccm->replies));
+}
--- /dev/null
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/* Copyright (C) 2016-2019 Netronome Systems, Inc. */
+
+#ifndef NFP_CCM_H
+#define NFP_CCM_H 1
+
+#include <linux/bitmap.h>
+#include <linux/skbuff.h>
+#include <linux/wait.h>
+
+struct nfp_app;
+
+/* Firmware ABI */
+
+enum nfp_ccm_type {
+ NFP_CCM_TYPE_BPF_MAP_ALLOC = 1,
+ NFP_CCM_TYPE_BPF_MAP_FREE = 2,
+ NFP_CCM_TYPE_BPF_MAP_LOOKUP = 3,
+ NFP_CCM_TYPE_BPF_MAP_UPDATE = 4,
+ NFP_CCM_TYPE_BPF_MAP_DELETE = 5,
+ NFP_CCM_TYPE_BPF_MAP_GETNEXT = 6,
+ NFP_CCM_TYPE_BPF_MAP_GETFIRST = 7,
+ NFP_CCM_TYPE_BPF_BPF_EVENT = 8,
+ __NFP_CCM_TYPE_MAX,
+};
+
+#define NFP_CCM_ABI_VERSION 1
+
+struct nfp_ccm_hdr {
+ u8 type;
+ u8 ver;
+ __be16 tag;
+};
+
+static inline u8 nfp_ccm_get_type(struct sk_buff *skb)
+{
+ struct nfp_ccm_hdr *hdr;
+
+ hdr = (struct nfp_ccm_hdr *)skb->data;
+
+ return hdr->type;
+}
+
+static inline unsigned int nfp_ccm_get_tag(struct sk_buff *skb)
+{
+ struct nfp_ccm_hdr *hdr;
+
+ hdr = (struct nfp_ccm_hdr *)skb->data;
+
+ return be16_to_cpu(hdr->tag);
+}
+
+/* Implementation */
+
+/**
+ * struct nfp_ccm - common control message handling
+ * @tag_allocator: bitmap of control message tags in use
+ * @tag_alloc_next: next tag bit to allocate
+ * @tag_alloc_last: next tag bit to be freed
+ *
+ * @replies: received cmsg replies waiting to be consumed
+ * @wq: work queue for waiting for cmsg replies
+ */
+struct nfp_ccm {
+ struct nfp_app *app;
+
+ DECLARE_BITMAP(tag_allocator, U16_MAX + 1);
+ u16 tag_alloc_next;
+ u16 tag_alloc_last;
+
+ struct sk_buff_head replies;
+ struct wait_queue_head wq;
+};
+
+int nfp_ccm_init(struct nfp_ccm *ccm, struct nfp_app *app);
+void nfp_ccm_clean(struct nfp_ccm *ccm);
+void nfp_ccm_rx(struct nfp_ccm *ccm, struct sk_buff *skb);
+struct sk_buff *
+nfp_ccm_communicate(struct nfp_ccm *ccm, struct sk_buff *skb,
+ enum nfp_ccm_type type, unsigned int reply_size);
+#endif
rtnl_lock();
rcu_read_lock();
- netdev = nfp_app_repr_get(app, be32_to_cpu(msg->portnum));
+ netdev = nfp_app_dev_get(app, be32_to_cpu(msg->portnum), NULL);
rcu_read_unlock();
if (!netdev) {
nfp_flower_cmsg_warn(app, "ctrl msg for unknown port 0x%08x\n",
msg = nfp_flower_cmsg_get_data(skb);
rcu_read_lock();
- exists = !!nfp_app_repr_get(app, be32_to_cpu(msg->portnum));
+ exists = !!nfp_app_dev_get(app, be32_to_cpu(msg->portnum), NULL);
rcu_read_unlock();
if (!exists) {
nfp_flower_cmsg_warn(app, "ctrl msg for unknown port 0x%08x\n",
wake_up(&priv->reify_wait_queue);
}
+static void
+nfp_flower_cmsg_merge_hint_rx(struct nfp_app *app, struct sk_buff *skb)
+{
+ unsigned int msg_len = nfp_flower_cmsg_get_data_len(skb);
+ struct nfp_flower_cmsg_merge_hint *msg;
+ struct nfp_fl_payload *sub_flows[2];
+ int err, i, flow_cnt;
+
+ msg = nfp_flower_cmsg_get_data(skb);
+ /* msg->count starts at 0 and always assumes at least 1 entry. */
+ flow_cnt = msg->count + 1;
+
+ if (msg_len < struct_size(msg, flow, flow_cnt)) {
+ nfp_flower_cmsg_warn(app, "Merge hint ctrl msg too short - %d bytes but expect %zd\n",
+ msg_len, struct_size(msg, flow, flow_cnt));
+ return;
+ }
+
+ if (flow_cnt != 2) {
+ nfp_flower_cmsg_warn(app, "Merge hint contains %d flows - two are expected\n",
+ flow_cnt);
+ return;
+ }
+
+ rtnl_lock();
+ for (i = 0; i < flow_cnt; i++) {
+ u32 ctx = be32_to_cpu(msg->flow[i].host_ctx);
+
+ sub_flows[i] = nfp_flower_get_fl_payload_from_ctx(app, ctx);
+ if (!sub_flows[i]) {
+ nfp_flower_cmsg_warn(app, "Invalid flow in merge hint\n");
+ goto err_rtnl_unlock;
+ }
+ }
+
+ err = nfp_flower_merge_offloaded_flows(app, sub_flows[0], sub_flows[1]);
+ /* Only warn on memory fail. Hint veto will not break functionality. */
+ if (err == -ENOMEM)
+ nfp_flower_cmsg_warn(app, "Flow merge memory fail.\n");
+
+err_rtnl_unlock:
+ rtnl_unlock();
+}
+
static void
nfp_flower_cmsg_process_one_rx(struct nfp_app *app, struct sk_buff *skb)
{
case NFP_FLOWER_CMSG_TYPE_PORT_MOD:
nfp_flower_cmsg_portmod_rx(app, skb);
break;
+ case NFP_FLOWER_CMSG_TYPE_MERGE_HINT:
+ if (app_priv->flower_ext_feats & NFP_FL_FEATS_FLOW_MERGE) {
+ nfp_flower_cmsg_merge_hint_rx(app, skb);
+ break;
+ }
+ goto err_default;
case NFP_FLOWER_CMSG_TYPE_NO_NEIGH:
nfp_tunnel_request_route(app, skb);
break;
}
/* fall through */
default:
+err_default:
nfp_flower_cmsg_warn(app, "Cannot handle invalid repr control type %u\n",
type);
goto out;
/* Types defined for port related control messages */
enum nfp_flower_cmsg_type_port {
NFP_FLOWER_CMSG_TYPE_FLOW_ADD = 0,
+ NFP_FLOWER_CMSG_TYPE_FLOW_MOD = 1,
NFP_FLOWER_CMSG_TYPE_FLOW_DEL = 2,
NFP_FLOWER_CMSG_TYPE_LAG_CONFIG = 4,
NFP_FLOWER_CMSG_TYPE_PORT_REIFY = 6,
NFP_FLOWER_CMSG_TYPE_MAC_REPR = 7,
NFP_FLOWER_CMSG_TYPE_PORT_MOD = 8,
+ NFP_FLOWER_CMSG_TYPE_MERGE_HINT = 9,
NFP_FLOWER_CMSG_TYPE_NO_NEIGH = 10,
NFP_FLOWER_CMSG_TYPE_TUN_MAC = 11,
NFP_FLOWER_CMSG_TYPE_ACTIVE_TUNS = 12,
#define NFP_FLOWER_CMSG_PORTREIFY_INFO_EXIST BIT(0)
+/* NFP_FLOWER_CMSG_TYPE_FLOW_MERGE_HINT */
+struct nfp_flower_cmsg_merge_hint {
+ u8 reserved[3];
+ u8 count;
+ struct {
+ __be32 host_ctx;
+ __be64 host_cookie;
+ } __packed flow[0];
+};
+
enum nfp_flower_cmsg_port_type {
NFP_FLOWER_CMSG_PORT_TYPE_UNSPEC = 0x0,
NFP_FLOWER_CMSG_PORT_TYPE_PHYS_PORT = 0x1,
#define NFP_FLOWER_CMSG_PORT_PCIE_Q GENMASK(5, 0)
#define NFP_FLOWER_CMSG_PORT_PHYS_PORT_NUM GENMASK(7, 0)
+static inline u32 nfp_flower_internal_port_get_port_id(u8 internal_port)
+{
+ return FIELD_PREP(NFP_FLOWER_CMSG_PORT_PHYS_PORT_NUM, internal_port) |
+ FIELD_PREP(NFP_FLOWER_CMSG_PORT_TYPE,
+ NFP_FLOWER_CMSG_PORT_TYPE_OTHER_PORT);
+}
+
static inline u32 nfp_flower_cmsg_phys_port(u8 phys_port)
{
return FIELD_PREP(NFP_FLOWER_CMSG_PORT_PHYS_PORT_NUM, phys_port) |
#define NFP_FLOWER_ALLOWED_VER 0x0001000000010000UL
+#define NFP_MIN_INT_PORT_ID 1
+#define NFP_MAX_INT_PORT_ID 256
+
static const char *nfp_flower_extra_cap(struct nfp_app *app, struct nfp_net *nn)
{
return "FLOWER";
return DEVLINK_ESWITCH_MODE_SWITCHDEV;
}
+static int
+nfp_flower_lookup_internal_port_id(struct nfp_flower_priv *priv,
+ struct net_device *netdev)
+{
+ struct net_device *entry;
+ int i, id = 0;
+
+ rcu_read_lock();
+ idr_for_each_entry(&priv->internal_ports.port_ids, entry, i)
+ if (entry == netdev) {
+ id = i;
+ break;
+ }
+ rcu_read_unlock();
+
+ return id;
+}
+
+static int
+nfp_flower_get_internal_port_id(struct nfp_app *app, struct net_device *netdev)
+{
+ struct nfp_flower_priv *priv = app->priv;
+ int id;
+
+ id = nfp_flower_lookup_internal_port_id(priv, netdev);
+ if (id > 0)
+ return id;
+
+ idr_preload(GFP_ATOMIC);
+ spin_lock_bh(&priv->internal_ports.lock);
+ id = idr_alloc(&priv->internal_ports.port_ids, netdev,
+ NFP_MIN_INT_PORT_ID, NFP_MAX_INT_PORT_ID, GFP_ATOMIC);
+ spin_unlock_bh(&priv->internal_ports.lock);
+ idr_preload_end();
+
+ return id;
+}
+
+u32 nfp_flower_get_port_id_from_netdev(struct nfp_app *app,
+ struct net_device *netdev)
+{
+ int ext_port;
+
+ if (nfp_netdev_is_nfp_repr(netdev)) {
+ return nfp_repr_get_port_id(netdev);
+ } else if (nfp_flower_internal_port_can_offload(app, netdev)) {
+ ext_port = nfp_flower_get_internal_port_id(app, netdev);
+ if (ext_port < 0)
+ return 0;
+
+ return nfp_flower_internal_port_get_port_id(ext_port);
+ }
+
+ return 0;
+}
+
+static struct net_device *
+nfp_flower_get_netdev_from_internal_port_id(struct nfp_app *app, int port_id)
+{
+ struct nfp_flower_priv *priv = app->priv;
+ struct net_device *netdev;
+
+ rcu_read_lock();
+ netdev = idr_find(&priv->internal_ports.port_ids, port_id);
+ rcu_read_unlock();
+
+ return netdev;
+}
+
+static void
+nfp_flower_free_internal_port_id(struct nfp_app *app, struct net_device *netdev)
+{
+ struct nfp_flower_priv *priv = app->priv;
+ int id;
+
+ id = nfp_flower_lookup_internal_port_id(priv, netdev);
+ if (!id)
+ return;
+
+ spin_lock_bh(&priv->internal_ports.lock);
+ idr_remove(&priv->internal_ports.port_ids, id);
+ spin_unlock_bh(&priv->internal_ports.lock);
+}
+
+static int
+nfp_flower_internal_port_event_handler(struct nfp_app *app,
+ struct net_device *netdev,
+ unsigned long event)
+{
+ if (event == NETDEV_UNREGISTER &&
+ nfp_flower_internal_port_can_offload(app, netdev))
+ nfp_flower_free_internal_port_id(app, netdev);
+
+ return NOTIFY_OK;
+}
+
+static void nfp_flower_internal_port_init(struct nfp_flower_priv *priv)
+{
+ spin_lock_init(&priv->internal_ports.lock);
+ idr_init(&priv->internal_ports.port_ids);
+}
+
+static void nfp_flower_internal_port_cleanup(struct nfp_flower_priv *priv)
+{
+ idr_destroy(&priv->internal_ports.port_ids);
+}
+
static struct nfp_flower_non_repr_priv *
nfp_flower_non_repr_priv_lookup(struct nfp_app *app, struct net_device *netdev)
{
}
static struct net_device *
-nfp_flower_repr_get(struct nfp_app *app, u32 port_id)
+nfp_flower_dev_get(struct nfp_app *app, u32 port_id, bool *redir_egress)
{
enum nfp_repr_type repr_type;
struct nfp_reprs *reprs;
u8 port = 0;
+ /* Check if the port is internal. */
+ if (FIELD_GET(NFP_FLOWER_CMSG_PORT_TYPE, port_id) ==
+ NFP_FLOWER_CMSG_PORT_TYPE_OTHER_PORT) {
+ if (redir_egress)
+ *redir_egress = true;
+ port = FIELD_GET(NFP_FLOWER_CMSG_PORT_PHYS_PORT_NUM, port_id);
+ return nfp_flower_get_netdev_from_internal_port_id(app, port);
+ }
+
repr_type = nfp_flower_repr_get_type_and_port(app, port_id, &port);
if (repr_type > NFP_REPR_TYPE_MAX)
return NULL;
goto err_cleanup_metadata;
}
+ if (app_priv->flower_ext_feats & NFP_FL_FEATS_FLOW_MOD) {
+ /* Tell the firmware that the driver supports flow merging. */
+ err = nfp_rtsym_write_le(app->pf->rtbl,
+ "_abi_flower_merge_hint_enable", 1);
+ if (!err) {
+ app_priv->flower_ext_feats |= NFP_FL_FEATS_FLOW_MERGE;
+ nfp_flower_internal_port_init(app_priv);
+ } else if (err == -ENOENT) {
+ nfp_warn(app->cpp, "Flow merge not supported by FW.\n");
+ } else {
+ goto err_lag_clean;
+ }
+ } else {
+ nfp_warn(app->cpp, "Flow mod/merge not supported by FW.\n");
+ }
+
INIT_LIST_HEAD(&app_priv->indr_block_cb_priv);
INIT_LIST_HEAD(&app_priv->non_repr_priv);
return 0;
+err_lag_clean:
+ if (app_priv->flower_ext_feats & NFP_FL_FEATS_LAG)
+ nfp_flower_lag_cleanup(&app_priv->nfp_lag);
err_cleanup_metadata:
nfp_flower_metadata_cleanup(app);
err_free_app_priv:
if (app_priv->flower_ext_feats & NFP_FL_FEATS_LAG)
nfp_flower_lag_cleanup(&app_priv->nfp_lag);
+ if (app_priv->flower_ext_feats & NFP_FL_FEATS_FLOW_MERGE)
+ nfp_flower_internal_port_cleanup(app_priv);
+
nfp_flower_metadata_cleanup(app);
vfree(app->priv);
app->priv = NULL;
if (ret & NOTIFY_STOP_MASK)
return ret;
+ ret = nfp_flower_internal_port_event_handler(app, netdev, event);
+ if (ret & NOTIFY_STOP_MASK)
+ return ret;
+
return nfp_tunnel_mac_event_handler(app, netdev, event, ptr);
}
.sriov_disable = nfp_flower_sriov_disable,
.eswitch_mode_get = eswitch_mode_get,
- .repr_get = nfp_flower_repr_get,
+ .dev_get = nfp_flower_dev_get,
.setup_tc = nfp_flower_setup_tc,
};
#define NFP_FL_NBI_MTU_SETTING BIT(1)
#define NFP_FL_FEATS_GENEVE_OPT BIT(2)
#define NFP_FL_FEATS_VLAN_PCP BIT(3)
+#define NFP_FL_FEATS_FLOW_MOD BIT(5)
+#define NFP_FL_FEATS_FLOW_MERGE BIT(30)
#define NFP_FL_FEATS_LAG BIT(31)
struct nfp_fl_mask_id {
struct sk_buff_head retrans_skbs;
};
+/**
+ * struct nfp_fl_internal_ports - Flower APP priv data for additional ports
+ * @port_ids: Assignment of ids to any additional ports
+ * @lock: Lock for extra ports list
+ */
+struct nfp_fl_internal_ports {
+ struct idr port_ids;
+ spinlock_t lock;
+};
+
/**
* struct nfp_flower_priv - Flower APP per-vNIC priv data
* @app: Back pointer to app
* @flow_table: Hash table used to store flower rules
* @stats: Stored stats updates for flower rules
* @stats_lock: Lock for flower rule stats updates
+ * @stats_ctx_table: Hash table to map stats contexts to its flow rule
* @cmsg_work: Workqueue for control messages processing
* @cmsg_skbs_high: List of higher priority skbs for control message
* processing
* @non_repr_priv: List of offloaded non-repr ports and their priv data
* @active_mem_unit: Current active memory unit for flower rules
* @total_mem_units: Total number of available memory units for flower rules
+ * @internal_ports: Internal port ids used in offloaded rules
*/
struct nfp_flower_priv {
struct nfp_app *app;
struct rhashtable flow_table;
struct nfp_fl_stats *stats;
spinlock_t stats_lock; /* lock stats */
+ struct rhashtable stats_ctx_table;
struct work_struct cmsg_work;
struct sk_buff_head cmsg_skbs_high;
struct sk_buff_head cmsg_skbs_low;
struct list_head non_repr_priv;
unsigned int active_mem_unit;
unsigned int total_mem_units;
+ struct nfp_fl_internal_ports internal_ports;
};
/**
char *unmasked_data;
char *mask_data;
char *action_data;
+ struct list_head linked_flows;
+ bool in_hw;
+};
+
+struct nfp_fl_payload_link {
+ /* A link contains a pointer to a merge flow and an associated sub_flow.
+ * Each merge flow will feature in 2 links to its underlying sub_flows.
+ * A sub_flow will have at least 1 link to a merge flow or more if it
+ * has been used to create multiple merge flows.
+ *
+ * For a merge flow, 'linked_flows' in its nfp_fl_payload struct lists
+ * all links to sub_flows (sub_flow.flow) via merge.list.
+ * For a sub_flow, 'linked_flows' gives all links to merge flows it has
+ * formed (merge_flow.flow) via sub_flow.list.
+ */
+ struct {
+ struct list_head list;
+ struct nfp_fl_payload *flow;
+ } merge_flow, sub_flow;
};
extern const struct rhashtable_params nfp_flower_table_params;
__be64 stats_cookie;
};
+static inline bool
+nfp_flower_internal_port_can_offload(struct nfp_app *app,
+ struct net_device *netdev)
+{
+ struct nfp_flower_priv *app_priv = app->priv;
+
+ if (!(app_priv->flower_ext_feats & NFP_FL_FEATS_FLOW_MERGE))
+ return false;
+ if (!netdev->rtnl_link_ops)
+ return false;
+ if (!strcmp(netdev->rtnl_link_ops->kind, "openvswitch"))
+ return true;
+
+ return false;
+}
+
+/* The address of the merged flow acts as its cookie.
+ * Cookies supplied to us by TC flower are also addresses to allocated
+ * memory and thus this scheme should not generate any collisions.
+ */
+static inline bool nfp_flower_is_merge_flow(struct nfp_fl_payload *flow_pay)
+{
+ return flow_pay->tc_flower_cookie == (unsigned long)flow_pay;
+}
+
int nfp_flower_metadata_init(struct nfp_app *app, u64 host_ctx_count,
unsigned int host_ctx_split);
void nfp_flower_metadata_cleanup(struct nfp_app *app);
int nfp_flower_setup_tc(struct nfp_app *app, struct net_device *netdev,
enum tc_setup_type type, void *type_data);
+int nfp_flower_merge_offloaded_flows(struct nfp_app *app,
+ struct nfp_fl_payload *sub_flow1,
+ struct nfp_fl_payload *sub_flow2);
int nfp_flower_compile_flow_match(struct nfp_app *app,
struct tc_cls_flower_offload *flow,
struct nfp_fl_key_ls *key_ls,
struct tc_cls_flower_offload *flow,
struct nfp_fl_payload *nfp_flow,
struct net_device *netdev);
+void __nfp_modify_flow_metadata(struct nfp_flower_priv *priv,
+ struct nfp_fl_payload *nfp_flow);
int nfp_modify_flow_metadata(struct nfp_app *app,
struct nfp_fl_payload *nfp_flow);
nfp_flower_search_fl_table(struct nfp_app *app, unsigned long tc_flower_cookie,
struct net_device *netdev);
struct nfp_fl_payload *
+nfp_flower_get_fl_payload_from_ctx(struct nfp_app *app, u32 ctx_id);
+struct nfp_fl_payload *
nfp_flower_remove_fl_table(struct nfp_app *app, unsigned long tc_flower_cookie);
void nfp_flower_rx_flow_stats(struct nfp_app *app, struct sk_buff *skb);
__nfp_flower_non_repr_priv_put(struct nfp_flower_non_repr_priv *non_repr_priv);
void
nfp_flower_non_repr_priv_put(struct nfp_app *app, struct net_device *netdev);
+u32 nfp_flower_get_port_id_from_netdev(struct nfp_app *app,
+ struct net_device *netdev);
#endif
struct nfp_fl_payload *nfp_flow,
enum nfp_flower_tun_type tun_type)
{
- u32 cmsg_port = 0;
+ u32 port_id;
int err;
u8 *ext;
u8 *msk;
- if (nfp_netdev_is_nfp_repr(netdev))
- cmsg_port = nfp_repr_get_port_id(netdev);
+ port_id = nfp_flower_get_port_id_from_netdev(app, netdev);
memset(nfp_flow->unmasked_data, 0, key_ls->key_size);
memset(nfp_flow->mask_data, 0, key_ls->key_size);
/* Populate Exact Port data. */
err = nfp_flower_compile_port((struct nfp_flower_in_port *)ext,
- cmsg_port, false, tun_type);
+ port_id, false, tun_type);
if (err)
return err;
/* Populate Mask Port Data. */
err = nfp_flower_compile_port((struct nfp_flower_in_port *)msk,
- cmsg_port, true, tun_type);
+ port_id, true, tun_type);
if (err)
return err;
unsigned long cookie;
};
+struct nfp_fl_stats_ctx_to_flow {
+ struct rhash_head ht_node;
+ u32 stats_cxt;
+ struct nfp_fl_payload *flow;
+};
+
+static const struct rhashtable_params stats_ctx_table_params = {
+ .key_offset = offsetof(struct nfp_fl_stats_ctx_to_flow, stats_cxt),
+ .head_offset = offsetof(struct nfp_fl_stats_ctx_to_flow, ht_node),
+ .key_len = sizeof(u32),
+};
+
static int nfp_release_stats_entry(struct nfp_app *app, u32 stats_context_id)
{
struct nfp_flower_priv *priv = app->priv;
if (!mask_entry)
return false;
- if (meta_flags)
- *meta_flags &= ~NFP_FL_META_FLAG_MANAGE_MASK;
-
*mask_id = mask_entry->mask_id;
mask_entry->ref_cnt--;
if (!mask_entry->ref_cnt) {
struct nfp_fl_payload *nfp_flow,
struct net_device *netdev)
{
+ struct nfp_fl_stats_ctx_to_flow *ctx_entry;
struct nfp_flower_priv *priv = app->priv;
struct nfp_fl_payload *check_entry;
u8 new_mask_id;
u32 stats_cxt;
+ int err;
- if (nfp_get_stats_entry(app, &stats_cxt))
- return -ENOENT;
+ err = nfp_get_stats_entry(app, &stats_cxt);
+ if (err)
+ return err;
nfp_flow->meta.host_ctx_id = cpu_to_be32(stats_cxt);
nfp_flow->meta.host_cookie = cpu_to_be64(flow->cookie);
nfp_flow->ingress_dev = netdev;
+ ctx_entry = kzalloc(sizeof(*ctx_entry), GFP_KERNEL);
+ if (!ctx_entry) {
+ err = -ENOMEM;
+ goto err_release_stats;
+ }
+
+ ctx_entry->stats_cxt = stats_cxt;
+ ctx_entry->flow = nfp_flow;
+
+ if (rhashtable_insert_fast(&priv->stats_ctx_table, &ctx_entry->ht_node,
+ stats_ctx_table_params)) {
+ err = -ENOMEM;
+ goto err_free_ctx_entry;
+ }
+
new_mask_id = 0;
if (!nfp_check_mask_add(app, nfp_flow->mask_data,
nfp_flow->meta.mask_len,
&nfp_flow->meta.flags, &new_mask_id)) {
- if (nfp_release_stats_entry(app, stats_cxt))
- return -EINVAL;
- return -ENOENT;
+ err = -ENOENT;
+ goto err_remove_rhash;
}
nfp_flow->meta.flow_version = cpu_to_be64(priv->flower_version);
check_entry = nfp_flower_search_fl_table(app, flow->cookie, netdev);
if (check_entry) {
- if (nfp_release_stats_entry(app, stats_cxt))
- return -EINVAL;
-
- if (!nfp_check_mask_remove(app, nfp_flow->mask_data,
- nfp_flow->meta.mask_len,
- NULL, &new_mask_id))
- return -EINVAL;
-
- return -EEXIST;
+ err = -EEXIST;
+ goto err_remove_mask;
}
return 0;
+
+err_remove_mask:
+ nfp_check_mask_remove(app, nfp_flow->mask_data, nfp_flow->meta.mask_len,
+ NULL, &new_mask_id);
+err_remove_rhash:
+ WARN_ON_ONCE(rhashtable_remove_fast(&priv->stats_ctx_table,
+ &ctx_entry->ht_node,
+ stats_ctx_table_params));
+err_free_ctx_entry:
+ kfree(ctx_entry);
+err_release_stats:
+ nfp_release_stats_entry(app, stats_cxt);
+
+ return err;
+}
+
+void __nfp_modify_flow_metadata(struct nfp_flower_priv *priv,
+ struct nfp_fl_payload *nfp_flow)
+{
+ nfp_flow->meta.flags &= ~NFP_FL_META_FLAG_MANAGE_MASK;
+ nfp_flow->meta.flow_version = cpu_to_be64(priv->flower_version);
+ priv->flower_version++;
}
int nfp_modify_flow_metadata(struct nfp_app *app,
struct nfp_fl_payload *nfp_flow)
{
+ struct nfp_fl_stats_ctx_to_flow *ctx_entry;
struct nfp_flower_priv *priv = app->priv;
u8 new_mask_id = 0;
u32 temp_ctx_id;
+ __nfp_modify_flow_metadata(priv, nfp_flow);
+
nfp_check_mask_remove(app, nfp_flow->mask_data,
nfp_flow->meta.mask_len, &nfp_flow->meta.flags,
&new_mask_id);
- nfp_flow->meta.flow_version = cpu_to_be64(priv->flower_version);
- priv->flower_version++;
-
/* Update flow payload with mask ids. */
nfp_flow->unmasked_data[NFP_FL_MASK_ID_LOCATION] = new_mask_id;
- /* Release the stats ctx id. */
+ /* Release the stats ctx id and ctx to flow table entry. */
temp_ctx_id = be32_to_cpu(nfp_flow->meta.host_ctx_id);
+ ctx_entry = rhashtable_lookup_fast(&priv->stats_ctx_table, &temp_ctx_id,
+ stats_ctx_table_params);
+ if (!ctx_entry)
+ return -ENOENT;
+
+ WARN_ON_ONCE(rhashtable_remove_fast(&priv->stats_ctx_table,
+ &ctx_entry->ht_node,
+ stats_ctx_table_params));
+ kfree(ctx_entry);
+
return nfp_release_stats_entry(app, temp_ctx_id);
}
+struct nfp_fl_payload *
+nfp_flower_get_fl_payload_from_ctx(struct nfp_app *app, u32 ctx_id)
+{
+ struct nfp_fl_stats_ctx_to_flow *ctx_entry;
+ struct nfp_flower_priv *priv = app->priv;
+
+ ctx_entry = rhashtable_lookup_fast(&priv->stats_ctx_table, &ctx_id,
+ stats_ctx_table_params);
+ if (!ctx_entry)
+ return NULL;
+
+ return ctx_entry->flow;
+}
+
static int nfp_fl_obj_cmpfn(struct rhashtable_compare_arg *arg,
const void *obj)
{
if (err)
return err;
+ err = rhashtable_init(&priv->stats_ctx_table, &stats_ctx_table_params);
+ if (err)
+ goto err_free_flow_table;
+
get_random_bytes(&priv->mask_id_seed, sizeof(priv->mask_id_seed));
/* Init ring buffer and unallocated mask_ids. */
kmalloc_array(NFP_FLOWER_MASK_ENTRY_RS,
NFP_FLOWER_MASK_ELEMENT_RS, GFP_KERNEL);
if (!priv->mask_ids.mask_id_free_list.buf)
- goto err_free_flow_table;
+ goto err_free_stats_ctx_table;
priv->mask_ids.init_unallocated = NFP_FLOWER_MASK_ENTRY_RS - 1;
kfree(priv->mask_ids.last_used);
err_free_mask_id:
kfree(priv->mask_ids.mask_id_free_list.buf);
+err_free_stats_ctx_table:
+ rhashtable_destroy(&priv->stats_ctx_table);
err_free_flow_table:
rhashtable_destroy(&priv->flow_table);
return -ENOMEM;
rhashtable_free_and_destroy(&priv->flow_table,
nfp_check_rhashtable_empty, NULL);
+ rhashtable_free_and_destroy(&priv->stats_ctx_table,
+ nfp_check_rhashtable_empty, NULL);
kvfree(priv->stats);
kfree(priv->mask_ids.mask_id_free_list.buf);
kfree(priv->mask_ids.last_used);
BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) | \
BIT(FLOW_DISSECTOR_KEY_ENC_PORTS))
+#define NFP_FLOWER_MERGE_FIELDS \
+ (NFP_FLOWER_LAYER_PORT | \
+ NFP_FLOWER_LAYER_MAC | \
+ NFP_FLOWER_LAYER_TP | \
+ NFP_FLOWER_LAYER_IPV4 | \
+ NFP_FLOWER_LAYER_IPV6)
+
+struct nfp_flower_merge_check {
+ union {
+ struct {
+ __be16 tci;
+ struct nfp_flower_mac_mpls l2;
+ struct nfp_flower_tp_ports l4;
+ union {
+ struct nfp_flower_ipv4 ipv4;
+ struct nfp_flower_ipv6 ipv6;
+ };
+ };
+ unsigned long vals[8];
+ };
+};
+
static int
nfp_flower_xmit_flow(struct nfp_app *app, struct nfp_fl_payload *nfp_flow,
u8 mtype)
break;
case cpu_to_be16(ETH_P_IPV6):
- key_layer |= NFP_FLOWER_LAYER_IPV6;
+ key_layer |= NFP_FLOWER_LAYER_IPV6;
key_size += sizeof(struct nfp_flower_ipv6);
break;
flow_pay->nfp_tun_ipv4_addr = 0;
flow_pay->meta.flags = 0;
+ INIT_LIST_HEAD(&flow_pay->linked_flows);
+ flow_pay->in_hw = false;
return flow_pay;
return NULL;
}
+static int
+nfp_flower_update_merge_with_actions(struct nfp_fl_payload *flow,
+ struct nfp_flower_merge_check *merge,
+ u8 *last_act_id, int *act_out)
+{
+ struct nfp_fl_set_ipv6_tc_hl_fl *ipv6_tc_hl_fl;
+ struct nfp_fl_set_ip4_ttl_tos *ipv4_ttl_tos;
+ struct nfp_fl_set_ip4_addrs *ipv4_add;
+ struct nfp_fl_set_ipv6_addr *ipv6_add;
+ struct nfp_fl_push_vlan *push_vlan;
+ struct nfp_fl_set_tport *tport;
+ struct nfp_fl_set_eth *eth;
+ struct nfp_fl_act_head *a;
+ unsigned int act_off = 0;
+ u8 act_id = 0;
+ u8 *ports;
+ int i;
+
+ while (act_off < flow->meta.act_len) {
+ a = (struct nfp_fl_act_head *)&flow->action_data[act_off];
+ act_id = a->jump_id;
+
+ switch (act_id) {
+ case NFP_FL_ACTION_OPCODE_OUTPUT:
+ if (act_out)
+ (*act_out)++;
+ break;
+ case NFP_FL_ACTION_OPCODE_PUSH_VLAN:
+ push_vlan = (struct nfp_fl_push_vlan *)a;
+ if (push_vlan->vlan_tci)
+ merge->tci = cpu_to_be16(0xffff);
+ break;
+ case NFP_FL_ACTION_OPCODE_POP_VLAN:
+ merge->tci = cpu_to_be16(0);
+ break;
+ case NFP_FL_ACTION_OPCODE_SET_IPV4_TUNNEL:
+ /* New tunnel header means l2 to l4 can be matched. */
+ eth_broadcast_addr(&merge->l2.mac_dst[0]);
+ eth_broadcast_addr(&merge->l2.mac_src[0]);
+ memset(&merge->l4, 0xff,
+ sizeof(struct nfp_flower_tp_ports));
+ memset(&merge->ipv4, 0xff,
+ sizeof(struct nfp_flower_ipv4));
+ break;
+ case NFP_FL_ACTION_OPCODE_SET_ETHERNET:
+ eth = (struct nfp_fl_set_eth *)a;
+ for (i = 0; i < ETH_ALEN; i++)
+ merge->l2.mac_dst[i] |= eth->eth_addr_mask[i];
+ for (i = 0; i < ETH_ALEN; i++)
+ merge->l2.mac_src[i] |=
+ eth->eth_addr_mask[ETH_ALEN + i];
+ break;
+ case NFP_FL_ACTION_OPCODE_SET_IPV4_ADDRS:
+ ipv4_add = (struct nfp_fl_set_ip4_addrs *)a;
+ merge->ipv4.ipv4_src |= ipv4_add->ipv4_src_mask;
+ merge->ipv4.ipv4_dst |= ipv4_add->ipv4_dst_mask;
+ break;
+ case NFP_FL_ACTION_OPCODE_SET_IPV4_TTL_TOS:
+ ipv4_ttl_tos = (struct nfp_fl_set_ip4_ttl_tos *)a;
+ merge->ipv4.ip_ext.ttl |= ipv4_ttl_tos->ipv4_ttl_mask;
+ merge->ipv4.ip_ext.tos |= ipv4_ttl_tos->ipv4_tos_mask;
+ break;
+ case NFP_FL_ACTION_OPCODE_SET_IPV6_SRC:
+ ipv6_add = (struct nfp_fl_set_ipv6_addr *)a;
+ for (i = 0; i < 4; i++)
+ merge->ipv6.ipv6_src.in6_u.u6_addr32[i] |=
+ ipv6_add->ipv6[i].mask;
+ break;
+ case NFP_FL_ACTION_OPCODE_SET_IPV6_DST:
+ ipv6_add = (struct nfp_fl_set_ipv6_addr *)a;
+ for (i = 0; i < 4; i++)
+ merge->ipv6.ipv6_dst.in6_u.u6_addr32[i] |=
+ ipv6_add->ipv6[i].mask;
+ break;
+ case NFP_FL_ACTION_OPCODE_SET_IPV6_TC_HL_FL:
+ ipv6_tc_hl_fl = (struct nfp_fl_set_ipv6_tc_hl_fl *)a;
+ merge->ipv6.ip_ext.ttl |=
+ ipv6_tc_hl_fl->ipv6_hop_limit_mask;
+ merge->ipv6.ip_ext.tos |= ipv6_tc_hl_fl->ipv6_tc_mask;
+ merge->ipv6.ipv6_flow_label_exthdr |=
+ ipv6_tc_hl_fl->ipv6_label_mask;
+ break;
+ case NFP_FL_ACTION_OPCODE_SET_UDP:
+ case NFP_FL_ACTION_OPCODE_SET_TCP:
+ tport = (struct nfp_fl_set_tport *)a;
+ ports = (u8 *)&merge->l4.port_src;
+ for (i = 0; i < 4; i++)
+ ports[i] |= tport->tp_port_mask[i];
+ break;
+ case NFP_FL_ACTION_OPCODE_PRE_TUNNEL:
+ case NFP_FL_ACTION_OPCODE_PRE_LAG:
+ case NFP_FL_ACTION_OPCODE_PUSH_GENEVE:
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ act_off += a->len_lw << NFP_FL_LW_SIZ;
+ }
+
+ if (last_act_id)
+ *last_act_id = act_id;
+
+ return 0;
+}
+
+static int
+nfp_flower_populate_merge_match(struct nfp_fl_payload *flow,
+ struct nfp_flower_merge_check *merge,
+ bool extra_fields)
+{
+ struct nfp_flower_meta_tci *meta_tci;
+ u8 *mask = flow->mask_data;
+ u8 key_layer, match_size;
+
+ memset(merge, 0, sizeof(struct nfp_flower_merge_check));
+
+ meta_tci = (struct nfp_flower_meta_tci *)mask;
+ key_layer = meta_tci->nfp_flow_key_layer;
+
+ if (key_layer & ~NFP_FLOWER_MERGE_FIELDS && !extra_fields)
+ return -EOPNOTSUPP;
+
+ merge->tci = meta_tci->tci;
+ mask += sizeof(struct nfp_flower_meta_tci);
+
+ if (key_layer & NFP_FLOWER_LAYER_EXT_META)
+ mask += sizeof(struct nfp_flower_ext_meta);
+
+ mask += sizeof(struct nfp_flower_in_port);
+
+ if (key_layer & NFP_FLOWER_LAYER_MAC) {
+ match_size = sizeof(struct nfp_flower_mac_mpls);
+ memcpy(&merge->l2, mask, match_size);
+ mask += match_size;
+ }
+
+ if (key_layer & NFP_FLOWER_LAYER_TP) {
+ match_size = sizeof(struct nfp_flower_tp_ports);
+ memcpy(&merge->l4, mask, match_size);
+ mask += match_size;
+ }
+
+ if (key_layer & NFP_FLOWER_LAYER_IPV4) {
+ match_size = sizeof(struct nfp_flower_ipv4);
+ memcpy(&merge->ipv4, mask, match_size);
+ }
+
+ if (key_layer & NFP_FLOWER_LAYER_IPV6) {
+ match_size = sizeof(struct nfp_flower_ipv6);
+ memcpy(&merge->ipv6, mask, match_size);
+ }
+
+ return 0;
+}
+
+static int
+nfp_flower_can_merge(struct nfp_fl_payload *sub_flow1,
+ struct nfp_fl_payload *sub_flow2)
+{
+ /* Two flows can be merged if sub_flow2 only matches on bits that are
+ * either matched by sub_flow1 or set by a sub_flow1 action. This
+ * ensures that every packet that hits sub_flow1 and recirculates is
+ * guaranteed to hit sub_flow2.
+ */
+ struct nfp_flower_merge_check sub_flow1_merge, sub_flow2_merge;
+ int err, act_out = 0;
+ u8 last_act_id = 0;
+
+ err = nfp_flower_populate_merge_match(sub_flow1, &sub_flow1_merge,
+ true);
+ if (err)
+ return err;
+
+ err = nfp_flower_populate_merge_match(sub_flow2, &sub_flow2_merge,
+ false);
+ if (err)
+ return err;
+
+ err = nfp_flower_update_merge_with_actions(sub_flow1, &sub_flow1_merge,
+ &last_act_id, &act_out);
+ if (err)
+ return err;
+
+ /* Must only be 1 output action and it must be the last in sequence. */
+ if (act_out != 1 || last_act_id != NFP_FL_ACTION_OPCODE_OUTPUT)
+ return -EOPNOTSUPP;
+
+ /* Reject merge if sub_flow2 matches on something that is not matched
+ * on or set in an action by sub_flow1.
+ */
+ err = bitmap_andnot(sub_flow2_merge.vals, sub_flow2_merge.vals,
+ sub_flow1_merge.vals,
+ sizeof(struct nfp_flower_merge_check) * 8);
+ if (err)
+ return -EINVAL;
+
+ return 0;
+}
+
+static unsigned int
+nfp_flower_copy_pre_actions(char *act_dst, char *act_src, int len,
+ bool *tunnel_act)
+{
+ unsigned int act_off = 0, act_len;
+ struct nfp_fl_act_head *a;
+ u8 act_id = 0;
+
+ while (act_off < len) {
+ a = (struct nfp_fl_act_head *)&act_src[act_off];
+ act_len = a->len_lw << NFP_FL_LW_SIZ;
+ act_id = a->jump_id;
+
+ switch (act_id) {
+ case NFP_FL_ACTION_OPCODE_PRE_TUNNEL:
+ if (tunnel_act)
+ *tunnel_act = true;
+ /* fall through */
+ case NFP_FL_ACTION_OPCODE_PRE_LAG:
+ memcpy(act_dst + act_off, act_src + act_off, act_len);
+ break;
+ default:
+ return act_off;
+ }
+
+ act_off += act_len;
+ }
+
+ return act_off;
+}
+
+static int nfp_fl_verify_post_tun_acts(char *acts, int len)
+{
+ struct nfp_fl_act_head *a;
+ unsigned int act_off = 0;
+
+ while (act_off < len) {
+ a = (struct nfp_fl_act_head *)&acts[act_off];
+ if (a->jump_id != NFP_FL_ACTION_OPCODE_OUTPUT)
+ return -EOPNOTSUPP;
+
+ act_off += a->len_lw << NFP_FL_LW_SIZ;
+ }
+
+ return 0;
+}
+
+static int
+nfp_flower_merge_action(struct nfp_fl_payload *sub_flow1,
+ struct nfp_fl_payload *sub_flow2,
+ struct nfp_fl_payload *merge_flow)
+{
+ unsigned int sub1_act_len, sub2_act_len, pre_off1, pre_off2;
+ bool tunnel_act = false;
+ char *merge_act;
+ int err;
+
+ /* The last action of sub_flow1 must be output - do not merge this. */
+ sub1_act_len = sub_flow1->meta.act_len - sizeof(struct nfp_fl_output);
+ sub2_act_len = sub_flow2->meta.act_len;
+
+ if (!sub2_act_len)
+ return -EINVAL;
+
+ if (sub1_act_len + sub2_act_len > NFP_FL_MAX_A_SIZ)
+ return -EINVAL;
+
+ /* A shortcut can only be applied if there is a single action. */
+ if (sub1_act_len)
+ merge_flow->meta.shortcut = cpu_to_be32(NFP_FL_SC_ACT_NULL);
+ else
+ merge_flow->meta.shortcut = sub_flow2->meta.shortcut;
+
+ merge_flow->meta.act_len = sub1_act_len + sub2_act_len;
+ merge_act = merge_flow->action_data;
+
+ /* Copy any pre-actions to the start of merge flow action list. */
+ pre_off1 = nfp_flower_copy_pre_actions(merge_act,
+ sub_flow1->action_data,
+ sub1_act_len, &tunnel_act);
+ merge_act += pre_off1;
+ sub1_act_len -= pre_off1;
+ pre_off2 = nfp_flower_copy_pre_actions(merge_act,
+ sub_flow2->action_data,
+ sub2_act_len, NULL);
+ merge_act += pre_off2;
+ sub2_act_len -= pre_off2;
+
+ /* FW does a tunnel push when egressing, therefore, if sub_flow 1 pushes
+ * a tunnel, sub_flow 2 can only have output actions for a valid merge.
+ */
+ if (tunnel_act) {
+ char *post_tun_acts = &sub_flow2->action_data[pre_off2];
+
+ err = nfp_fl_verify_post_tun_acts(post_tun_acts, sub2_act_len);
+ if (err)
+ return err;
+ }
+
+ /* Copy remaining actions from sub_flows 1 and 2. */
+ memcpy(merge_act, sub_flow1->action_data + pre_off1, sub1_act_len);
+ merge_act += sub1_act_len;
+ memcpy(merge_act, sub_flow2->action_data + pre_off2, sub2_act_len);
+
+ return 0;
+}
+
+/* Flow link code should only be accessed under RTNL. */
+static void nfp_flower_unlink_flow(struct nfp_fl_payload_link *link)
+{
+ list_del(&link->merge_flow.list);
+ list_del(&link->sub_flow.list);
+ kfree(link);
+}
+
+static void nfp_flower_unlink_flows(struct nfp_fl_payload *merge_flow,
+ struct nfp_fl_payload *sub_flow)
+{
+ struct nfp_fl_payload_link *link;
+
+ list_for_each_entry(link, &merge_flow->linked_flows, merge_flow.list)
+ if (link->sub_flow.flow == sub_flow) {
+ nfp_flower_unlink_flow(link);
+ return;
+ }
+}
+
+static int nfp_flower_link_flows(struct nfp_fl_payload *merge_flow,
+ struct nfp_fl_payload *sub_flow)
+{
+ struct nfp_fl_payload_link *link;
+
+ link = kmalloc(sizeof(*link), GFP_KERNEL);
+ if (!link)
+ return -ENOMEM;
+
+ link->merge_flow.flow = merge_flow;
+ list_add_tail(&link->merge_flow.list, &merge_flow->linked_flows);
+ link->sub_flow.flow = sub_flow;
+ list_add_tail(&link->sub_flow.list, &sub_flow->linked_flows);
+
+ return 0;
+}
+
+/**
+ * nfp_flower_merge_offloaded_flows() - Merge 2 existing flows to single flow.
+ * @app: Pointer to the APP handle
+ * @sub_flow1: Initial flow matched to produce merge hint
+ * @sub_flow2: Post recirculation flow matched in merge hint
+ *
+ * Combines 2 flows (if valid) to a single flow, removing the initial from hw
+ * and offloading the new, merged flow.
+ *
+ * Return: negative value on error, 0 in success.
+ */
+int nfp_flower_merge_offloaded_flows(struct nfp_app *app,
+ struct nfp_fl_payload *sub_flow1,
+ struct nfp_fl_payload *sub_flow2)
+{
+ struct tc_cls_flower_offload merge_tc_off;
+ struct nfp_flower_priv *priv = app->priv;
+ struct nfp_fl_payload *merge_flow;
+ struct nfp_fl_key_ls merge_key_ls;
+ int err;
+
+ ASSERT_RTNL();
+
+ if (sub_flow1 == sub_flow2 ||
+ nfp_flower_is_merge_flow(sub_flow1) ||
+ nfp_flower_is_merge_flow(sub_flow2))
+ return -EINVAL;
+
+ err = nfp_flower_can_merge(sub_flow1, sub_flow2);
+ if (err)
+ return err;
+
+ merge_key_ls.key_size = sub_flow1->meta.key_len;
+
+ merge_flow = nfp_flower_allocate_new(&merge_key_ls);
+ if (!merge_flow)
+ return -ENOMEM;
+
+ merge_flow->tc_flower_cookie = (unsigned long)merge_flow;
+ merge_flow->ingress_dev = sub_flow1->ingress_dev;
+
+ memcpy(merge_flow->unmasked_data, sub_flow1->unmasked_data,
+ sub_flow1->meta.key_len);
+ memcpy(merge_flow->mask_data, sub_flow1->mask_data,
+ sub_flow1->meta.mask_len);
+
+ err = nfp_flower_merge_action(sub_flow1, sub_flow2, merge_flow);
+ if (err)
+ goto err_destroy_merge_flow;
+
+ err = nfp_flower_link_flows(merge_flow, sub_flow1);
+ if (err)
+ goto err_destroy_merge_flow;
+
+ err = nfp_flower_link_flows(merge_flow, sub_flow2);
+ if (err)
+ goto err_unlink_sub_flow1;
+
+ merge_tc_off.cookie = merge_flow->tc_flower_cookie;
+ err = nfp_compile_flow_metadata(app, &merge_tc_off, merge_flow,
+ merge_flow->ingress_dev);
+ if (err)
+ goto err_unlink_sub_flow2;
+
+ err = rhashtable_insert_fast(&priv->flow_table, &merge_flow->fl_node,
+ nfp_flower_table_params);
+ if (err)
+ goto err_release_metadata;
+
+ err = nfp_flower_xmit_flow(app, merge_flow,
+ NFP_FLOWER_CMSG_TYPE_FLOW_MOD);
+ if (err)
+ goto err_remove_rhash;
+
+ merge_flow->in_hw = true;
+ sub_flow1->in_hw = false;
+
+ return 0;
+
+err_remove_rhash:
+ WARN_ON_ONCE(rhashtable_remove_fast(&priv->flow_table,
+ &merge_flow->fl_node,
+ nfp_flower_table_params));
+err_release_metadata:
+ nfp_modify_flow_metadata(app, merge_flow);
+err_unlink_sub_flow2:
+ nfp_flower_unlink_flows(merge_flow, sub_flow2);
+err_unlink_sub_flow1:
+ nfp_flower_unlink_flows(merge_flow, sub_flow1);
+err_destroy_merge_flow:
+ kfree(merge_flow->action_data);
+ kfree(merge_flow->mask_data);
+ kfree(merge_flow->unmasked_data);
+ kfree(merge_flow);
+ return err;
+}
+
/**
* nfp_flower_add_offload() - Adds a new flow to hardware.
* @app: Pointer to the APP handle
if (port)
port->tc_offload_cnt++;
+ flow_pay->in_hw = true;
+
/* Deallocate flow payload when flower rule has been destroyed. */
kfree(key_layer);
return err;
}
+static void
+nfp_flower_remove_merge_flow(struct nfp_app *app,
+ struct nfp_fl_payload *del_sub_flow,
+ struct nfp_fl_payload *merge_flow)
+{
+ struct nfp_flower_priv *priv = app->priv;
+ struct nfp_fl_payload_link *link, *temp;
+ struct nfp_fl_payload *origin;
+ bool mod = false;
+ int err;
+
+ link = list_first_entry(&merge_flow->linked_flows,
+ struct nfp_fl_payload_link, merge_flow.list);
+ origin = link->sub_flow.flow;
+
+ /* Re-add rule the merge had overwritten if it has not been deleted. */
+ if (origin != del_sub_flow)
+ mod = true;
+
+ err = nfp_modify_flow_metadata(app, merge_flow);
+ if (err) {
+ nfp_flower_cmsg_warn(app, "Metadata fail for merge flow delete.\n");
+ goto err_free_links;
+ }
+
+ if (!mod) {
+ err = nfp_flower_xmit_flow(app, merge_flow,
+ NFP_FLOWER_CMSG_TYPE_FLOW_DEL);
+ if (err) {
+ nfp_flower_cmsg_warn(app, "Failed to delete merged flow.\n");
+ goto err_free_links;
+ }
+ } else {
+ __nfp_modify_flow_metadata(priv, origin);
+ err = nfp_flower_xmit_flow(app, origin,
+ NFP_FLOWER_CMSG_TYPE_FLOW_MOD);
+ if (err)
+ nfp_flower_cmsg_warn(app, "Failed to revert merge flow.\n");
+ origin->in_hw = true;
+ }
+
+err_free_links:
+ /* Clean any links connected with the merged flow. */
+ list_for_each_entry_safe(link, temp, &merge_flow->linked_flows,
+ merge_flow.list)
+ nfp_flower_unlink_flow(link);
+
+ kfree(merge_flow->action_data);
+ kfree(merge_flow->mask_data);
+ kfree(merge_flow->unmasked_data);
+ WARN_ON_ONCE(rhashtable_remove_fast(&priv->flow_table,
+ &merge_flow->fl_node,
+ nfp_flower_table_params));
+ kfree_rcu(merge_flow, rcu);
+}
+
+static void
+nfp_flower_del_linked_merge_flows(struct nfp_app *app,
+ struct nfp_fl_payload *sub_flow)
+{
+ struct nfp_fl_payload_link *link, *temp;
+
+ /* Remove any merge flow formed from the deleted sub_flow. */
+ list_for_each_entry_safe(link, temp, &sub_flow->linked_flows,
+ sub_flow.list)
+ nfp_flower_remove_merge_flow(app, sub_flow,
+ link->merge_flow.flow);
+}
+
/**
* nfp_flower_del_offload() - Removes a flow from hardware.
* @app: Pointer to the APP handle
* @flow: TC flower classifier offload structure
*
* Removes a flow from the repeated hash structure and clears the
- * action payload.
+ * action payload. Any flows merged from this are also deleted.
*
* Return: negative value on error, 0 if removed successfully.
*/
err = nfp_modify_flow_metadata(app, nfp_flow);
if (err)
- goto err_free_flow;
+ goto err_free_merge_flow;
if (nfp_flow->nfp_tun_ipv4_addr)
nfp_tunnel_del_ipv4_off(app, nfp_flow->nfp_tun_ipv4_addr);
+ if (!nfp_flow->in_hw) {
+ err = 0;
+ goto err_free_merge_flow;
+ }
+
err = nfp_flower_xmit_flow(app, nfp_flow,
NFP_FLOWER_CMSG_TYPE_FLOW_DEL);
- if (err)
- goto err_free_flow;
+ /* Fall through on error. */
-err_free_flow:
+err_free_merge_flow:
+ nfp_flower_del_linked_merge_flows(app, nfp_flow);
if (port)
port->tc_offload_cnt--;
kfree(nfp_flow->action_data);
return err;
}
+static void
+__nfp_flower_update_merge_stats(struct nfp_app *app,
+ struct nfp_fl_payload *merge_flow)
+{
+ struct nfp_flower_priv *priv = app->priv;
+ struct nfp_fl_payload_link *link;
+ struct nfp_fl_payload *sub_flow;
+ u64 pkts, bytes, used;
+ u32 ctx_id;
+
+ ctx_id = be32_to_cpu(merge_flow->meta.host_ctx_id);
+ pkts = priv->stats[ctx_id].pkts;
+ /* Do not cycle subflows if no stats to distribute. */
+ if (!pkts)
+ return;
+ bytes = priv->stats[ctx_id].bytes;
+ used = priv->stats[ctx_id].used;
+
+ /* Reset stats for the merge flow. */
+ priv->stats[ctx_id].pkts = 0;
+ priv->stats[ctx_id].bytes = 0;
+
+ /* The merge flow has received stats updates from firmware.
+ * Distribute these stats to all subflows that form the merge.
+ * The stats will collected from TC via the subflows.
+ */
+ list_for_each_entry(link, &merge_flow->linked_flows, merge_flow.list) {
+ sub_flow = link->sub_flow.flow;
+ ctx_id = be32_to_cpu(sub_flow->meta.host_ctx_id);
+ priv->stats[ctx_id].pkts += pkts;
+ priv->stats[ctx_id].bytes += bytes;
+ max_t(u64, priv->stats[ctx_id].used, used);
+ }
+}
+
+static void
+nfp_flower_update_merge_stats(struct nfp_app *app,
+ struct nfp_fl_payload *sub_flow)
+{
+ struct nfp_fl_payload_link *link;
+
+ /* Get merge flows that the subflow forms to distribute their stats. */
+ list_for_each_entry(link, &sub_flow->linked_flows, sub_flow.list)
+ __nfp_flower_update_merge_stats(app, link->merge_flow.flow);
+}
+
/**
* nfp_flower_get_stats() - Populates flow stats obtained from hardware.
* @app: Pointer to the APP handle
ctx_id = be32_to_cpu(nfp_flow->meta.host_ctx_id);
spin_lock_bh(&priv->stats_lock);
+ /* If request is for a sub_flow, update stats from merged flows. */
+ if (!list_empty(&nfp_flow->linked_flows))
+ nfp_flower_update_merge_stats(app, nfp_flow);
+
flow_stats_update(&flow->stats, priv->stats[ctx_id].bytes,
priv->stats[ctx_id].pkts, priv->stats[ctx_id].used);
struct nfp_flower_priv *priv = app->priv;
int err;
- if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
+ if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS &&
+ !(f->binder_type == TCF_BLOCK_BINDER_TYPE_CLSACT_EGRESS &&
+ nfp_flower_internal_port_can_offload(app, netdev)))
return -EOPNOTSUPP;
switch (f->command) {
for (i = 0; i < count; i++) {
ipv4_addr = payload->tun_info[i].ipv4;
port = be32_to_cpu(payload->tun_info[i].egress_port);
- netdev = nfp_app_repr_get(app, port);
+ netdev = nfp_app_dev_get(app, port, NULL);
if (!netdev)
continue;
struct flowi4 *flow, struct neighbour *neigh, gfp_t flag)
{
struct nfp_tun_neigh payload;
+ u32 port_id;
- /* Only offload representor IPv4s for now. */
- if (!nfp_netdev_is_nfp_repr(netdev))
+ port_id = nfp_flower_get_port_id_from_netdev(app, netdev);
+ if (!port_id)
return;
memset(&payload, 0, sizeof(struct nfp_tun_neigh));
payload.src_ipv4 = flow->saddr;
ether_addr_copy(payload.src_addr, netdev->dev_addr);
neigh_ha_snapshot(payload.dst_addr, neigh, netdev);
- payload.port_id = cpu_to_be32(nfp_repr_get_port_id(netdev));
+ payload.port_id = cpu_to_be32(port_id);
/* Add destination of new route to NFP cache. */
nfp_tun_add_route_to_cache(app, payload.dst_ipv4);
payload = nfp_flower_cmsg_get_data(skb);
- netdev = nfp_app_repr_get(app, be32_to_cpu(payload->ingress_port));
+ netdev = nfp_app_dev_get(app, be32_to_cpu(payload->ingress_port), NULL);
if (!netdev)
goto route_fail_warning;
* @eswitch_mode_set: set SR-IOV eswitch mode (under pf->lock)
* @sriov_enable: app-specific sriov initialisation
* @sriov_disable: app-specific sriov clean-up
- * @repr_get: get representor netdev
+ * @dev_get: get representor or internal port representing netdev
*/
struct nfp_app_type {
enum nfp_app_id id;
enum devlink_eswitch_mode (*eswitch_mode_get)(struct nfp_app *app);
int (*eswitch_mode_set)(struct nfp_app *app, u16 mode);
- struct net_device *(*repr_get)(struct nfp_app *app, u32 id);
+ struct net_device *(*dev_get)(struct nfp_app *app, u32 id,
+ bool *redir_egress);
};
/**
app->type->sriov_disable(app);
}
-static inline struct net_device *nfp_app_repr_get(struct nfp_app *app, u32 id)
+static inline
+struct net_device *nfp_app_dev_get(struct nfp_app *app, u32 id,
+ bool *redir_egress)
{
- if (unlikely(!app || !app->type->repr_get))
+ if (unlikely(!app || !app->type->dev_get))
return NULL;
- return app->type->repr_get(app, id);
+ return app->type->dev_get(app, id, redir_egress);
}
struct nfp_app *nfp_app_from_netdev(struct net_device *netdev);
static int
nfp_devlink_sb_pool_set(struct devlink *devlink, unsigned int sb_index,
u16 pool_index,
- u32 size, enum devlink_sb_threshold_type threshold_type)
+ u32 size, enum devlink_sb_threshold_type threshold_type,
+ struct netlink_ext_ack *extack)
{
struct nfp_pf *pf = devlink_priv(devlink);
static int nfp_pcie_sriov_configure(struct pci_dev *pdev, int num_vfs)
{
+ if (!pci_get_drvdata(pdev))
+ return -ENOENT;
+
if (num_vfs == 0)
return nfp_pcie_sriov_disable(pdev);
else
return err;
}
-static void nfp_pci_remove(struct pci_dev *pdev)
+static void __nfp_pci_shutdown(struct pci_dev *pdev, bool unload_fw)
{
- struct nfp_pf *pf = pci_get_drvdata(pdev);
+ struct nfp_pf *pf;
+
+ pf = pci_get_drvdata(pdev);
+ if (!pf)
+ return;
nfp_hwmon_unregister(pf);
vfree(pf->dumpspec);
kfree(pf->rtbl);
nfp_mip_close(pf->mip);
- if (pf->fw_loaded)
+ if (unload_fw && pf->fw_loaded)
nfp_fw_unload(pf);
destroy_workqueue(pf->wq);
pci_disable_device(pdev);
}
+static void nfp_pci_remove(struct pci_dev *pdev)
+{
+ __nfp_pci_shutdown(pdev, true);
+}
+
+static void nfp_pci_shutdown(struct pci_dev *pdev)
+{
+ __nfp_pci_shutdown(pdev, false);
+}
+
static struct pci_driver nfp_pci_driver = {
.name = nfp_driver_name,
.id_table = nfp_pci_device_ids,
.probe = nfp_pci_probe,
.remove = nfp_pci_remove,
+ .shutdown = nfp_pci_shutdown,
.sriov_configure = nfp_pcie_sriov_configure,
};
* @shared_handler: Handler for shared interrupts
* @shared_name: Name for shared interrupt
* @me_freq_mhz: ME clock_freq (MHz)
- * @reconfig_lock: Protects HW reconfiguration request regs/machinery
+ * @reconfig_lock: Protects @reconfig_posted, @reconfig_timer_active,
+ * @reconfig_sync_present and HW reconfiguration request
+ * regs/machinery from async requests (sync must take
+ * @bar_lock)
* @reconfig_posted: Pending reconfig bits coming from async sources
* @reconfig_timer_active: Timer for reading reconfiguration results is pending
* @reconfig_sync_present: Some thread is performing synchronous reconfig
* @reconfig_timer: Timer for async reading of reconfig results
* @reconfig_in_progress_update: Update FW is processing now (debug only)
+ * @bar_lock: vNIC config BAR access lock, protects: update,
+ * mailbox area
* @link_up: Is the link up?
* @link_status_lock: Protects @link_* and ensures atomicity with BAR reading
* @rx_coalesce_usecs: RX interrupt moderation usecs delay parameter
struct timer_list reconfig_timer;
u32 reconfig_in_progress_update;
+ struct mutex bar_lock;
+
u32 rx_coalesce_usecs;
u32 rx_coalesce_max_frames;
u32 tx_coalesce_usecs;
spin_unlock_bh(&nn->r_vecs[0].lock);
}
+static inline void nn_ctrl_bar_lock(struct nfp_net *nn)
+{
+ mutex_lock(&nn->bar_lock);
+}
+
+static inline void nn_ctrl_bar_unlock(struct nfp_net *nn)
+{
+ mutex_unlock(&nn->bar_lock);
+}
+
/* Globals */
extern const char nfp_driver_version[];
void nfp_net_rss_write_itbl(struct nfp_net *nn);
void nfp_net_rss_write_key(struct nfp_net *nn);
void nfp_net_coalesce_write_cfg(struct nfp_net *nn);
-int nfp_net_reconfig_mbox(struct nfp_net *nn, u32 mbox_cmd);
+int nfp_net_mbox_lock(struct nfp_net *nn, unsigned int data_size);
+int nfp_net_mbox_reconfig(struct nfp_net *nn, u32 mbox_cmd);
+int nfp_net_mbox_reconfig_and_unlock(struct nfp_net *nn, u32 mbox_cmd);
unsigned int
nfp_net_irqs_alloc(struct pci_dev *pdev, struct msix_entry *irq_entries,
#include <linux/interrupt.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
+#include <linux/lockdep.h>
#include <linux/mm.h>
#include <linux/overflow.h>
#include <linux/page_ref.h>
return false;
}
-static int nfp_net_reconfig_wait(struct nfp_net *nn, unsigned long deadline)
+static bool __nfp_net_reconfig_wait(struct nfp_net *nn, unsigned long deadline)
{
bool timed_out = false;
+ int i;
+
+ /* Poll update field, waiting for NFP to ack the config.
+ * Do an opportunistic wait-busy loop, afterward sleep.
+ */
+ for (i = 0; i < 50; i++) {
+ if (nfp_net_reconfig_check_done(nn, false))
+ return false;
+ udelay(4);
+ }
- /* Poll update field, waiting for NFP to ack the config */
while (!nfp_net_reconfig_check_done(nn, timed_out)) {
- msleep(1);
+ usleep_range(250, 500);
timed_out = time_is_before_eq_jiffies(deadline);
}
+ return timed_out;
+}
+
+static int nfp_net_reconfig_wait(struct nfp_net *nn, unsigned long deadline)
+{
+ if (__nfp_net_reconfig_wait(nn, deadline))
+ return -EIO;
+
if (nn_readl(nn, NFP_NET_CFG_UPDATE) & NFP_NET_CFG_UPDATE_ERR)
return -EIO;
- return timed_out ? -EIO : 0;
+ return 0;
}
static void nfp_net_reconfig_timer(struct timer_list *t)
}
/**
- * nfp_net_reconfig() - Reconfigure the firmware
+ * __nfp_net_reconfig() - Reconfigure the firmware
* @nn: NFP Net device to reconfigure
* @update: The value for the update field in the BAR config
*
*
* Return: Negative errno on error, 0 on success
*/
-int nfp_net_reconfig(struct nfp_net *nn, u32 update)
+static int __nfp_net_reconfig(struct nfp_net *nn, u32 update)
{
int ret;
+ lockdep_assert_held(&nn->bar_lock);
+
nfp_net_reconfig_sync_enter(nn);
nfp_net_reconfig_start(nn, update);
return ret;
}
+int nfp_net_reconfig(struct nfp_net *nn, u32 update)
+{
+ int ret;
+
+ nn_ctrl_bar_lock(nn);
+ ret = __nfp_net_reconfig(nn, update);
+ nn_ctrl_bar_unlock(nn);
+
+ return ret;
+}
+
+int nfp_net_mbox_lock(struct nfp_net *nn, unsigned int data_size)
+{
+ if (nn->tlv_caps.mbox_len < NFP_NET_CFG_MBOX_SIMPLE_VAL + data_size) {
+ nn_err(nn, "mailbox too small for %u of data (%u)\n",
+ data_size, nn->tlv_caps.mbox_len);
+ return -EIO;
+ }
+
+ nn_ctrl_bar_lock(nn);
+ return 0;
+}
+
/**
- * nfp_net_reconfig_mbox() - Reconfigure the firmware via the mailbox
+ * nfp_net_mbox_reconfig() - Reconfigure the firmware via the mailbox
* @nn: NFP Net device to reconfigure
* @mbox_cmd: The value for the mailbox command
*
*
* Return: Negative errno on error, 0 on success
*/
-int nfp_net_reconfig_mbox(struct nfp_net *nn, u32 mbox_cmd)
+int nfp_net_mbox_reconfig(struct nfp_net *nn, u32 mbox_cmd)
{
u32 mbox = nn->tlv_caps.mbox_off;
int ret;
- if (!nfp_net_has_mbox(&nn->tlv_caps)) {
- nn_err(nn, "no mailbox present, command: %u\n", mbox_cmd);
- return -EIO;
- }
-
+ lockdep_assert_held(&nn->bar_lock);
nn_writeq(nn, mbox + NFP_NET_CFG_MBOX_SIMPLE_CMD, mbox_cmd);
- ret = nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_MBOX);
+ ret = __nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_MBOX);
if (ret) {
nn_err(nn, "Mailbox update error\n");
return ret;
return -nn_readl(nn, mbox + NFP_NET_CFG_MBOX_SIMPLE_RET);
}
+int nfp_net_mbox_reconfig_and_unlock(struct nfp_net *nn, u32 mbox_cmd)
+{
+ int ret;
+
+ ret = nfp_net_mbox_reconfig(nn, mbox_cmd);
+ nn_ctrl_bar_unlock(nn);
+ return ret;
+}
+
/* Interrupt configuration and handling
*/
struct nfp_net_rx_buf *rxbuf;
struct nfp_net_rx_desc *rxd;
struct nfp_meta_parsed meta;
+ bool redir_egress = false;
struct net_device *netdev;
dma_addr_t new_dma_addr;
u32 meta_len_xdp = 0;
struct nfp_net *nn;
nn = netdev_priv(dp->netdev);
- netdev = nfp_app_repr_get(nn->app, meta.portid);
+ netdev = nfp_app_dev_get(nn->app, meta.portid,
+ &redir_egress);
if (unlikely(!netdev)) {
nfp_net_rx_drop(dp, r_vec, rx_ring, rxbuf,
NULL);
continue;
}
- nfp_repr_inc_rx_stats(netdev, pkt_len);
+
+ if (nfp_netdev_is_nfp_repr(netdev))
+ nfp_repr_inc_rx_stats(netdev, pkt_len);
}
skb = build_skb(rxbuf->frag, true_bufsz);
if (meta_len_xdp)
skb_metadata_set(skb, meta_len_xdp);
- napi_gro_receive(&rx_ring->r_vec->napi, skb);
+ if (likely(!redir_egress)) {
+ napi_gro_receive(&rx_ring->r_vec->napi, skb);
+ } else {
+ skb->dev = netdev;
+ __skb_push(skb, ETH_HLEN);
+ dev_queue_xmit(skb);
+ }
}
if (xdp_prog) {
static int
nfp_net_vlan_rx_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
{
+ const u32 cmd = NFP_NET_CFG_MBOX_CMD_CTAG_FILTER_ADD;
struct nfp_net *nn = netdev_priv(netdev);
+ int err;
/* Priority tagged packets with vlan id 0 are processed by the
* NFP as untagged packets
if (!vid)
return 0;
+ err = nfp_net_mbox_lock(nn, NFP_NET_CFG_VLAN_FILTER_SZ);
+ if (err)
+ return err;
+
nn_writew(nn, nn->tlv_caps.mbox_off + NFP_NET_CFG_VLAN_FILTER_VID, vid);
nn_writew(nn, nn->tlv_caps.mbox_off + NFP_NET_CFG_VLAN_FILTER_PROTO,
ETH_P_8021Q);
- return nfp_net_reconfig_mbox(nn, NFP_NET_CFG_MBOX_CMD_CTAG_FILTER_ADD);
+ return nfp_net_mbox_reconfig_and_unlock(nn, cmd);
}
static int
nfp_net_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, u16 vid)
{
+ const u32 cmd = NFP_NET_CFG_MBOX_CMD_CTAG_FILTER_KILL;
struct nfp_net *nn = netdev_priv(netdev);
+ int err;
/* Priority tagged packets with vlan id 0 are processed by the
* NFP as untagged packets
if (!vid)
return 0;
+ err = nfp_net_mbox_lock(nn, NFP_NET_CFG_VLAN_FILTER_SZ);
+ if (err)
+ return err;
+
nn_writew(nn, nn->tlv_caps.mbox_off + NFP_NET_CFG_VLAN_FILTER_VID, vid);
nn_writew(nn, nn->tlv_caps.mbox_off + NFP_NET_CFG_VLAN_FILTER_PROTO,
ETH_P_8021Q);
- return nfp_net_reconfig_mbox(nn, NFP_NET_CFG_MBOX_CMD_CTAG_FILTER_KILL);
+ return nfp_net_mbox_reconfig_and_unlock(nn, cmd);
}
static void nfp_net_stat64(struct net_device *netdev,
.ndo_set_vf_mac = nfp_app_set_vf_mac,
.ndo_set_vf_vlan = nfp_app_set_vf_vlan,
.ndo_set_vf_spoofchk = nfp_app_set_vf_spoofchk,
+ .ndo_set_vf_trust = nfp_app_set_vf_trust,
.ndo_get_vf_config = nfp_app_get_vf_config,
.ndo_set_vf_link_state = nfp_app_set_vf_link_state,
.ndo_setup_tc = nfp_port_setup_tc,
nn->dp.txd_cnt = NFP_NET_TX_DESCS_DEFAULT;
nn->dp.rxd_cnt = NFP_NET_RX_DESCS_DEFAULT;
+ mutex_init(&nn->bar_lock);
+
spin_lock_init(&nn->reconfig_lock);
spin_lock_init(&nn->link_status_lock);
void nfp_net_free(struct nfp_net *nn)
{
WARN_ON(timer_pending(&nn->reconfig_timer) || nn->reconfig_posted);
+
+ mutex_destroy(&nn->bar_lock);
+
if (nn->dp.netdev)
free_netdev(nn->dp.netdev);
else
nn->dp.ctrl |= NFP_NET_CFG_CTRL_IRQMOD;
}
- if (nn->dp.netdev)
- nfp_net_netdev_init(nn);
-
/* Stash the re-configuration queue away. First odd queue in TX Bar */
nn->qcp_cfg = nn->tx_bar + NFP_QCP_QUEUE_ADDR_SZ;
if (err)
return err;
+ if (nn->dp.netdev)
+ nfp_net_netdev_init(nn);
+
nfp_net_vecs_init(nn);
if (!nn->dp.netdev)
#define NFP_NET_CFG_MBOX_SIMPLE_CMD 0x0
#define NFP_NET_CFG_MBOX_SIMPLE_RET 0x4
#define NFP_NET_CFG_MBOX_SIMPLE_VAL 0x8
-#define NFP_NET_CFG_MBOX_SIMPLE_LEN 12
#define NFP_NET_CFG_MBOX_CMD_CTAG_FILTER_ADD 1
#define NFP_NET_CFG_MBOX_CMD_CTAG_FILTER_KILL 2
int nfp_net_tlv_caps_parse(struct device *dev, u8 __iomem *ctrl_mem,
struct nfp_net_tlv_caps *caps);
-
-static inline bool nfp_net_has_mbox(struct nfp_net_tlv_caps *caps)
-{
- return caps->mbox_len >= NFP_NET_CFG_MBOX_SIMPLE_LEN;
-}
-
#endif /* _NFP_NET_CTRL_H_ */
.ndo_set_vf_mac = nfp_app_set_vf_mac,
.ndo_set_vf_vlan = nfp_app_set_vf_vlan,
.ndo_set_vf_spoofchk = nfp_app_set_vf_spoofchk,
+ .ndo_set_vf_trust = nfp_app_set_vf_trust,
.ndo_get_vf_config = nfp_app_get_vf_config,
.ndo_set_vf_link_state = nfp_app_set_vf_link_state,
.ndo_fix_features = nfp_repr_fix_features,
// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
-/* Copyright (C) 2017 Netronome Systems, Inc. */
+/* Copyright (C) 2017-2019 Netronome Systems, Inc. */
#include <linux/bitfield.h>
#include <linux/errno.h>
"spoofchk");
}
+int nfp_app_set_vf_trust(struct net_device *netdev, int vf, bool enable)
+{
+ struct nfp_app *app = nfp_app_from_netdev(netdev);
+ unsigned int vf_offset;
+ u8 vf_ctrl;
+ int err;
+
+ err = nfp_net_sriov_check(app, vf, NFP_NET_VF_CFG_MB_CAP_TRUST,
+ "trust");
+ if (err)
+ return err;
+
+ /* Write trust control bit to VF entry in VF config symbol */
+ vf_offset = NFP_NET_VF_CFG_MB_SZ + vf * NFP_NET_VF_CFG_SZ +
+ NFP_NET_VF_CFG_CTRL;
+ vf_ctrl = readb(app->pf->vfcfg_tbl2 + vf_offset);
+ vf_ctrl &= ~NFP_NET_VF_CFG_CTRL_TRUST;
+ vf_ctrl |= FIELD_PREP(NFP_NET_VF_CFG_CTRL_TRUST, enable);
+ writeb(vf_ctrl, app->pf->vfcfg_tbl2 + vf_offset);
+
+ return nfp_net_sriov_update(app, vf, NFP_NET_VF_CFG_MB_UPD_TRUST,
+ "trust");
+}
+
int nfp_app_set_vf_link_state(struct net_device *netdev, int vf,
int link_state)
{
ivi->qos = FIELD_GET(NFP_NET_VF_CFG_VLAN_QOS, vlan_tci);
ivi->spoofchk = FIELD_GET(NFP_NET_VF_CFG_CTRL_SPOOF, flags);
+ ivi->trusted = FIELD_GET(NFP_NET_VF_CFG_CTRL_TRUST, flags);
ivi->linkstate = FIELD_GET(NFP_NET_VF_CFG_CTRL_LINK_STATE, flags);
return 0;
/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
-/* Copyright (C) 2017 Netronome Systems, Inc. */
+/* Copyright (C) 2017-2019 Netronome Systems, Inc. */
#ifndef _NFP_NET_SRIOV_H_
#define _NFP_NET_SRIOV_H_
#define NFP_NET_VF_CFG_MB_CAP_VLAN (0x1 << 1)
#define NFP_NET_VF_CFG_MB_CAP_SPOOF (0x1 << 2)
#define NFP_NET_VF_CFG_MB_CAP_LINK_STATE (0x1 << 3)
+#define NFP_NET_VF_CFG_MB_CAP_TRUST (0x1 << 4)
#define NFP_NET_VF_CFG_MB_RET 0x2
#define NFP_NET_VF_CFG_MB_UPD 0x4
#define NFP_NET_VF_CFG_MB_UPD_MAC (0x1 << 0)
#define NFP_NET_VF_CFG_MB_UPD_VLAN (0x1 << 1)
#define NFP_NET_VF_CFG_MB_UPD_SPOOF (0x1 << 2)
#define NFP_NET_VF_CFG_MB_UPD_LINK_STATE (0x1 << 3)
+#define NFP_NET_VF_CFG_MB_UPD_TRUST (0x1 << 4)
#define NFP_NET_VF_CFG_MB_VF_NUM 0x7
/* VF config entry
#define NFP_NET_VF_CFG_MAC_HI 0x0
#define NFP_NET_VF_CFG_MAC_LO 0x6
#define NFP_NET_VF_CFG_CTRL 0x4
+#define NFP_NET_VF_CFG_CTRL_TRUST 0x8
#define NFP_NET_VF_CFG_CTRL_SPOOF 0x4
#define NFP_NET_VF_CFG_CTRL_LINK_STATE 0x3
#define NFP_NET_VF_CFG_LS_MODE_AUTO 0
int nfp_app_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos,
__be16 vlan_proto);
int nfp_app_set_vf_spoofchk(struct net_device *netdev, int vf, bool setting);
+int nfp_app_set_vf_trust(struct net_device *netdev, int vf, bool setting);
int nfp_app_set_vf_link_state(struct net_device *netdev, int vf,
int link_state);
int nfp_app_get_vf_config(struct net_device *netdev, int vf,
static void nfp_netvf_pci_remove(struct pci_dev *pdev)
{
- struct nfp_net_vf *vf = pci_get_drvdata(pdev);
- struct nfp_net *nn = vf->nn;
+ struct nfp_net_vf *vf;
+ struct nfp_net *nn;
+
+ vf = pci_get_drvdata(pdev);
+ if (!vf)
+ return;
+
+ nn = vf->nn;
/* Note, the order is slightly different from above as we need
* to keep the nn pointer around till we have freed everything.
.id_table = nfp_netvf_pci_device_ids,
.probe = nfp_netvf_pci_probe,
.remove = nfp_netvf_pci_remove,
+ .shutdown = nfp_netvf_pci_remove,
};
u8 num_pf_rls;
};
+#define QED_OVERFLOW_BIT 1
+
struct qed_db_recovery_info {
struct list_head list;
/* Lock to protect the doorbell recovery mechanism list */
spinlock_t lock;
+ bool dorq_attn;
u32 db_recovery_counter;
+ unsigned long overflow;
};
struct storm_stats {
/* doorbell recovery mechanism */
void qed_db_recovery_dp(struct qed_hwfn *p_hwfn);
-void qed_db_recovery_execute(struct qed_hwfn *p_hwfn,
- enum qed_db_rec_exec db_exec);
+void qed_db_recovery_execute(struct qed_hwfn *p_hwfn);
bool qed_edpm_enabled(struct qed_hwfn *p_hwfn);
/* Other Linux specific common definitions */
/* Doorbell address sanity (address within doorbell bar range) */
static bool qed_db_rec_sanity(struct qed_dev *cdev,
- void __iomem *db_addr, void *db_data)
+ void __iomem *db_addr,
+ enum qed_db_rec_width db_width,
+ void *db_data)
{
+ u32 width = (db_width == DB_REC_WIDTH_32B) ? 32 : 64;
+
/* Make sure doorbell address is within the doorbell bar */
if (db_addr < cdev->doorbells ||
- (u8 __iomem *)db_addr >
+ (u8 __iomem *)db_addr + width >
(u8 __iomem *)cdev->doorbells + cdev->db_size) {
WARN(true,
"Illegal doorbell address: %p. Legal range for doorbell addresses is [%p..%p]\n",
}
/* Sanitize doorbell address */
- if (!qed_db_rec_sanity(cdev, db_addr, db_data))
+ if (!qed_db_rec_sanity(cdev, db_addr, db_width, db_data))
return -EINVAL;
/* Obtain hwfn from doorbell address */
return 0;
}
- /* Sanitize doorbell address */
- if (!qed_db_rec_sanity(cdev, db_addr, db_data))
- return -EINVAL;
-
/* Obtain hwfn from doorbell address */
p_hwfn = qed_db_rec_find_hwfn(cdev, db_addr);
/* Ring the doorbell of a single doorbell recovery entry */
static void qed_db_recovery_ring(struct qed_hwfn *p_hwfn,
- struct qed_db_recovery_entry *db_entry,
- enum qed_db_rec_exec db_exec)
-{
- if (db_exec != DB_REC_ONCE) {
- /* Print according to width */
- if (db_entry->db_width == DB_REC_WIDTH_32B) {
- DP_VERBOSE(p_hwfn, QED_MSG_SPQ,
- "%s doorbell address %p data %x\n",
- db_exec == DB_REC_DRY_RUN ?
- "would have rung" : "ringing",
- db_entry->db_addr,
- *(u32 *)db_entry->db_data);
- } else {
- DP_VERBOSE(p_hwfn, QED_MSG_SPQ,
- "%s doorbell address %p data %llx\n",
- db_exec == DB_REC_DRY_RUN ?
- "would have rung" : "ringing",
- db_entry->db_addr,
- *(u64 *)(db_entry->db_data));
- }
+ struct qed_db_recovery_entry *db_entry)
+{
+ /* Print according to width */
+ if (db_entry->db_width == DB_REC_WIDTH_32B) {
+ DP_VERBOSE(p_hwfn, QED_MSG_SPQ,
+ "ringing doorbell address %p data %x\n",
+ db_entry->db_addr,
+ *(u32 *)db_entry->db_data);
+ } else {
+ DP_VERBOSE(p_hwfn, QED_MSG_SPQ,
+ "ringing doorbell address %p data %llx\n",
+ db_entry->db_addr,
+ *(u64 *)(db_entry->db_data));
}
/* Sanity */
if (!qed_db_rec_sanity(p_hwfn->cdev, db_entry->db_addr,
- db_entry->db_data))
+ db_entry->db_width, db_entry->db_data))
return;
/* Flush the write combined buffer. Since there are multiple doorbelling
wmb();
/* Ring the doorbell */
- if (db_exec == DB_REC_REAL_DEAL || db_exec == DB_REC_ONCE) {
- if (db_entry->db_width == DB_REC_WIDTH_32B)
- DIRECT_REG_WR(db_entry->db_addr,
- *(u32 *)(db_entry->db_data));
- else
- DIRECT_REG_WR64(db_entry->db_addr,
- *(u64 *)(db_entry->db_data));
- }
+ if (db_entry->db_width == DB_REC_WIDTH_32B)
+ DIRECT_REG_WR(db_entry->db_addr,
+ *(u32 *)(db_entry->db_data));
+ else
+ DIRECT_REG_WR64(db_entry->db_addr,
+ *(u64 *)(db_entry->db_data));
/* Flush the write combined buffer. Next doorbell may come from a
* different entity to the same address...
}
/* Traverse the doorbell recovery entry list and ring all the doorbells */
-void qed_db_recovery_execute(struct qed_hwfn *p_hwfn,
- enum qed_db_rec_exec db_exec)
+void qed_db_recovery_execute(struct qed_hwfn *p_hwfn)
{
struct qed_db_recovery_entry *db_entry = NULL;
- if (db_exec != DB_REC_ONCE) {
- DP_NOTICE(p_hwfn,
- "Executing doorbell recovery. Counter was %d\n",
- p_hwfn->db_recovery_info.db_recovery_counter);
+ DP_NOTICE(p_hwfn, "Executing doorbell recovery. Counter was %d\n",
+ p_hwfn->db_recovery_info.db_recovery_counter);
- /* Track amount of times recovery was executed */
- p_hwfn->db_recovery_info.db_recovery_counter++;
- }
+ /* Track amount of times recovery was executed */
+ p_hwfn->db_recovery_info.db_recovery_counter++;
/* Protect the list */
spin_lock_bh(&p_hwfn->db_recovery_info.lock);
list_for_each_entry(db_entry,
- &p_hwfn->db_recovery_info.list, list_entry) {
- qed_db_recovery_ring(p_hwfn, db_entry, db_exec);
- if (db_exec == DB_REC_ONCE)
- break;
- }
-
+ &p_hwfn->db_recovery_info.list, list_entry)
+ qed_db_recovery_ring(p_hwfn, db_entry);
spin_unlock_bh(&p_hwfn->db_recovery_info.lock);
}
u32 count = QED_DB_REC_COUNT;
u32 usage = 1;
+ /* Flush any pending (e)dpms as they may never arrive */
+ qed_wr(p_hwfn, p_ptt, DORQ_REG_DPM_FORCE_ABORT, 0x1);
+
/* wait for usage to zero or count to run out. This is necessary since
* EDPM doorbell transactions can take multiple 64b cycles, and as such
* can "split" over the pci. Possibly, the doorbell drop can happen with
int qed_db_rec_handler(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
{
- u32 overflow;
+ u32 attn_ovfl, cur_ovfl;
int rc;
- overflow = qed_rd(p_hwfn, p_ptt, DORQ_REG_PF_OVFL_STICKY);
- DP_NOTICE(p_hwfn, "PF Overflow sticky 0x%x\n", overflow);
- if (!overflow) {
- qed_db_recovery_execute(p_hwfn, DB_REC_ONCE);
+ attn_ovfl = test_and_clear_bit(QED_OVERFLOW_BIT,
+ &p_hwfn->db_recovery_info.overflow);
+ cur_ovfl = qed_rd(p_hwfn, p_ptt, DORQ_REG_PF_OVFL_STICKY);
+ if (!cur_ovfl && !attn_ovfl)
return 0;
- }
- if (qed_edpm_enabled(p_hwfn)) {
+ DP_NOTICE(p_hwfn, "PF Overflow sticky: attn %u current %u\n",
+ attn_ovfl, cur_ovfl);
+
+ if (cur_ovfl && !p_hwfn->db_bar_no_edpm) {
rc = qed_db_rec_flush_queue(p_hwfn, p_ptt);
if (rc)
return rc;
}
- /* Flush any pending (e)dpm as they may never arrive */
- qed_wr(p_hwfn, p_ptt, DORQ_REG_DPM_FORCE_ABORT, 0x1);
-
/* Release overflow sticky indication (stop silently dropping everything) */
qed_wr(p_hwfn, p_ptt, DORQ_REG_PF_OVFL_STICKY, 0x0);
/* Repeat all last doorbells (doorbell drop recovery) */
- qed_db_recovery_execute(p_hwfn, DB_REC_REAL_DEAL);
+ qed_db_recovery_execute(p_hwfn);
return 0;
}
-static int qed_dorq_attn_cb(struct qed_hwfn *p_hwfn)
+static void qed_dorq_attn_overflow(struct qed_hwfn *p_hwfn)
{
- u32 int_sts, first_drop_reason, details, address, all_drops_reason;
struct qed_ptt *p_ptt = p_hwfn->p_dpc_ptt;
+ u32 overflow;
int rc;
- int_sts = qed_rd(p_hwfn, p_ptt, DORQ_REG_INT_STS);
- DP_NOTICE(p_hwfn->cdev, "DORQ attention. int_sts was %x\n", int_sts);
+ overflow = qed_rd(p_hwfn, p_ptt, DORQ_REG_PF_OVFL_STICKY);
+ if (!overflow)
+ goto out;
+
+ /* Run PF doorbell recovery in next periodic handler */
+ set_bit(QED_OVERFLOW_BIT, &p_hwfn->db_recovery_info.overflow);
+
+ if (!p_hwfn->db_bar_no_edpm) {
+ rc = qed_db_rec_flush_queue(p_hwfn, p_ptt);
+ if (rc)
+ goto out;
+ }
+
+ qed_wr(p_hwfn, p_ptt, DORQ_REG_PF_OVFL_STICKY, 0x0);
+out:
+ /* Schedule the handler even if overflow was not detected */
+ qed_periodic_db_rec_start(p_hwfn);
+}
+
+static int qed_dorq_attn_int_sts(struct qed_hwfn *p_hwfn)
+{
+ u32 int_sts, first_drop_reason, details, address, all_drops_reason;
+ struct qed_ptt *p_ptt = p_hwfn->p_dpc_ptt;
/* int_sts may be zero since all PFs were interrupted for doorbell
* overflow but another one already handled it. Can abort here. If
* This PF also requires overflow recovery we will be interrupted again.
* The masked almost full indication may also be set. Ignoring.
*/
+ int_sts = qed_rd(p_hwfn, p_ptt, DORQ_REG_INT_STS);
if (!(int_sts & ~DORQ_REG_INT_STS_DORQ_FIFO_AFULL))
return 0;
+ DP_NOTICE(p_hwfn->cdev, "DORQ attention. int_sts was %x\n", int_sts);
+
/* check if db_drop or overflow happened */
if (int_sts & (DORQ_REG_INT_STS_DB_DROP |
DORQ_REG_INT_STS_DORQ_FIFO_OVFL_ERR)) {
GET_FIELD(details, QED_DORQ_ATTENTION_SIZE) * 4,
first_drop_reason, all_drops_reason);
- rc = qed_db_rec_handler(p_hwfn, p_ptt);
- qed_periodic_db_rec_start(p_hwfn);
- if (rc)
- return rc;
-
/* Clear the doorbell drop details and prepare for next drop */
qed_wr(p_hwfn, p_ptt, DORQ_REG_DB_DROP_DETAILS_REL, 0);
return -EINVAL;
}
+static int qed_dorq_attn_cb(struct qed_hwfn *p_hwfn)
+{
+ p_hwfn->db_recovery_info.dorq_attn = true;
+ qed_dorq_attn_overflow(p_hwfn);
+
+ return qed_dorq_attn_int_sts(p_hwfn);
+}
+
+static void qed_dorq_attn_handler(struct qed_hwfn *p_hwfn)
+{
+ if (p_hwfn->db_recovery_info.dorq_attn)
+ goto out;
+
+ /* Call DORQ callback if the attention was missed */
+ qed_dorq_attn_cb(p_hwfn);
+out:
+ p_hwfn->db_recovery_info.dorq_attn = false;
+}
+
/* Instead of major changes to the data-structure, we have a some 'special'
* identifiers for sources that changed meaning between adapters.
*/
}
}
+ /* Handle missed DORQ attention */
+ qed_dorq_attn_handler(p_hwfn);
+
/* Clear IGU indication for the deasserted bits */
DIRECT_REG_WR((u8 __iomem *)p_hwfn->regview +
GTT_BAR0_MAP_REG_IGU_CMD +
/**
* @brief - Doorbell Recovery handler.
- * Run DB_REAL_DEAL doorbell recovery in case of PF overflow
- * (and flush DORQ if needed), otherwise run DB_REC_ONCE.
+ * Run doorbell recovery in case of PF overflow (and flush DORQ if
+ * needed).
*
* @param p_hwfn
* @param p_ptt
}
}
-#define QED_PERIODIC_DB_REC_COUNT 100
+#define QED_PERIODIC_DB_REC_COUNT 10
#define QED_PERIODIC_DB_REC_INTERVAL_MS 100
#define QED_PERIODIC_DB_REC_INTERVAL \
msecs_to_jiffies(QED_PERIODIC_DB_REC_INTERVAL_MS)
p_vfdev->eth_fp_hsi_minor = ETH_HSI_VER_NO_PKT_LEN_TUNN;
} else {
DP_INFO(p_hwfn,
- "VF[%d] needs fastpath HSI %02x.%02x, which is incompatible with loaded FW's faspath HSI %02x.%02x\n",
+ "VF[%d] needs fastpath HSI %02x.%02x, which is incompatible with loaded FW's fastpath HSI %02x.%02x\n",
vf->abs_vf_id,
req->vfdev_info.eth_fp_hsi_major,
req->vfdev_info.eth_fp_hsi_minor,
ptp->clock = ptp_clock_register(&ptp->clock_info, &edev->pdev->dev);
if (IS_ERR(ptp->clock)) {
- rc = -EINVAL;
DP_ERR(edev, "PTP clock registration failed\n");
+ qede_ptp_disable(edev);
+ rc = -EINVAL;
goto err2;
}
return 0;
-err2:
- qede_ptp_disable(edev);
- ptp->clock = NULL;
err1:
kfree(ptp);
+err2:
edev->ptp = NULL;
return rc;
u16 board_type;
u16 supported_type;
- u16 link_speed;
+ u32 link_speed;
u16 link_duplex;
u16 link_autoneg;
u16 module_type;
#include <linux/pm_runtime.h>
#include <linux/firmware.h>
#include <linux/prefetch.h>
+#include <linux/pci-aspm.h>
#include <linux/ipv6.h>
#include <net/ip6_checksum.h>
u32 ocp_base;
};
+typedef void (*rtl_generic_fct)(struct rtl8169_private *tp);
+
MODULE_AUTHOR("Realtek and the Linux r8169 crew <netdev@vger.kernel.org>");
MODULE_DESCRIPTION("RealTek RTL-8169 Gigabit Ethernet driver");
module_param_named(debug, debug.msg_enable, int, 0);
static void rtl_hw_phy_config(struct net_device *dev)
{
+ static const rtl_generic_fct phy_configs[] = {
+ /* PCI devices. */
+ [RTL_GIGA_MAC_VER_01] = NULL,
+ [RTL_GIGA_MAC_VER_02] = rtl8169s_hw_phy_config,
+ [RTL_GIGA_MAC_VER_03] = rtl8169s_hw_phy_config,
+ [RTL_GIGA_MAC_VER_04] = rtl8169sb_hw_phy_config,
+ [RTL_GIGA_MAC_VER_05] = rtl8169scd_hw_phy_config,
+ [RTL_GIGA_MAC_VER_06] = rtl8169sce_hw_phy_config,
+ /* PCI-E devices. */
+ [RTL_GIGA_MAC_VER_07] = rtl8102e_hw_phy_config,
+ [RTL_GIGA_MAC_VER_08] = rtl8102e_hw_phy_config,
+ [RTL_GIGA_MAC_VER_09] = rtl8102e_hw_phy_config,
+ [RTL_GIGA_MAC_VER_10] = NULL,
+ [RTL_GIGA_MAC_VER_11] = rtl8168bb_hw_phy_config,
+ [RTL_GIGA_MAC_VER_12] = rtl8168bef_hw_phy_config,
+ [RTL_GIGA_MAC_VER_13] = NULL,
+ [RTL_GIGA_MAC_VER_14] = NULL,
+ [RTL_GIGA_MAC_VER_15] = NULL,
+ [RTL_GIGA_MAC_VER_16] = NULL,
+ [RTL_GIGA_MAC_VER_17] = rtl8168bef_hw_phy_config,
+ [RTL_GIGA_MAC_VER_18] = rtl8168cp_1_hw_phy_config,
+ [RTL_GIGA_MAC_VER_19] = rtl8168c_1_hw_phy_config,
+ [RTL_GIGA_MAC_VER_20] = rtl8168c_2_hw_phy_config,
+ [RTL_GIGA_MAC_VER_21] = rtl8168c_3_hw_phy_config,
+ [RTL_GIGA_MAC_VER_22] = rtl8168c_4_hw_phy_config,
+ [RTL_GIGA_MAC_VER_23] = rtl8168cp_2_hw_phy_config,
+ [RTL_GIGA_MAC_VER_24] = rtl8168cp_2_hw_phy_config,
+ [RTL_GIGA_MAC_VER_25] = rtl8168d_1_hw_phy_config,
+ [RTL_GIGA_MAC_VER_26] = rtl8168d_2_hw_phy_config,
+ [RTL_GIGA_MAC_VER_27] = rtl8168d_3_hw_phy_config,
+ [RTL_GIGA_MAC_VER_28] = rtl8168d_4_hw_phy_config,
+ [RTL_GIGA_MAC_VER_29] = rtl8105e_hw_phy_config,
+ [RTL_GIGA_MAC_VER_30] = rtl8105e_hw_phy_config,
+ [RTL_GIGA_MAC_VER_31] = NULL,
+ [RTL_GIGA_MAC_VER_32] = rtl8168e_1_hw_phy_config,
+ [RTL_GIGA_MAC_VER_33] = rtl8168e_1_hw_phy_config,
+ [RTL_GIGA_MAC_VER_34] = rtl8168e_2_hw_phy_config,
+ [RTL_GIGA_MAC_VER_35] = rtl8168f_1_hw_phy_config,
+ [RTL_GIGA_MAC_VER_36] = rtl8168f_2_hw_phy_config,
+ [RTL_GIGA_MAC_VER_37] = rtl8402_hw_phy_config,
+ [RTL_GIGA_MAC_VER_38] = rtl8411_hw_phy_config,
+ [RTL_GIGA_MAC_VER_39] = rtl8106e_hw_phy_config,
+ [RTL_GIGA_MAC_VER_40] = rtl8168g_1_hw_phy_config,
+ [RTL_GIGA_MAC_VER_41] = NULL,
+ [RTL_GIGA_MAC_VER_42] = rtl8168g_2_hw_phy_config,
+ [RTL_GIGA_MAC_VER_43] = rtl8168g_2_hw_phy_config,
+ [RTL_GIGA_MAC_VER_44] = rtl8168g_2_hw_phy_config,
+ [RTL_GIGA_MAC_VER_45] = rtl8168h_1_hw_phy_config,
+ [RTL_GIGA_MAC_VER_46] = rtl8168h_2_hw_phy_config,
+ [RTL_GIGA_MAC_VER_47] = rtl8168h_1_hw_phy_config,
+ [RTL_GIGA_MAC_VER_48] = rtl8168h_2_hw_phy_config,
+ [RTL_GIGA_MAC_VER_49] = rtl8168ep_1_hw_phy_config,
+ [RTL_GIGA_MAC_VER_50] = rtl8168ep_2_hw_phy_config,
+ [RTL_GIGA_MAC_VER_51] = rtl8168ep_2_hw_phy_config,
+ };
struct rtl8169_private *tp = netdev_priv(dev);
- switch (tp->mac_version) {
- case RTL_GIGA_MAC_VER_01:
- break;
- case RTL_GIGA_MAC_VER_02:
- case RTL_GIGA_MAC_VER_03:
- rtl8169s_hw_phy_config(tp);
- break;
- case RTL_GIGA_MAC_VER_04:
- rtl8169sb_hw_phy_config(tp);
- break;
- case RTL_GIGA_MAC_VER_05:
- rtl8169scd_hw_phy_config(tp);
- break;
- case RTL_GIGA_MAC_VER_06:
- rtl8169sce_hw_phy_config(tp);
- break;
- case RTL_GIGA_MAC_VER_07:
- case RTL_GIGA_MAC_VER_08:
- case RTL_GIGA_MAC_VER_09:
- rtl8102e_hw_phy_config(tp);
- break;
- case RTL_GIGA_MAC_VER_11:
- rtl8168bb_hw_phy_config(tp);
- break;
- case RTL_GIGA_MAC_VER_12:
- rtl8168bef_hw_phy_config(tp);
- break;
- case RTL_GIGA_MAC_VER_17:
- rtl8168bef_hw_phy_config(tp);
- break;
- case RTL_GIGA_MAC_VER_18:
- rtl8168cp_1_hw_phy_config(tp);
- break;
- case RTL_GIGA_MAC_VER_19:
- rtl8168c_1_hw_phy_config(tp);
- break;
- case RTL_GIGA_MAC_VER_20:
- rtl8168c_2_hw_phy_config(tp);
- break;
- case RTL_GIGA_MAC_VER_21:
- rtl8168c_3_hw_phy_config(tp);
- break;
- case RTL_GIGA_MAC_VER_22:
- rtl8168c_4_hw_phy_config(tp);
- break;
- case RTL_GIGA_MAC_VER_23:
- case RTL_GIGA_MAC_VER_24:
- rtl8168cp_2_hw_phy_config(tp);
- break;
- case RTL_GIGA_MAC_VER_25:
- rtl8168d_1_hw_phy_config(tp);
- break;
- case RTL_GIGA_MAC_VER_26:
- rtl8168d_2_hw_phy_config(tp);
- break;
- case RTL_GIGA_MAC_VER_27:
- rtl8168d_3_hw_phy_config(tp);
- break;
- case RTL_GIGA_MAC_VER_28:
- rtl8168d_4_hw_phy_config(tp);
- break;
- case RTL_GIGA_MAC_VER_29:
- case RTL_GIGA_MAC_VER_30:
- rtl8105e_hw_phy_config(tp);
- break;
- case RTL_GIGA_MAC_VER_31:
- /* None. */
- break;
- case RTL_GIGA_MAC_VER_32:
- case RTL_GIGA_MAC_VER_33:
- rtl8168e_1_hw_phy_config(tp);
- break;
- case RTL_GIGA_MAC_VER_34:
- rtl8168e_2_hw_phy_config(tp);
- break;
- case RTL_GIGA_MAC_VER_35:
- rtl8168f_1_hw_phy_config(tp);
- break;
- case RTL_GIGA_MAC_VER_36:
- rtl8168f_2_hw_phy_config(tp);
- break;
-
- case RTL_GIGA_MAC_VER_37:
- rtl8402_hw_phy_config(tp);
- break;
-
- case RTL_GIGA_MAC_VER_38:
- rtl8411_hw_phy_config(tp);
- break;
-
- case RTL_GIGA_MAC_VER_39:
- rtl8106e_hw_phy_config(tp);
- break;
-
- case RTL_GIGA_MAC_VER_40:
- rtl8168g_1_hw_phy_config(tp);
- break;
- case RTL_GIGA_MAC_VER_42:
- case RTL_GIGA_MAC_VER_43:
- case RTL_GIGA_MAC_VER_44:
- rtl8168g_2_hw_phy_config(tp);
- break;
- case RTL_GIGA_MAC_VER_45:
- case RTL_GIGA_MAC_VER_47:
- rtl8168h_1_hw_phy_config(tp);
- break;
- case RTL_GIGA_MAC_VER_46:
- case RTL_GIGA_MAC_VER_48:
- rtl8168h_2_hw_phy_config(tp);
- break;
-
- case RTL_GIGA_MAC_VER_49:
- rtl8168ep_1_hw_phy_config(tp);
- break;
- case RTL_GIGA_MAC_VER_50:
- case RTL_GIGA_MAC_VER_51:
- rtl8168ep_2_hw_phy_config(tp);
- break;
-
- case RTL_GIGA_MAC_VER_41:
- default:
- break;
- }
+ if (phy_configs[tp->mac_version])
+ phy_configs[tp->mac_version](tp);
}
static void rtl_schedule_task(struct rtl8169_private *tp, enum rtl_flag flag)
rtl_hw_aspm_clkreq_enable(tp, true);
}
-static void rtl_hw_start_8168(struct rtl8169_private *tp)
-{
- RTL_W8(tp, MaxTxPacketSize, TxPacketMax);
-
- /* Work around for RxFIFO overflow. */
- if (tp->mac_version == RTL_GIGA_MAC_VER_11) {
- tp->irq_mask |= RxFIFOOver;
- tp->irq_mask &= ~RxOverflow;
- }
-
- switch (tp->mac_version) {
- case RTL_GIGA_MAC_VER_11:
- rtl_hw_start_8168bb(tp);
- break;
-
- case RTL_GIGA_MAC_VER_12:
- case RTL_GIGA_MAC_VER_17:
- rtl_hw_start_8168bef(tp);
- break;
-
- case RTL_GIGA_MAC_VER_18:
- rtl_hw_start_8168cp_1(tp);
- break;
-
- case RTL_GIGA_MAC_VER_19:
- rtl_hw_start_8168c_1(tp);
- break;
-
- case RTL_GIGA_MAC_VER_20:
- rtl_hw_start_8168c_2(tp);
- break;
-
- case RTL_GIGA_MAC_VER_21:
- rtl_hw_start_8168c_3(tp);
- break;
-
- case RTL_GIGA_MAC_VER_22:
- rtl_hw_start_8168c_4(tp);
- break;
-
- case RTL_GIGA_MAC_VER_23:
- rtl_hw_start_8168cp_2(tp);
- break;
-
- case RTL_GIGA_MAC_VER_24:
- rtl_hw_start_8168cp_3(tp);
- break;
-
- case RTL_GIGA_MAC_VER_25:
- case RTL_GIGA_MAC_VER_26:
- case RTL_GIGA_MAC_VER_27:
- rtl_hw_start_8168d(tp);
- break;
-
- case RTL_GIGA_MAC_VER_28:
- rtl_hw_start_8168d_4(tp);
- break;
-
- case RTL_GIGA_MAC_VER_31:
- rtl_hw_start_8168dp(tp);
- break;
-
- case RTL_GIGA_MAC_VER_32:
- case RTL_GIGA_MAC_VER_33:
- rtl_hw_start_8168e_1(tp);
- break;
- case RTL_GIGA_MAC_VER_34:
- rtl_hw_start_8168e_2(tp);
- break;
-
- case RTL_GIGA_MAC_VER_35:
- case RTL_GIGA_MAC_VER_36:
- rtl_hw_start_8168f_1(tp);
- break;
-
- case RTL_GIGA_MAC_VER_38:
- rtl_hw_start_8411(tp);
- break;
-
- case RTL_GIGA_MAC_VER_40:
- case RTL_GIGA_MAC_VER_41:
- rtl_hw_start_8168g_1(tp);
- break;
- case RTL_GIGA_MAC_VER_42:
- rtl_hw_start_8168g_2(tp);
- break;
-
- case RTL_GIGA_MAC_VER_44:
- rtl_hw_start_8411_2(tp);
- break;
-
- case RTL_GIGA_MAC_VER_45:
- case RTL_GIGA_MAC_VER_46:
- rtl_hw_start_8168h_1(tp);
- break;
-
- case RTL_GIGA_MAC_VER_49:
- rtl_hw_start_8168ep_1(tp);
- break;
-
- case RTL_GIGA_MAC_VER_50:
- rtl_hw_start_8168ep_2(tp);
- break;
-
- case RTL_GIGA_MAC_VER_51:
- rtl_hw_start_8168ep_3(tp);
- break;
-
- default:
- netif_err(tp, drv, tp->dev,
- "unknown chipset (mac_version = %d)\n",
- tp->mac_version);
- break;
- }
-}
-
static void rtl_hw_start_8102e_1(struct rtl8169_private *tp)
{
static const struct ephy_info e_info_8102e_1[] = {
rtl_hw_aspm_clkreq_enable(tp, true);
}
+static void rtl_hw_config(struct rtl8169_private *tp)
+{
+ static const rtl_generic_fct hw_configs[] = {
+ [RTL_GIGA_MAC_VER_07] = rtl_hw_start_8102e_1,
+ [RTL_GIGA_MAC_VER_08] = rtl_hw_start_8102e_3,
+ [RTL_GIGA_MAC_VER_09] = rtl_hw_start_8102e_2,
+ [RTL_GIGA_MAC_VER_10] = NULL,
+ [RTL_GIGA_MAC_VER_11] = rtl_hw_start_8168bb,
+ [RTL_GIGA_MAC_VER_12] = rtl_hw_start_8168bef,
+ [RTL_GIGA_MAC_VER_13] = NULL,
+ [RTL_GIGA_MAC_VER_14] = NULL,
+ [RTL_GIGA_MAC_VER_15] = NULL,
+ [RTL_GIGA_MAC_VER_16] = NULL,
+ [RTL_GIGA_MAC_VER_17] = rtl_hw_start_8168bef,
+ [RTL_GIGA_MAC_VER_18] = rtl_hw_start_8168cp_1,
+ [RTL_GIGA_MAC_VER_19] = rtl_hw_start_8168c_1,
+ [RTL_GIGA_MAC_VER_20] = rtl_hw_start_8168c_2,
+ [RTL_GIGA_MAC_VER_21] = rtl_hw_start_8168c_3,
+ [RTL_GIGA_MAC_VER_22] = rtl_hw_start_8168c_4,
+ [RTL_GIGA_MAC_VER_23] = rtl_hw_start_8168cp_2,
+ [RTL_GIGA_MAC_VER_24] = rtl_hw_start_8168cp_3,
+ [RTL_GIGA_MAC_VER_25] = rtl_hw_start_8168d,
+ [RTL_GIGA_MAC_VER_26] = rtl_hw_start_8168d,
+ [RTL_GIGA_MAC_VER_27] = rtl_hw_start_8168d,
+ [RTL_GIGA_MAC_VER_28] = rtl_hw_start_8168d_4,
+ [RTL_GIGA_MAC_VER_29] = rtl_hw_start_8105e_1,
+ [RTL_GIGA_MAC_VER_30] = rtl_hw_start_8105e_2,
+ [RTL_GIGA_MAC_VER_31] = rtl_hw_start_8168dp,
+ [RTL_GIGA_MAC_VER_32] = rtl_hw_start_8168e_1,
+ [RTL_GIGA_MAC_VER_33] = rtl_hw_start_8168e_1,
+ [RTL_GIGA_MAC_VER_34] = rtl_hw_start_8168e_2,
+ [RTL_GIGA_MAC_VER_35] = rtl_hw_start_8168f_1,
+ [RTL_GIGA_MAC_VER_36] = rtl_hw_start_8168f_1,
+ [RTL_GIGA_MAC_VER_37] = rtl_hw_start_8402,
+ [RTL_GIGA_MAC_VER_38] = rtl_hw_start_8411,
+ [RTL_GIGA_MAC_VER_39] = rtl_hw_start_8106,
+ [RTL_GIGA_MAC_VER_40] = rtl_hw_start_8168g_1,
+ [RTL_GIGA_MAC_VER_41] = rtl_hw_start_8168g_1,
+ [RTL_GIGA_MAC_VER_42] = rtl_hw_start_8168g_2,
+ [RTL_GIGA_MAC_VER_43] = rtl_hw_start_8168g_2,
+ [RTL_GIGA_MAC_VER_44] = rtl_hw_start_8411_2,
+ [RTL_GIGA_MAC_VER_45] = rtl_hw_start_8168h_1,
+ [RTL_GIGA_MAC_VER_46] = rtl_hw_start_8168h_1,
+ [RTL_GIGA_MAC_VER_47] = rtl_hw_start_8168h_1,
+ [RTL_GIGA_MAC_VER_48] = rtl_hw_start_8168h_1,
+ [RTL_GIGA_MAC_VER_49] = rtl_hw_start_8168ep_1,
+ [RTL_GIGA_MAC_VER_50] = rtl_hw_start_8168ep_2,
+ [RTL_GIGA_MAC_VER_51] = rtl_hw_start_8168ep_3,
+ };
+
+ if (hw_configs[tp->mac_version])
+ hw_configs[tp->mac_version](tp);
+}
+
+static void rtl_hw_start_8168(struct rtl8169_private *tp)
+{
+ RTL_W8(tp, MaxTxPacketSize, TxPacketMax);
+
+ /* Workaround for RxFIFO overflow. */
+ if (tp->mac_version == RTL_GIGA_MAC_VER_11) {
+ tp->irq_mask |= RxFIFOOver;
+ tp->irq_mask &= ~RxOverflow;
+ }
+
+ rtl_hw_config(tp);
+}
+
static void rtl_hw_start_8101(struct rtl8169_private *tp)
{
if (tp->mac_version >= RTL_GIGA_MAC_VER_30)
tp->cp_cmd &= CPCMD_QUIRK_MASK;
RTL_W16(tp, CPlusCmd, tp->cp_cmd);
- switch (tp->mac_version) {
- case RTL_GIGA_MAC_VER_07:
- rtl_hw_start_8102e_1(tp);
- break;
-
- case RTL_GIGA_MAC_VER_08:
- rtl_hw_start_8102e_3(tp);
- break;
-
- case RTL_GIGA_MAC_VER_09:
- rtl_hw_start_8102e_2(tp);
- break;
-
- case RTL_GIGA_MAC_VER_29:
- rtl_hw_start_8105e_1(tp);
- break;
- case RTL_GIGA_MAC_VER_30:
- rtl_hw_start_8105e_2(tp);
- break;
-
- case RTL_GIGA_MAC_VER_37:
- rtl_hw_start_8402(tp);
- break;
-
- case RTL_GIGA_MAC_VER_39:
- rtl_hw_start_8106(tp);
- break;
- case RTL_GIGA_MAC_VER_43:
- rtl_hw_start_8168g_2(tp);
- break;
- case RTL_GIGA_MAC_VER_47:
- case RTL_GIGA_MAC_VER_48:
- rtl_hw_start_8168h_1(tp);
- break;
- }
+ rtl_hw_config(tp);
}
static int rtl8169_change_mtu(struct net_device *dev, int new_mtu)
set_bit(RTL_FLAG_TASK_RESET_PENDING, tp->wk.flags);
}
- if (status & (RTL_EVENT_NAPI | LinkChg)) {
- rtl_irq_disable(tp);
- napi_schedule_irqoff(&tp->napi);
- }
+ rtl_irq_disable(tp);
+ napi_schedule_irqoff(&tp->napi);
out:
rtl_ack_events(tp, status);
if (rc)
return rc;
+ /* Disable ASPM completely as that cause random device stop working
+ * problems as well as full system hangs for some PCIe devices users.
+ */
+ pci_disable_link_state(pdev, PCIE_LINK_STATE_L0S | PCIE_LINK_STATE_L1);
+
/* enable device (incl. PCI PM wakeup and hotplug setup) */
rc = pcim_enable_device(pdev);
if (rc < 0) {
}
}
+static const struct soc_device_attribute ravb_delay_mode_quirk_match[] = {
+ { .soc_id = "r8a774c0" },
+ { .soc_id = "r8a77990" },
+ { .soc_id = "r8a77995" },
+ { /* sentinel */ }
+};
+
/* Set tx and rx clock internal delay modes */
static void ravb_set_delay_mode(struct net_device *ndev)
{
set |= APSR_DM_RDM;
if (priv->phy_interface == PHY_INTERFACE_MODE_RGMII_ID ||
- priv->phy_interface == PHY_INTERFACE_MODE_RGMII_TXID)
- set |= APSR_DM_TDM;
+ priv->phy_interface == PHY_INTERFACE_MODE_RGMII_TXID) {
+ if (!WARN(soc_device_match(ravb_delay_mode_quirk_match),
+ "phy-mode %s requires TX clock internal delay mode which is not supported by this hardware revision. Please update device tree",
+ phy_modes(priv->phy_interface)))
+ set |= APSR_DM_TDM;
+ }
ravb_modify(ndev, APSR, APSR_DM, set);
}
switch (event) {
case FIB_EVENT_ENTRY_ADD: /* fall through */
case FIB_EVENT_ENTRY_DEL:
+ if (info->family == AF_INET) {
+ struct fib_entry_notifier_info *fen_info = ptr;
+
+ if (fen_info->fi->fib_nh_is_v6) {
+ NL_SET_ERR_MSG_MOD(info->extack, "IPv6 gateway with IPv4 route is not supported");
+ return notifier_from_errno(-EINVAL);
+ }
+ }
+
memcpy(&fib_work->fen_info, ptr, sizeof(fib_work->fen_info));
/* Take referece on fib_info to prevent it from being
* freed while work is queued. Release it afterwards.
}
static void *netsec_alloc_rx_data(struct netsec_priv *priv,
- dma_addr_t *dma_handle, u16 *desc_len)
+ dma_addr_t *dma_handle, u16 *desc_len,
+ bool napi)
{
size_t total_len = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
size_t payload_len = NETSEC_RX_BUF_SZ;
total_len += SKB_DATA_ALIGN(payload_len + NETSEC_SKB_PAD);
- buf = napi_alloc_frag(total_len);
+ buf = napi ? napi_alloc_frag(total_len) : netdev_alloc_frag(total_len);
if (!buf)
return NULL;
/* allocate a fresh buffer and map it to the hardware.
* This will eventually replace the old buffer in the hardware
*/
- buf_addr = netsec_alloc_rx_data(priv, &dma_handle, &desc_len);
+ buf_addr = netsec_alloc_rx_data(priv, &dma_handle, &desc_len,
+ true);
if (unlikely(!buf_addr))
break;
void *buf;
u16 len;
- buf = netsec_alloc_rx_data(priv, &dma_handle, &len);
+ buf = netsec_alloc_rx_data(priv, &dma_handle, &len,
+ false);
if (!buf) {
netsec_uninit_pkt_dring(priv, NETSEC_RING_RX);
goto err_out;
#define XGMAC_RSF BIT(5)
#define XGMAC_RTC GENMASK(1, 0)
#define XGMAC_RTC_SHIFT 0
+#define XGMAC_MTL_RXQ_FLOW_CONTROL(x) (0x00001150 + (0x80 * (x)))
+#define XGMAC_RFD GENMASK(31, 17)
+#define XGMAC_RFD_SHIFT 17
+#define XGMAC_RFA GENMASK(15, 1)
+#define XGMAC_RFA_SHIFT 1
#define XGMAC_MTL_QINTEN(x) (0x00001170 + (0x80 * (x)))
#define XGMAC_RXOIE BIT(16)
#define XGMAC_MTL_QINT_STATUS(x) (0x00001174 + (0x80 * (x)))
value &= ~XGMAC_RQS;
value |= (rqs << XGMAC_RQS_SHIFT) & XGMAC_RQS;
+ if ((fifosz >= 4096) && (qmode != MTL_QUEUE_AVB)) {
+ u32 flow = readl(ioaddr + XGMAC_MTL_RXQ_FLOW_CONTROL(channel));
+ unsigned int rfd, rfa;
+
+ value |= XGMAC_EHFC;
+
+ /* Set Threshold for Activating Flow Control to min 2 frames,
+ * i.e. 1500 * 2 = 3000 bytes.
+ *
+ * Set Threshold for Deactivating Flow Control to min 1 frame,
+ * i.e. 1500 bytes.
+ */
+ switch (fifosz) {
+ case 4096:
+ /* This violates the above formula because of FIFO size
+ * limit therefore overflow may occur in spite of this.
+ */
+ rfd = 0x03; /* Full-2.5K */
+ rfa = 0x01; /* Full-1.5K */
+ break;
+
+ case 8192:
+ rfd = 0x06; /* Full-4K */
+ rfa = 0x0a; /* Full-6K */
+ break;
+
+ case 16384:
+ rfd = 0x06; /* Full-4K */
+ rfa = 0x12; /* Full-10K */
+ break;
+
+ default:
+ rfd = 0x06; /* Full-4K */
+ rfa = 0x1e; /* Full-16K */
+ break;
+ }
+
+ flow &= ~XGMAC_RFD;
+ flow |= rfd << XGMAC_RFD_SHIFT;
+
+ flow &= ~XGMAC_RFA;
+ flow |= rfa << XGMAC_RFA_SHIFT;
+
+ writel(flow, ioaddr + XGMAC_MTL_RXQ_FLOW_CONTROL(channel));
+ }
+
writel(value, ioaddr + XGMAC_MTL_RXQ_OPMODE(channel));
/* Enable MTL RX overflow */
p->des0 |= cpu_to_le32(RDES0_OWN);
bfsize1 = min(bfsize, BUF_SIZE_2KiB - 1);
- p->des1 |= cpu_to_le32(bfsize & RDES1_BUFFER1_SIZE_MASK);
+ p->des1 |= cpu_to_le32(bfsize1 & RDES1_BUFFER1_SIZE_MASK);
if (mode == STMMAC_CHAIN_MODE)
ndesc_rx_set_on_chain(p, end);
#define STMMAC_TX_THRESH (DMA_TX_SIZE / 4)
#define STMMAC_RX_THRESH (DMA_RX_SIZE / 4)
-static int flow_ctrl = FLOW_OFF;
+static int flow_ctrl = FLOW_AUTO;
module_param(flow_ctrl, int, 0644);
MODULE_PARM_DESC(flow_ctrl, "Flow control ability [on/off]");
u32 chan;
int ret;
- stmmac_check_ether_addr(priv);
-
if (priv->hw->pcs != STMMAC_PCS_RGMII &&
priv->hw->pcs != STMMAC_PCS_TBI &&
priv->hw->pcs != STMMAC_PCS_RTBI) {
if (ret)
goto error_hw_init;
+ stmmac_check_ether_addr(priv);
+
/* Configure real RX and TX queues */
netif_set_real_num_rx_queues(ndev, priv->plat->rx_queues_to_use);
netif_set_real_num_tx_queues(ndev, priv->plat->tx_queues_to_use);
},
.driver_data = (void *)&galileo_stmmac_dmi_data,
},
+ /*
+ * There are 2 types of SIMATIC IOT2000: IOT20202 and IOT2040.
+ * The asset tag "6ES7647-0AA00-0YA2" is only for IOT2020 which
+ * has only one pci network device while other asset tags are
+ * for IOT2040 which has two.
+ */
{
.matches = {
DMI_EXACT_MATCH(DMI_BOARD_NAME, "SIMATIC IOT2000"),
{
.matches = {
DMI_EXACT_MATCH(DMI_BOARD_NAME, "SIMATIC IOT2000"),
- DMI_EXACT_MATCH(DMI_BOARD_ASSET_TAG,
- "6ES7647-0AA00-1YA2"),
},
.driver_data = (void *)&iot2040_stmmac_dmi_data,
},
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/skbuff.h>
+#include <linux/ethtool.h>
#include <linux/io.h>
#include <linux/slab.h>
#include <linux/of_address.h>
return (bool)*p;
}
+/**
+ * xemaclite_ethtools_get_drvinfo - Get various Axi Emac Lite driver info
+ * @ndev: Pointer to net_device structure
+ * @ed: Pointer to ethtool_drvinfo structure
+ *
+ * This implements ethtool command for getting the driver information.
+ * Issue "ethtool -i ethX" under linux prompt to execute this function.
+ */
+static void xemaclite_ethtools_get_drvinfo(struct net_device *ndev,
+ struct ethtool_drvinfo *ed)
+{
+ strlcpy(ed->driver, DRIVER_NAME, sizeof(ed->driver));
+}
+
+static const struct ethtool_ops xemaclite_ethtool_ops = {
+ .get_drvinfo = xemaclite_ethtools_get_drvinfo,
+ .get_link = ethtool_op_get_link,
+ .get_link_ksettings = phy_ethtool_get_link_ksettings,
+ .set_link_ksettings = phy_ethtool_set_link_ksettings,
+};
+
static const struct net_device_ops xemaclite_netdev_ops;
/**
dev_info(dev, "MAC address is now %pM\n", ndev->dev_addr);
ndev->netdev_ops = &xemaclite_netdev_ops;
+ ndev->ethtool_ops = &xemaclite_ethtool_ops;
ndev->flags &= ~IFF_MULTICAST;
ndev->watchdog_timeo = TX_TIMEOUT;
}
#endif
+/* Ioctl MII Interface */
+static int xemaclite_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
+{
+ if (!dev->phydev || !netif_running(dev))
+ return -EINVAL;
+
+ switch (cmd) {
+ case SIOCGMIIPHY:
+ case SIOCGMIIREG:
+ case SIOCSMIIREG:
+ return phy_mii_ioctl(dev->phydev, rq, cmd);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
static const struct net_device_ops xemaclite_netdev_ops = {
.ndo_open = xemaclite_open,
.ndo_stop = xemaclite_close,
.ndo_start_xmit = xemaclite_send,
.ndo_set_mac_address = xemaclite_set_mac_address,
.ndo_tx_timeout = xemaclite_tx_timeout,
+ .ndo_do_ioctl = xemaclite_ioctl,
#ifdef CONFIG_NET_POLL_CONTROLLER
.ndo_poll_controller = xemaclite_poll_controller,
#endif
if (rrpriv->tx_skbuff[cons]){
len = min_t(int, 0x80, rrpriv->tx_skbuff[cons]->len);
printk("skbuff for cons %i is valid - dumping data (0x%x bytes - skbuff len 0x%x)\n", cons, len, rrpriv->tx_skbuff[cons]->len);
- printk("mode 0x%x, size 0x%x,\n phys %08Lx, skbuff-addr %08lx, truesize 0x%x\n",
+ printk("mode 0x%x, size 0x%x,\n phys %08Lx, skbuff-addr %p, truesize 0x%x\n",
rrpriv->tx_ring[cons].mode,
rrpriv->tx_ring[cons].size,
(unsigned long long) rrpriv->tx_ring[cons].addr.addrlo,
- (unsigned long)rrpriv->tx_skbuff[cons]->data,
+ rrpriv->tx_skbuff[cons]->data,
(unsigned int)rrpriv->tx_skbuff[cons]->truesize);
for (i = 0; i < len; i++){
if (!(i & 7))
return 1;
}
-static int loopback_get_ts_info(struct net_device *netdev,
- struct ethtool_ts_info *ts_info)
-{
- ts_info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE |
- SOF_TIMESTAMPING_RX_SOFTWARE |
- SOF_TIMESTAMPING_SOFTWARE;
-
- ts_info->phc_index = -1;
-
- return 0;
-};
-
static const struct ethtool_ops loopback_ethtool_ops = {
.get_link = always_on,
- .get_ts_info = loopback_get_ts_info,
+ .get_ts_info = ethtool_op_get_ts_info,
};
static int loopback_dev_init(struct net_device *dev)
obj-$(CONFIG_NETDEVSIM) += netdevsim.o
netdevsim-objs := \
- netdev.o \
+ netdev.o dev.o fib.o bus.o
ifeq ($(CONFIG_BPF_SYSCALL),y)
netdevsim-objs += \
bpf.o
endif
-ifneq ($(CONFIG_NET_DEVLINK),)
-netdevsim-objs += devlink.o fib.o
-endif
-
ifneq ($(CONFIG_XFRM_OFFLOAD),)
netdevsim-objs += ipsec.o
endif
bpf_verifier_log_write(env, "[netdevsim] " fmt, ##__VA_ARGS__)
struct nsim_bpf_bound_prog {
- struct netdevsim *ns;
+ struct nsim_dev *nsim_dev;
struct bpf_prog *prog;
struct dentry *ddir;
const char *state;
struct nsim_bpf_bound_prog *state;
state = env->prog->aux->offload->dev_priv;
- if (state->ns->bpf_bind_verifier_delay && !insn_idx)
- msleep(state->ns->bpf_bind_verifier_delay);
+ if (state->nsim_dev->bpf_bind_verifier_delay && !insn_idx)
+ msleep(state->nsim_dev->bpf_bind_verifier_delay);
if (insn_idx == env->prog->len - 1)
pr_vlog(env, "Hello from netdevsim!\n");
return 0;
}
-static int nsim_bpf_create_prog(struct netdevsim *ns, struct bpf_prog *prog)
+static int nsim_bpf_create_prog(struct nsim_dev *nsim_dev,
+ struct bpf_prog *prog)
{
struct nsim_bpf_bound_prog *state;
char name[16];
if (!state)
return -ENOMEM;
- state->ns = ns;
+ state->nsim_dev = nsim_dev;
state->prog = prog;
state->state = "verify";
/* Program id is not populated yet when we create the state. */
- sprintf(name, "%u", ns->sdev->prog_id_gen++);
- state->ddir = debugfs_create_dir(name, ns->sdev->ddir_bpf_bound_progs);
+ sprintf(name, "%u", nsim_dev->prog_id_gen++);
+ state->ddir = debugfs_create_dir(name, nsim_dev->ddir_bpf_bound_progs);
if (IS_ERR_OR_NULL(state->ddir)) {
kfree(state);
return -ENOMEM;
&state->state, &nsim_bpf_string_fops);
debugfs_create_bool("loaded", 0400, state->ddir, &state->is_loaded);
- list_add_tail(&state->l, &ns->sdev->bpf_bound_progs);
+ list_add_tail(&state->l, &nsim_dev->bpf_bound_progs);
prog->aux->offload->dev_priv = state;
static int nsim_bpf_verifier_prep(struct bpf_prog *prog)
{
- struct netdevsim *ns = bpf_offload_dev_priv(prog->aux->offload->offdev);
+ struct nsim_dev *nsim_dev =
+ bpf_offload_dev_priv(prog->aux->offload->offdev);
- if (!ns->bpf_bind_accept)
+ if (!nsim_dev->bpf_bind_accept)
return -EOPNOTSUPP;
- return nsim_bpf_create_prog(ns, prog);
+ return nsim_bpf_create_prog(nsim_dev, prog);
}
static int nsim_bpf_translate(struct bpf_prog *prog)
}
offmap->dev_ops = &nsim_bpf_map_ops;
- list_add_tail(&nmap->l, &ns->sdev->bpf_bound_maps);
+ list_add_tail(&nmap->l, &ns->nsim_dev->bpf_bound_maps);
return 0;
}
}
-int nsim_bpf_init(struct netdevsim *ns)
+int nsim_bpf_dev_init(struct nsim_dev *nsim_dev)
{
int err;
- if (ns->sdev->refcnt == 1) {
- INIT_LIST_HEAD(&ns->sdev->bpf_bound_progs);
- INIT_LIST_HEAD(&ns->sdev->bpf_bound_maps);
+ INIT_LIST_HEAD(&nsim_dev->bpf_bound_progs);
+ INIT_LIST_HEAD(&nsim_dev->bpf_bound_maps);
- ns->sdev->ddir_bpf_bound_progs =
- debugfs_create_dir("bpf_bound_progs", ns->sdev->ddir);
- if (IS_ERR_OR_NULL(ns->sdev->ddir_bpf_bound_progs))
- return -ENOMEM;
+ nsim_dev->ddir_bpf_bound_progs = debugfs_create_dir("bpf_bound_progs",
+ nsim_dev->ddir);
+ if (IS_ERR_OR_NULL(nsim_dev->ddir_bpf_bound_progs))
+ return -ENOMEM;
- ns->sdev->bpf_dev = bpf_offload_dev_create(&nsim_bpf_dev_ops,
- ns);
- err = PTR_ERR_OR_ZERO(ns->sdev->bpf_dev);
- if (err)
- return err;
- }
+ nsim_dev->bpf_dev = bpf_offload_dev_create(&nsim_bpf_dev_ops, nsim_dev);
+ err = PTR_ERR_OR_ZERO(nsim_dev->bpf_dev);
+ if (err)
+ return err;
+
+ nsim_dev->bpf_bind_accept = true;
+ debugfs_create_bool("bpf_bind_accept", 0600, nsim_dev->ddir,
+ &nsim_dev->bpf_bind_accept);
+ debugfs_create_u32("bpf_bind_verifier_delay", 0600, nsim_dev->ddir,
+ &nsim_dev->bpf_bind_verifier_delay);
+ return 0;
+}
+
+void nsim_bpf_dev_exit(struct nsim_dev *nsim_dev)
+{
+ WARN_ON(!list_empty(&nsim_dev->bpf_bound_progs));
+ WARN_ON(!list_empty(&nsim_dev->bpf_bound_maps));
+ bpf_offload_dev_destroy(nsim_dev->bpf_dev);
+}
+
+int nsim_bpf_init(struct netdevsim *ns)
+{
+ struct dentry *ddir = ns->nsim_dev_port->ddir;
+ int err;
- err = bpf_offload_dev_netdev_register(ns->sdev->bpf_dev, ns->netdev);
+ err = bpf_offload_dev_netdev_register(ns->nsim_dev->bpf_dev,
+ ns->netdev);
if (err)
- goto err_destroy_bdev;
+ return err;
- debugfs_create_u32("bpf_offloaded_id", 0400, ns->ddir,
+ debugfs_create_u32("bpf_offloaded_id", 0400, ddir,
&ns->bpf_offloaded_id);
- ns->bpf_bind_accept = true;
- debugfs_create_bool("bpf_bind_accept", 0600, ns->ddir,
- &ns->bpf_bind_accept);
- debugfs_create_u32("bpf_bind_verifier_delay", 0600, ns->ddir,
- &ns->bpf_bind_verifier_delay);
-
ns->bpf_tc_accept = true;
- debugfs_create_bool("bpf_tc_accept", 0600, ns->ddir,
+ debugfs_create_bool("bpf_tc_accept", 0600, ddir,
&ns->bpf_tc_accept);
- debugfs_create_bool("bpf_tc_non_bound_accept", 0600, ns->ddir,
+ debugfs_create_bool("bpf_tc_non_bound_accept", 0600, ddir,
&ns->bpf_tc_non_bound_accept);
ns->bpf_xdpdrv_accept = true;
- debugfs_create_bool("bpf_xdpdrv_accept", 0600, ns->ddir,
+ debugfs_create_bool("bpf_xdpdrv_accept", 0600, ddir,
&ns->bpf_xdpdrv_accept);
ns->bpf_xdpoffload_accept = true;
- debugfs_create_bool("bpf_xdpoffload_accept", 0600, ns->ddir,
+ debugfs_create_bool("bpf_xdpoffload_accept", 0600, ddir,
&ns->bpf_xdpoffload_accept);
ns->bpf_map_accept = true;
- debugfs_create_bool("bpf_map_accept", 0600, ns->ddir,
+ debugfs_create_bool("bpf_map_accept", 0600, ddir,
&ns->bpf_map_accept);
return 0;
-
-err_destroy_bdev:
- if (ns->sdev->refcnt == 1)
- bpf_offload_dev_destroy(ns->sdev->bpf_dev);
- return err;
}
void nsim_bpf_uninit(struct netdevsim *ns)
WARN_ON(ns->xdp.prog);
WARN_ON(ns->xdp_hw.prog);
WARN_ON(ns->bpf_offloaded);
- bpf_offload_dev_netdev_unregister(ns->sdev->bpf_dev, ns->netdev);
-
- if (ns->sdev->refcnt == 1) {
- WARN_ON(!list_empty(&ns->sdev->bpf_bound_progs));
- WARN_ON(!list_empty(&ns->sdev->bpf_bound_maps));
- bpf_offload_dev_destroy(ns->sdev->bpf_dev);
- }
+ bpf_offload_dev_netdev_unregister(ns->nsim_dev->bpf_dev, ns->netdev);
}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2017 Netronome Systems, Inc.
+ * Copyright (C) 2019 Mellanox Technologies. All rights reserved
+ */
+
+#include <linux/device.h>
+#include <linux/idr.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/rtnetlink.h>
+#include <linux/slab.h>
+#include <linux/sysfs.h>
+
+#include "netdevsim.h"
+
+static DEFINE_IDA(nsim_bus_dev_ids);
+static LIST_HEAD(nsim_bus_dev_list);
+static DEFINE_MUTEX(nsim_bus_dev_list_lock);
+
+static struct nsim_bus_dev *to_nsim_bus_dev(struct device *dev)
+{
+ return container_of(dev, struct nsim_bus_dev, dev);
+}
+
+static int nsim_bus_dev_vfs_enable(struct nsim_bus_dev *nsim_bus_dev,
+ unsigned int num_vfs)
+{
+ nsim_bus_dev->vfconfigs = kcalloc(num_vfs,
+ sizeof(struct nsim_vf_config),
+ GFP_KERNEL);
+ if (!nsim_bus_dev->vfconfigs)
+ return -ENOMEM;
+ nsim_bus_dev->num_vfs = num_vfs;
+
+ return 0;
+}
+
+static void nsim_bus_dev_vfs_disable(struct nsim_bus_dev *nsim_bus_dev)
+{
+ kfree(nsim_bus_dev->vfconfigs);
+ nsim_bus_dev->vfconfigs = NULL;
+ nsim_bus_dev->num_vfs = 0;
+}
+
+static ssize_t
+nsim_bus_dev_numvfs_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct nsim_bus_dev *nsim_bus_dev = to_nsim_bus_dev(dev);
+ unsigned int num_vfs;
+ int ret;
+
+ ret = kstrtouint(buf, 0, &num_vfs);
+ if (ret)
+ return ret;
+
+ rtnl_lock();
+ if (nsim_bus_dev->num_vfs == num_vfs)
+ goto exit_good;
+ if (nsim_bus_dev->num_vfs && num_vfs) {
+ ret = -EBUSY;
+ goto exit_unlock;
+ }
+
+ if (num_vfs) {
+ ret = nsim_bus_dev_vfs_enable(nsim_bus_dev, num_vfs);
+ if (ret)
+ goto exit_unlock;
+ } else {
+ nsim_bus_dev_vfs_disable(nsim_bus_dev);
+ }
+exit_good:
+ ret = count;
+exit_unlock:
+ rtnl_unlock();
+
+ return ret;
+}
+
+static ssize_t
+nsim_bus_dev_numvfs_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct nsim_bus_dev *nsim_bus_dev = to_nsim_bus_dev(dev);
+
+ return sprintf(buf, "%u\n", nsim_bus_dev->num_vfs);
+}
+
+static struct device_attribute nsim_bus_dev_numvfs_attr =
+ __ATTR(sriov_numvfs, 0664, nsim_bus_dev_numvfs_show,
+ nsim_bus_dev_numvfs_store);
+
+static ssize_t
+new_port_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct nsim_bus_dev *nsim_bus_dev = to_nsim_bus_dev(dev);
+ unsigned int port_index;
+ int ret;
+
+ ret = kstrtouint(buf, 0, &port_index);
+ if (ret)
+ return ret;
+ ret = nsim_dev_port_add(nsim_bus_dev, port_index);
+ return ret ? ret : count;
+}
+
+static struct device_attribute nsim_bus_dev_new_port_attr = __ATTR_WO(new_port);
+
+static ssize_t
+del_port_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct nsim_bus_dev *nsim_bus_dev = to_nsim_bus_dev(dev);
+ unsigned int port_index;
+ int ret;
+
+ ret = kstrtouint(buf, 0, &port_index);
+ if (ret)
+ return ret;
+ ret = nsim_dev_port_del(nsim_bus_dev, port_index);
+ return ret ? ret : count;
+}
+
+static struct device_attribute nsim_bus_dev_del_port_attr = __ATTR_WO(del_port);
+
+static struct attribute *nsim_bus_dev_attrs[] = {
+ &nsim_bus_dev_numvfs_attr.attr,
+ &nsim_bus_dev_new_port_attr.attr,
+ &nsim_bus_dev_del_port_attr.attr,
+ NULL,
+};
+
+static const struct attribute_group nsim_bus_dev_attr_group = {
+ .attrs = nsim_bus_dev_attrs,
+};
+
+static const struct attribute_group *nsim_bus_dev_attr_groups[] = {
+ &nsim_bus_dev_attr_group,
+ NULL,
+};
+
+static void nsim_bus_dev_release(struct device *dev)
+{
+ struct nsim_bus_dev *nsim_bus_dev = to_nsim_bus_dev(dev);
+
+ nsim_bus_dev_vfs_disable(nsim_bus_dev);
+}
+
+static struct device_type nsim_bus_dev_type = {
+ .groups = nsim_bus_dev_attr_groups,
+ .release = nsim_bus_dev_release,
+};
+
+static struct nsim_bus_dev *
+nsim_bus_dev_new(unsigned int id, unsigned int port_count);
+
+static ssize_t
+new_device_store(struct bus_type *bus, const char *buf, size_t count)
+{
+ struct nsim_bus_dev *nsim_bus_dev;
+ unsigned int port_count;
+ unsigned int id;
+ int err;
+
+ err = sscanf(buf, "%u %u", &id, &port_count);
+ switch (err) {
+ case 1:
+ port_count = 1;
+ /* pass through */
+ case 2:
+ if (id > INT_MAX) {
+ pr_err("Value of \"id\" is too big.\n");
+ return -EINVAL;
+ }
+ break;
+ default:
+ pr_err("Format for adding new device is \"id port_count\" (uint uint).\n");
+ return -EINVAL;
+ }
+ nsim_bus_dev = nsim_bus_dev_new(id, port_count);
+ if (IS_ERR(nsim_bus_dev))
+ return PTR_ERR(nsim_bus_dev);
+
+ mutex_lock(&nsim_bus_dev_list_lock);
+ list_add_tail(&nsim_bus_dev->list, &nsim_bus_dev_list);
+ mutex_unlock(&nsim_bus_dev_list_lock);
+
+ return count;
+}
+static BUS_ATTR_WO(new_device);
+
+static void nsim_bus_dev_del(struct nsim_bus_dev *nsim_bus_dev);
+
+static ssize_t
+del_device_store(struct bus_type *bus, const char *buf, size_t count)
+{
+ struct nsim_bus_dev *nsim_bus_dev, *tmp;
+ unsigned int id;
+ int err;
+
+ err = sscanf(buf, "%u", &id);
+ switch (err) {
+ case 1:
+ if (id > INT_MAX) {
+ pr_err("Value of \"id\" is too big.\n");
+ return -EINVAL;
+ }
+ break;
+ default:
+ pr_err("Format for deleting device is \"id\" (uint).\n");
+ return -EINVAL;
+ }
+
+ err = -ENOENT;
+ mutex_lock(&nsim_bus_dev_list_lock);
+ list_for_each_entry_safe(nsim_bus_dev, tmp, &nsim_bus_dev_list, list) {
+ if (nsim_bus_dev->dev.id != id)
+ continue;
+ list_del(&nsim_bus_dev->list);
+ nsim_bus_dev_del(nsim_bus_dev);
+ err = 0;
+ break;
+ }
+ mutex_unlock(&nsim_bus_dev_list_lock);
+ return !err ? count : err;
+}
+static BUS_ATTR_WO(del_device);
+
+static struct attribute *nsim_bus_attrs[] = {
+ &bus_attr_new_device.attr,
+ &bus_attr_del_device.attr,
+ NULL
+};
+ATTRIBUTE_GROUPS(nsim_bus);
+
+static int nsim_bus_probe(struct device *dev)
+{
+ struct nsim_bus_dev *nsim_bus_dev = to_nsim_bus_dev(dev);
+
+ return nsim_dev_probe(nsim_bus_dev);
+}
+
+static int nsim_bus_remove(struct device *dev)
+{
+ struct nsim_bus_dev *nsim_bus_dev = to_nsim_bus_dev(dev);
+
+ nsim_dev_remove(nsim_bus_dev);
+ return 0;
+}
+
+int nsim_num_vf(struct device *dev)
+{
+ struct nsim_bus_dev *nsim_bus_dev = to_nsim_bus_dev(dev);
+
+ return nsim_bus_dev->num_vfs;
+}
+
+static struct bus_type nsim_bus = {
+ .name = DRV_NAME,
+ .dev_name = DRV_NAME,
+ .bus_groups = nsim_bus_groups,
+ .probe = nsim_bus_probe,
+ .remove = nsim_bus_remove,
+ .num_vf = nsim_num_vf,
+};
+
+static struct nsim_bus_dev *
+nsim_bus_dev_new(unsigned int id, unsigned int port_count)
+{
+ struct nsim_bus_dev *nsim_bus_dev;
+ int err;
+
+ nsim_bus_dev = kzalloc(sizeof(*nsim_bus_dev), GFP_KERNEL);
+ if (!nsim_bus_dev)
+ return ERR_PTR(-ENOMEM);
+
+ err = ida_alloc_range(&nsim_bus_dev_ids, id, id, GFP_KERNEL);
+ if (err < 0)
+ goto err_nsim_bus_dev_free;
+ nsim_bus_dev->dev.id = err;
+ nsim_bus_dev->dev.bus = &nsim_bus;
+ nsim_bus_dev->dev.type = &nsim_bus_dev_type;
+ nsim_bus_dev->port_count = port_count;
+
+ err = device_register(&nsim_bus_dev->dev);
+ if (err)
+ goto err_nsim_bus_dev_id_free;
+ return nsim_bus_dev;
+
+err_nsim_bus_dev_id_free:
+ ida_free(&nsim_bus_dev_ids, nsim_bus_dev->dev.id);
+err_nsim_bus_dev_free:
+ kfree(nsim_bus_dev);
+ return ERR_PTR(err);
+}
+
+static void nsim_bus_dev_del(struct nsim_bus_dev *nsim_bus_dev)
+{
+ device_unregister(&nsim_bus_dev->dev);
+ ida_free(&nsim_bus_dev_ids, nsim_bus_dev->dev.id);
+ kfree(nsim_bus_dev);
+}
+
+static struct device_driver nsim_driver = {
+ .name = DRV_NAME,
+ .bus = &nsim_bus,
+ .owner = THIS_MODULE,
+};
+
+int nsim_bus_init(void)
+{
+ int err;
+
+ err = bus_register(&nsim_bus);
+ if (err)
+ return err;
+ err = driver_register(&nsim_driver);
+ if (err)
+ goto err_bus_unregister;
+ return 0;
+
+err_bus_unregister:
+ bus_unregister(&nsim_bus);
+ return err;
+}
+
+void nsim_bus_exit(void)
+{
+ struct nsim_bus_dev *nsim_bus_dev, *tmp;
+
+ mutex_lock(&nsim_bus_dev_list_lock);
+ list_for_each_entry_safe(nsim_bus_dev, tmp, &nsim_bus_dev_list, list) {
+ list_del(&nsim_bus_dev->list);
+ nsim_bus_dev_del(nsim_bus_dev);
+ }
+ mutex_unlock(&nsim_bus_dev_list_lock);
+ driver_unregister(&nsim_driver);
+ bus_unregister(&nsim_bus);
+}
--- /dev/null
+/*
+ * Copyright (c) 2018 Cumulus Networks. All rights reserved.
+ * Copyright (c) 2018 David Ahern <dsa@cumulusnetworks.com>
+ * Copyright (c) 2019 Mellanox Technologies. All rights reserved.
+ *
+ * This software is licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree.
+ *
+ * THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS"
+ * WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE
+ * OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME
+ * THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+ */
+
+#include <linux/debugfs.h>
+#include <linux/device.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/random.h>
+#include <linux/rtnetlink.h>
+#include <net/devlink.h>
+
+#include "netdevsim.h"
+
+static struct dentry *nsim_dev_ddir;
+
+static int nsim_dev_debugfs_init(struct nsim_dev *nsim_dev)
+{
+ char dev_ddir_name[16];
+
+ sprintf(dev_ddir_name, DRV_NAME "%u", nsim_dev->nsim_bus_dev->dev.id);
+ nsim_dev->ddir = debugfs_create_dir(dev_ddir_name, nsim_dev_ddir);
+ if (IS_ERR_OR_NULL(nsim_dev->ddir))
+ return PTR_ERR_OR_ZERO(nsim_dev->ddir) ?: -EINVAL;
+ nsim_dev->ports_ddir = debugfs_create_dir("ports", nsim_dev->ddir);
+ if (IS_ERR_OR_NULL(nsim_dev->ports_ddir))
+ return PTR_ERR_OR_ZERO(nsim_dev->ports_ddir) ?: -EINVAL;
+ return 0;
+}
+
+static void nsim_dev_debugfs_exit(struct nsim_dev *nsim_dev)
+{
+ debugfs_remove_recursive(nsim_dev->ports_ddir);
+ debugfs_remove_recursive(nsim_dev->ddir);
+}
+
+static int nsim_dev_port_debugfs_init(struct nsim_dev *nsim_dev,
+ struct nsim_dev_port *nsim_dev_port)
+{
+ char port_ddir_name[16];
+ char dev_link_name[32];
+
+ sprintf(port_ddir_name, "%u", nsim_dev_port->port_index);
+ nsim_dev_port->ddir = debugfs_create_dir(port_ddir_name,
+ nsim_dev->ports_ddir);
+ if (IS_ERR_OR_NULL(nsim_dev_port->ddir))
+ return -ENOMEM;
+
+ sprintf(dev_link_name, "../../../" DRV_NAME "%u",
+ nsim_dev->nsim_bus_dev->dev.id);
+ debugfs_create_symlink("dev", nsim_dev_port->ddir, dev_link_name);
+
+ return 0;
+}
+
+static void nsim_dev_port_debugfs_exit(struct nsim_dev_port *nsim_dev_port)
+{
+ debugfs_remove_recursive(nsim_dev_port->ddir);
+}
+
+static u64 nsim_dev_ipv4_fib_resource_occ_get(void *priv)
+{
+ struct nsim_dev *nsim_dev = priv;
+
+ return nsim_fib_get_val(nsim_dev->fib_data,
+ NSIM_RESOURCE_IPV4_FIB, false);
+}
+
+static u64 nsim_dev_ipv4_fib_rules_res_occ_get(void *priv)
+{
+ struct nsim_dev *nsim_dev = priv;
+
+ return nsim_fib_get_val(nsim_dev->fib_data,
+ NSIM_RESOURCE_IPV4_FIB_RULES, false);
+}
+
+static u64 nsim_dev_ipv6_fib_resource_occ_get(void *priv)
+{
+ struct nsim_dev *nsim_dev = priv;
+
+ return nsim_fib_get_val(nsim_dev->fib_data,
+ NSIM_RESOURCE_IPV6_FIB, false);
+}
+
+static u64 nsim_dev_ipv6_fib_rules_res_occ_get(void *priv)
+{
+ struct nsim_dev *nsim_dev = priv;
+
+ return nsim_fib_get_val(nsim_dev->fib_data,
+ NSIM_RESOURCE_IPV6_FIB_RULES, false);
+}
+
+static int nsim_dev_resources_register(struct devlink *devlink)
+{
+ struct nsim_dev *nsim_dev = devlink_priv(devlink);
+ struct devlink_resource_size_params params = {
+ .size_max = (u64)-1,
+ .size_granularity = 1,
+ .unit = DEVLINK_RESOURCE_UNIT_ENTRY
+ };
+ int err;
+ u64 n;
+
+ /* Resources for IPv4 */
+ err = devlink_resource_register(devlink, "IPv4", (u64)-1,
+ NSIM_RESOURCE_IPV4,
+ DEVLINK_RESOURCE_ID_PARENT_TOP,
+ ¶ms);
+ if (err) {
+ pr_err("Failed to register IPv4 top resource\n");
+ goto out;
+ }
+
+ n = nsim_fib_get_val(nsim_dev->fib_data,
+ NSIM_RESOURCE_IPV4_FIB, true);
+ err = devlink_resource_register(devlink, "fib", n,
+ NSIM_RESOURCE_IPV4_FIB,
+ NSIM_RESOURCE_IPV4, ¶ms);
+ if (err) {
+ pr_err("Failed to register IPv4 FIB resource\n");
+ return err;
+ }
+
+ n = nsim_fib_get_val(nsim_dev->fib_data,
+ NSIM_RESOURCE_IPV4_FIB_RULES, true);
+ err = devlink_resource_register(devlink, "fib-rules", n,
+ NSIM_RESOURCE_IPV4_FIB_RULES,
+ NSIM_RESOURCE_IPV4, ¶ms);
+ if (err) {
+ pr_err("Failed to register IPv4 FIB rules resource\n");
+ return err;
+ }
+
+ /* Resources for IPv6 */
+ err = devlink_resource_register(devlink, "IPv6", (u64)-1,
+ NSIM_RESOURCE_IPV6,
+ DEVLINK_RESOURCE_ID_PARENT_TOP,
+ ¶ms);
+ if (err) {
+ pr_err("Failed to register IPv6 top resource\n");
+ goto out;
+ }
+
+ n = nsim_fib_get_val(nsim_dev->fib_data,
+ NSIM_RESOURCE_IPV6_FIB, true);
+ err = devlink_resource_register(devlink, "fib", n,
+ NSIM_RESOURCE_IPV6_FIB,
+ NSIM_RESOURCE_IPV6, ¶ms);
+ if (err) {
+ pr_err("Failed to register IPv6 FIB resource\n");
+ return err;
+ }
+
+ n = nsim_fib_get_val(nsim_dev->fib_data,
+ NSIM_RESOURCE_IPV6_FIB_RULES, true);
+ err = devlink_resource_register(devlink, "fib-rules", n,
+ NSIM_RESOURCE_IPV6_FIB_RULES,
+ NSIM_RESOURCE_IPV6, ¶ms);
+ if (err) {
+ pr_err("Failed to register IPv6 FIB rules resource\n");
+ return err;
+ }
+
+ devlink_resource_occ_get_register(devlink,
+ NSIM_RESOURCE_IPV4_FIB,
+ nsim_dev_ipv4_fib_resource_occ_get,
+ nsim_dev);
+ devlink_resource_occ_get_register(devlink,
+ NSIM_RESOURCE_IPV4_FIB_RULES,
+ nsim_dev_ipv4_fib_rules_res_occ_get,
+ nsim_dev);
+ devlink_resource_occ_get_register(devlink,
+ NSIM_RESOURCE_IPV6_FIB,
+ nsim_dev_ipv6_fib_resource_occ_get,
+ nsim_dev);
+ devlink_resource_occ_get_register(devlink,
+ NSIM_RESOURCE_IPV6_FIB_RULES,
+ nsim_dev_ipv6_fib_rules_res_occ_get,
+ nsim_dev);
+out:
+ return err;
+}
+
+static int nsim_dev_reload(struct devlink *devlink,
+ struct netlink_ext_ack *extack)
+{
+ struct nsim_dev *nsim_dev = devlink_priv(devlink);
+ enum nsim_resource_id res_ids[] = {
+ NSIM_RESOURCE_IPV4_FIB, NSIM_RESOURCE_IPV4_FIB_RULES,
+ NSIM_RESOURCE_IPV6_FIB, NSIM_RESOURCE_IPV6_FIB_RULES
+ };
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(res_ids); ++i) {
+ int err;
+ u64 val;
+
+ err = devlink_resource_size_get(devlink, res_ids[i], &val);
+ if (!err) {
+ err = nsim_fib_set_max(nsim_dev->fib_data,
+ res_ids[i], val, extack);
+ if (err)
+ return err;
+ }
+ }
+
+ return 0;
+}
+
+static const struct devlink_ops nsim_dev_devlink_ops = {
+ .reload = nsim_dev_reload,
+};
+
+static struct nsim_dev *
+nsim_dev_create(struct nsim_bus_dev *nsim_bus_dev, unsigned int port_count)
+{
+ struct nsim_dev *nsim_dev;
+ struct devlink *devlink;
+ int err;
+
+ devlink = devlink_alloc(&nsim_dev_devlink_ops, sizeof(*nsim_dev));
+ if (!devlink)
+ return ERR_PTR(-ENOMEM);
+ nsim_dev = devlink_priv(devlink);
+ nsim_dev->nsim_bus_dev = nsim_bus_dev;
+ nsim_dev->switch_id.id_len = sizeof(nsim_dev->switch_id.id);
+ get_random_bytes(nsim_dev->switch_id.id, nsim_dev->switch_id.id_len);
+ INIT_LIST_HEAD(&nsim_dev->port_list);
+ mutex_init(&nsim_dev->port_list_lock);
+
+ nsim_dev->fib_data = nsim_fib_create();
+ if (IS_ERR(nsim_dev->fib_data)) {
+ err = PTR_ERR(nsim_dev->fib_data);
+ goto err_devlink_free;
+ }
+
+ err = nsim_dev_resources_register(devlink);
+ if (err)
+ goto err_fib_destroy;
+
+ err = devlink_register(devlink, &nsim_bus_dev->dev);
+ if (err)
+ goto err_resources_unregister;
+
+ err = nsim_dev_debugfs_init(nsim_dev);
+ if (err)
+ goto err_dl_unregister;
+
+ err = nsim_bpf_dev_init(nsim_dev);
+ if (err)
+ goto err_debugfs_exit;
+
+ return nsim_dev;
+
+err_debugfs_exit:
+ nsim_dev_debugfs_exit(nsim_dev);
+err_dl_unregister:
+ devlink_unregister(devlink);
+err_resources_unregister:
+ devlink_resources_unregister(devlink, NULL);
+err_fib_destroy:
+ nsim_fib_destroy(nsim_dev->fib_data);
+err_devlink_free:
+ devlink_free(devlink);
+ return ERR_PTR(err);
+}
+
+static void nsim_dev_destroy(struct nsim_dev *nsim_dev)
+{
+ struct devlink *devlink = priv_to_devlink(nsim_dev);
+
+ nsim_bpf_dev_exit(nsim_dev);
+ nsim_dev_debugfs_exit(nsim_dev);
+ devlink_unregister(devlink);
+ devlink_resources_unregister(devlink, NULL);
+ nsim_fib_destroy(nsim_dev->fib_data);
+ mutex_destroy(&nsim_dev->port_list_lock);
+ devlink_free(devlink);
+}
+
+static int __nsim_dev_port_add(struct nsim_dev *nsim_dev,
+ unsigned int port_index)
+{
+ struct nsim_dev_port *nsim_dev_port;
+ struct devlink_port *devlink_port;
+ int err;
+
+ nsim_dev_port = kzalloc(sizeof(*nsim_dev_port), GFP_KERNEL);
+ if (!nsim_dev_port)
+ return -ENOMEM;
+ nsim_dev_port->port_index = port_index;
+
+ devlink_port = &nsim_dev_port->devlink_port;
+ devlink_port_attrs_set(devlink_port, DEVLINK_PORT_FLAVOUR_PHYSICAL,
+ port_index + 1, 0, 0,
+ nsim_dev->switch_id.id,
+ nsim_dev->switch_id.id_len);
+ err = devlink_port_register(priv_to_devlink(nsim_dev), devlink_port,
+ port_index);
+ if (err)
+ goto err_port_free;
+
+ err = nsim_dev_port_debugfs_init(nsim_dev, nsim_dev_port);
+ if (err)
+ goto err_dl_port_unregister;
+
+ nsim_dev_port->ns = nsim_create(nsim_dev, nsim_dev_port);
+ if (IS_ERR(nsim_dev_port->ns)) {
+ err = PTR_ERR(nsim_dev_port->ns);
+ goto err_port_debugfs_exit;
+ }
+
+ devlink_port_type_eth_set(devlink_port, nsim_dev_port->ns->netdev);
+ list_add(&nsim_dev_port->list, &nsim_dev->port_list);
+
+ return 0;
+
+err_port_debugfs_exit:
+ nsim_dev_port_debugfs_exit(nsim_dev_port);
+err_dl_port_unregister:
+ devlink_port_unregister(devlink_port);
+err_port_free:
+ kfree(nsim_dev_port);
+ return err;
+}
+
+static void __nsim_dev_port_del(struct nsim_dev_port *nsim_dev_port)
+{
+ struct devlink_port *devlink_port = &nsim_dev_port->devlink_port;
+
+ list_del(&nsim_dev_port->list);
+ devlink_port_type_clear(devlink_port);
+ nsim_destroy(nsim_dev_port->ns);
+ nsim_dev_port_debugfs_exit(nsim_dev_port);
+ devlink_port_unregister(devlink_port);
+ kfree(nsim_dev_port);
+}
+
+static void nsim_dev_port_del_all(struct nsim_dev *nsim_dev)
+{
+ struct nsim_dev_port *nsim_dev_port, *tmp;
+
+ list_for_each_entry_safe(nsim_dev_port, tmp,
+ &nsim_dev->port_list, list)
+ __nsim_dev_port_del(nsim_dev_port);
+}
+
+int nsim_dev_probe(struct nsim_bus_dev *nsim_bus_dev)
+{
+ struct nsim_dev *nsim_dev;
+ int i;
+ int err;
+
+ nsim_dev = nsim_dev_create(nsim_bus_dev, nsim_bus_dev->port_count);
+ if (IS_ERR(nsim_dev))
+ return PTR_ERR(nsim_dev);
+ dev_set_drvdata(&nsim_bus_dev->dev, nsim_dev);
+
+ for (i = 0; i < nsim_bus_dev->port_count; i++) {
+ err = __nsim_dev_port_add(nsim_dev, i);
+ if (err)
+ goto err_port_del_all;
+ }
+ return 0;
+
+err_port_del_all:
+ nsim_dev_port_del_all(nsim_dev);
+ nsim_dev_destroy(nsim_dev);
+ return err;
+}
+
+void nsim_dev_remove(struct nsim_bus_dev *nsim_bus_dev)
+{
+ struct nsim_dev *nsim_dev = dev_get_drvdata(&nsim_bus_dev->dev);
+
+ nsim_dev_port_del_all(nsim_dev);
+ nsim_dev_destroy(nsim_dev);
+}
+
+static struct nsim_dev_port *
+__nsim_dev_port_lookup(struct nsim_dev *nsim_dev, unsigned int port_index)
+{
+ struct nsim_dev_port *nsim_dev_port;
+
+ list_for_each_entry(nsim_dev_port, &nsim_dev->port_list, list)
+ if (nsim_dev_port->port_index == port_index)
+ return nsim_dev_port;
+ return NULL;
+}
+
+int nsim_dev_port_add(struct nsim_bus_dev *nsim_bus_dev,
+ unsigned int port_index)
+{
+ struct nsim_dev *nsim_dev = dev_get_drvdata(&nsim_bus_dev->dev);
+ int err;
+
+ mutex_lock(&nsim_dev->port_list_lock);
+ if (__nsim_dev_port_lookup(nsim_dev, port_index))
+ err = -EEXIST;
+ else
+ err = __nsim_dev_port_add(nsim_dev, port_index);
+ mutex_unlock(&nsim_dev->port_list_lock);
+ return err;
+}
+
+int nsim_dev_port_del(struct nsim_bus_dev *nsim_bus_dev,
+ unsigned int port_index)
+{
+ struct nsim_dev *nsim_dev = dev_get_drvdata(&nsim_bus_dev->dev);
+ struct nsim_dev_port *nsim_dev_port;
+ int err = 0;
+
+ mutex_lock(&nsim_dev->port_list_lock);
+ nsim_dev_port = __nsim_dev_port_lookup(nsim_dev, port_index);
+ if (!nsim_dev_port)
+ err = -ENOENT;
+ else
+ __nsim_dev_port_del(nsim_dev_port);
+ mutex_unlock(&nsim_dev->port_list_lock);
+ return err;
+}
+
+int nsim_dev_init(void)
+{
+ nsim_dev_ddir = debugfs_create_dir(DRV_NAME, NULL);
+ if (IS_ERR_OR_NULL(nsim_dev_ddir))
+ return -ENOMEM;
+ return 0;
+}
+
+void nsim_dev_exit(void)
+{
+ debugfs_remove_recursive(nsim_dev_ddir);
+}
+++ /dev/null
-/*
- * Copyright (c) 2018 Cumulus Networks. All rights reserved.
- * Copyright (c) 2018 David Ahern <dsa@cumulusnetworks.com>
- *
- * This software is licensed under the GNU General License Version 2,
- * June 1991 as shown in the file COPYING in the top-level directory of this
- * source tree.
- *
- * THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS"
- * WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING,
- * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- * FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE
- * OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME
- * THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
- */
-
-#include <linux/device.h>
-#include <net/devlink.h>
-#include <net/netns/generic.h>
-
-#include "netdevsim.h"
-
-static unsigned int nsim_devlink_id;
-
-/* place holder until devlink and namespaces is sorted out */
-static struct net *nsim_devlink_net(struct devlink *devlink)
-{
- return &init_net;
-}
-
-/* IPv4
- */
-static u64 nsim_ipv4_fib_resource_occ_get(void *priv)
-{
- struct net *net = priv;
-
- return nsim_fib_get_val(net, NSIM_RESOURCE_IPV4_FIB, false);
-}
-
-static u64 nsim_ipv4_fib_rules_res_occ_get(void *priv)
-{
- struct net *net = priv;
-
- return nsim_fib_get_val(net, NSIM_RESOURCE_IPV4_FIB_RULES, false);
-}
-
-/* IPv6
- */
-static u64 nsim_ipv6_fib_resource_occ_get(void *priv)
-{
- struct net *net = priv;
-
- return nsim_fib_get_val(net, NSIM_RESOURCE_IPV6_FIB, false);
-}
-
-static u64 nsim_ipv6_fib_rules_res_occ_get(void *priv)
-{
- struct net *net = priv;
-
- return nsim_fib_get_val(net, NSIM_RESOURCE_IPV6_FIB_RULES, false);
-}
-
-static int devlink_resources_register(struct devlink *devlink)
-{
- struct devlink_resource_size_params params = {
- .size_max = (u64)-1,
- .size_granularity = 1,
- .unit = DEVLINK_RESOURCE_UNIT_ENTRY
- };
- struct net *net = nsim_devlink_net(devlink);
- int err;
- u64 n;
-
- /* Resources for IPv4 */
- err = devlink_resource_register(devlink, "IPv4", (u64)-1,
- NSIM_RESOURCE_IPV4,
- DEVLINK_RESOURCE_ID_PARENT_TOP,
- ¶ms);
- if (err) {
- pr_err("Failed to register IPv4 top resource\n");
- goto out;
- }
-
- n = nsim_fib_get_val(net, NSIM_RESOURCE_IPV4_FIB, true);
- err = devlink_resource_register(devlink, "fib", n,
- NSIM_RESOURCE_IPV4_FIB,
- NSIM_RESOURCE_IPV4, ¶ms);
- if (err) {
- pr_err("Failed to register IPv4 FIB resource\n");
- return err;
- }
-
- n = nsim_fib_get_val(net, NSIM_RESOURCE_IPV4_FIB_RULES, true);
- err = devlink_resource_register(devlink, "fib-rules", n,
- NSIM_RESOURCE_IPV4_FIB_RULES,
- NSIM_RESOURCE_IPV4, ¶ms);
- if (err) {
- pr_err("Failed to register IPv4 FIB rules resource\n");
- return err;
- }
-
- /* Resources for IPv6 */
- err = devlink_resource_register(devlink, "IPv6", (u64)-1,
- NSIM_RESOURCE_IPV6,
- DEVLINK_RESOURCE_ID_PARENT_TOP,
- ¶ms);
- if (err) {
- pr_err("Failed to register IPv6 top resource\n");
- goto out;
- }
-
- n = nsim_fib_get_val(net, NSIM_RESOURCE_IPV6_FIB, true);
- err = devlink_resource_register(devlink, "fib", n,
- NSIM_RESOURCE_IPV6_FIB,
- NSIM_RESOURCE_IPV6, ¶ms);
- if (err) {
- pr_err("Failed to register IPv6 FIB resource\n");
- return err;
- }
-
- n = nsim_fib_get_val(net, NSIM_RESOURCE_IPV6_FIB_RULES, true);
- err = devlink_resource_register(devlink, "fib-rules", n,
- NSIM_RESOURCE_IPV6_FIB_RULES,
- NSIM_RESOURCE_IPV6, ¶ms);
- if (err) {
- pr_err("Failed to register IPv6 FIB rules resource\n");
- return err;
- }
-
- devlink_resource_occ_get_register(devlink,
- NSIM_RESOURCE_IPV4_FIB,
- nsim_ipv4_fib_resource_occ_get,
- net);
- devlink_resource_occ_get_register(devlink,
- NSIM_RESOURCE_IPV4_FIB_RULES,
- nsim_ipv4_fib_rules_res_occ_get,
- net);
- devlink_resource_occ_get_register(devlink,
- NSIM_RESOURCE_IPV6_FIB,
- nsim_ipv6_fib_resource_occ_get,
- net);
- devlink_resource_occ_get_register(devlink,
- NSIM_RESOURCE_IPV6_FIB_RULES,
- nsim_ipv6_fib_rules_res_occ_get,
- net);
-out:
- return err;
-}
-
-static int nsim_devlink_reload(struct devlink *devlink,
- struct netlink_ext_ack *extack)
-{
- enum nsim_resource_id res_ids[] = {
- NSIM_RESOURCE_IPV4_FIB, NSIM_RESOURCE_IPV4_FIB_RULES,
- NSIM_RESOURCE_IPV6_FIB, NSIM_RESOURCE_IPV6_FIB_RULES
- };
- struct net *net = nsim_devlink_net(devlink);
- int i;
-
- for (i = 0; i < ARRAY_SIZE(res_ids); ++i) {
- int err;
- u64 val;
-
- err = devlink_resource_size_get(devlink, res_ids[i], &val);
- if (!err) {
- err = nsim_fib_set_max(net, res_ids[i], val, extack);
- if (err)
- return err;
- }
- }
-
- return 0;
-}
-
-static void nsim_devlink_net_reset(struct net *net)
-{
- enum nsim_resource_id res_ids[] = {
- NSIM_RESOURCE_IPV4_FIB, NSIM_RESOURCE_IPV4_FIB_RULES,
- NSIM_RESOURCE_IPV6_FIB, NSIM_RESOURCE_IPV6_FIB_RULES
- };
- int i;
-
- for (i = 0; i < ARRAY_SIZE(res_ids); ++i) {
- if (nsim_fib_set_max(net, res_ids[i], (u64)-1, NULL)) {
- pr_err("Failed to reset limit for resource %u\n",
- res_ids[i]);
- }
- }
-}
-
-static const struct devlink_ops nsim_devlink_ops = {
- .reload = nsim_devlink_reload,
-};
-
-/* once devlink / namespace issues are sorted out
- * this needs to be net in which a devlink instance
- * is to be created. e.g., dev_net(ns->netdev)
- */
-static struct net *nsim_to_net(struct netdevsim *ns)
-{
- return &init_net;
-}
-
-void nsim_devlink_teardown(struct netdevsim *ns)
-{
- if (ns->devlink) {
- struct net *net = nsim_to_net(ns);
- bool *reg_devlink = net_generic(net, nsim_devlink_id);
-
- devlink_resources_unregister(ns->devlink, NULL);
- devlink_unregister(ns->devlink);
- devlink_free(ns->devlink);
- ns->devlink = NULL;
-
- nsim_devlink_net_reset(net);
- *reg_devlink = true;
- }
-}
-
-int nsim_devlink_setup(struct netdevsim *ns)
-{
- struct net *net = nsim_to_net(ns);
- bool *reg_devlink = net_generic(net, nsim_devlink_id);
- struct devlink *devlink;
- int err;
-
- /* only one device per namespace controls devlink */
- if (!*reg_devlink) {
- ns->devlink = NULL;
- return 0;
- }
-
- devlink = devlink_alloc(&nsim_devlink_ops, 0);
- if (!devlink)
- return -ENOMEM;
-
- err = devlink_register(devlink, &ns->dev);
- if (err)
- goto err_devlink_free;
-
- err = devlink_resources_register(devlink);
- if (err)
- goto err_dl_unregister;
-
- ns->devlink = devlink;
-
- *reg_devlink = false;
-
- return 0;
-
-err_dl_unregister:
- devlink_unregister(devlink);
-err_devlink_free:
- devlink_free(devlink);
-
- return err;
-}
-
-/* Initialize per network namespace state */
-static int __net_init nsim_devlink_netns_init(struct net *net)
-{
- bool *reg_devlink = net_generic(net, nsim_devlink_id);
-
- *reg_devlink = true;
-
- return 0;
-}
-
-static struct pernet_operations nsim_devlink_net_ops = {
- .init = nsim_devlink_netns_init,
- .id = &nsim_devlink_id,
- .size = sizeof(bool),
-};
-
-void nsim_devlink_exit(void)
-{
- unregister_pernet_subsys(&nsim_devlink_net_ops);
- nsim_fib_exit();
-}
-
-int nsim_devlink_init(void)
-{
- int err;
-
- err = nsim_fib_init();
- if (err)
- goto err_out;
-
- err = register_pernet_subsys(&nsim_devlink_net_ops);
- if (err)
- nsim_fib_exit();
-
-err_out:
- return err;
-}
#include <net/ip_fib.h>
#include <net/ip6_fib.h>
#include <net/fib_rules.h>
-#include <net/netns/generic.h>
#include "netdevsim.h"
};
struct nsim_fib_data {
+ struct notifier_block fib_nb;
struct nsim_per_fib_data ipv4;
struct nsim_per_fib_data ipv6;
};
-static unsigned int nsim_fib_net_id;
-
-u64 nsim_fib_get_val(struct net *net, enum nsim_resource_id res_id, bool max)
+u64 nsim_fib_get_val(struct nsim_fib_data *fib_data,
+ enum nsim_resource_id res_id, bool max)
{
- struct nsim_fib_data *fib_data = net_generic(net, nsim_fib_net_id);
struct nsim_fib_entry *entry;
switch (res_id) {
return max ? entry->max : entry->num;
}
-int nsim_fib_set_max(struct net *net, enum nsim_resource_id res_id, u64 val,
+int nsim_fib_set_max(struct nsim_fib_data *fib_data,
+ enum nsim_resource_id res_id, u64 val,
struct netlink_ext_ack *extack)
{
- struct nsim_fib_data *fib_data = net_generic(net, nsim_fib_net_id);
struct nsim_fib_entry *entry;
int err = 0;
return err;
}
-static int nsim_fib_rule_event(struct fib_notifier_info *info, bool add)
+static int nsim_fib_rule_event(struct nsim_fib_data *data,
+ struct fib_notifier_info *info, bool add)
{
- struct nsim_fib_data *data = net_generic(info->net, nsim_fib_net_id);
struct netlink_ext_ack *extack = info->extack;
int err = 0;
return err;
}
-static int nsim_fib_event(struct fib_notifier_info *info, bool add)
+static int nsim_fib_event(struct nsim_fib_data *data,
+ struct fib_notifier_info *info, bool add)
{
- struct nsim_fib_data *data = net_generic(info->net, nsim_fib_net_id);
struct netlink_ext_ack *extack = info->extack;
int err = 0;
static int nsim_fib_event_nb(struct notifier_block *nb, unsigned long event,
void *ptr)
{
+ struct nsim_fib_data *data = container_of(nb, struct nsim_fib_data,
+ fib_nb);
struct fib_notifier_info *info = ptr;
int err = 0;
switch (event) {
case FIB_EVENT_RULE_ADD: /* fall through */
case FIB_EVENT_RULE_DEL:
- err = nsim_fib_rule_event(info, event == FIB_EVENT_RULE_ADD);
+ err = nsim_fib_rule_event(data, info,
+ event == FIB_EVENT_RULE_ADD);
break;
case FIB_EVENT_ENTRY_ADD: /* fall through */
case FIB_EVENT_ENTRY_DEL:
- err = nsim_fib_event(info, event == FIB_EVENT_ENTRY_ADD);
+ err = nsim_fib_event(data, info,
+ event == FIB_EVENT_ENTRY_ADD);
break;
}
/* inconsistent dump, trying again */
static void nsim_fib_dump_inconsistent(struct notifier_block *nb)
{
- struct nsim_fib_data *data;
- struct net *net;
-
- rcu_read_lock();
- for_each_net_rcu(net) {
- data = net_generic(net, nsim_fib_net_id);
-
- data->ipv4.fib.num = 0ULL;
- data->ipv4.rules.num = 0ULL;
+ struct nsim_fib_data *data = container_of(nb, struct nsim_fib_data,
+ fib_nb);
- data->ipv6.fib.num = 0ULL;
- data->ipv6.rules.num = 0ULL;
- }
- rcu_read_unlock();
+ data->ipv4.fib.num = 0ULL;
+ data->ipv4.rules.num = 0ULL;
+ data->ipv6.fib.num = 0ULL;
+ data->ipv6.rules.num = 0ULL;
}
-static struct notifier_block nsim_fib_nb = {
- .notifier_call = nsim_fib_event_nb,
-};
-
-/* Initialize per network namespace state */
-static int __net_init nsim_fib_netns_init(struct net *net)
+struct nsim_fib_data *nsim_fib_create(void)
{
- struct nsim_fib_data *data = net_generic(net, nsim_fib_net_id);
+ struct nsim_fib_data *data;
+ int err;
+
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return ERR_PTR(-ENOMEM);
data->ipv4.fib.max = (u64)-1;
data->ipv4.rules.max = (u64)-1;
data->ipv6.fib.max = (u64)-1;
data->ipv6.rules.max = (u64)-1;
- return 0;
-}
-
-static struct pernet_operations nsim_fib_net_ops = {
- .init = nsim_fib_netns_init,
- .id = &nsim_fib_net_id,
- .size = sizeof(struct nsim_fib_data),
-};
-
-void nsim_fib_exit(void)
-{
- unregister_pernet_subsys(&nsim_fib_net_ops);
- unregister_fib_notifier(&nsim_fib_nb);
-}
-
-int nsim_fib_init(void)
-{
- int err;
-
- err = register_pernet_subsys(&nsim_fib_net_ops);
- if (err < 0) {
- pr_err("Failed to register pernet subsystem\n");
- goto err_out;
- }
-
- err = register_fib_notifier(&nsim_fib_nb, nsim_fib_dump_inconsistent);
- if (err < 0) {
+ data->fib_nb.notifier_call = nsim_fib_event_nb;
+ err = register_fib_notifier(&data->fib_nb, nsim_fib_dump_inconsistent);
+ if (err) {
pr_err("Failed to register fib notifier\n");
goto err_out;
}
+ return data;
+
err_out:
- return err;
+ kfree(data);
+ return ERR_PTR(err);
+}
+
+void nsim_fib_destroy(struct nsim_fib_data *data)
+{
+ unregister_fib_notifier(&data->fib_nb);
+ kfree(data);
}
ns->netdev->features |= NSIM_ESP_FEATURES;
ns->netdev->hw_enc_features |= NSIM_ESP_FEATURES;
- ns->ipsec.pfile = debugfs_create_file("ipsec", 0400, ns->ddir, ns,
+ ns->ipsec.pfile = debugfs_create_file("ipsec", 0400,
+ ns->nsim_dev_port->ddir, ns,
&ipsec_dbg_fops);
}
#include "netdevsim.h"
-struct nsim_vf_config {
- int link_state;
- u16 min_tx_rate;
- u16 max_tx_rate;
- u16 vlan;
- __be16 vlan_proto;
- u16 qos;
- u8 vf_mac[ETH_ALEN];
- bool spoofchk_enabled;
- bool trusted;
- bool rss_query_enabled;
-};
-
-static u32 nsim_dev_id;
-
-static struct dentry *nsim_ddir;
-static struct dentry *nsim_sdev_ddir;
-
-static int nsim_num_vf(struct device *dev)
-{
- struct netdevsim *ns = to_nsim(dev);
-
- return ns->num_vfs;
-}
-
-static struct bus_type nsim_bus = {
- .name = DRV_NAME,
- .dev_name = DRV_NAME,
- .num_vf = nsim_num_vf,
-};
-
-static int nsim_vfs_enable(struct netdevsim *ns, unsigned int num_vfs)
-{
- ns->vfconfigs = kcalloc(num_vfs, sizeof(struct nsim_vf_config),
- GFP_KERNEL);
- if (!ns->vfconfigs)
- return -ENOMEM;
- ns->num_vfs = num_vfs;
-
- return 0;
-}
-
-static void nsim_vfs_disable(struct netdevsim *ns)
-{
- kfree(ns->vfconfigs);
- ns->vfconfigs = NULL;
- ns->num_vfs = 0;
-}
-
-static ssize_t
-nsim_numvfs_store(struct device *dev, struct device_attribute *attr,
- const char *buf, size_t count)
-{
- struct netdevsim *ns = to_nsim(dev);
- unsigned int num_vfs;
- int ret;
-
- ret = kstrtouint(buf, 0, &num_vfs);
- if (ret)
- return ret;
-
- rtnl_lock();
- if (ns->num_vfs == num_vfs)
- goto exit_good;
- if (ns->num_vfs && num_vfs) {
- ret = -EBUSY;
- goto exit_unlock;
- }
-
- if (num_vfs) {
- ret = nsim_vfs_enable(ns, num_vfs);
- if (ret)
- goto exit_unlock;
- } else {
- nsim_vfs_disable(ns);
- }
-exit_good:
- ret = count;
-exit_unlock:
- rtnl_unlock();
-
- return ret;
-}
-
-static ssize_t
-nsim_numvfs_show(struct device *dev, struct device_attribute *attr, char *buf)
-{
- struct netdevsim *ns = to_nsim(dev);
-
- return sprintf(buf, "%u\n", ns->num_vfs);
-}
-
-static struct device_attribute nsim_numvfs_attr =
- __ATTR(sriov_numvfs, 0664, nsim_numvfs_show, nsim_numvfs_store);
-
-static struct attribute *nsim_dev_attrs[] = {
- &nsim_numvfs_attr.attr,
- NULL,
-};
-
-static const struct attribute_group nsim_dev_attr_group = {
- .attrs = nsim_dev_attrs,
-};
-
-static const struct attribute_group *nsim_dev_attr_groups[] = {
- &nsim_dev_attr_group,
- NULL,
-};
-
-static void nsim_dev_release(struct device *dev)
-{
- struct netdevsim *ns = to_nsim(dev);
-
- nsim_vfs_disable(ns);
- free_netdev(ns->netdev);
-}
-
-static struct device_type nsim_dev_type = {
- .groups = nsim_dev_attr_groups,
- .release = nsim_dev_release,
-};
-
-static int nsim_get_port_parent_id(struct net_device *dev,
- struct netdev_phys_item_id *ppid)
-{
- struct netdevsim *ns = netdev_priv(dev);
-
- ppid->id_len = sizeof(ns->sdev->switch_id);
- memcpy(&ppid->id, &ns->sdev->switch_id, ppid->id_len);
- return 0;
-}
-
-static int nsim_init(struct net_device *dev)
-{
- char sdev_ddir_name[10], sdev_link_name[32];
- struct netdevsim *ns = netdev_priv(dev);
- int err;
-
- ns->netdev = dev;
- ns->ddir = debugfs_create_dir(netdev_name(dev), nsim_ddir);
- if (IS_ERR_OR_NULL(ns->ddir))
- return -ENOMEM;
-
- if (!ns->sdev) {
- ns->sdev = kzalloc(sizeof(*ns->sdev), GFP_KERNEL);
- if (!ns->sdev) {
- err = -ENOMEM;
- goto err_debugfs_destroy;
- }
- ns->sdev->refcnt = 1;
- ns->sdev->switch_id = nsim_dev_id;
- sprintf(sdev_ddir_name, "%u", ns->sdev->switch_id);
- ns->sdev->ddir = debugfs_create_dir(sdev_ddir_name,
- nsim_sdev_ddir);
- if (IS_ERR_OR_NULL(ns->sdev->ddir)) {
- err = PTR_ERR_OR_ZERO(ns->sdev->ddir) ?: -EINVAL;
- goto err_sdev_free;
- }
- } else {
- sprintf(sdev_ddir_name, "%u", ns->sdev->switch_id);
- ns->sdev->refcnt++;
- }
-
- sprintf(sdev_link_name, "../../" DRV_NAME "_sdev/%s", sdev_ddir_name);
- debugfs_create_symlink("sdev", ns->ddir, sdev_link_name);
-
- err = nsim_bpf_init(ns);
- if (err)
- goto err_sdev_destroy;
-
- ns->dev.id = nsim_dev_id++;
- ns->dev.bus = &nsim_bus;
- ns->dev.type = &nsim_dev_type;
- err = device_register(&ns->dev);
- if (err)
- goto err_bpf_uninit;
-
- SET_NETDEV_DEV(dev, &ns->dev);
-
- err = nsim_devlink_setup(ns);
- if (err)
- goto err_unreg_dev;
-
- nsim_ipsec_init(ns);
-
- return 0;
-
-err_unreg_dev:
- device_unregister(&ns->dev);
-err_bpf_uninit:
- nsim_bpf_uninit(ns);
-err_sdev_destroy:
- if (!--ns->sdev->refcnt) {
- debugfs_remove_recursive(ns->sdev->ddir);
-err_sdev_free:
- kfree(ns->sdev);
- }
-err_debugfs_destroy:
- debugfs_remove_recursive(ns->ddir);
- return err;
-}
-
-static void nsim_uninit(struct net_device *dev)
-{
- struct netdevsim *ns = netdev_priv(dev);
-
- nsim_ipsec_teardown(ns);
- nsim_devlink_teardown(ns);
- debugfs_remove_recursive(ns->ddir);
- nsim_bpf_uninit(ns);
- if (!--ns->sdev->refcnt) {
- debugfs_remove_recursive(ns->sdev->ddir);
- kfree(ns->sdev);
- }
-}
-
-static void nsim_free(struct net_device *dev)
-{
- struct netdevsim *ns = netdev_priv(dev);
-
- device_unregister(&ns->dev);
- /* netdev and vf state will be freed out of device_release() */
-}
-
static netdev_tx_t nsim_start_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct netdevsim *ns = netdev_priv(dev);
static int nsim_set_vf_mac(struct net_device *dev, int vf, u8 *mac)
{
struct netdevsim *ns = netdev_priv(dev);
+ struct nsim_bus_dev *nsim_bus_dev = ns->nsim_bus_dev;
/* Only refuse multicast addresses, zero address can mean unset/any. */
- if (vf >= ns->num_vfs || is_multicast_ether_addr(mac))
+ if (vf >= nsim_bus_dev->num_vfs || is_multicast_ether_addr(mac))
return -EINVAL;
- memcpy(ns->vfconfigs[vf].vf_mac, mac, ETH_ALEN);
+ memcpy(nsim_bus_dev->vfconfigs[vf].vf_mac, mac, ETH_ALEN);
return 0;
}
u16 vlan, u8 qos, __be16 vlan_proto)
{
struct netdevsim *ns = netdev_priv(dev);
+ struct nsim_bus_dev *nsim_bus_dev = ns->nsim_bus_dev;
- if (vf >= ns->num_vfs || vlan > 4095 || qos > 7)
+ if (vf >= nsim_bus_dev->num_vfs || vlan > 4095 || qos > 7)
return -EINVAL;
- ns->vfconfigs[vf].vlan = vlan;
- ns->vfconfigs[vf].qos = qos;
- ns->vfconfigs[vf].vlan_proto = vlan_proto;
+ nsim_bus_dev->vfconfigs[vf].vlan = vlan;
+ nsim_bus_dev->vfconfigs[vf].qos = qos;
+ nsim_bus_dev->vfconfigs[vf].vlan_proto = vlan_proto;
return 0;
}
static int nsim_set_vf_rate(struct net_device *dev, int vf, int min, int max)
{
struct netdevsim *ns = netdev_priv(dev);
+ struct nsim_bus_dev *nsim_bus_dev = ns->nsim_bus_dev;
- if (vf >= ns->num_vfs)
+ if (vf >= nsim_bus_dev->num_vfs)
return -EINVAL;
- ns->vfconfigs[vf].min_tx_rate = min;
- ns->vfconfigs[vf].max_tx_rate = max;
+ nsim_bus_dev->vfconfigs[vf].min_tx_rate = min;
+ nsim_bus_dev->vfconfigs[vf].max_tx_rate = max;
return 0;
}
static int nsim_set_vf_spoofchk(struct net_device *dev, int vf, bool val)
{
struct netdevsim *ns = netdev_priv(dev);
+ struct nsim_bus_dev *nsim_bus_dev = ns->nsim_bus_dev;
- if (vf >= ns->num_vfs)
+ if (vf >= nsim_bus_dev->num_vfs)
return -EINVAL;
- ns->vfconfigs[vf].spoofchk_enabled = val;
+ nsim_bus_dev->vfconfigs[vf].spoofchk_enabled = val;
return 0;
}
static int nsim_set_vf_rss_query_en(struct net_device *dev, int vf, bool val)
{
struct netdevsim *ns = netdev_priv(dev);
+ struct nsim_bus_dev *nsim_bus_dev = ns->nsim_bus_dev;
- if (vf >= ns->num_vfs)
+ if (vf >= nsim_bus_dev->num_vfs)
return -EINVAL;
- ns->vfconfigs[vf].rss_query_enabled = val;
+ nsim_bus_dev->vfconfigs[vf].rss_query_enabled = val;
return 0;
}
static int nsim_set_vf_trust(struct net_device *dev, int vf, bool val)
{
struct netdevsim *ns = netdev_priv(dev);
+ struct nsim_bus_dev *nsim_bus_dev = ns->nsim_bus_dev;
- if (vf >= ns->num_vfs)
+ if (vf >= nsim_bus_dev->num_vfs)
return -EINVAL;
- ns->vfconfigs[vf].trusted = val;
+ nsim_bus_dev->vfconfigs[vf].trusted = val;
return 0;
}
nsim_get_vf_config(struct net_device *dev, int vf, struct ifla_vf_info *ivi)
{
struct netdevsim *ns = netdev_priv(dev);
+ struct nsim_bus_dev *nsim_bus_dev = ns->nsim_bus_dev;
- if (vf >= ns->num_vfs)
+ if (vf >= nsim_bus_dev->num_vfs)
return -EINVAL;
ivi->vf = vf;
- ivi->linkstate = ns->vfconfigs[vf].link_state;
- ivi->min_tx_rate = ns->vfconfigs[vf].min_tx_rate;
- ivi->max_tx_rate = ns->vfconfigs[vf].max_tx_rate;
- ivi->vlan = ns->vfconfigs[vf].vlan;
- ivi->vlan_proto = ns->vfconfigs[vf].vlan_proto;
- ivi->qos = ns->vfconfigs[vf].qos;
- memcpy(&ivi->mac, ns->vfconfigs[vf].vf_mac, ETH_ALEN);
- ivi->spoofchk = ns->vfconfigs[vf].spoofchk_enabled;
- ivi->trusted = ns->vfconfigs[vf].trusted;
- ivi->rss_query_en = ns->vfconfigs[vf].rss_query_enabled;
+ ivi->linkstate = nsim_bus_dev->vfconfigs[vf].link_state;
+ ivi->min_tx_rate = nsim_bus_dev->vfconfigs[vf].min_tx_rate;
+ ivi->max_tx_rate = nsim_bus_dev->vfconfigs[vf].max_tx_rate;
+ ivi->vlan = nsim_bus_dev->vfconfigs[vf].vlan;
+ ivi->vlan_proto = nsim_bus_dev->vfconfigs[vf].vlan_proto;
+ ivi->qos = nsim_bus_dev->vfconfigs[vf].qos;
+ memcpy(&ivi->mac, nsim_bus_dev->vfconfigs[vf].vf_mac, ETH_ALEN);
+ ivi->spoofchk = nsim_bus_dev->vfconfigs[vf].spoofchk_enabled;
+ ivi->trusted = nsim_bus_dev->vfconfigs[vf].trusted;
+ ivi->rss_query_en = nsim_bus_dev->vfconfigs[vf].rss_query_enabled;
return 0;
}
static int nsim_set_vf_link_state(struct net_device *dev, int vf, int state)
{
struct netdevsim *ns = netdev_priv(dev);
+ struct nsim_bus_dev *nsim_bus_dev = ns->nsim_bus_dev;
- if (vf >= ns->num_vfs)
+ if (vf >= nsim_bus_dev->num_vfs)
return -EINVAL;
switch (state) {
return -EINVAL;
}
- ns->vfconfigs[vf].link_state = state;
+ nsim_bus_dev->vfconfigs[vf].link_state = state;
return 0;
}
return 0;
}
+static struct devlink_port *nsim_get_devlink_port(struct net_device *dev)
+{
+ struct netdevsim *ns = netdev_priv(dev);
+
+ return &ns->nsim_dev_port->devlink_port;
+}
+
static const struct net_device_ops nsim_netdev_ops = {
- .ndo_init = nsim_init,
- .ndo_uninit = nsim_uninit,
.ndo_start_xmit = nsim_start_xmit,
.ndo_set_rx_mode = nsim_set_rx_mode,
.ndo_set_mac_address = eth_mac_addr,
.ndo_setup_tc = nsim_setup_tc,
.ndo_set_features = nsim_set_features,
.ndo_bpf = nsim_bpf,
- .ndo_get_port_parent_id = nsim_get_port_parent_id,
+ .ndo_get_devlink_port = nsim_get_devlink_port,
};
static void nsim_setup(struct net_device *dev)
ether_setup(dev);
eth_hw_addr_random(dev);
- dev->netdev_ops = &nsim_netdev_ops;
- dev->priv_destructor = nsim_free;
-
dev->tx_queue_len = 0;
dev->flags |= IFF_NOARP;
dev->flags &= ~IFF_MULTICAST;
dev->max_mtu = ETH_MAX_MTU;
}
-static int nsim_validate(struct nlattr *tb[], struct nlattr *data[],
- struct netlink_ext_ack *extack)
+struct netdevsim *
+nsim_create(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port)
{
- if (tb[IFLA_ADDRESS]) {
- if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
- return -EINVAL;
- if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
- return -EADDRNOTAVAIL;
- }
- return 0;
+ struct net_device *dev;
+ struct netdevsim *ns;
+ int err;
+
+ dev = alloc_netdev(sizeof(*ns), "eth%d", NET_NAME_UNKNOWN, nsim_setup);
+ if (!dev)
+ return ERR_PTR(-ENOMEM);
+
+ ns = netdev_priv(dev);
+ ns->netdev = dev;
+ ns->nsim_dev = nsim_dev;
+ ns->nsim_dev_port = nsim_dev_port;
+ ns->nsim_bus_dev = nsim_dev->nsim_bus_dev;
+ SET_NETDEV_DEV(dev, &ns->nsim_bus_dev->dev);
+ dev->netdev_ops = &nsim_netdev_ops;
+
+ rtnl_lock();
+ err = nsim_bpf_init(ns);
+ if (err)
+ goto err_free_netdev;
+
+ nsim_ipsec_init(ns);
+
+ err = register_netdevice(dev);
+ if (err)
+ goto err_ipsec_teardown;
+ rtnl_unlock();
+
+ return ns;
+
+err_ipsec_teardown:
+ nsim_ipsec_teardown(ns);
+ nsim_bpf_uninit(ns);
+ rtnl_unlock();
+err_free_netdev:
+ free_netdev(dev);
+ return ERR_PTR(err);
}
-static int nsim_newlink(struct net *src_net, struct net_device *dev,
- struct nlattr *tb[], struct nlattr *data[],
- struct netlink_ext_ack *extack)
+void nsim_destroy(struct netdevsim *ns)
{
- struct netdevsim *ns = netdev_priv(dev);
-
- if (tb[IFLA_LINK]) {
- struct net_device *joindev;
- struct netdevsim *joinns;
-
- joindev = __dev_get_by_index(src_net,
- nla_get_u32(tb[IFLA_LINK]));
- if (!joindev)
- return -ENODEV;
- if (joindev->netdev_ops != &nsim_netdev_ops)
- return -EINVAL;
-
- joinns = netdev_priv(joindev);
- if (!joinns->sdev || !joinns->sdev->refcnt)
- return -EINVAL;
- ns->sdev = joinns->sdev;
- }
+ struct net_device *dev = ns->netdev;
- return register_netdevice(dev);
+ rtnl_lock();
+ unregister_netdevice(dev);
+ nsim_ipsec_teardown(ns);
+ nsim_bpf_uninit(ns);
+ rtnl_unlock();
+ free_netdev(dev);
}
-static void nsim_dellink(struct net_device *dev, struct list_head *head)
+static int nsim_validate(struct nlattr *tb[], struct nlattr *data[],
+ struct netlink_ext_ack *extack)
{
- unregister_netdevice_queue(dev, head);
+ NL_SET_ERR_MSG_MOD(extack, "Please use: echo \"[ID] [PORT_COUNT]\" > /sys/bus/netdevsim/new_device");
+ return -EOPNOTSUPP;
}
static struct rtnl_link_ops nsim_link_ops __read_mostly = {
.kind = DRV_NAME,
- .priv_size = sizeof(struct netdevsim),
- .setup = nsim_setup,
.validate = nsim_validate,
- .newlink = nsim_newlink,
- .dellink = nsim_dellink,
};
static int __init nsim_module_init(void)
{
int err;
- nsim_ddir = debugfs_create_dir(DRV_NAME, NULL);
- if (IS_ERR_OR_NULL(nsim_ddir))
- return -ENOMEM;
-
- nsim_sdev_ddir = debugfs_create_dir(DRV_NAME "_sdev", NULL);
- if (IS_ERR_OR_NULL(nsim_sdev_ddir)) {
- err = -ENOMEM;
- goto err_debugfs_destroy;
- }
-
- err = bus_register(&nsim_bus);
+ err = nsim_dev_init();
if (err)
- goto err_sdir_destroy;
+ return err;
- err = nsim_devlink_init();
+ err = nsim_bus_init();
if (err)
- goto err_unreg_bus;
+ goto err_dev_exit;
err = rtnl_link_register(&nsim_link_ops);
if (err)
- goto err_dl_fini;
+ goto err_bus_exit;
return 0;
-err_dl_fini:
- nsim_devlink_exit();
-err_unreg_bus:
- bus_unregister(&nsim_bus);
-err_sdir_destroy:
- debugfs_remove_recursive(nsim_sdev_ddir);
-err_debugfs_destroy:
- debugfs_remove_recursive(nsim_ddir);
+err_bus_exit:
+ nsim_bus_exit();
+err_dev_exit:
+ nsim_dev_exit();
return err;
}
static void __exit nsim_module_exit(void)
{
rtnl_link_unregister(&nsim_link_ops);
- nsim_devlink_exit();
- bus_unregister(&nsim_bus);
- debugfs_remove_recursive(nsim_sdev_ddir);
- debugfs_remove_recursive(nsim_ddir);
+ nsim_bus_exit();
+ nsim_dev_exit();
}
module_init(nsim_module_init);
#include <linux/list.h>
#include <linux/netdevice.h>
#include <linux/u64_stats_sync.h>
+#include <net/devlink.h>
#include <net/xdp.h>
#define DRV_NAME "netdevsim"
#define NSIM_EA(extack, msg) NL_SET_ERR_MSG_MOD((extack), msg)
-struct bpf_prog;
-struct bpf_offload_dev;
-struct dentry;
-struct nsim_vf_config;
-
-struct netdevsim_shared_dev {
- unsigned int refcnt;
- u32 switch_id;
-
- struct dentry *ddir;
-
- struct bpf_offload_dev *bpf_dev;
-
- struct dentry *ddir_bpf_bound_progs;
- u32 prog_id_gen;
-
- struct list_head bpf_bound_progs;
- struct list_head bpf_bound_maps;
-};
-
#define NSIM_IPSEC_MAX_SA_COUNT 33
#define NSIM_IPSEC_VALID BIT(31)
struct netdevsim {
struct net_device *netdev;
+ struct nsim_dev *nsim_dev;
+ struct nsim_dev_port *nsim_dev_port;
u64 tx_packets;
u64 tx_bytes;
struct u64_stats_sync syncp;
- struct device dev;
- struct netdevsim_shared_dev *sdev;
-
- struct dentry *ddir;
-
- unsigned int num_vfs;
- struct nsim_vf_config *vfconfigs;
+ struct nsim_bus_dev *nsim_bus_dev;
struct bpf_prog *bpf_offloaded;
u32 bpf_offloaded_id;
struct xdp_attachment_info xdp;
struct xdp_attachment_info xdp_hw;
- bool bpf_bind_accept;
- u32 bpf_bind_verifier_delay;
-
bool bpf_tc_accept;
bool bpf_tc_non_bound_accept;
bool bpf_xdpdrv_accept;
bool bpf_xdpoffload_accept;
bool bpf_map_accept;
-#if IS_ENABLED(CONFIG_NET_DEVLINK)
- struct devlink *devlink;
-#endif
struct nsim_ipsec ipsec;
};
+struct netdevsim *
+nsim_create(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port);
+void nsim_destroy(struct netdevsim *ns);
+
#ifdef CONFIG_BPF_SYSCALL
+int nsim_bpf_dev_init(struct nsim_dev *nsim_dev);
+void nsim_bpf_dev_exit(struct nsim_dev *nsim_dev);
int nsim_bpf_init(struct netdevsim *ns);
void nsim_bpf_uninit(struct netdevsim *ns);
int nsim_bpf(struct net_device *dev, struct netdev_bpf *bpf);
int nsim_bpf_setup_tc_block_cb(enum tc_setup_type type,
void *type_data, void *cb_priv);
#else
+
+static inline int nsim_bpf_dev_init(struct nsim_dev *nsim_dev)
+{
+ return 0;
+}
+
+static inline void nsim_bpf_dev_exit(struct nsim_dev *nsim_dev)
+{
+}
static inline int nsim_bpf_init(struct netdevsim *ns)
{
return 0;
}
#endif
-#if IS_ENABLED(CONFIG_NET_DEVLINK)
enum nsim_resource_id {
NSIM_RESOURCE_NONE, /* DEVLINK_RESOURCE_ID_PARENT_TOP */
NSIM_RESOURCE_IPV4,
NSIM_RESOURCE_IPV6_FIB_RULES,
};
-int nsim_devlink_setup(struct netdevsim *ns);
-void nsim_devlink_teardown(struct netdevsim *ns);
+struct nsim_dev_port {
+ struct list_head list;
+ struct devlink_port devlink_port;
+ unsigned int port_index;
+ struct dentry *ddir;
+ struct netdevsim *ns;
+};
-int nsim_devlink_init(void);
-void nsim_devlink_exit(void);
+struct nsim_dev {
+ struct nsim_bus_dev *nsim_bus_dev;
+ struct nsim_fib_data *fib_data;
+ struct dentry *ddir;
+ struct dentry *ports_ddir;
+ struct bpf_offload_dev *bpf_dev;
+ bool bpf_bind_accept;
+ u32 bpf_bind_verifier_delay;
+ struct dentry *ddir_bpf_bound_progs;
+ u32 prog_id_gen;
+ struct list_head bpf_bound_progs;
+ struct list_head bpf_bound_maps;
+ struct netdev_phys_item_id switch_id;
+ struct list_head port_list;
+ struct mutex port_list_lock; /* protects port list */
+};
-int nsim_fib_init(void);
-void nsim_fib_exit(void);
-u64 nsim_fib_get_val(struct net *net, enum nsim_resource_id res_id, bool max);
-int nsim_fib_set_max(struct net *net, enum nsim_resource_id res_id, u64 val,
+int nsim_dev_init(void);
+void nsim_dev_exit(void);
+int nsim_dev_probe(struct nsim_bus_dev *nsim_bus_dev);
+void nsim_dev_remove(struct nsim_bus_dev *nsim_bus_dev);
+int nsim_dev_port_add(struct nsim_bus_dev *nsim_bus_dev,
+ unsigned int port_index);
+int nsim_dev_port_del(struct nsim_bus_dev *nsim_bus_dev,
+ unsigned int port_index);
+
+struct nsim_fib_data *nsim_fib_create(void);
+void nsim_fib_destroy(struct nsim_fib_data *fib_data);
+u64 nsim_fib_get_val(struct nsim_fib_data *fib_data,
+ enum nsim_resource_id res_id, bool max);
+int nsim_fib_set_max(struct nsim_fib_data *fib_data,
+ enum nsim_resource_id res_id, u64 val,
struct netlink_ext_ack *extack);
-#else
-static inline int nsim_devlink_setup(struct netdevsim *ns)
-{
- return 0;
-}
-
-static inline void nsim_devlink_teardown(struct netdevsim *ns)
-{
-}
-
-static inline int nsim_devlink_init(void)
-{
- return 0;
-}
-
-static inline void nsim_devlink_exit(void)
-{
-}
-#endif
#if IS_ENABLED(CONFIG_XFRM_OFFLOAD)
void nsim_ipsec_init(struct netdevsim *ns);
}
#endif
-static inline struct netdevsim *to_nsim(struct device *ptr)
-{
- return container_of(ptr, struct netdevsim, dev);
-}
+struct nsim_vf_config {
+ int link_state;
+ u16 min_tx_rate;
+ u16 max_tx_rate;
+ u16 vlan;
+ __be16 vlan_proto;
+ u16 qos;
+ u8 vf_mac[ETH_ALEN];
+ bool spoofchk_enabled;
+ bool trusted;
+ bool rss_query_enabled;
+};
+
+struct nsim_bus_dev {
+ struct device dev;
+ struct list_head list;
+ unsigned int port_count;
+ unsigned int num_vfs;
+ struct nsim_vf_config *vfconfigs;
+};
+
+int nsim_bus_init(void);
+void nsim_bus_exit(void);
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019 Mellanox Technologies. All rights reserved */
+
+#include <linux/debugfs.h>
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+
+#include "netdevsim.h"
+
+static struct dentry *nsim_sdev_ddir;
+
+static u32 nsim_sdev_id;
+
+struct netdevsim_shared_dev *nsim_sdev_get(struct netdevsim *joinns)
+{
+ struct netdevsim_shared_dev *sdev;
+ char sdev_ddir_name[10];
+ int err;
+
+ if (joinns) {
+ if (WARN_ON(!joinns->sdev))
+ return ERR_PTR(-EINVAL);
+ sdev = joinns->sdev;
+ sdev->refcnt++;
+ return sdev;
+ }
+
+ sdev = kzalloc(sizeof(*sdev), GFP_KERNEL);
+ if (!sdev)
+ return ERR_PTR(-ENOMEM);
+ sdev->refcnt = 1;
+ sdev->switch_id = nsim_sdev_id++;
+
+ sprintf(sdev_ddir_name, "%u", sdev->switch_id);
+ sdev->ddir = debugfs_create_dir(sdev_ddir_name, nsim_sdev_ddir);
+ if (IS_ERR_OR_NULL(sdev->ddir)) {
+ err = PTR_ERR_OR_ZERO(sdev->ddir) ?: -EINVAL;
+ goto err_sdev_free;
+ }
+
+ return sdev;
+
+err_sdev_free:
+ nsim_sdev_id--;
+ kfree(sdev);
+ return ERR_PTR(err);
+}
+
+void nsim_sdev_put(struct netdevsim_shared_dev *sdev)
+{
+ if (--sdev->refcnt)
+ return;
+ debugfs_remove_recursive(sdev->ddir);
+ kfree(sdev);
+}
+
+int nsim_sdev_init(void)
+{
+ nsim_sdev_ddir = debugfs_create_dir(DRV_NAME "_sdev", NULL);
+ if (IS_ERR_OR_NULL(nsim_sdev_ddir))
+ return -ENOMEM;
+ return 0;
+}
+
+void nsim_sdev_exit(void)
+{
+ debugfs_remove_recursive(nsim_sdev_ddir);
+}
config MICROSEMI_PHY
tristate "Microsemi PHYs"
---help---
- Currently supports VSC8530, VSC8531, VSC8540 and VSC8541 PHYs
+ Currently supports VSC8514, VSC8530, VSC8531, VSC8540 and VSC8541 PHYs
config NATIONAL_PHY
tristate "National Semiconductor PHYs"
.phy_id = PHY_ID_AM79C874,
.name = "AM79C874",
.phy_id_mask = 0xfffffff0,
- .features = PHY_BASIC_FEATURES,
+ /* PHY_BASIC_FEATURES */
.config_init = am79c_config_init,
.ack_interrupt = am79c_ack_interrupt,
.config_intr = am79c_config_intr,
{
PHY_ID_MATCH_MODEL(PHY_ID_AQ1202),
.name = "Aquantia AQ1202",
- .aneg_done = genphy_c45_aneg_done,
- .get_features = genphy_c45_pma_read_abilities,
.config_aneg = aqr_config_aneg,
.config_intr = aqr_config_intr,
.ack_interrupt = aqr_ack_interrupt,
{
PHY_ID_MATCH_MODEL(PHY_ID_AQ2104),
.name = "Aquantia AQ2104",
- .aneg_done = genphy_c45_aneg_done,
- .get_features = genphy_c45_pma_read_abilities,
.config_aneg = aqr_config_aneg,
.config_intr = aqr_config_intr,
.ack_interrupt = aqr_ack_interrupt,
{
PHY_ID_MATCH_MODEL(PHY_ID_AQR105),
.name = "Aquantia AQR105",
- .aneg_done = genphy_c45_aneg_done,
- .get_features = genphy_c45_pma_read_abilities,
.config_aneg = aqr_config_aneg,
.config_intr = aqr_config_intr,
.ack_interrupt = aqr_ack_interrupt,
{
PHY_ID_MATCH_MODEL(PHY_ID_AQR106),
.name = "Aquantia AQR106",
- .aneg_done = genphy_c45_aneg_done,
- .get_features = genphy_c45_pma_read_abilities,
.config_aneg = aqr_config_aneg,
.config_intr = aqr_config_intr,
.ack_interrupt = aqr_ack_interrupt,
{
PHY_ID_MATCH_MODEL(PHY_ID_AQR107),
.name = "Aquantia AQR107",
- .aneg_done = genphy_c45_aneg_done,
- .get_features = genphy_c45_pma_read_abilities,
.probe = aqr107_probe,
.config_init = aqr107_config_init,
.config_aneg = aqr_config_aneg,
{
PHY_ID_MATCH_MODEL(PHY_ID_AQCS109),
.name = "Aquantia AQCS109",
- .aneg_done = genphy_c45_aneg_done,
- .get_features = genphy_c45_pma_read_abilities,
.probe = aqr107_probe,
.config_init = aqcs109_config_init,
.config_aneg = aqr_config_aneg,
{
PHY_ID_MATCH_MODEL(PHY_ID_AQR405),
.name = "Aquantia AQR405",
- .aneg_done = genphy_c45_aneg_done,
- .get_features = genphy_c45_pma_read_abilities,
.config_aneg = aqr_config_aneg,
.config_intr = aqr_config_intr,
.ack_interrupt = aqr_ack_interrupt,
.phy_id = PHY_ID_ASIX_AX88796B,
.name = "Asix Electronics AX88796B",
.phy_id_mask = 0xfffffff0,
- .features = PHY_BASIC_FEATURES,
+ /* PHY_BASIC_FEATURES */
.soft_reset = asix_soft_reset,
} };
* in the FIFO. In such cases, the FIFO enters an error mode it
* cannot recover from by software.
*/
- if (phydev->state == PHY_NOLINK && phydev->mdio.reset) {
+ if (phydev->state == PHY_NOLINK && phydev->mdio.reset_gpio) {
struct at803x_context context;
at803x_context_save(phydev, &context);
.get_wol = at803x_get_wol,
.suspend = at803x_suspend,
.resume = at803x_resume,
- .features = PHY_GBIT_FEATURES,
+ /* PHY_GBIT_FEATURES */
.ack_interrupt = at803x_ack_interrupt,
.config_intr = at803x_config_intr,
}, {
.get_wol = at803x_get_wol,
.suspend = at803x_suspend,
.resume = at803x_resume,
- .features = PHY_BASIC_FEATURES,
+ /* PHY_BASIC_FEATURES */
.ack_interrupt = at803x_ack_interrupt,
.config_intr = at803x_config_intr,
}, {
.get_wol = at803x_get_wol,
.suspend = at803x_suspend,
.resume = at803x_resume,
- .features = PHY_GBIT_FEATURES,
+ /* PHY_GBIT_FEATURES */
.aneg_done = at803x_aneg_done,
.ack_interrupt = &at803x_ack_interrupt,
.config_intr = &at803x_config_intr,
.phy_id = PHY_ID_BCM_CYGNUS,
.phy_id_mask = 0xfffffff0,
.name = "Broadcom Cygnus PHY",
- .features = PHY_GBIT_FEATURES,
+ /* PHY_GBIT_FEATURES */
.config_init = bcm_cygnus_config_init,
.ack_interrupt = bcm_phy_ack_intr,
.config_intr = bcm_phy_config_intr,
.phy_id = PHY_ID_BCM_OMEGA,
.phy_id_mask = 0xfffffff0,
.name = "Broadcom Omega Combo GPHY",
- .features = PHY_GBIT_FEATURES,
+ /* PHY_GBIT_FEATURES */
.flags = PHY_IS_INTERNAL,
.config_init = bcm_omega_config_init,
.suspend = genphy_suspend,
.phy_id = 0x00406000,
.phy_id_mask = 0xfffffc00,
.name = "Broadcom BCM63XX (1)",
- .features = PHY_BASIC_FEATURES,
+ /* PHY_BASIC_FEATURES */
.flags = PHY_IS_INTERNAL,
.config_init = bcm63xx_config_init,
.ack_interrupt = bcm_phy_ack_intr,
/* same phy as above, with just a different OUI */
.phy_id = 0x002bdc00,
.phy_id_mask = 0xfffffc00,
- .features = PHY_BASIC_FEATURES,
+ /* PHY_BASIC_FEATURES */
.flags = PHY_IS_INTERNAL,
.config_init = bcm63xx_config_init,
.ack_interrupt = bcm_phy_ack_intr,
.phy_id = (_oui), \
.phy_id_mask = 0xfffffff0, \
.name = _name, \
- .features = PHY_GBIT_FEATURES, \
+ /* PHY_GBIT_FEATURES */ \
.flags = PHY_IS_INTERNAL, \
.config_init = bcm7xxx_28nm_config_init, \
.resume = bcm7xxx_28nm_resume, \
.phy_id = (_oui), \
.phy_id_mask = 0xfffffff0, \
.name = _name, \
- .features = PHY_BASIC_FEATURES, \
+ /* PHY_BASIC_FEATURES */ \
.flags = PHY_IS_INTERNAL, \
.config_init = bcm7xxx_28nm_ephy_config_init, \
.resume = bcm7xxx_28nm_ephy_resume, \
.phy_id = (_oui), \
.phy_id_mask = 0xfffffff0, \
.name = _name, \
- .features = PHY_BASIC_FEATURES, \
+ /* PHY_BASIC_FEATURES */ \
.flags = PHY_IS_INTERNAL, \
.config_init = bcm7xxx_config_init, \
.suspend = bcm7xxx_suspend, \
.phy_id = PHY_ID_BCM5411,
.phy_id_mask = 0xfffffff0,
.name = "Broadcom BCM5411",
- .features = PHY_GBIT_FEATURES,
+ /* PHY_GBIT_FEATURES */
.config_init = bcm54xx_config_init,
.ack_interrupt = bcm_phy_ack_intr,
.config_intr = bcm_phy_config_intr,
.phy_id = PHY_ID_BCM5421,
.phy_id_mask = 0xfffffff0,
.name = "Broadcom BCM5421",
- .features = PHY_GBIT_FEATURES,
+ /* PHY_GBIT_FEATURES */
.config_init = bcm54xx_config_init,
.ack_interrupt = bcm_phy_ack_intr,
.config_intr = bcm_phy_config_intr,
.phy_id = PHY_ID_BCM54210E,
.phy_id_mask = 0xfffffff0,
.name = "Broadcom BCM54210E",
- .features = PHY_GBIT_FEATURES,
+ /* PHY_GBIT_FEATURES */
.config_init = bcm54xx_config_init,
.ack_interrupt = bcm_phy_ack_intr,
.config_intr = bcm_phy_config_intr,
.phy_id = PHY_ID_BCM5461,
.phy_id_mask = 0xfffffff0,
.name = "Broadcom BCM5461",
- .features = PHY_GBIT_FEATURES,
+ /* PHY_GBIT_FEATURES */
.config_init = bcm54xx_config_init,
.ack_interrupt = bcm_phy_ack_intr,
.config_intr = bcm_phy_config_intr,
.phy_id = PHY_ID_BCM54612E,
.phy_id_mask = 0xfffffff0,
.name = "Broadcom BCM54612E",
- .features = PHY_GBIT_FEATURES,
+ /* PHY_GBIT_FEATURES */
.config_init = bcm54xx_config_init,
.ack_interrupt = bcm_phy_ack_intr,
.config_intr = bcm_phy_config_intr,
.phy_id = PHY_ID_BCM54616S,
.phy_id_mask = 0xfffffff0,
.name = "Broadcom BCM54616S",
- .features = PHY_GBIT_FEATURES,
+ /* PHY_GBIT_FEATURES */
.config_init = bcm54xx_config_init,
.config_aneg = bcm54616s_config_aneg,
.ack_interrupt = bcm_phy_ack_intr,
.phy_id = PHY_ID_BCM5464,
.phy_id_mask = 0xfffffff0,
.name = "Broadcom BCM5464",
- .features = PHY_GBIT_FEATURES,
+ /* PHY_GBIT_FEATURES */
.config_init = bcm54xx_config_init,
.ack_interrupt = bcm_phy_ack_intr,
.config_intr = bcm_phy_config_intr,
.phy_id = PHY_ID_BCM5481,
.phy_id_mask = 0xfffffff0,
.name = "Broadcom BCM5481",
- .features = PHY_GBIT_FEATURES,
+ /* PHY_GBIT_FEATURES */
.config_init = bcm54xx_config_init,
.config_aneg = bcm5481_config_aneg,
.ack_interrupt = bcm_phy_ack_intr,
.phy_id = PHY_ID_BCM54810,
.phy_id_mask = 0xfffffff0,
.name = "Broadcom BCM54810",
- .features = PHY_GBIT_FEATURES,
+ /* PHY_GBIT_FEATURES */
.config_init = bcm54xx_config_init,
.config_aneg = bcm5481_config_aneg,
.ack_interrupt = bcm_phy_ack_intr,
.phy_id = PHY_ID_BCM5482,
.phy_id_mask = 0xfffffff0,
.name = "Broadcom BCM5482",
- .features = PHY_GBIT_FEATURES,
+ /* PHY_GBIT_FEATURES */
.config_init = bcm5482_config_init,
.read_status = bcm5482_read_status,
.ack_interrupt = bcm_phy_ack_intr,
.phy_id = PHY_ID_BCM50610,
.phy_id_mask = 0xfffffff0,
.name = "Broadcom BCM50610",
- .features = PHY_GBIT_FEATURES,
+ /* PHY_GBIT_FEATURES */
.config_init = bcm54xx_config_init,
.ack_interrupt = bcm_phy_ack_intr,
.config_intr = bcm_phy_config_intr,
.phy_id = PHY_ID_BCM50610M,
.phy_id_mask = 0xfffffff0,
.name = "Broadcom BCM50610M",
- .features = PHY_GBIT_FEATURES,
+ /* PHY_GBIT_FEATURES */
.config_init = bcm54xx_config_init,
.ack_interrupt = bcm_phy_ack_intr,
.config_intr = bcm_phy_config_intr,
.phy_id = PHY_ID_BCM57780,
.phy_id_mask = 0xfffffff0,
.name = "Broadcom BCM57780",
- .features = PHY_GBIT_FEATURES,
+ /* PHY_GBIT_FEATURES */
.config_init = bcm54xx_config_init,
.ack_interrupt = bcm_phy_ack_intr,
.config_intr = bcm_phy_config_intr,
.phy_id = PHY_ID_BCMAC131,
.phy_id_mask = 0xfffffff0,
.name = "Broadcom BCMAC131",
- .features = PHY_BASIC_FEATURES,
+ /* PHY_BASIC_FEATURES */
.config_init = brcm_fet_config_init,
.ack_interrupt = brcm_fet_ack_interrupt,
.config_intr = brcm_fet_config_intr,
.phy_id = PHY_ID_BCM5241,
.phy_id_mask = 0xfffffff0,
.name = "Broadcom BCM5241",
- .features = PHY_BASIC_FEATURES,
+ /* PHY_BASIC_FEATURES */
.config_init = brcm_fet_config_init,
.ack_interrupt = brcm_fet_ack_interrupt,
.config_intr = brcm_fet_config_intr,
.phy_id_mask = 0xfffffff0,
.name = "Broadcom BCM5395",
.flags = PHY_IS_INTERNAL,
- .features = PHY_GBIT_FEATURES,
+ /* PHY_GBIT_FEATURES */
.get_sset_count = bcm_phy_get_sset_count,
.get_strings = bcm_phy_get_strings,
.get_stats = bcm53xx_phy_get_stats,
.phy_id = PHY_ID_BCM89610,
.phy_id_mask = 0xfffffff0,
.name = "Broadcom BCM89610",
- .features = PHY_GBIT_FEATURES,
+ /* PHY_GBIT_FEATURES */
.config_init = bcm54xx_config_init,
.ack_interrupt = bcm_phy_ack_intr,
.config_intr = bcm_phy_config_intr,
.phy_id = 0x000fc410,
.name = "Cicada Cis8201",
.phy_id_mask = 0x000ffff0,
- .features = PHY_GBIT_FEATURES,
+ /* PHY_GBIT_FEATURES */
.config_init = &cis820x_config_init,
.ack_interrupt = &cis820x_ack_interrupt,
.config_intr = &cis820x_config_intr,
.phy_id = 0x000fc440,
.name = "Cicada Cis8204",
.phy_id_mask = 0x000fffc0,
- .features = PHY_GBIT_FEATURES,
+ /* PHY_GBIT_FEATURES */
.config_init = &cis820x_config_init,
.ack_interrupt = &cis820x_ack_interrupt,
.config_intr = &cis820x_config_intr,
.phy_id = 0x0181b880,
.name = "Davicom DM9161E",
.phy_id_mask = 0x0ffffff0,
- .features = PHY_BASIC_FEATURES,
+ /* PHY_BASIC_FEATURES */
.config_init = dm9161_config_init,
.config_aneg = dm9161_config_aneg,
.ack_interrupt = dm9161_ack_interrupt,
.phy_id = 0x0181b8b0,
.name = "Davicom DM9161B/C",
.phy_id_mask = 0x0ffffff0,
- .features = PHY_BASIC_FEATURES,
+ /* PHY_BASIC_FEATURES */
.config_init = dm9161_config_init,
.config_aneg = dm9161_config_aneg,
.ack_interrupt = dm9161_ack_interrupt,
.phy_id = 0x0181b8a0,
.name = "Davicom DM9161A",
.phy_id_mask = 0x0ffffff0,
- .features = PHY_BASIC_FEATURES,
+ /* PHY_BASIC_FEATURES */
.config_init = dm9161_config_init,
.config_aneg = dm9161_config_aneg,
.ack_interrupt = dm9161_ack_interrupt,
.phy_id = 0x00181b80,
.name = "Davicom DM9131",
.phy_id_mask = 0x0ffffff0,
- .features = PHY_BASIC_FEATURES,
+ /* PHY_BASIC_FEATURES */
.ack_interrupt = dm9161_ack_interrupt,
.config_intr = dm9161_config_intr,
} };
.phy_id = DP83640_PHY_ID,
.phy_id_mask = 0xfffffff0,
.name = "NatSemi DP83640",
- .features = PHY_BASIC_FEATURES,
+ /* PHY_BASIC_FEATURES */
.probe = dp83640_probe,
.remove = dp83640_remove,
.soft_reset = dp83640_soft_reset,
{ \
PHY_ID_MATCH_MODEL(_id), \
.name = (_name), \
- .features = PHY_BASIC_FEATURES, \
+ /* PHY_BASIC_FEATURES */ \
.soft_reset = dp83822_phy_reset, \
.config_init = dp83822_config_init, \
.get_wol = dp83822_get_wol, \
.phy_id = _id, \
.phy_id_mask = 0xfffffff0, \
.name = _name, \
- .features = PHY_BASIC_FEATURES, \
+ /* PHY_BASIC_FEATURES */ \
\
.soft_reset = genphy_soft_reset, \
.config_init = _config_init, \
.phy_id = DP83867_PHY_ID,
.phy_id_mask = 0xfffffff0,
.name = "TI DP83867",
- .features = PHY_GBIT_FEATURES,
+ /* PHY_GBIT_FEATURES */
.config_init = dp83867_config_init,
.soft_reset = dp83867_phy_reset,
.phy_id = DP83TC811_PHY_ID,
.phy_id_mask = 0xfffffff0,
.name = "TI DP83TC811",
- .features = PHY_BASIC_FEATURES,
+ /* PHY_BASIC_FEATURES */
.config_init = dp83811_config_init,
.config_aneg = dp83811_config_aneg,
.soft_reset = dp83811_phy_reset,
.phy_id = 0x0282f014,
.name = "ET1011C",
.phy_id_mask = 0xfffffff0,
- .features = PHY_GBIT_FEATURES,
+ /* PHY_GBIT_FEATURES */
.config_aneg = et1011c_config_aneg,
.read_status = et1011c_read_status,
} };
.phy_id = 0x02430d80,
.name = "ICPlus IP175C",
.phy_id_mask = 0x0ffffff0,
- .features = PHY_BASIC_FEATURES,
+ /* PHY_BASIC_FEATURES */
.config_init = &ip175c_config_init,
.config_aneg = &ip175c_config_aneg,
.read_status = &ip175c_read_status,
.phy_id = 0x02430d90,
.name = "ICPlus IP1001",
.phy_id_mask = 0x0ffffff0,
- .features = PHY_GBIT_FEATURES,
+ /* PHY_GBIT_FEATURES */
.config_init = &ip1001_config_init,
.suspend = genphy_suspend,
.resume = genphy_resume,
.phy_id = 0x02430c54,
.name = "ICPlus IP101A/G",
.phy_id_mask = 0x0ffffff0,
- .features = PHY_BASIC_FEATURES,
+ /* PHY_BASIC_FEATURES */
.probe = ip101a_g_probe,
.config_intr = ip101a_g_config_intr,
.did_interrupt = ip101a_g_did_interrupt,
.phy_id = PHY_ID_PHY11G_1_3,
.phy_id_mask = 0xffffffff,
.name = "Intel XWAY PHY11G (PEF 7071/PEF 7072) v1.3",
- .features = PHY_GBIT_FEATURES,
+ /* PHY_GBIT_FEATURES */
.config_init = xway_gphy_config_init,
.config_aneg = xway_gphy14_config_aneg,
.ack_interrupt = xway_gphy_ack_interrupt,
.phy_id = PHY_ID_PHY22F_1_3,
.phy_id_mask = 0xffffffff,
.name = "Intel XWAY PHY22F (PEF 7061) v1.3",
- .features = PHY_BASIC_FEATURES,
+ /* PHY_BASIC_FEATURES */
.config_init = xway_gphy_config_init,
.config_aneg = xway_gphy14_config_aneg,
.ack_interrupt = xway_gphy_ack_interrupt,
.phy_id = PHY_ID_PHY11G_1_4,
.phy_id_mask = 0xffffffff,
.name = "Intel XWAY PHY11G (PEF 7071/PEF 7072) v1.4",
- .features = PHY_GBIT_FEATURES,
+ /* PHY_GBIT_FEATURES */
.config_init = xway_gphy_config_init,
.config_aneg = xway_gphy14_config_aneg,
.ack_interrupt = xway_gphy_ack_interrupt,
.phy_id = PHY_ID_PHY22F_1_4,
.phy_id_mask = 0xffffffff,
.name = "Intel XWAY PHY22F (PEF 7061) v1.4",
- .features = PHY_BASIC_FEATURES,
+ /* PHY_BASIC_FEATURES */
.config_init = xway_gphy_config_init,
.config_aneg = xway_gphy14_config_aneg,
.ack_interrupt = xway_gphy_ack_interrupt,
.phy_id = PHY_ID_PHY11G_1_5,
.phy_id_mask = 0xffffffff,
.name = "Intel XWAY PHY11G (PEF 7071/PEF 7072) v1.5 / v1.6",
- .features = PHY_GBIT_FEATURES,
+ /* PHY_GBIT_FEATURES */
.config_init = xway_gphy_config_init,
.ack_interrupt = xway_gphy_ack_interrupt,
.did_interrupt = xway_gphy_did_interrupt,
.phy_id = PHY_ID_PHY22F_1_5,
.phy_id_mask = 0xffffffff,
.name = "Intel XWAY PHY22F (PEF 7061) v1.5 / v1.6",
- .features = PHY_BASIC_FEATURES,
+ /* PHY_BASIC_FEATURES */
.config_init = xway_gphy_config_init,
.ack_interrupt = xway_gphy_ack_interrupt,
.did_interrupt = xway_gphy_did_interrupt,
.phy_id = PHY_ID_PHY11G_VR9_1_1,
.phy_id_mask = 0xffffffff,
.name = "Intel XWAY PHY11G (xRX v1.1 integrated)",
- .features = PHY_GBIT_FEATURES,
+ /* PHY_GBIT_FEATURES */
.config_init = xway_gphy_config_init,
.ack_interrupt = xway_gphy_ack_interrupt,
.did_interrupt = xway_gphy_did_interrupt,
.phy_id = PHY_ID_PHY22F_VR9_1_1,
.phy_id_mask = 0xffffffff,
.name = "Intel XWAY PHY22F (xRX v1.1 integrated)",
- .features = PHY_BASIC_FEATURES,
+ /* PHY_BASIC_FEATURES */
.config_init = xway_gphy_config_init,
.ack_interrupt = xway_gphy_ack_interrupt,
.did_interrupt = xway_gphy_did_interrupt,
.phy_id = PHY_ID_PHY11G_VR9_1_2,
.phy_id_mask = 0xffffffff,
.name = "Intel XWAY PHY11G (xRX v1.2 integrated)",
- .features = PHY_GBIT_FEATURES,
+ /* PHY_GBIT_FEATURES */
.config_init = xway_gphy_config_init,
.ack_interrupt = xway_gphy_ack_interrupt,
.did_interrupt = xway_gphy_did_interrupt,
.phy_id = PHY_ID_PHY22F_VR9_1_2,
.phy_id_mask = 0xffffffff,
.name = "Intel XWAY PHY22F (xRX v1.2 integrated)",
- .features = PHY_BASIC_FEATURES,
+ /* PHY_BASIC_FEATURES */
.config_init = xway_gphy_config_init,
.ack_interrupt = xway_gphy_ack_interrupt,
.did_interrupt = xway_gphy_did_interrupt,
.phy_id = 0x78100000,
.name = "LXT970",
.phy_id_mask = 0xfffffff0,
- .features = PHY_BASIC_FEATURES,
+ /* PHY_BASIC_FEATURES */
.config_init = lxt970_config_init,
.ack_interrupt = lxt970_ack_interrupt,
.config_intr = lxt970_config_intr,
.phy_id = 0x001378e0,
.name = "LXT971",
.phy_id_mask = 0xfffffff0,
- .features = PHY_BASIC_FEATURES,
+ /* PHY_BASIC_FEATURES */
.ack_interrupt = lxt971_ack_interrupt,
.config_intr = lxt971_config_intr,
}, {
.phy_id = 0x00137a10,
.name = "LXT973-A2",
.phy_id_mask = 0xffffffff,
- .features = PHY_BASIC_FEATURES,
+ /* PHY_BASIC_FEATURES */
.flags = 0,
.probe = lxt973_probe,
.config_aneg = lxt973_config_aneg,
.phy_id = 0x00137a10,
.name = "LXT973",
.phy_id_mask = 0xfffffff0,
- .features = PHY_BASIC_FEATURES,
+ /* PHY_BASIC_FEATURES */
.flags = 0,
.probe = lxt973_probe,
.config_aneg = lxt973_config_aneg,
#define MII_PHY_LED_CTRL 16
#define MII_88E1121_PHY_LED_DEF 0x0030
#define MII_88E1510_PHY_LED_DEF 0x1177
+#define MII_88E1510_PHY_LED0_LINK_LED1_ACTIVE 0x1040
#define MII_M1011_PHY_STATUS 0x11
#define MII_M1011_PHY_STATUS_1000 0x8000
* LED[2] .. Blink, Activity
*/
case MARVELL_PHY_FAMILY_ID(MARVELL_PHY_ID_88E1510):
- def_config = MII_88E1510_PHY_LED_DEF;
+ if (phydev->dev_flags & MARVELL_PHY_LED0_LINK_LED1_ACTIVE)
+ def_config = MII_88E1510_PHY_LED0_LINK_LED1_ACTIVE;
+ else
+ def_config = MII_88E1510_PHY_LED_DEF;
break;
default:
return;
.phy_id = MARVELL_PHY_ID_88E1101,
.phy_id_mask = MARVELL_PHY_ID_MASK,
.name = "Marvell 88E1101",
- .features = PHY_GBIT_FEATURES,
+ /* PHY_GBIT_FEATURES */
.probe = marvell_probe,
.config_init = &marvell_config_init,
.config_aneg = &m88e1101_config_aneg,
.phy_id = MARVELL_PHY_ID_88E1112,
.phy_id_mask = MARVELL_PHY_ID_MASK,
.name = "Marvell 88E1112",
- .features = PHY_GBIT_FEATURES,
+ /* PHY_GBIT_FEATURES */
.probe = marvell_probe,
.config_init = &m88e1111_config_init,
.config_aneg = &marvell_config_aneg,
.phy_id = MARVELL_PHY_ID_88E1111,
.phy_id_mask = MARVELL_PHY_ID_MASK,
.name = "Marvell 88E1111",
- .features = PHY_GBIT_FEATURES,
+ /* PHY_GBIT_FEATURES */
.probe = marvell_probe,
.config_init = &m88e1111_config_init,
.config_aneg = &marvell_config_aneg,
.phy_id = MARVELL_PHY_ID_88E1118,
.phy_id_mask = MARVELL_PHY_ID_MASK,
.name = "Marvell 88E1118",
- .features = PHY_GBIT_FEATURES,
+ /* PHY_GBIT_FEATURES */
.probe = marvell_probe,
.config_init = &m88e1118_config_init,
.config_aneg = &m88e1118_config_aneg,
.phy_id = MARVELL_PHY_ID_88E1121R,
.phy_id_mask = MARVELL_PHY_ID_MASK,
.name = "Marvell 88E1121R",
- .features = PHY_GBIT_FEATURES,
+ /* PHY_GBIT_FEATURES */
.probe = &m88e1121_probe,
.config_init = &marvell_config_init,
.config_aneg = &m88e1121_config_aneg,
.phy_id = MARVELL_PHY_ID_88E1318S,
.phy_id_mask = MARVELL_PHY_ID_MASK,
.name = "Marvell 88E1318S",
- .features = PHY_GBIT_FEATURES,
+ /* PHY_GBIT_FEATURES */
.probe = marvell_probe,
.config_init = &m88e1318_config_init,
.config_aneg = &m88e1318_config_aneg,
.phy_id = MARVELL_PHY_ID_88E1145,
.phy_id_mask = MARVELL_PHY_ID_MASK,
.name = "Marvell 88E1145",
- .features = PHY_GBIT_FEATURES,
+ /* PHY_GBIT_FEATURES */
.probe = marvell_probe,
.config_init = &m88e1145_config_init,
.config_aneg = &m88e1101_config_aneg,
.phy_id = MARVELL_PHY_ID_88E1149R,
.phy_id_mask = MARVELL_PHY_ID_MASK,
.name = "Marvell 88E1149R",
- .features = PHY_GBIT_FEATURES,
+ /* PHY_GBIT_FEATURES */
.probe = marvell_probe,
.config_init = &m88e1149_config_init,
.config_aneg = &m88e1118_config_aneg,
.phy_id = MARVELL_PHY_ID_88E1240,
.phy_id_mask = MARVELL_PHY_ID_MASK,
.name = "Marvell 88E1240",
- .features = PHY_GBIT_FEATURES,
+ /* PHY_GBIT_FEATURES */
.probe = marvell_probe,
.config_init = &m88e1111_config_init,
.config_aneg = &marvell_config_aneg,
.phy_id = MARVELL_PHY_ID_88E1116R,
.phy_id_mask = MARVELL_PHY_ID_MASK,
.name = "Marvell 88E1116R",
- .features = PHY_GBIT_FEATURES,
+ /* PHY_GBIT_FEATURES */
.probe = marvell_probe,
.config_init = &m88e1116r_config_init,
.ack_interrupt = &marvell_ack_interrupt,
.phy_id = MARVELL_PHY_ID_88E1540,
.phy_id_mask = MARVELL_PHY_ID_MASK,
.name = "Marvell 88E1540",
- .features = PHY_GBIT_FEATURES,
+ /* PHY_GBIT_FEATURES */
.probe = m88e1510_probe,
.config_init = &marvell_config_init,
.config_aneg = &m88e1510_config_aneg,
.phy_id_mask = MARVELL_PHY_ID_MASK,
.name = "Marvell 88E1545",
.probe = m88e1510_probe,
- .features = PHY_GBIT_FEATURES,
+ /* PHY_GBIT_FEATURES */
.config_init = &marvell_config_init,
.config_aneg = &m88e1510_config_aneg,
.read_status = &marvell_read_status,
.phy_id = MARVELL_PHY_ID_88E3016,
.phy_id_mask = MARVELL_PHY_ID_MASK,
.name = "Marvell 88E3016",
- .features = PHY_BASIC_FEATURES,
+ /* PHY_BASIC_FEATURES */
.probe = marvell_probe,
.config_init = &m88e3016_config_init,
.aneg_done = &marvell_aneg_done,
.phy_id = MARVELL_PHY_ID_88E6390,
.phy_id_mask = MARVELL_PHY_ID_MASK,
.name = "Marvell 88E6390",
- .features = PHY_GBIT_FEATURES,
+ /* PHY_GBIT_FEATURES */
.probe = m88e6390_probe,
.config_init = &marvell_config_init,
.config_aneg = &m88e6390_config_aneg,
.phy_id = MARVELL_PHY_ID_88E2110,
.phy_id_mask = MARVELL_PHY_ID_MASK,
.name = "mv88x2110",
- .get_features = genphy_c45_pma_read_abilities,
.probe = mv3310_probe,
.suspend = mv3310_suspend,
.resume = mv3310_resume,
#include <linux/of_gpio.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
+#include <linux/reset.h>
#include <linux/skbuff.h>
#include <linux/spinlock.h>
#include <linux/mm.h>
return PTR_ERR(gpiod);
}
- mdiodev->reset = gpiod;
+ mdiodev->reset_gpio = gpiod;
- /* Assert the reset signal again */
- mdio_device_reset(mdiodev, 1);
+ return 0;
+}
+
+static int mdiobus_register_reset(struct mdio_device *mdiodev)
+{
+ struct reset_control *reset = NULL;
+
+ if (mdiodev->dev.of_node)
+ reset = devm_reset_control_get_exclusive(&mdiodev->dev,
+ "phy");
+ if (PTR_ERR(reset) == -ENOENT ||
+ PTR_ERR(reset) == -ENOTSUPP)
+ reset = NULL;
+ else if (IS_ERR(reset))
+ return PTR_ERR(reset);
+
+ mdiodev->reset_ctrl = reset;
return 0;
}
err = mdiobus_register_gpiod(mdiodev);
if (err)
return err;
+
+ err = mdiobus_register_reset(mdiodev);
+ if (err)
+ return err;
+
+ /* Assert the reset signal */
+ mdio_device_reset(mdiodev, 1);
}
mdiodev->bus->mdio_map[mdiodev->addr] = mdiodev;
if (!mdiodev)
continue;
- if (mdiodev->reset)
- gpiod_put(mdiodev->reset);
+ if (mdiodev->reset_gpio)
+ gpiod_put(mdiodev->reset_gpio);
mdiodev->device_remove(mdiodev);
mdiodev->device_free(mdiodev);
#include <linux/mii.h>
#include <linux/module.h>
#include <linux/phy.h>
+#include <linux/reset.h>
#include <linux/slab.h>
#include <linux/string.h>
#include <linux/unistd.h>
{
unsigned int d;
- if (!mdiodev->reset)
+ if (!mdiodev->reset_gpio && !mdiodev->reset_ctrl)
return;
- gpiod_set_value(mdiodev->reset, value);
+ if (mdiodev->reset_gpio)
+ gpiod_set_value(mdiodev->reset_gpio, value);
+
+ if (mdiodev->reset_ctrl) {
+ if (value)
+ reset_control_assert(mdiodev->reset_ctrl);
+ else
+ reset_control_deassert(mdiodev->reset_ctrl);
+ }
d = value ? mdiodev->reset_assert_delay : mdiodev->reset_deassert_delay;
if (d)
{
PHY_ID_MATCH_EXACT(0x01814400),
.name = "Meson GXL Internal PHY",
- .features = PHY_BASIC_FEATURES,
+ /* PHY_BASIC_FEATURES */
.flags = PHY_IS_INTERNAL,
.soft_reset = genphy_soft_reset,
.config_init = meson_gxl_config_init,
}, {
PHY_ID_MATCH_EXACT(0x01803301),
.name = "Meson G12A Internal PHY",
- .features = PHY_BASIC_FEATURES,
+ /* PHY_BASIC_FEATURES */
.flags = PHY_IS_INTERNAL,
.soft_reset = genphy_soft_reset,
.ack_interrupt = meson_gxl_ack_interrupt,
return 0;
}
+static int ksz9031_get_features(struct phy_device *phydev)
+{
+ int ret;
+
+ ret = genphy_read_abilities(phydev);
+ if (ret < 0)
+ return ret;
+
+ /* Silicon Errata Sheet (DS80000691D or DS80000692D):
+ * Whenever the device's Asymmetric Pause capability is set to 1,
+ * link-up may fail after a link-up to link-down transition.
+ *
+ * Workaround:
+ * Do not enable the Asymmetric Pause capability bit.
+ */
+ linkmode_clear_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, phydev->supported);
+
+ /* We force setting the Pause capability as the core will force the
+ * Asymmetric Pause capability to 1 otherwise.
+ */
+ linkmode_set_bit(ETHTOOL_LINK_MODE_Pause_BIT, phydev->supported);
+
+ return 0;
+}
+
static int ksz9031_read_status(struct phy_device *phydev)
{
int err;
.phy_id = PHY_ID_KS8737,
.phy_id_mask = MICREL_PHY_ID_MASK,
.name = "Micrel KS8737",
- .features = PHY_BASIC_FEATURES,
+ /* PHY_BASIC_FEATURES */
.driver_data = &ks8737_type,
.config_init = kszphy_config_init,
.ack_interrupt = kszphy_ack_interrupt,
.phy_id = PHY_ID_KSZ8021,
.phy_id_mask = 0x00ffffff,
.name = "Micrel KSZ8021 or KSZ8031",
- .features = PHY_BASIC_FEATURES,
+ /* PHY_BASIC_FEATURES */
.driver_data = &ksz8021_type,
.probe = kszphy_probe,
.config_init = kszphy_config_init,
.phy_id = PHY_ID_KSZ8031,
.phy_id_mask = 0x00ffffff,
.name = "Micrel KSZ8031",
- .features = PHY_BASIC_FEATURES,
+ /* PHY_BASIC_FEATURES */
.driver_data = &ksz8021_type,
.probe = kszphy_probe,
.config_init = kszphy_config_init,
.phy_id = PHY_ID_KSZ8041,
.phy_id_mask = MICREL_PHY_ID_MASK,
.name = "Micrel KSZ8041",
- .features = PHY_BASIC_FEATURES,
+ /* PHY_BASIC_FEATURES */
.driver_data = &ksz8041_type,
.probe = kszphy_probe,
.config_init = ksz8041_config_init,
.phy_id = PHY_ID_KSZ8041RNLI,
.phy_id_mask = MICREL_PHY_ID_MASK,
.name = "Micrel KSZ8041RNLI",
- .features = PHY_BASIC_FEATURES,
+ /* PHY_BASIC_FEATURES */
.driver_data = &ksz8041_type,
.probe = kszphy_probe,
.config_init = kszphy_config_init,
.phy_id = PHY_ID_KSZ8051,
.phy_id_mask = MICREL_PHY_ID_MASK,
.name = "Micrel KSZ8051",
- .features = PHY_BASIC_FEATURES,
+ /* PHY_BASIC_FEATURES */
.driver_data = &ksz8051_type,
.probe = kszphy_probe,
.config_init = kszphy_config_init,
.phy_id = PHY_ID_KSZ8001,
.name = "Micrel KSZ8001 or KS8721",
.phy_id_mask = 0x00fffffc,
- .features = PHY_BASIC_FEATURES,
+ /* PHY_BASIC_FEATURES */
.driver_data = &ksz8041_type,
.probe = kszphy_probe,
.config_init = kszphy_config_init,
.phy_id = PHY_ID_KSZ8081,
.name = "Micrel KSZ8081 or KSZ8091",
.phy_id_mask = MICREL_PHY_ID_MASK,
- .features = PHY_BASIC_FEATURES,
+ /* PHY_BASIC_FEATURES */
.driver_data = &ksz8081_type,
.probe = kszphy_probe,
.config_init = kszphy_config_init,
.phy_id = PHY_ID_KSZ8061,
.name = "Micrel KSZ8061",
.phy_id_mask = MICREL_PHY_ID_MASK,
- .features = PHY_BASIC_FEATURES,
+ /* PHY_BASIC_FEATURES */
.config_init = ksz8061_config_init,
.ack_interrupt = kszphy_ack_interrupt,
.config_intr = kszphy_config_intr,
.phy_id = PHY_ID_KSZ9021,
.phy_id_mask = 0x000ffffe,
.name = "Micrel KSZ9021 Gigabit PHY",
- .features = PHY_GBIT_FEATURES,
+ /* PHY_GBIT_FEATURES */
.driver_data = &ksz9021_type,
.probe = kszphy_probe,
.config_init = ksz9021_config_init,
.phy_id = PHY_ID_KSZ9031,
.phy_id_mask = MICREL_PHY_ID_MASK,
.name = "Micrel KSZ9031 Gigabit PHY",
- .features = PHY_GBIT_FEATURES,
.driver_data = &ksz9021_type,
.probe = kszphy_probe,
+ .get_features = ksz9031_get_features,
.config_init = ksz9031_config_init,
.soft_reset = genphy_soft_reset,
.read_status = ksz9031_read_status,
.phy_id = PHY_ID_KSZ9131,
.phy_id_mask = MICREL_PHY_ID_MASK,
.name = "Microchip KSZ9131 Gigabit PHY",
- .features = PHY_GBIT_FEATURES,
+ /* PHY_GBIT_FEATURES */
.driver_data = &ksz9021_type,
.probe = kszphy_probe,
.config_init = ksz9131_config_init,
.phy_id = PHY_ID_KSZ8873MLL,
.phy_id_mask = MICREL_PHY_ID_MASK,
.name = "Micrel KSZ8873MLL Switch",
- .features = PHY_BASIC_FEATURES,
+ /* PHY_BASIC_FEATURES */
.config_init = kszphy_config_init,
.config_aneg = ksz8873mll_config_aneg,
.read_status = ksz8873mll_read_status,
.phy_id = PHY_ID_KSZ886X,
.phy_id_mask = MICREL_PHY_ID_MASK,
.name = "Micrel KSZ886X Switch",
- .features = PHY_BASIC_FEATURES,
+ /* PHY_BASIC_FEATURES */
.config_init = kszphy_config_init,
.suspend = genphy_suspend,
.resume = genphy_resume,
.phy_id = PHY_ID_KSZ8795,
.phy_id_mask = MICREL_PHY_ID_MASK,
.name = "Micrel KSZ8795",
- .features = PHY_BASIC_FEATURES,
+ /* PHY_BASIC_FEATURES */
.config_init = kszphy_config_init,
.config_aneg = ksz8873mll_config_aneg,
.read_status = ksz8873mll_read_status,
.phy_id = PHY_ID_KSZ9477,
.phy_id_mask = MICREL_PHY_ID_MASK,
.name = "Microchip KSZ9477",
- .features = PHY_GBIT_FEATURES,
+ /* PHY_GBIT_FEATURES */
.config_init = kszphy_config_init,
.suspend = genphy_suspend,
.resume = genphy_resume,
.phy_id_mask = 0xfffffff0,
.name = "Microchip LAN88xx",
- .features = PHY_GBIT_FEATURES,
+ /* PHY_GBIT_FEATURES */
.probe = lan88xx_probe,
.remove = lan88xx_remove,
#define LED_MODE_SEL_MASK(x) (GENMASK(3, 0) << LED_MODE_SEL_POS(x))
#define LED_MODE_SEL(x, mode) (((mode) << LED_MODE_SEL_POS(x)) & LED_MODE_SEL_MASK(x))
+#define MSCC_EXT_PAGE_CSR_CNTL_17 17
+#define MSCC_EXT_PAGE_CSR_CNTL_18 18
+
+#define MSCC_EXT_PAGE_CSR_CNTL_19 19
+#define MSCC_PHY_CSR_CNTL_19_REG_ADDR(x) (x)
+#define MSCC_PHY_CSR_CNTL_19_TARGET(x) ((x) << 12)
+#define MSCC_PHY_CSR_CNTL_19_READ BIT(14)
+#define MSCC_PHY_CSR_CNTL_19_CMD BIT(15)
+
+#define MSCC_EXT_PAGE_CSR_CNTL_20 20
+#define MSCC_PHY_CSR_CNTL_20_TARGET(x) (x)
+
+#define PHY_MCB_TARGET 0x07
+#define PHY_MCB_S6G_WRITE BIT(31)
+#define PHY_MCB_S6G_READ BIT(30)
+
+#define PHY_S6G_PLL5G_CFG0 0x06
+#define PHY_S6G_LCPLL_CFG 0x11
+#define PHY_S6G_PLL_CFG 0x2b
+#define PHY_S6G_COMMON_CFG 0x2c
+#define PHY_S6G_GPC_CFG 0x2e
+#define PHY_S6G_MISC_CFG 0x3b
+#define PHY_MCB_S6G_CFG 0x3f
+#define PHY_S6G_DFT_CFG2 0x3e
+#define PHY_S6G_PLL_STATUS 0x31
+#define PHY_S6G_IB_STATUS0 0x2f
+
+#define PHY_S6G_SYS_RST_POS 31
+#define PHY_S6G_ENA_LANE_POS 18
+#define PHY_S6G_ENA_LOOP_POS 8
+#define PHY_S6G_QRATE_POS 6
+#define PHY_S6G_IF_MODE_POS 4
+#define PHY_S6G_PLL_ENA_OFFS_POS 21
+#define PHY_S6G_PLL_FSM_CTRL_DATA_POS 8
+#define PHY_S6G_PLL_FSM_ENA_POS 7
+
#define MSCC_EXT_PAGE_ACCESS 31
#define MSCC_PHY_PAGE_STANDARD 0x0000 /* Standard registers */
#define MSCC_PHY_PAGE_EXTENDED 0x0001 /* Extended registers */
#define MSCC_PHY_PAGE_EXTENDED_2 0x0002 /* Extended reg - page 2 */
#define MSCC_PHY_PAGE_EXTENDED_3 0x0003 /* Extended reg - page 3 */
#define MSCC_PHY_PAGE_EXTENDED_4 0x0004 /* Extended reg - page 4 */
+#define MSCC_PHY_PAGE_CSR_CNTL MSCC_PHY_PAGE_EXTENDED_4
/* Extended reg - GPIO; this is a bank of registers that are shared for all PHYs
* in the same package.
*/
#define MSCC_PHY_TR_MSB 18
/* Microsemi PHY ID's */
+#define PHY_ID_VSC8514 0x00070670
#define PHY_ID_VSC8530 0x00070560
#define PHY_ID_VSC8531 0x00070570
#define PHY_ID_VSC8540 0x00070760
return (rc < 0) ? 0 : rc & MII_VSC85XX_INT_MASK_MASK;
}
+static int vsc8514_config_pre_init(struct phy_device *phydev)
+{
+ /* These are the settings to override the silicon default
+ * values to handle hardware performance of PHY. They
+ * are set at Power-On state and remain until PHY Reset.
+ */
+ const struct reg_val pre_init1[] = {
+ {0x0f90, 0x00688980},
+ {0x0786, 0x00000003},
+ {0x07fa, 0x0050100f},
+ {0x0f82, 0x0012b002},
+ {0x1686, 0x00000004},
+ {0x168c, 0x00d2c46f},
+ {0x17a2, 0x00000620},
+ {0x16a0, 0x00eeffdd},
+ {0x16a6, 0x00071448},
+ {0x16a4, 0x0013132f},
+ {0x16a8, 0x00000000},
+ {0x0ffc, 0x00c0a028},
+ {0x0fe8, 0x0091b06c},
+ {0x0fea, 0x00041600},
+ {0x0f80, 0x00fffaff},
+ {0x0fec, 0x00901809},
+ {0x0ffe, 0x00b01007},
+ {0x16b0, 0x00eeff00},
+ {0x16b2, 0x00007000},
+ {0x16b4, 0x00000814},
+ };
+ unsigned int i;
+ u16 reg;
+
+ phy_base_write(phydev, MSCC_EXT_PAGE_ACCESS, MSCC_PHY_PAGE_STANDARD);
+
+ /* all writes below are broadcasted to all PHYs in the same package */
+ reg = phy_base_read(phydev, MSCC_PHY_EXT_CNTL_STATUS);
+ reg |= SMI_BROADCAST_WR_EN;
+ phy_base_write(phydev, MSCC_PHY_EXT_CNTL_STATUS, reg);
+
+ phy_base_write(phydev, MSCC_EXT_PAGE_ACCESS, MSCC_PHY_PAGE_TEST);
+
+ reg = phy_base_read(phydev, MSCC_PHY_TEST_PAGE_8);
+ reg |= BIT(15);
+ phy_base_write(phydev, MSCC_PHY_TEST_PAGE_8, reg);
+
+ phy_base_write(phydev, MSCC_EXT_PAGE_ACCESS, MSCC_PHY_PAGE_TR);
+
+ for (i = 0; i < ARRAY_SIZE(pre_init1); i++)
+ vsc8584_csr_write(phydev, pre_init1[i].reg, pre_init1[i].val);
+
+ phy_base_write(phydev, MSCC_EXT_PAGE_ACCESS, MSCC_PHY_PAGE_TEST);
+
+ reg = phy_base_read(phydev, MSCC_PHY_TEST_PAGE_8);
+ reg &= ~BIT(15);
+ phy_base_write(phydev, MSCC_PHY_TEST_PAGE_8, reg);
+
+ phy_base_write(phydev, MSCC_EXT_PAGE_ACCESS, MSCC_PHY_PAGE_STANDARD);
+
+ reg = phy_base_read(phydev, MSCC_PHY_EXT_CNTL_STATUS);
+ reg &= ~SMI_BROADCAST_WR_EN;
+ phy_base_write(phydev, MSCC_PHY_EXT_CNTL_STATUS, reg);
+
+ return 0;
+}
+
+static u32 vsc85xx_csr_ctrl_phy_read(struct phy_device *phydev,
+ u32 target, u32 reg)
+{
+ unsigned long deadline;
+ u32 val, val_l, val_h;
+
+ phy_base_write(phydev, MSCC_EXT_PAGE_ACCESS, MSCC_PHY_PAGE_CSR_CNTL);
+
+ /* CSR registers are grouped under different Target IDs.
+ * 6-bit Target_ID is split between MSCC_EXT_PAGE_CSR_CNTL_20 and
+ * MSCC_EXT_PAGE_CSR_CNTL_19 registers.
+ * Target_ID[5:2] maps to bits[3:0] of MSCC_EXT_PAGE_CSR_CNTL_20
+ * and Target_ID[1:0] maps to bits[13:12] of MSCC_EXT_PAGE_CSR_CNTL_19.
+ */
+
+ /* Setup the Target ID */
+ phy_base_write(phydev, MSCC_EXT_PAGE_CSR_CNTL_20,
+ MSCC_PHY_CSR_CNTL_20_TARGET(target >> 2));
+
+ /* Trigger CSR Action - Read into the CSR's */
+ phy_base_write(phydev, MSCC_EXT_PAGE_CSR_CNTL_19,
+ MSCC_PHY_CSR_CNTL_19_CMD | MSCC_PHY_CSR_CNTL_19_READ |
+ MSCC_PHY_CSR_CNTL_19_REG_ADDR(reg) |
+ MSCC_PHY_CSR_CNTL_19_TARGET(target & 0x3));
+
+ /* Wait for register access*/
+ deadline = jiffies + msecs_to_jiffies(PROC_CMD_NCOMPLETED_TIMEOUT_MS);
+ do {
+ usleep_range(500, 1000);
+ val = phy_base_read(phydev, MSCC_EXT_PAGE_CSR_CNTL_19);
+ } while (time_before(jiffies, deadline) &&
+ !(val & MSCC_PHY_CSR_CNTL_19_CMD));
+
+ if (!(val & MSCC_PHY_CSR_CNTL_19_CMD))
+ return 0xffffffff;
+
+ /* Read the Least Significant Word (LSW) (17) */
+ val_l = phy_base_read(phydev, MSCC_EXT_PAGE_CSR_CNTL_17);
+
+ /* Read the Most Significant Word (MSW) (18) */
+ val_h = phy_base_read(phydev, MSCC_EXT_PAGE_CSR_CNTL_18);
+
+ phy_base_write(phydev, MSCC_EXT_PAGE_ACCESS,
+ MSCC_PHY_PAGE_STANDARD);
+
+ return (val_h << 16) | val_l;
+}
+
+static int vsc85xx_csr_ctrl_phy_write(struct phy_device *phydev,
+ u32 target, u32 reg, u32 val)
+{
+ unsigned long deadline;
+
+ phy_base_write(phydev, MSCC_EXT_PAGE_ACCESS, MSCC_PHY_PAGE_CSR_CNTL);
+
+ /* CSR registers are grouped under different Target IDs.
+ * 6-bit Target_ID is split between MSCC_EXT_PAGE_CSR_CNTL_20 and
+ * MSCC_EXT_PAGE_CSR_CNTL_19 registers.
+ * Target_ID[5:2] maps to bits[3:0] of MSCC_EXT_PAGE_CSR_CNTL_20
+ * and Target_ID[1:0] maps to bits[13:12] of MSCC_EXT_PAGE_CSR_CNTL_19.
+ */
+
+ /* Setup the Target ID */
+ phy_base_write(phydev, MSCC_EXT_PAGE_CSR_CNTL_20,
+ MSCC_PHY_CSR_CNTL_20_TARGET(target >> 2));
+
+ /* Write the Least Significant Word (LSW) (17) */
+ phy_base_write(phydev, MSCC_EXT_PAGE_CSR_CNTL_17, (u16)val);
+
+ /* Write the Most Significant Word (MSW) (18) */
+ phy_base_write(phydev, MSCC_EXT_PAGE_CSR_CNTL_18, (u16)(val >> 16));
+
+ /* Trigger CSR Action - Write into the CSR's */
+ phy_base_write(phydev, MSCC_EXT_PAGE_CSR_CNTL_19,
+ MSCC_PHY_CSR_CNTL_19_CMD |
+ MSCC_PHY_CSR_CNTL_19_REG_ADDR(reg) |
+ MSCC_PHY_CSR_CNTL_19_TARGET(target & 0x3));
+
+ /* Wait for register access */
+ deadline = jiffies + msecs_to_jiffies(PROC_CMD_NCOMPLETED_TIMEOUT_MS);
+ do {
+ usleep_range(500, 1000);
+ val = phy_base_read(phydev, MSCC_EXT_PAGE_CSR_CNTL_19);
+ } while (time_before(jiffies, deadline) &&
+ !(val & MSCC_PHY_CSR_CNTL_19_CMD));
+
+ if (!(val & MSCC_PHY_CSR_CNTL_19_CMD))
+ return -ETIMEDOUT;
+
+ phy_base_write(phydev, MSCC_EXT_PAGE_ACCESS,
+ MSCC_PHY_PAGE_STANDARD);
+
+ return 0;
+}
+
+static int __phy_write_mcb_s6g(struct phy_device *phydev, u32 reg, u8 mcb,
+ u32 op)
+{
+ unsigned long deadline;
+ u32 val;
+ int ret;
+
+ ret = vsc85xx_csr_ctrl_phy_write(phydev, PHY_MCB_TARGET, reg,
+ op | (1 << mcb));
+ if (ret)
+ return -EINVAL;
+
+ deadline = jiffies + msecs_to_jiffies(PROC_CMD_NCOMPLETED_TIMEOUT_MS);
+ do {
+ usleep_range(500, 1000);
+ val = vsc85xx_csr_ctrl_phy_read(phydev, PHY_MCB_TARGET, reg);
+
+ if (val == 0xffffffff)
+ return -EIO;
+
+ } while (time_before(jiffies, deadline) && (val & op));
+
+ if (val & op)
+ return -ETIMEDOUT;
+
+ return 0;
+}
+
+/* Trigger a read to the spcified MCB */
+static int phy_update_mcb_s6g(struct phy_device *phydev, u32 reg, u8 mcb)
+{
+ return __phy_write_mcb_s6g(phydev, reg, mcb, PHY_MCB_S6G_READ);
+}
+
+/* Trigger a write to the spcified MCB */
+static int phy_commit_mcb_s6g(struct phy_device *phydev, u32 reg, u8 mcb)
+{
+ return __phy_write_mcb_s6g(phydev, reg, mcb, PHY_MCB_S6G_WRITE);
+}
+
+static int vsc8514_config_init(struct phy_device *phydev)
+{
+ struct vsc8531_private *vsc8531 = phydev->priv;
+ unsigned long deadline;
+ u16 val, addr;
+ int ret, i;
+ u32 reg;
+
+ phydev->mdix_ctrl = ETH_TP_MDI_AUTO;
+
+ mutex_lock(&phydev->mdio.bus->mdio_lock);
+
+ __phy_write(phydev, MSCC_EXT_PAGE_ACCESS, MSCC_PHY_PAGE_EXTENDED);
+
+ addr = __phy_read(phydev, MSCC_PHY_EXT_PHY_CNTL_4);
+ addr >>= PHY_CNTL_4_ADDR_POS;
+
+ val = __phy_read(phydev, MSCC_PHY_ACTIPHY_CNTL);
+
+ if (val & PHY_ADDR_REVERSED)
+ vsc8531->base_addr = phydev->mdio.addr + addr;
+ else
+ vsc8531->base_addr = phydev->mdio.addr - addr;
+
+ /* Some parts of the init sequence are identical for every PHY in the
+ * package. Some parts are modifying the GPIO register bank which is a
+ * set of registers that are affecting all PHYs, a few resetting the
+ * microprocessor common to all PHYs.
+ * All PHYs' interrupts mask register has to be zeroed before enabling
+ * any PHY's interrupt in this register.
+ * For all these reasons, we need to do the init sequence once and only
+ * once whatever is the first PHY in the package that is initialized and
+ * do the correct init sequence for all PHYs that are package-critical
+ * in this pre-init function.
+ */
+ if (!vsc8584_is_pkg_init(phydev, val & PHY_ADDR_REVERSED ? 1 : 0))
+ vsc8514_config_pre_init(phydev);
+
+ vsc8531->pkg_init = true;
+
+ phy_base_write(phydev, MSCC_EXT_PAGE_ACCESS,
+ MSCC_PHY_PAGE_EXTENDED_GPIO);
+
+ val = phy_base_read(phydev, MSCC_PHY_MAC_CFG_FASTLINK);
+
+ val &= ~MAC_CFG_MASK;
+ val |= MAC_CFG_QSGMII;
+ ret = phy_base_write(phydev, MSCC_PHY_MAC_CFG_FASTLINK, val);
+
+ if (ret)
+ goto err;
+
+ ret = vsc8584_cmd(phydev,
+ PROC_CMD_MCB_ACCESS_MAC_CONF |
+ PROC_CMD_RST_CONF_PORT |
+ PROC_CMD_READ_MOD_WRITE_PORT | PROC_CMD_QSGMII_MAC);
+ if (ret)
+ goto err;
+
+ /* 6g mcb */
+ phy_update_mcb_s6g(phydev, PHY_MCB_S6G_CFG, 0);
+ /* lcpll mcb */
+ phy_update_mcb_s6g(phydev, PHY_S6G_LCPLL_CFG, 0);
+ /* pll5gcfg0 */
+ ret = vsc85xx_csr_ctrl_phy_write(phydev, PHY_MCB_TARGET,
+ PHY_S6G_PLL5G_CFG0, 0x7036f145);
+ if (ret)
+ goto err;
+
+ phy_commit_mcb_s6g(phydev, PHY_S6G_LCPLL_CFG, 0);
+ /* pllcfg */
+ ret = vsc85xx_csr_ctrl_phy_write(phydev, PHY_MCB_TARGET,
+ PHY_S6G_PLL_CFG,
+ (3 << PHY_S6G_PLL_ENA_OFFS_POS) |
+ (120 << PHY_S6G_PLL_FSM_CTRL_DATA_POS)
+ | (0 << PHY_S6G_PLL_FSM_ENA_POS));
+ if (ret)
+ goto err;
+
+ /* commoncfg */
+ ret = vsc85xx_csr_ctrl_phy_write(phydev, PHY_MCB_TARGET,
+ PHY_S6G_COMMON_CFG,
+ (0 << PHY_S6G_SYS_RST_POS) |
+ (0 << PHY_S6G_ENA_LANE_POS) |
+ (0 << PHY_S6G_ENA_LOOP_POS) |
+ (0 << PHY_S6G_QRATE_POS) |
+ (3 << PHY_S6G_IF_MODE_POS));
+ if (ret)
+ goto err;
+
+ /* misccfg */
+ ret = vsc85xx_csr_ctrl_phy_write(phydev, PHY_MCB_TARGET,
+ PHY_S6G_MISC_CFG, 1);
+ if (ret)
+ goto err;
+
+ /* gpcfg */
+ ret = vsc85xx_csr_ctrl_phy_write(phydev, PHY_MCB_TARGET,
+ PHY_S6G_GPC_CFG, 768);
+ if (ret)
+ goto err;
+
+ phy_commit_mcb_s6g(phydev, PHY_S6G_DFT_CFG2, 0);
+
+ deadline = jiffies + msecs_to_jiffies(PROC_CMD_NCOMPLETED_TIMEOUT_MS);
+ do {
+ usleep_range(500, 1000);
+ phy_update_mcb_s6g(phydev, PHY_MCB_S6G_CFG,
+ 0); /* read 6G MCB into CSRs */
+ reg = vsc85xx_csr_ctrl_phy_read(phydev, PHY_MCB_TARGET,
+ PHY_S6G_PLL_STATUS);
+ if (reg == 0xffffffff) {
+ mutex_unlock(&phydev->mdio.bus->mdio_lock);
+ return -EIO;
+ }
+
+ } while (time_before(jiffies, deadline) && (reg & BIT(12)));
+
+ if (reg & BIT(12)) {
+ mutex_unlock(&phydev->mdio.bus->mdio_lock);
+ return -ETIMEDOUT;
+ }
+
+ /* misccfg */
+ ret = vsc85xx_csr_ctrl_phy_write(phydev, PHY_MCB_TARGET,
+ PHY_S6G_MISC_CFG, 0);
+ if (ret)
+ goto err;
+
+ phy_commit_mcb_s6g(phydev, PHY_MCB_S6G_CFG, 0);
+
+ deadline = jiffies + msecs_to_jiffies(PROC_CMD_NCOMPLETED_TIMEOUT_MS);
+ do {
+ usleep_range(500, 1000);
+ phy_update_mcb_s6g(phydev, PHY_MCB_S6G_CFG,
+ 0); /* read 6G MCB into CSRs */
+ reg = vsc85xx_csr_ctrl_phy_read(phydev, PHY_MCB_TARGET,
+ PHY_S6G_IB_STATUS0);
+ if (reg == 0xffffffff) {
+ mutex_unlock(&phydev->mdio.bus->mdio_lock);
+ return -EIO;
+ }
+
+ } while (time_before(jiffies, deadline) && !(reg & BIT(8)));
+
+ if (!(reg & BIT(8))) {
+ mutex_unlock(&phydev->mdio.bus->mdio_lock);
+ return -ETIMEDOUT;
+ }
+
+ mutex_unlock(&phydev->mdio.bus->mdio_lock);
+
+ ret = phy_write(phydev, MSCC_EXT_PAGE_ACCESS, MSCC_PHY_PAGE_STANDARD);
+
+ if (ret)
+ return ret;
+
+ ret = phy_modify(phydev, MSCC_PHY_EXT_PHY_CNTL_1, MEDIA_OP_MODE_MASK,
+ MEDIA_OP_MODE_COPPER);
+
+ if (ret)
+ return ret;
+
+ ret = genphy_soft_reset(phydev);
+
+ if (ret)
+ return ret;
+
+ for (i = 0; i < vsc8531->nleds; i++) {
+ ret = vsc85xx_led_cntl_set(phydev, i, vsc8531->leds_mode[i]);
+ if (ret)
+ return ret;
+ }
+
+ return ret;
+
+err:
+ mutex_unlock(&phydev->mdio.bus->mdio_lock);
+ return ret;
+}
+
static int vsc85xx_ack_interrupt(struct phy_device *phydev)
{
int rc = 0;
return genphy_read_status(phydev);
}
+static int vsc8514_probe(struct phy_device *phydev)
+{
+ struct vsc8531_private *vsc8531;
+ u32 default_mode[4] = {VSC8531_LINK_1000_ACTIVITY,
+ VSC8531_LINK_100_ACTIVITY, VSC8531_LINK_ACTIVITY,
+ VSC8531_DUPLEX_COLLISION};
+
+ vsc8531 = devm_kzalloc(&phydev->mdio.dev, sizeof(*vsc8531), GFP_KERNEL);
+ if (!vsc8531)
+ return -ENOMEM;
+
+ phydev->priv = vsc8531;
+
+ vsc8531->nleds = 4;
+ vsc8531->supp_led_modes = VSC85XX_SUPP_LED_MODES;
+ vsc8531->hw_stats = vsc85xx_hw_stats;
+ vsc8531->nstats = ARRAY_SIZE(vsc85xx_hw_stats);
+ vsc8531->stats = devm_kmalloc_array(&phydev->mdio.dev, vsc8531->nstats,
+ sizeof(u64), GFP_KERNEL);
+ if (!vsc8531->stats)
+ return -ENOMEM;
+
+ return vsc85xx_dt_led_modes_get(phydev, default_mode);
+}
+
static int vsc8574_probe(struct phy_device *phydev)
{
struct vsc8531_private *vsc8531;
/* Microsemi VSC85xx PHYs */
static struct phy_driver vsc85xx_driver[] = {
+{
+ .phy_id = PHY_ID_VSC8514,
+ .name = "Microsemi GE VSC8514 SyncE",
+ .phy_id_mask = 0xfffffff0,
+ .soft_reset = &genphy_soft_reset,
+ .config_init = &vsc8514_config_init,
+ .config_aneg = &vsc85xx_config_aneg,
+ .read_status = &vsc85xx_read_status,
+ .ack_interrupt = &vsc85xx_ack_interrupt,
+ .config_intr = &vsc85xx_config_intr,
+ .suspend = &genphy_suspend,
+ .resume = &genphy_resume,
+ .probe = &vsc8514_probe,
+ .set_wol = &vsc85xx_wol_set,
+ .get_wol = &vsc85xx_wol_get,
+ .get_tunable = &vsc85xx_get_tunable,
+ .set_tunable = &vsc85xx_set_tunable,
+ .read_page = &vsc85xx_phy_read_page,
+ .write_page = &vsc85xx_phy_write_page,
+ .get_sset_count = &vsc85xx_get_sset_count,
+ .get_strings = &vsc85xx_get_strings,
+ .get_stats = &vsc85xx_get_stats,
+},
{
.phy_id = PHY_ID_VSC8530,
.name = "Microsemi FE VSC8530",
.phy_id_mask = 0xfffffff0,
- .features = PHY_BASIC_FEATURES,
+ /* PHY_BASIC_FEATURES */
.soft_reset = &genphy_soft_reset,
.config_init = &vsc85xx_config_init,
.config_aneg = &vsc85xx_config_aneg,
.phy_id = PHY_ID_VSC8531,
.name = "Microsemi VSC8531",
.phy_id_mask = 0xfffffff0,
- .features = PHY_GBIT_FEATURES,
+ /* PHY_GBIT_FEATURES */
.soft_reset = &genphy_soft_reset,
.config_init = &vsc85xx_config_init,
.config_aneg = &vsc85xx_config_aneg,
.phy_id = PHY_ID_VSC8540,
.name = "Microsemi FE VSC8540 SyncE",
.phy_id_mask = 0xfffffff0,
- .features = PHY_BASIC_FEATURES,
+ /* PHY_BASIC_FEATURES */
.soft_reset = &genphy_soft_reset,
.config_init = &vsc85xx_config_init,
.config_aneg = &vsc85xx_config_aneg,
.phy_id = PHY_ID_VSC8541,
.name = "Microsemi VSC8541 SyncE",
.phy_id_mask = 0xfffffff0,
- .features = PHY_GBIT_FEATURES,
+ /* PHY_GBIT_FEATURES */
.soft_reset = &genphy_soft_reset,
.config_init = &vsc85xx_config_init,
.config_aneg = &vsc85xx_config_aneg,
.phy_id = PHY_ID_VSC8574,
.name = "Microsemi GE VSC8574 SyncE",
.phy_id_mask = 0xfffffff0,
- .features = PHY_GBIT_FEATURES,
+ /* PHY_GBIT_FEATURES */
.soft_reset = &genphy_soft_reset,
.config_init = &vsc8584_config_init,
.config_aneg = &vsc85xx_config_aneg,
.phy_id = PHY_ID_VSC8584,
.name = "Microsemi GE VSC8584 SyncE",
.phy_id_mask = 0xfffffff0,
- .features = PHY_GBIT_FEATURES,
+ /* PHY_GBIT_FEATURES */
.soft_reset = &genphy_soft_reset,
.config_init = &vsc8584_config_init,
.config_aneg = &vsc85xx_config_aneg,
module_phy_driver(vsc85xx_driver);
static struct mdio_device_id __maybe_unused vsc85xx_tbl[] = {
+ { PHY_ID_VSC8514, 0xfffffff0, },
{ PHY_ID_VSC8530, 0xfffffff0, },
{ PHY_ID_VSC8531, 0xfffffff0, },
{ PHY_ID_VSC8540, 0xfffffff0, },
.phy_id = DP83865_PHY_ID,
.phy_id_mask = 0xfffffff0,
.name = "NatSemi DP83865",
- .features = PHY_GBIT_FEATURES,
+ /* PHY_GBIT_FEATURES */
.config_init = ns_config_init,
.ack_interrupt = ns_ack_interrupt,
.config_intr = ns_config_intr,
}
EXPORT_SYMBOL_GPL(gen10g_config_aneg);
-static int gen10g_read_status(struct phy_device *phydev)
-{
- /* For now just lie and say it's 10G all the time */
- phydev->speed = SPEED_10000;
- phydev->duplex = DUPLEX_FULL;
-
- return genphy_c45_read_link(phydev);
-}
-
-struct phy_driver genphy_10g_driver = {
+struct phy_driver genphy_c45_driver = {
.phy_id = 0xffffffff,
.phy_id_mask = 0xffffffff,
- .name = "Generic 10G PHY",
+ .name = "Generic Clause 45 PHY",
.soft_reset = genphy_no_soft_reset,
- .features = PHY_10GBIT_FEATURES,
- .config_aneg = gen10g_config_aneg,
- .read_status = gen10g_read_status,
+ .read_status = genphy_c45_read_status,
};
const char *phy_speed_to_str(int speed)
{
+ BUILD_BUG_ON_MSG(__ETHTOOL_LINK_MODE_MASK_NBITS != 67,
+ "Enum ethtool_link_mode_bit_indices and phylib are out of sync. "
+ "If a speed or mode has been added please update phy_speed_to_str "
+ "and the PHY settings array.\n");
+
switch (speed) {
case SPEED_10:
return "10Mbps";
return "56Gbps";
case SPEED_100000:
return "100Gbps";
+ case SPEED_200000:
+ return "200Gbps";
case SPEED_UNKNOWN:
return "Unknown";
default:
/* A mapping of all SUPPORTED settings to speed/duplex. This table
* must be grouped by speed and sorted in descending match priority
* - iow, descending speed. */
+
+#define PHY_SETTING(s, d, b) { .speed = SPEED_ ## s, .duplex = DUPLEX_ ## d, \
+ .bit = ETHTOOL_LINK_MODE_ ## b ## _BIT}
+
static const struct phy_setting settings[] = {
+ /* 200G */
+ PHY_SETTING( 200000, FULL, 200000baseCR4_Full ),
+ PHY_SETTING( 200000, FULL, 200000baseKR4_Full ),
+ PHY_SETTING( 200000, FULL, 200000baseLR4_ER4_FR4_Full ),
+ PHY_SETTING( 200000, FULL, 200000baseDR4_Full ),
+ PHY_SETTING( 200000, FULL, 200000baseSR4_Full ),
/* 100G */
- {
- .speed = SPEED_100000,
- .duplex = DUPLEX_FULL,
- .bit = ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT,
- },
- {
- .speed = SPEED_100000,
- .duplex = DUPLEX_FULL,
- .bit = ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT,
- },
- {
- .speed = SPEED_100000,
- .duplex = DUPLEX_FULL,
- .bit = ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT,
- },
- {
- .speed = SPEED_100000,
- .duplex = DUPLEX_FULL,
- .bit = ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT,
- },
+ PHY_SETTING( 100000, FULL, 100000baseCR4_Full ),
+ PHY_SETTING( 100000, FULL, 100000baseKR4_Full ),
+ PHY_SETTING( 100000, FULL, 100000baseLR4_ER4_Full ),
+ PHY_SETTING( 100000, FULL, 100000baseSR4_Full ),
+ PHY_SETTING( 100000, FULL, 100000baseCR2_Full ),
+ PHY_SETTING( 100000, FULL, 100000baseKR2_Full ),
+ PHY_SETTING( 100000, FULL, 100000baseLR2_ER2_FR2_Full ),
+ PHY_SETTING( 100000, FULL, 100000baseDR2_Full ),
+ PHY_SETTING( 100000, FULL, 100000baseSR2_Full ),
/* 56G */
- {
- .speed = SPEED_56000,
- .duplex = DUPLEX_FULL,
- .bit = ETHTOOL_LINK_MODE_56000baseCR4_Full_BIT,
- },
- {
- .speed = SPEED_56000,
- .duplex = DUPLEX_FULL,
- .bit = ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT,
- },
- {
- .speed = SPEED_56000,
- .duplex = DUPLEX_FULL,
- .bit = ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT,
- },
- {
- .speed = SPEED_56000,
- .duplex = DUPLEX_FULL,
- .bit = ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT,
- },
+ PHY_SETTING( 56000, FULL, 56000baseCR4_Full ),
+ PHY_SETTING( 56000, FULL, 56000baseKR4_Full ),
+ PHY_SETTING( 56000, FULL, 56000baseLR4_Full ),
+ PHY_SETTING( 56000, FULL, 56000baseSR4_Full ),
/* 50G */
- {
- .speed = SPEED_50000,
- .duplex = DUPLEX_FULL,
- .bit = ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT,
- },
- {
- .speed = SPEED_50000,
- .duplex = DUPLEX_FULL,
- .bit = ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT,
- },
- {
- .speed = SPEED_50000,
- .duplex = DUPLEX_FULL,
- .bit = ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT,
- },
+ PHY_SETTING( 50000, FULL, 50000baseCR2_Full ),
+ PHY_SETTING( 50000, FULL, 50000baseKR2_Full ),
+ PHY_SETTING( 50000, FULL, 50000baseSR2_Full ),
+ PHY_SETTING( 50000, FULL, 50000baseCR_Full ),
+ PHY_SETTING( 50000, FULL, 50000baseKR_Full ),
+ PHY_SETTING( 50000, FULL, 50000baseLR_ER_FR_Full ),
+ PHY_SETTING( 50000, FULL, 50000baseDR_Full ),
+ PHY_SETTING( 50000, FULL, 50000baseSR_Full ),
/* 40G */
- {
- .speed = SPEED_40000,
- .duplex = DUPLEX_FULL,
- .bit = ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT,
- },
- {
- .speed = SPEED_40000,
- .duplex = DUPLEX_FULL,
- .bit = ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT,
- },
- {
- .speed = SPEED_40000,
- .duplex = DUPLEX_FULL,
- .bit = ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT,
- },
- {
- .speed = SPEED_40000,
- .duplex = DUPLEX_FULL,
- .bit = ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT,
- },
+ PHY_SETTING( 40000, FULL, 40000baseCR4_Full ),
+ PHY_SETTING( 40000, FULL, 40000baseKR4_Full ),
+ PHY_SETTING( 40000, FULL, 40000baseLR4_Full ),
+ PHY_SETTING( 40000, FULL, 40000baseSR4_Full ),
/* 25G */
- {
- .speed = SPEED_25000,
- .duplex = DUPLEX_FULL,
- .bit = ETHTOOL_LINK_MODE_25000baseCR_Full_BIT,
- },
- {
- .speed = SPEED_25000,
- .duplex = DUPLEX_FULL,
- .bit = ETHTOOL_LINK_MODE_25000baseKR_Full_BIT,
- },
- {
- .speed = SPEED_25000,
- .duplex = DUPLEX_FULL,
- .bit = ETHTOOL_LINK_MODE_25000baseSR_Full_BIT,
- },
-
+ PHY_SETTING( 25000, FULL, 25000baseCR_Full ),
+ PHY_SETTING( 25000, FULL, 25000baseKR_Full ),
+ PHY_SETTING( 25000, FULL, 25000baseSR_Full ),
/* 20G */
- {
- .speed = SPEED_20000,
- .duplex = DUPLEX_FULL,
- .bit = ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT,
- },
- {
- .speed = SPEED_20000,
- .duplex = DUPLEX_FULL,
- .bit = ETHTOOL_LINK_MODE_20000baseMLD2_Full_BIT,
- },
+ PHY_SETTING( 20000, FULL, 20000baseKR2_Full ),
+ PHY_SETTING( 20000, FULL, 20000baseMLD2_Full ),
/* 10G */
- {
- .speed = SPEED_10000,
- .duplex = DUPLEX_FULL,
- .bit = ETHTOOL_LINK_MODE_10000baseCR_Full_BIT,
- },
- {
- .speed = SPEED_10000,
- .duplex = DUPLEX_FULL,
- .bit = ETHTOOL_LINK_MODE_10000baseER_Full_BIT,
- },
- {
- .speed = SPEED_10000,
- .duplex = DUPLEX_FULL,
- .bit = ETHTOOL_LINK_MODE_10000baseKR_Full_BIT,
- },
- {
- .speed = SPEED_10000,
- .duplex = DUPLEX_FULL,
- .bit = ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT,
- },
- {
- .speed = SPEED_10000,
- .duplex = DUPLEX_FULL,
- .bit = ETHTOOL_LINK_MODE_10000baseLR_Full_BIT,
- },
- {
- .speed = SPEED_10000,
- .duplex = DUPLEX_FULL,
- .bit = ETHTOOL_LINK_MODE_10000baseLRM_Full_BIT,
- },
- {
- .speed = SPEED_10000,
- .duplex = DUPLEX_FULL,
- .bit = ETHTOOL_LINK_MODE_10000baseR_FEC_BIT,
- },
- {
- .speed = SPEED_10000,
- .duplex = DUPLEX_FULL,
- .bit = ETHTOOL_LINK_MODE_10000baseSR_Full_BIT,
- },
- {
- .speed = SPEED_10000,
- .duplex = DUPLEX_FULL,
- .bit = ETHTOOL_LINK_MODE_10000baseT_Full_BIT,
- },
+ PHY_SETTING( 10000, FULL, 10000baseCR_Full ),
+ PHY_SETTING( 10000, FULL, 10000baseER_Full ),
+ PHY_SETTING( 10000, FULL, 10000baseKR_Full ),
+ PHY_SETTING( 10000, FULL, 10000baseKX4_Full ),
+ PHY_SETTING( 10000, FULL, 10000baseLR_Full ),
+ PHY_SETTING( 10000, FULL, 10000baseLRM_Full ),
+ PHY_SETTING( 10000, FULL, 10000baseR_FEC ),
+ PHY_SETTING( 10000, FULL, 10000baseSR_Full ),
+ PHY_SETTING( 10000, FULL, 10000baseT_Full ),
/* 5G */
- {
- .speed = SPEED_5000,
- .duplex = DUPLEX_FULL,
- .bit = ETHTOOL_LINK_MODE_5000baseT_Full_BIT,
- },
-
+ PHY_SETTING( 5000, FULL, 5000baseT_Full ),
/* 2.5G */
- {
- .speed = SPEED_2500,
- .duplex = DUPLEX_FULL,
- .bit = ETHTOOL_LINK_MODE_2500baseT_Full_BIT,
- },
- {
- .speed = SPEED_2500,
- .duplex = DUPLEX_FULL,
- .bit = ETHTOOL_LINK_MODE_2500baseX_Full_BIT,
- },
+ PHY_SETTING( 2500, FULL, 2500baseT_Full ),
+ PHY_SETTING( 2500, FULL, 2500baseX_Full ),
/* 1G */
- {
- .speed = SPEED_1000,
- .duplex = DUPLEX_FULL,
- .bit = ETHTOOL_LINK_MODE_1000baseKX_Full_BIT,
- },
- {
- .speed = SPEED_1000,
- .duplex = DUPLEX_FULL,
- .bit = ETHTOOL_LINK_MODE_1000baseT_Full_BIT,
- },
- {
- .speed = SPEED_1000,
- .duplex = DUPLEX_HALF,
- .bit = ETHTOOL_LINK_MODE_1000baseT_Half_BIT,
- },
- {
- .speed = SPEED_1000,
- .duplex = DUPLEX_FULL,
- .bit = ETHTOOL_LINK_MODE_1000baseX_Full_BIT,
- },
+ PHY_SETTING( 1000, FULL, 1000baseKX_Full ),
+ PHY_SETTING( 1000, FULL, 1000baseT_Full ),
+ PHY_SETTING( 1000, HALF, 1000baseT_Half ),
+ PHY_SETTING( 1000, FULL, 1000baseX_Full ),
/* 100M */
- {
- .speed = SPEED_100,
- .duplex = DUPLEX_FULL,
- .bit = ETHTOOL_LINK_MODE_100baseT_Full_BIT,
- },
- {
- .speed = SPEED_100,
- .duplex = DUPLEX_HALF,
- .bit = ETHTOOL_LINK_MODE_100baseT_Half_BIT,
- },
+ PHY_SETTING( 100, FULL, 100baseT_Full ),
+ PHY_SETTING( 100, HALF, 100baseT_Half ),
/* 10M */
- {
- .speed = SPEED_10,
- .duplex = DUPLEX_FULL,
- .bit = ETHTOOL_LINK_MODE_10baseT_Full_BIT,
- },
- {
- .speed = SPEED_10,
- .duplex = DUPLEX_HALF,
- .bit = ETHTOOL_LINK_MODE_10baseT_Half_BIT,
- },
+ PHY_SETTING( 10, FULL, 10baseT_Full ),
+ PHY_SETTING( 10, HALF, 10baseT_Half ),
};
+#undef PHY_SETTING
/**
* phy_lookup_setting - lookup a PHY setting
{
const struct phy_setting *setting;
- /* Sanitize settings based on PHY capabilities */
- if (linkmode_test_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, phydev->supported))
- phydev->autoneg = AUTONEG_DISABLE;
-
setting = phy_find_valid(phydev->speed, phydev->duplex,
phydev->supported);
if (setting) {
}
static struct phy_driver genphy_driver;
-extern struct phy_driver genphy_10g_driver;
+extern struct phy_driver genphy_c45_driver;
static LIST_HEAD(phy_fixup_list);
static DEFINE_MUTEX(phy_fixup_lock);
*/
if (!d->driver) {
if (phydev->is_c45)
- d->driver = &genphy_10g_driver.mdiodrv.driver;
+ d->driver = &genphy_c45_driver.mdiodrv.driver;
else
d->driver = &genphy_driver.mdiodrv.driver;
bool phy_driver_is_genphy_10g(struct phy_device *phydev)
{
return phy_driver_is_genphy_kind(phydev,
- &genphy_10g_driver.mdiodrv.driver);
+ &genphy_c45_driver.mdiodrv.driver);
}
EXPORT_SYMBOL_GPL(phy_driver_is_genphy_10g);
*/
if (!phy_polling_mode(phydev)) {
status = phy_read(phydev, MII_BMSR);
- if (status < 0) {
+ if (status < 0)
return status;
- } else if (status & BMSR_LSTATUS) {
- phydev->link = 1;
- return 0;
- }
+ else if (status & BMSR_LSTATUS)
+ goto done;
}
/* Read link and autonegotiation status */
status = phy_read(phydev, MII_BMSR);
if (status < 0)
return status;
-
+done:
phydev->link = status & BMSR_LSTATUS ? 1 : 0;
phydev->autoneg_complete = status & BMSR_ANEGCOMPLETE ? 1 : 0;
linkmode_zero(phydev->lp_advertising);
if (phydev->autoneg == AUTONEG_ENABLE && phydev->autoneg_complete) {
- if (linkmode_test_bit(ETHTOOL_LINK_MODE_1000baseT_Half_BIT,
- phydev->supported) ||
- linkmode_test_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT,
- phydev->supported)) {
+ if (phydev->is_gigabit_capable) {
lpagb = phy_read(phydev, MII_STAT1000);
if (lpagb < 0)
return lpagb;
*/
int genphy_soft_reset(struct phy_device *phydev)
{
+ u16 res = BMCR_RESET;
int ret;
- ret = phy_set_bits(phydev, MII_BMCR, BMCR_RESET);
+ if (phydev->autoneg == AUTONEG_ENABLE)
+ res |= BMCR_ANRESTART;
+
+ ret = phy_modify(phydev, MII_BMCR, BMCR_ISOLATE, res);
if (ret < 0)
return ret;
- return phy_poll_reset(phydev);
+ ret = phy_poll_reset(phydev);
+ if (ret)
+ return ret;
+
+ /* BMCR may be reset to defaults */
+ if (phydev->autoneg == AUTONEG_DISABLE)
+ ret = genphy_setup_forced(phydev);
+
+ return ret;
}
EXPORT_SYMBOL(genphy_soft_reset);
if (err)
goto out;
+ if (!linkmode_test_bit(ETHTOOL_LINK_MODE_Autoneg_BIT,
+ phydev->supported))
+ phydev->autoneg = 0;
+
+ if (linkmode_test_bit(ETHTOOL_LINK_MODE_1000baseT_Half_BIT,
+ phydev->supported))
+ phydev->is_gigabit_capable = 1;
+ if (linkmode_test_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT,
+ phydev->supported))
+ phydev->is_gigabit_capable = 1;
+
of_set_phy_supported(phydev);
linkmode_copy(phydev->advertising, phydev->supported);
features_init();
- rc = phy_driver_register(&genphy_10g_driver, THIS_MODULE);
+ rc = phy_driver_register(&genphy_c45_driver, THIS_MODULE);
if (rc)
- goto err_10g;
+ goto err_c45;
rc = phy_driver_register(&genphy_driver, THIS_MODULE);
if (rc) {
- phy_driver_unregister(&genphy_10g_driver);
-err_10g:
+ phy_driver_unregister(&genphy_c45_driver);
+err_c45:
mdio_bus_exit();
}
static void __exit phy_exit(void)
{
- phy_driver_unregister(&genphy_10g_driver);
+ phy_driver_unregister(&genphy_c45_driver);
phy_driver_unregister(&genphy_driver);
mdio_bus_exit();
}
.phy_id = 0x00181440,
.name = "QS6612",
.phy_id_mask = 0xfffffff0,
- .features = PHY_BASIC_FEATURES,
+ /* PHY_BASIC_FEATURES */
.config_init = qs6612_config_init,
.ack_interrupt = qs6612_ack_interrupt,
.config_intr = qs6612_config_intr,
.phy_id = INTERNAL_EPHY_ID,
.phy_id_mask = 0xfffffff0,
.name = "Rockchip integrated EPHY",
- .features = PHY_BASIC_FEATURES,
+ /* PHY_BASIC_FEATURES */
.flags = 0,
.link_change_notify = rockchip_link_change_notify,
.soft_reset = genphy_soft_reset,
.phy_id_mask = 0xfffffff0,
.name = "SMSC LAN83C185",
- .features = PHY_BASIC_FEATURES,
+ /* PHY_BASIC_FEATURES */
.probe = smsc_phy_probe,
.phy_id_mask = 0xfffffff0,
.name = "SMSC LAN8187",
- .features = PHY_BASIC_FEATURES,
+ /* PHY_BASIC_FEATURES */
.probe = smsc_phy_probe,
.phy_id_mask = 0xfffffff0,
.name = "SMSC LAN8700",
- .features = PHY_BASIC_FEATURES,
+ /* PHY_BASIC_FEATURES */
.probe = smsc_phy_probe,
.phy_id_mask = 0xfffffff0,
.name = "SMSC LAN911x Internal PHY",
- .features = PHY_BASIC_FEATURES,
+ /* PHY_BASIC_FEATURES */
.probe = smsc_phy_probe,
.phy_id_mask = 0xfffffff0,
.name = "SMSC LAN8710/LAN8720",
- .features = PHY_BASIC_FEATURES,
+ /* PHY_BASIC_FEATURES */
.flags = PHY_RST_AFTER_CLK_EN,
.probe = smsc_phy_probe,
.phy_id_mask = 0xfffffff0,
.name = "SMSC LAN8740",
- .features = PHY_BASIC_FEATURES,
+ /* PHY_BASIC_FEATURES */
.probe = smsc_phy_probe,
};
MODULE_DEVICE_TABLE(spi, ks8995_id);
+static const struct of_device_id ks8895_spi_of_match[] = {
+ { .compatible = "micrel,ks8995" },
+ { .compatible = "micrel,ksz8864" },
+ { .compatible = "micrel,ksz8795" },
+ { },
+ };
+MODULE_DEVICE_TABLE(of, ks8895_spi_of_match);
+
static inline u8 get_chip_id(u8 val)
{
return (val >> ID1_CHIPID_S) & ID1_CHIPID_M;
static struct spi_driver ks8995_driver = {
.driver = {
.name = "spi-ks8995",
+ .of_match_table = of_match_ptr(ks8895_spi_of_match),
},
.probe = ks8995_probe,
.remove = ks8995_remove,
.phy_id = STE101P_PHY_ID,
.phy_id_mask = 0xfffffff0,
.name = "STe101p",
- .features = PHY_BASIC_FEATURES,
+ /* PHY_BASIC_FEATURES */
.config_init = ste10Xp_config_init,
.ack_interrupt = ste10Xp_ack_interrupt,
.config_intr = ste10Xp_config_intr,
.phy_id = STE100P_PHY_ID,
.phy_id_mask = 0xffffffff,
.name = "STe100p",
- .features = PHY_BASIC_FEATURES,
+ /* PHY_BASIC_FEATURES */
.config_init = ste10Xp_config_init,
.ack_interrupt = ste10Xp_ack_interrupt,
.config_intr = ste10Xp_config_intr,
.phy_id = UPD60620_PHY_ID,
.phy_id_mask = 0xfffffffe,
.name = "Renesas uPD60620",
- .features = PHY_BASIC_FEATURES,
+ /* PHY_BASIC_FEATURES */
.flags = 0,
.config_init = upd60620_config_init,
.read_status = upd60620_read_status,
#define PHY_ID_VSC8234 0x000fc620
#define PHY_ID_VSC8244 0x000fc6c0
-#define PHY_ID_VSC8514 0x00070670
#define PHY_ID_VSC8572 0x000704d0
#define PHY_ID_VSC8601 0x00070420
#define PHY_ID_VSC7385 0x00070450
err = phy_write(phydev, MII_VSC8244_IMASK,
(phydev->drv->phy_id == PHY_ID_VSC8234 ||
phydev->drv->phy_id == PHY_ID_VSC8244 ||
- phydev->drv->phy_id == PHY_ID_VSC8514 ||
phydev->drv->phy_id == PHY_ID_VSC8572 ||
phydev->drv->phy_id == PHY_ID_VSC8601) ?
MII_VSC8244_IMASK_MASK :
.phy_id = PHY_ID_VSC8234,
.name = "Vitesse VSC8234",
.phy_id_mask = 0x000ffff0,
- .features = PHY_GBIT_FEATURES,
+ /* PHY_GBIT_FEATURES */
.config_init = &vsc824x_config_init,
.config_aneg = &vsc82x4_config_aneg,
.ack_interrupt = &vsc824x_ack_interrupt,
.phy_id = PHY_ID_VSC8244,
.name = "Vitesse VSC8244",
.phy_id_mask = 0x000fffc0,
- .features = PHY_GBIT_FEATURES,
- .config_init = &vsc824x_config_init,
- .config_aneg = &vsc82x4_config_aneg,
- .ack_interrupt = &vsc824x_ack_interrupt,
- .config_intr = &vsc82xx_config_intr,
-}, {
- .phy_id = PHY_ID_VSC8514,
- .name = "Vitesse VSC8514",
- .phy_id_mask = 0x000ffff0,
- .features = PHY_GBIT_FEATURES,
+ /* PHY_GBIT_FEATURES */
.config_init = &vsc824x_config_init,
.config_aneg = &vsc82x4_config_aneg,
.ack_interrupt = &vsc824x_ack_interrupt,
.phy_id = PHY_ID_VSC8572,
.name = "Vitesse VSC8572",
.phy_id_mask = 0x000ffff0,
- .features = PHY_GBIT_FEATURES,
+ /* PHY_GBIT_FEATURES */
.config_init = &vsc824x_config_init,
.config_aneg = &vsc82x4_config_aneg,
.ack_interrupt = &vsc824x_ack_interrupt,
.phy_id = PHY_ID_VSC8601,
.name = "Vitesse VSC8601",
.phy_id_mask = 0x000ffff0,
- .features = PHY_GBIT_FEATURES,
+ /* PHY_GBIT_FEATURES */
.config_init = &vsc8601_config_init,
.ack_interrupt = &vsc824x_ack_interrupt,
.config_intr = &vsc82xx_config_intr,
.phy_id = PHY_ID_VSC7385,
.name = "Vitesse VSC7385",
.phy_id_mask = 0x000ffff0,
- .features = PHY_GBIT_FEATURES,
+ /* PHY_GBIT_FEATURES */
.config_init = vsc738x_config_init,
.config_aneg = vsc73xx_config_aneg,
.read_page = vsc73xx_read_page,
.phy_id = PHY_ID_VSC7388,
.name = "Vitesse VSC7388",
.phy_id_mask = 0x000ffff0,
- .features = PHY_GBIT_FEATURES,
+ /* PHY_GBIT_FEATURES */
.config_init = vsc738x_config_init,
.config_aneg = vsc73xx_config_aneg,
.read_page = vsc73xx_read_page,
.phy_id = PHY_ID_VSC7395,
.name = "Vitesse VSC7395",
.phy_id_mask = 0x000ffff0,
- .features = PHY_GBIT_FEATURES,
+ /* PHY_GBIT_FEATURES */
.config_init = vsc739x_config_init,
.config_aneg = vsc73xx_config_aneg,
.read_page = vsc73xx_read_page,
.phy_id = PHY_ID_VSC7398,
.name = "Vitesse VSC7398",
.phy_id_mask = 0x000ffff0,
- .features = PHY_GBIT_FEATURES,
+ /* PHY_GBIT_FEATURES */
.config_init = vsc739x_config_init,
.config_aneg = vsc73xx_config_aneg,
.read_page = vsc73xx_read_page,
.phy_id = PHY_ID_VSC8662,
.name = "Vitesse VSC8662",
.phy_id_mask = 0x000ffff0,
- .features = PHY_GBIT_FEATURES,
+ /* PHY_GBIT_FEATURES */
.config_init = &vsc824x_config_init,
.config_aneg = &vsc82x4_config_aneg,
.ack_interrupt = &vsc824x_ack_interrupt,
.phy_id = PHY_ID_VSC8221,
.phy_id_mask = 0x000ffff0,
.name = "Vitesse VSC8221",
- .features = PHY_GBIT_FEATURES,
+ /* PHY_GBIT_FEATURES */
.config_init = &vsc8221_config_init,
.ack_interrupt = &vsc824x_ack_interrupt,
.config_intr = &vsc82xx_config_intr,
.phy_id = PHY_ID_VSC8211,
.phy_id_mask = 0x000ffff0,
.name = "Vitesse VSC8211",
- .features = PHY_GBIT_FEATURES,
+ /* PHY_GBIT_FEATURES */
.config_init = &vsc8221_config_init,
.ack_interrupt = &vsc824x_ack_interrupt,
.config_intr = &vsc82xx_config_intr,
static struct mdio_device_id __maybe_unused vitesse_tbl[] = {
{ PHY_ID_VSC8234, 0x000ffff0 },
{ PHY_ID_VSC8244, 0x000fffc0 },
- { PHY_ID_VSC8514, 0x000ffff0 },
{ PHY_ID_VSC8572, 0x000ffff0 },
{ PHY_ID_VSC7385, 0x000ffff0 },
{ PHY_ID_VSC7388, 0x000ffff0 },
card_send_command(const int ioaddr[], const char* name,
const unsigned char out[], unsigned char in[])
{
- int status, x;
+ int status;
if ((status = card_wait_for_busy_clear(ioaddr, name)))
return status;
out[0], out[1], out[2], out[3], out[4], out[5]);
}
- if (out[1] == 0x1b) {
- x = (out[2] == 0x02);
- } else {
+ if (out[1] != 0x1b) {
if (out[0] >= 0x80 && in[0] != (out[1] | 0x80))
return -EIO;
}
static const unsigned char Command0[6] = {0x80, 0x1f, 0x00, 0x00, 0x00, 0x00};
unsigned char st[7];
- int crc, status;
+ int status;
/* check CRC */
if ((status = card_send_command(ioaddr, name, Command0, st)))
return status;
if (st[1] != st[3] || st[2] != st[4])
return -EIO;
- crc = st[1] << 8 | st[2];
return 0;
}
return -EINVAL;
}
+ if (netdev_has_upper_dev(dev, port_dev)) {
+ NL_SET_ERR_MSG(extack, "Device is already an upper device of the team interface");
+ netdev_err(dev, "Device %s is already an upper device of the team interface\n",
+ portname);
+ return -EBUSY;
+ }
+
if (port_dev->features & NETIF_F_VLAN_CHALLENGED &&
vlan_uses_dev(dev)) {
NL_SET_ERR_MSG(extack, "Device is VLAN challenged and team device has VLAN set up");
goto err_option_port_add;
}
+ /* set promiscuity level to new slave */
+ if (dev->flags & IFF_PROMISC) {
+ err = dev_set_promiscuity(port_dev, 1);
+ if (err)
+ goto err_set_slave_promisc;
+ }
+
+ /* set allmulti level to new slave */
+ if (dev->flags & IFF_ALLMULTI) {
+ err = dev_set_allmulti(port_dev, 1);
+ if (err) {
+ if (dev->flags & IFF_PROMISC)
+ dev_set_promiscuity(port_dev, -1);
+ goto err_set_slave_promisc;
+ }
+ }
+
netif_addr_lock_bh(dev);
dev_uc_sync_multiple(port_dev, dev);
dev_mc_sync_multiple(port_dev, dev);
return 0;
+err_set_slave_promisc:
+ __team_option_inst_del_port(team, port);
+
err_option_port_add:
team_upper_dev_unlink(team, port);
team_port_disable(team, port);
list_del_rcu(&port->list);
+
+ if (dev->flags & IFF_PROMISC)
+ dev_set_promiscuity(port_dev, -1);
+ if (dev->flags & IFF_ALLMULTI)
+ dev_set_allmulti(port_dev, -1);
+
team_upper_dev_unlink(team, port);
netdev_rx_handler_unregister(port_dev);
team_port_disable_netpoll(port);
#include <linux/workqueue.h>
#define USB_VENDOR_APPLE 0x05ac
-#define USB_PRODUCT_IPHONE 0x1290
-#define USB_PRODUCT_IPHONE_3G 0x1292
-#define USB_PRODUCT_IPHONE_3GS 0x1294
-#define USB_PRODUCT_IPHONE_4 0x1297
-#define USB_PRODUCT_IPAD 0x129a
-#define USB_PRODUCT_IPAD_2 0x12a2
-#define USB_PRODUCT_IPAD_3 0x12a6
-#define USB_PRODUCT_IPAD_MINI 0x12ab
-#define USB_PRODUCT_IPHONE_4_VZW 0x129c
-#define USB_PRODUCT_IPHONE_4S 0x12a0
-#define USB_PRODUCT_IPHONE_5 0x12a8
#define IPHETH_USBINTF_CLASS 255
#define IPHETH_USBINTF_SUBCLASS 253
#define IPHETH_CARRIER_ON 0x04
static const struct usb_device_id ipheth_table[] = {
- { USB_DEVICE_AND_INTERFACE_INFO(
- USB_VENDOR_APPLE, USB_PRODUCT_IPHONE,
- IPHETH_USBINTF_CLASS, IPHETH_USBINTF_SUBCLASS,
- IPHETH_USBINTF_PROTO) },
- { USB_DEVICE_AND_INTERFACE_INFO(
- USB_VENDOR_APPLE, USB_PRODUCT_IPHONE_3G,
- IPHETH_USBINTF_CLASS, IPHETH_USBINTF_SUBCLASS,
- IPHETH_USBINTF_PROTO) },
- { USB_DEVICE_AND_INTERFACE_INFO(
- USB_VENDOR_APPLE, USB_PRODUCT_IPHONE_3GS,
- IPHETH_USBINTF_CLASS, IPHETH_USBINTF_SUBCLASS,
- IPHETH_USBINTF_PROTO) },
- { USB_DEVICE_AND_INTERFACE_INFO(
- USB_VENDOR_APPLE, USB_PRODUCT_IPHONE_4,
- IPHETH_USBINTF_CLASS, IPHETH_USBINTF_SUBCLASS,
- IPHETH_USBINTF_PROTO) },
- { USB_DEVICE_AND_INTERFACE_INFO(
- USB_VENDOR_APPLE, USB_PRODUCT_IPAD,
- IPHETH_USBINTF_CLASS, IPHETH_USBINTF_SUBCLASS,
- IPHETH_USBINTF_PROTO) },
- { USB_DEVICE_AND_INTERFACE_INFO(
- USB_VENDOR_APPLE, USB_PRODUCT_IPAD_2,
- IPHETH_USBINTF_CLASS, IPHETH_USBINTF_SUBCLASS,
- IPHETH_USBINTF_PROTO) },
- { USB_DEVICE_AND_INTERFACE_INFO(
- USB_VENDOR_APPLE, USB_PRODUCT_IPAD_3,
- IPHETH_USBINTF_CLASS, IPHETH_USBINTF_SUBCLASS,
- IPHETH_USBINTF_PROTO) },
- { USB_DEVICE_AND_INTERFACE_INFO(
- USB_VENDOR_APPLE, USB_PRODUCT_IPAD_MINI,
- IPHETH_USBINTF_CLASS, IPHETH_USBINTF_SUBCLASS,
- IPHETH_USBINTF_PROTO) },
- { USB_DEVICE_AND_INTERFACE_INFO(
- USB_VENDOR_APPLE, USB_PRODUCT_IPHONE_4_VZW,
- IPHETH_USBINTF_CLASS, IPHETH_USBINTF_SUBCLASS,
- IPHETH_USBINTF_PROTO) },
- { USB_DEVICE_AND_INTERFACE_INFO(
- USB_VENDOR_APPLE, USB_PRODUCT_IPHONE_4S,
- IPHETH_USBINTF_CLASS, IPHETH_USBINTF_SUBCLASS,
- IPHETH_USBINTF_PROTO) },
- { USB_DEVICE_AND_INTERFACE_INFO(
- USB_VENDOR_APPLE, USB_PRODUCT_IPHONE_5,
- IPHETH_USBINTF_CLASS, IPHETH_USBINTF_SUBCLASS,
- IPHETH_USBINTF_PROTO) },
+ { USB_VENDOR_AND_INTERFACE_INFO(USB_VENDOR_APPLE, IPHETH_USBINTF_CLASS,
+ IPHETH_USBINTF_SUBCLASS,
+ IPHETH_USBINTF_PROTO) },
{ }
};
MODULE_DEVICE_TABLE(usb, ipheth_table);
enum qmi_wwan_quirks {
QMI_WWAN_QUIRK_DTR = 1 << 0, /* needs "set DTR" request */
+ QMI_WWAN_QUIRK_QUECTEL_DYNCFG = 1 << 1, /* check num. endpoints */
};
struct qmimux_hdr {
.data = QMI_WWAN_QUIRK_DTR,
};
+static const struct driver_info qmi_wwan_info_quirk_quectel_dyncfg = {
+ .description = "WWAN/QMI device",
+ .flags = FLAG_WWAN | FLAG_SEND_ZLP,
+ .bind = qmi_wwan_bind,
+ .unbind = qmi_wwan_unbind,
+ .manage_power = qmi_wwan_manage_power,
+ .rx_fixup = qmi_wwan_rx_fixup,
+ .data = QMI_WWAN_QUIRK_DTR | QMI_WWAN_QUIRK_QUECTEL_DYNCFG,
+};
+
#define HUAWEI_VENDOR_ID 0x12D1
/* map QMI/wwan function by a fixed interface number */
#define QMI_GOBI_DEVICE(vend, prod) \
QMI_FIXED_INTF(vend, prod, 0)
+/* Quectel does not use fixed interface numbers on at least some of their
+ * devices. We need to check the number of endpoints to ensure that we bind to
+ * the correct interface.
+ */
+#define QMI_QUIRK_QUECTEL_DYNCFG(vend, prod) \
+ USB_DEVICE_AND_INTERFACE_INFO(vend, prod, USB_CLASS_VENDOR_SPEC, \
+ USB_SUBCLASS_VENDOR_SPEC, 0xff), \
+ .driver_info = (unsigned long)&qmi_wwan_info_quirk_quectel_dyncfg
+
static const struct usb_device_id products[] = {
/* 1. CDC ECM like devices match on the control interface */
{ /* Huawei E392, E398 and possibly others sharing both device id and more... */
USB_DEVICE_AND_INTERFACE_INFO(0x03f0, 0x581d, USB_CLASS_VENDOR_SPEC, 1, 7),
.driver_info = (unsigned long)&qmi_wwan_info,
},
- { /* Quectel EP06/EG06/EM06 */
- USB_DEVICE_AND_INTERFACE_INFO(0x2c7c, 0x0306,
- USB_CLASS_VENDOR_SPEC,
- USB_SUBCLASS_VENDOR_SPEC,
- 0xff),
- .driver_info = (unsigned long)&qmi_wwan_info_quirk_dtr,
- },
- { /* Quectel EG12/EM12 */
- USB_DEVICE_AND_INTERFACE_INFO(0x2c7c, 0x0512,
- USB_CLASS_VENDOR_SPEC,
- USB_SUBCLASS_VENDOR_SPEC,
- 0xff),
- .driver_info = (unsigned long)&qmi_wwan_info_quirk_dtr,
- },
+ {QMI_QUIRK_QUECTEL_DYNCFG(0x2c7c, 0x0125)}, /* Quectel EC25, EC20 R2.0 Mini PCIe */
+ {QMI_QUIRK_QUECTEL_DYNCFG(0x2c7c, 0x0306)}, /* Quectel EP06/EG06/EM06 */
+ {QMI_QUIRK_QUECTEL_DYNCFG(0x2c7c, 0x0512)}, /* Quectel EG12/EM12 */
/* 3. Combined interface devices matching on interface number */
{QMI_FIXED_INTF(0x0408, 0xea42, 4)}, /* Yota / Megafon M100-1 */
{QMI_FIXED_INTF(0x03f0, 0x9d1d, 1)}, /* HP lt4120 Snapdragon X5 LTE */
{QMI_FIXED_INTF(0x22de, 0x9061, 3)}, /* WeTelecom WPD-600N */
{QMI_QUIRK_SET_DTR(0x1e0e, 0x9001, 5)}, /* SIMCom 7100E, 7230E, 7600E ++ */
- {QMI_QUIRK_SET_DTR(0x2c7c, 0x0125, 4)}, /* Quectel EC25, EC20 R2.0 Mini PCIe */
{QMI_QUIRK_SET_DTR(0x2c7c, 0x0121, 4)}, /* Quectel EC21 Mini PCIe */
{QMI_QUIRK_SET_DTR(0x2c7c, 0x0191, 4)}, /* Quectel EG91 */
{QMI_FIXED_INTF(0x2c7c, 0x0296, 4)}, /* Quectel BG96 */
return false;
}
-static bool quectel_diag_detected(struct usb_interface *intf)
-{
- struct usb_device *dev = interface_to_usbdev(intf);
- struct usb_interface_descriptor intf_desc = intf->cur_altsetting->desc;
- u16 id_vendor = le16_to_cpu(dev->descriptor.idVendor);
- u16 id_product = le16_to_cpu(dev->descriptor.idProduct);
-
- if (id_vendor != 0x2c7c || intf_desc.bNumEndpoints != 2)
- return false;
-
- if (id_product == 0x0306 || id_product == 0x0512)
- return true;
- else
- return false;
-}
-
static int qmi_wwan_probe(struct usb_interface *intf,
const struct usb_device_id *prod)
{
struct usb_device_id *id = (struct usb_device_id *)prod;
struct usb_interface_descriptor *desc = &intf->cur_altsetting->desc;
+ const struct driver_info *info;
/* Workaround to enable dynamic IDs. This disables usbnet
* blacklisting functionality. Which, if required, can be
* we need to match on class/subclass/protocol. These values are
* identical for the diagnostic- and QMI-interface, but bNumEndpoints is
* different. Ignore the current interface if the number of endpoints
- * the number for the diag interface (two).
+ * equals the number for the diag interface (two).
*/
- if (quectel_diag_detected(intf))
- return -ENODEV;
+ info = (void *)&id->driver_info;
+
+ if (info->data & QMI_WWAN_QUIRK_QUECTEL_DYNCFG) {
+ if (desc->bNumEndpoints == 2)
+ return -ENODEV;
+ }
return usbnet_probe(intf, id);
}
struct net_device *dev = tp->netdev;
int ret;
+ sa->sa_family = dev->type;
+
if (tp->version == RTL_VER_01) {
ret = pla_ocp_read(tp, PLA_IDR, 8, sa->sa_data);
} else {
}
}
-static int veth_get_ts_info(struct net_device *dev,
- struct ethtool_ts_info *info)
-{
- info->so_timestamping =
- SOF_TIMESTAMPING_TX_SOFTWARE |
- SOF_TIMESTAMPING_RX_SOFTWARE |
- SOF_TIMESTAMPING_SOFTWARE;
- info->phc_index = -1;
-
- return 0;
-}
-
static const struct ethtool_ops veth_ethtool_ops = {
.get_drvinfo = veth_get_drvinfo,
.get_link = ethtool_op_get_link,
.get_sset_count = veth_get_sset_count,
.get_ethtool_stats = veth_get_ethtool_stats,
.get_link_ksettings = veth_get_link_ksettings,
- .get_ts_info = veth_get_ts_info,
+ .get_ts_info = ethtool_op_get_ts_info,
};
/* general routines */
neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
if (!IS_ERR(neigh)) {
sock_confirm_neigh(skb, neigh);
- ret = neigh_output(neigh, skb);
+ ret = neigh_output(neigh, skb, false);
rcu_read_unlock_bh();
return ret;
}
struct net_device *dev = dst->dev;
unsigned int hh_len = LL_RESERVED_SPACE(dev);
struct neighbour *neigh;
- u32 nexthop;
+ bool is_v6gw = false;
int ret = -EINVAL;
nf_reset(skb);
rcu_read_lock_bh();
- nexthop = (__force u32)rt_nexthop(rt, ip_hdr(skb)->daddr);
- neigh = __ipv4_neigh_lookup_noref(dev, nexthop);
- if (unlikely(!neigh))
- neigh = __neigh_create(&arp_tbl, &nexthop, dev, false);
+ neigh = ip_neigh_for_gw(rt, skb, &is_v6gw);
if (!IS_ERR(neigh)) {
sock_confirm_neigh(skb, neigh);
- ret = neigh_output(neigh, skb);
+ /* if crossing protocols, can not use the cached header */
+ ret = neigh_output(neigh, skb, is_v6gw);
rcu_read_unlock_bh();
return ret;
}
.ndo_init = vrf_dev_init,
.ndo_uninit = vrf_dev_uninit,
.ndo_start_xmit = vrf_xmit,
+ .ndo_set_mac_address = eth_mac_addr,
.ndo_get_stats64 = vrf_get_stats64,
.ndo_add_slave = vrf_add_slave,
.ndo_del_slave = vrf_del_slave,
/* default to no qdisc; user can add if desired */
dev->priv_flags |= IFF_NO_QUEUE;
dev->priv_flags |= IFF_NO_RX_HANDLER;
+ dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
- dev->min_mtu = 0;
- dev->max_mtu = 0;
+ /* VRF devices do not care about MTU, but if the MTU is set
+ * too low then the ipv4 and ipv6 protocols are disabled
+ * which breaks networking.
+ */
+ dev->min_mtu = IPV6_MIN_MTU;
+ dev->max_mtu = ETH_MAX_MTU;
}
static int vrf_validate(struct nlattr *tb[], struct nlattr *data[],
case I2400M_SS_IDLE:
d_printf(1, dev, "entering BS-negotiated idle mode\n");
+ /* Fall through */
case I2400M_SS_DISCONNECTING:
case I2400M_SS_DATA_PATH_CONNECTED:
wimax_state_change(wimax_dev, WIMAX_ST_CONNECTED);
num_msdus++;
num_bytes += ret;
}
- ieee80211_return_txq(hw, txq);
+ ieee80211_return_txq(hw, txq, false);
ieee80211_txq_schedule_end(hw, txq->ac);
record->num_msdus = cpu_to_le16(num_msdus);
if (ret < 0)
break;
}
- ieee80211_return_txq(hw, txq);
+ ieee80211_return_txq(hw, txq, false);
ath10k_htt_tx_txq_update(hw, txq);
if (ret == -EBUSY)
break;
if (ret < 0)
break;
}
- ieee80211_return_txq(hw, txq);
+ ieee80211_return_txq(hw, txq, false);
ath10k_htt_tx_txq_update(hw, txq);
out:
ieee80211_txq_schedule_end(hw, ac);
goto out;
while ((queue = ieee80211_next_txq(hw, txq->mac80211_qnum))) {
+ bool force;
+
tid = (struct ath_atx_tid *)queue->drv_priv;
ret = ath_tx_sched_aggr(sc, txq, tid);
ath_dbg(common, QUEUE, "ath_tx_sched_aggr returned %d\n", ret);
- ieee80211_return_txq(hw, queue);
+ force = !skb_queue_empty(&tid->retry_q);
+ ieee80211_return_txq(hw, queue, force);
}
out:
static void lpphy_papd_cal_txpwr(struct b43_wldev *dev)
{
struct b43_phy_lp *lpphy = dev->phy.lp;
- struct lpphy_tx_gains gains, oldgains;
+ struct lpphy_tx_gains oldgains;
int old_txpctl, old_afe_ovr, old_rf, old_bbmult;
lpphy_read_tx_pctl_mode_from_hardware(dev);
lpphy_set_tx_power_control(dev, B43_LPPHY_TXPCTL_OFF);
if (dev->dev->chip_id == 0x4325 && dev->dev->chip_rev == 0)
- lpphy_papd_cal(dev, gains, 0, 1, 30);
+ lpphy_papd_cal(dev, oldgains, 0, 1, 30);
else
- lpphy_papd_cal(dev, gains, 0, 1, 65);
+ lpphy_papd_cal(dev, oldgains, 0, 1, 65);
if (old_afe_ovr)
lpphy_set_tx_gains(dev, oldgains);
return -ENOMEM;
}
-void brcmf_proto_bcdc_detach(struct brcmf_pub *drvr)
+void brcmf_proto_bcdc_detach_pre_delif(struct brcmf_pub *drvr)
+{
+ struct brcmf_bcdc *bcdc = drvr->proto->pd;
+
+ brcmf_fws_detach_pre_delif(bcdc->fws);
+}
+
+void brcmf_proto_bcdc_detach_post_delif(struct brcmf_pub *drvr)
{
struct brcmf_bcdc *bcdc = drvr->proto->pd;
drvr->proto->pd = NULL;
- brcmf_fws_detach(bcdc->fws);
+ brcmf_fws_detach_post_delif(bcdc->fws);
kfree(bcdc);
}
#ifdef CONFIG_BRCMFMAC_PROTO_BCDC
int brcmf_proto_bcdc_attach(struct brcmf_pub *drvr);
-void brcmf_proto_bcdc_detach(struct brcmf_pub *drvr);
+void brcmf_proto_bcdc_detach_pre_delif(struct brcmf_pub *drvr);
+void brcmf_proto_bcdc_detach_post_delif(struct brcmf_pub *drvr);
void brcmf_proto_bcdc_txflowblock(struct device *dev, bool state);
void brcmf_proto_bcdc_txcomplete(struct device *dev, struct sk_buff *txp,
bool success);
struct brcmf_fws_info *drvr_to_fws(struct brcmf_pub *drvr);
#else
static inline int brcmf_proto_bcdc_attach(struct brcmf_pub *drvr) { return 0; }
-static inline void brcmf_proto_bcdc_detach(struct brcmf_pub *drvr) {}
+static void brcmf_proto_bcdc_detach_pre_delif(struct brcmf_pub *drvr) {};
+static inline void brcmf_proto_bcdc_detach_post_delif(struct brcmf_pub *drvr) {}
#endif
#endif /* BRCMFMAC_BCDC_H */
err = brcmf_sdiod_set_backplane_window(sdiodev, addr);
if (err)
- return err;
+ goto out;
addr &= SBSDIO_SB_OFT_ADDR_MASK;
addr |= SBSDIO_SB_ACCESS_2_4B_FLAG;
- if (!err)
- err = brcmf_sdiod_skbuff_write(sdiodev, sdiodev->func2, addr,
- mypkt);
-
+ err = brcmf_sdiod_skbuff_write(sdiodev, sdiodev->func2, addr, mypkt);
+out:
brcmu_pkt_buf_free_skb(mypkt);
return err;
int (*get_fwname)(struct device *dev, const char *ext,
unsigned char *fw_name);
void (*debugfs_create)(struct device *dev);
+ int (*reset)(struct device *dev);
};
return bus->ops->debugfs_create(bus->dev);
}
+static inline
+int brcmf_bus_reset(struct brcmf_bus *bus)
+{
+ if (!bus->ops->reset)
+ return -EOPNOTSUPP;
+
+ return bus->ops->reset(bus->dev);
+}
+
/*
* interface functions from common layer
*/
void brcmf_dev_reset(struct device *dev);
/* Request from bus module to initiate a coredump */
void brcmf_dev_coredump(struct device *dev);
+/* Indication that firmware has halted or crashed */
+void brcmf_fw_crashed(struct device *dev);
/* Configure the "global" bus state used by upper layers */
void brcmf_bus_change_state(struct brcmf_bus *bus, enum brcmf_bus_state state);
conn_info->req_ie =
kmemdup(cfg->extra_buf, conn_info->req_ie_len,
GFP_KERNEL);
+ if (!conn_info->req_ie)
+ conn_info->req_ie_len = 0;
} else {
conn_info->req_ie_len = 0;
conn_info->req_ie = NULL;
conn_info->resp_ie =
kmemdup(cfg->extra_buf, conn_info->resp_ie_len,
GFP_KERNEL);
+ if (!conn_info->resp_ie)
+ conn_info->resp_ie_len = 0;
} else {
conn_info->resp_ie_len = 0;
conn_info->resp_ie = NULL;
bool rtnl_locked)
{
struct brcmf_if *ifp;
+ int ifidx;
ifp = drvr->iflist[bsscfgidx];
- drvr->iflist[bsscfgidx] = NULL;
if (!ifp) {
bphy_err(drvr, "Null interface, bsscfgidx=%d\n", bsscfgidx);
return;
}
brcmf_dbg(TRACE, "Enter, bsscfgidx=%d, ifidx=%d\n", bsscfgidx,
ifp->ifidx);
- if (drvr->if2bss[ifp->ifidx] == bsscfgidx)
- drvr->if2bss[ifp->ifidx] = BRCMF_BSSIDX_INVALID;
+ ifidx = ifp->ifidx;
+
if (ifp->ndev) {
if (bsscfgidx == 0) {
if (ifp->ndev->netdev_ops == &brcmf_netdev_ops_pri) {
brcmf_p2p_ifp_removed(ifp, rtnl_locked);
kfree(ifp);
}
+
+ drvr->iflist[bsscfgidx] = NULL;
+ if (drvr->if2bss[ifidx] == bsscfgidx)
+ drvr->if2bss[ifidx] = BRCMF_BSSIDX_INVALID;
}
void brcmf_remove_interface(struct brcmf_if *ifp, bool rtnl_locked)
return 0;
}
+static void brcmf_core_bus_reset(struct work_struct *work)
+{
+ struct brcmf_pub *drvr = container_of(work, struct brcmf_pub,
+ bus_reset);
+
+ brcmf_bus_reset(drvr->bus_if);
+}
+
static int brcmf_bus_started(struct brcmf_pub *drvr, struct cfg80211_ops *ops)
{
int ret = -1;
#endif
#endif /* CONFIG_INET */
+ INIT_WORK(&drvr->bus_reset, brcmf_core_bus_reset);
+
/* populate debugfs */
brcmf_debugfs_add_entry(drvr, "revinfo", brcmf_revinfo_read);
brcmf_feat_debugfs_create(drvr);
brcmf_dbg(TRACE, "failed to create coredump\n");
}
+void brcmf_fw_crashed(struct device *dev)
+{
+ struct brcmf_bus *bus_if = dev_get_drvdata(dev);
+ struct brcmf_pub *drvr = bus_if->drvr;
+
+ bphy_err(drvr, "Firmware has halted or crashed\n");
+
+ brcmf_dev_coredump(dev);
+
+ schedule_work(&drvr->bus_reset);
+}
+
void brcmf_detach(struct device *dev)
{
s32 i;
brcmf_bus_change_state(bus_if, BRCMF_BUS_DOWN);
+ brcmf_proto_detach_pre_delif(drvr);
+
/* make sure primary interface removed last */
for (i = BRCMF_MAX_IFS-1; i > -1; i--)
brcmf_remove_interface(drvr->iflist[i], false);
brcmf_bus_stop(drvr->bus_if);
- brcmf_proto_detach(drvr);
+ brcmf_proto_detach_post_delif(drvr);
bus_if->drvr = NULL;
wiphy_free(drvr->wiphy);
struct notifier_block inet6addr_notifier;
struct brcmf_mp_device *settings;
+ struct work_struct bus_reset;
+
u8 clmver[BRCMF_DCMD_SMLEN];
};
size_t mp_path_len;
u32 i, j;
char end = '\0';
- size_t reqsz;
for (i = 0; i < table_size; i++) {
if (mapping_table[i].chipid == chip &&
return NULL;
}
- reqsz = sizeof(*fwreq) + n_fwnames * sizeof(struct brcmf_fw_item);
- fwreq = kzalloc(reqsz, GFP_KERNEL);
+ fwreq = kzalloc(struct_size(fwreq, items, n_fwnames), GFP_KERNEL);
if (!fwreq)
return NULL;
for (j = 0; j < n_fwnames; j++) {
fwreq->items[j].path = fwnames[j].path;
+ fwnames[j].path[0] = '\0';
/* check if firmware path is provided by module parameter */
if (brcmf_mp_global.firmware_path[0] != '\0') {
strlcpy(fwnames[j].path, mp_path,
return ifidx == *(int *)arg;
}
-static void brcmf_fws_psq_flush(struct brcmf_fws_info *fws, struct pktq *q,
- int ifidx)
-{
- bool (*matchfn)(struct sk_buff *, void *) = NULL;
- struct sk_buff *skb;
- int prec;
-
- if (ifidx != -1)
- matchfn = brcmf_fws_ifidx_match;
- for (prec = 0; prec < q->num_prec; prec++) {
- skb = brcmu_pktq_pdeq_match(q, prec, matchfn, &ifidx);
- while (skb) {
- brcmu_pkt_buf_free_skb(skb);
- skb = brcmu_pktq_pdeq_match(q, prec, matchfn, &ifidx);
- }
- }
-}
-
static void brcmf_fws_hanger_init(struct brcmf_fws_hanger *hanger)
{
int i;
return 0;
}
+static void brcmf_fws_psq_flush(struct brcmf_fws_info *fws, struct pktq *q,
+ int ifidx)
+{
+ bool (*matchfn)(struct sk_buff *, void *) = NULL;
+ struct sk_buff *skb;
+ int prec;
+ u32 hslot;
+
+ if (ifidx != -1)
+ matchfn = brcmf_fws_ifidx_match;
+ for (prec = 0; prec < q->num_prec; prec++) {
+ skb = brcmu_pktq_pdeq_match(q, prec, matchfn, &ifidx);
+ while (skb) {
+ hslot = brcmf_skb_htod_tag_get_field(skb, HSLOT);
+ brcmf_fws_hanger_poppkt(&fws->hanger, hslot, &skb,
+ true);
+ brcmu_pkt_buf_free_skb(skb);
+ skb = brcmu_pktq_pdeq_match(q, prec, matchfn, &ifidx);
+ }
+ }
+}
+
static int brcmf_fws_hanger_mark_suppressed(struct brcmf_fws_hanger *h,
u32 slot_id)
{
brcmf_fws_lock(fws);
ifp->fws_desc = NULL;
brcmf_dbg(TRACE, "deleting %s\n", entry->name);
+ brcmf_fws_macdesc_cleanup(fws, &fws->desc.iface[ifp->ifidx],
+ ifp->ifidx);
brcmf_fws_macdesc_deinit(entry);
brcmf_fws_cleanup(fws, ifp->ifidx);
brcmf_fws_unlock(fws);
return fws;
fail:
- brcmf_fws_detach(fws);
+ brcmf_fws_detach_pre_delif(fws);
+ brcmf_fws_detach_post_delif(fws);
return ERR_PTR(rc);
}
-void brcmf_fws_detach(struct brcmf_fws_info *fws)
+void brcmf_fws_detach_pre_delif(struct brcmf_fws_info *fws)
{
if (!fws)
return;
-
- if (fws->fws_wq)
+ if (fws->fws_wq) {
destroy_workqueue(fws->fws_wq);
+ fws->fws_wq = NULL;
+ }
+}
+
+void brcmf_fws_detach_post_delif(struct brcmf_fws_info *fws)
+{
+ if (!fws)
+ return;
/* cleanup */
brcmf_fws_lock(fws);
#define FWSIGNAL_H_
struct brcmf_fws_info *brcmf_fws_attach(struct brcmf_pub *drvr);
-void brcmf_fws_detach(struct brcmf_fws_info *fws);
+void brcmf_fws_detach_pre_delif(struct brcmf_fws_info *fws);
+void brcmf_fws_detach_post_delif(struct brcmf_fws_info *fws);
void brcmf_fws_debugfs_create(struct brcmf_pub *drvr);
bool brcmf_fws_queue_skbs(struct brcmf_fws_info *fws);
bool brcmf_fws_fc_active(struct brcmf_fws_info *fws);
BRCMF_D2H_MSGRING_RX_COMPLETE_ITEMSIZE
};
+static void brcmf_pcie_setup(struct device *dev, int ret,
+ struct brcmf_fw_request *fwreq);
+static struct brcmf_fw_request *
+brcmf_pcie_prepare_fw_request(struct brcmf_pciedev_info *devinfo);
static u32
brcmf_pcie_read_reg32(struct brcmf_pciedev_info *devinfo, u32 reg_offset)
}
if (dtoh_mb_data & BRCMF_D2H_DEV_FWHALT) {
brcmf_dbg(PCIE, "D2H_MB_DATA: FW HALT\n");
- brcmf_dev_coredump(&devinfo->pdev->dev);
+ brcmf_fw_crashed(&devinfo->pdev->dev);
}
}
return 0;
}
+static int brcmf_pcie_reset(struct device *dev)
+{
+ struct brcmf_bus *bus_if = dev_get_drvdata(dev);
+ struct brcmf_pciedev *buspub = bus_if->bus_priv.pcie;
+ struct brcmf_pciedev_info *devinfo = buspub->devinfo;
+ struct brcmf_fw_request *fwreq;
+ int err;
+
+ brcmf_detach(dev);
+
+ brcmf_pcie_release_irq(devinfo);
+ brcmf_pcie_release_scratchbuffers(devinfo);
+ brcmf_pcie_release_ringbuffers(devinfo);
+ brcmf_pcie_reset_device(devinfo);
+
+ fwreq = brcmf_pcie_prepare_fw_request(devinfo);
+ if (!fwreq) {
+ dev_err(dev, "Failed to prepare FW request\n");
+ return -ENOMEM;
+ }
+
+ err = brcmf_fw_get_firmwares(dev, fwreq, brcmf_pcie_setup);
+ if (err) {
+ dev_err(dev, "Failed to prepare FW request\n");
+ kfree(fwreq);
+ }
+
+ return err;
+}
+
static const struct brcmf_bus_ops brcmf_pcie_bus_ops = {
.txdata = brcmf_pcie_tx,
.stop = brcmf_pcie_down,
.get_ramsize = brcmf_pcie_get_ramsize,
.get_memdump = brcmf_pcie_get_memdump,
.get_fwname = brcmf_pcie_get_fwname,
+ .reset = brcmf_pcie_reset,
};
return -ENOMEM;
}
-void brcmf_proto_detach(struct brcmf_pub *drvr)
+void brcmf_proto_detach_post_delif(struct brcmf_pub *drvr)
{
brcmf_dbg(TRACE, "Enter\n");
if (drvr->proto) {
if (drvr->bus_if->proto_type == BRCMF_PROTO_BCDC)
- brcmf_proto_bcdc_detach(drvr);
+ brcmf_proto_bcdc_detach_post_delif(drvr);
else if (drvr->bus_if->proto_type == BRCMF_PROTO_MSGBUF)
brcmf_proto_msgbuf_detach(drvr);
kfree(drvr->proto);
drvr->proto = NULL;
}
}
+
+void brcmf_proto_detach_pre_delif(struct brcmf_pub *drvr)
+{
+ if (drvr->proto && drvr->bus_if->proto_type == BRCMF_PROTO_BCDC)
+ brcmf_proto_bcdc_detach_pre_delif(drvr);
+}
int brcmf_proto_attach(struct brcmf_pub *drvr);
-void brcmf_proto_detach(struct brcmf_pub *drvr);
+void brcmf_proto_detach_pre_delif(struct brcmf_pub *drvr);
+void brcmf_proto_detach_post_delif(struct brcmf_pub *drvr);
static inline int brcmf_proto_hdrpull(struct brcmf_pub *drvr, bool do_fws,
struct sk_buff *skb,
/* Note the names are not postfixed with a1 for backward compatibility */
BRCMF_FW_DEF(43430A1, "brcmfmac43430-sdio");
BRCMF_FW_DEF(43455, "brcmfmac43455-sdio");
+BRCMF_FW_DEF(43456, "brcmfmac43456-sdio");
BRCMF_FW_DEF(4354, "brcmfmac4354-sdio");
BRCMF_FW_DEF(4356, "brcmfmac4356-sdio");
BRCMF_FW_DEF(4373, "brcmfmac4373-sdio");
BRCMF_FW_ENTRY(BRCM_CC_4339_CHIP_ID, 0xFFFFFFFF, 4339),
BRCMF_FW_ENTRY(BRCM_CC_43430_CHIP_ID, 0x00000001, 43430A0),
BRCMF_FW_ENTRY(BRCM_CC_43430_CHIP_ID, 0xFFFFFFFE, 43430A1),
- BRCMF_FW_ENTRY(BRCM_CC_4345_CHIP_ID, 0xFFFFFFC0, 43455),
+ BRCMF_FW_ENTRY(BRCM_CC_4345_CHIP_ID, 0x00000200, 43456),
+ BRCMF_FW_ENTRY(BRCM_CC_4345_CHIP_ID, 0xFFFFFDC0, 43455),
BRCMF_FW_ENTRY(BRCM_CC_4354_CHIP_ID, 0xFFFFFFFF, 4354),
BRCMF_FW_ENTRY(BRCM_CC_4356_CHIP_ID, 0xFFFFFFFF, 4356),
BRCMF_FW_ENTRY(CY_CC_4373_CHIP_ID, 0xFFFFFFFF, 4373),
/* dongle indicates the firmware has halted/crashed */
if (hmb_data & HMB_DATA_FWHALT) {
- brcmf_err("mailbox indicates firmware halted\n");
- brcmf_dev_coredump(&sdiod->func1->dev);
+ brcmf_dbg(SDIO, "mailbox indicates firmware halted\n");
+ brcmf_fw_crashed(&sdiod->func1->dev);
}
/* Dongle recomposed rx frames, accept them again */
struct usb_device *usbdev;
struct device *dev;
- struct mutex dev_init_lock;
+ struct completion dev_init_done;
int ctl_in_pipe, ctl_out_pipe;
struct urb *ctl_urb; /* URB for control endpoint */
}
-static void brcmf_usb_free_q(struct list_head *q, bool pending)
+static void brcmf_usb_free_q(struct list_head *q)
{
struct brcmf_usbreq *req, *next;
- int i = 0;
+
list_for_each_entry_safe(req, next, q, list) {
if (!req->urb) {
brcmf_err("bad req\n");
break;
}
- i++;
- if (pending) {
- usb_kill_urb(req->urb);
- } else {
- usb_free_urb(req->urb);
- list_del_init(&req->list);
- }
+ usb_free_urb(req->urb);
+ list_del_init(&req->list);
}
}
static void brcmf_cancel_all_urbs(struct brcmf_usbdev_info *devinfo)
{
+ int i;
+
if (devinfo->ctl_urb)
usb_kill_urb(devinfo->ctl_urb);
if (devinfo->bulk_urb)
usb_kill_urb(devinfo->bulk_urb);
- brcmf_usb_free_q(&devinfo->tx_postq, true);
- brcmf_usb_free_q(&devinfo->rx_postq, true);
+ if (devinfo->tx_reqs)
+ for (i = 0; i < devinfo->bus_pub.ntxq; i++)
+ usb_kill_urb(devinfo->tx_reqs[i].urb);
+ if (devinfo->rx_reqs)
+ for (i = 0; i < devinfo->bus_pub.nrxq; i++)
+ usb_kill_urb(devinfo->rx_reqs[i].urb);
}
static void brcmf_usb_down(struct device *dev)
brcmf_dbg(USB, "Enter, devinfo %p\n", devinfo);
/* free the URBS */
- brcmf_usb_free_q(&devinfo->rx_freeq, false);
- brcmf_usb_free_q(&devinfo->tx_freeq, false);
+ brcmf_usb_free_q(&devinfo->rx_freeq);
+ brcmf_usb_free_q(&devinfo->tx_freeq);
usb_free_urb(devinfo->ctl_urb);
usb_free_urb(devinfo->bulk_urb);
if (ret)
goto error;
- mutex_unlock(&devinfo->dev_init_lock);
+ complete(&devinfo->dev_init_done);
return;
error:
brcmf_dbg(TRACE, "failed: dev=%s, err=%d\n", dev_name(dev), ret);
- mutex_unlock(&devinfo->dev_init_lock);
+ complete(&devinfo->dev_init_done);
device_release_driver(dev);
}
if (ret)
goto fail;
/* we are done */
- mutex_unlock(&devinfo->dev_init_lock);
+ complete(&devinfo->dev_init_done);
return 0;
}
bus->chip = bus_pub->devid;
devinfo->usbdev = usb;
devinfo->dev = &usb->dev;
- /* Take an init lock, to protect for disconnect while still loading.
+ /* Init completion, to protect for disconnect while still loading.
* Necessary because of the asynchronous firmware load construction
*/
- mutex_init(&devinfo->dev_init_lock);
- mutex_lock(&devinfo->dev_init_lock);
+ init_completion(&devinfo->dev_init_done);
usb_set_intfdata(intf, devinfo);
return 0;
fail:
- mutex_unlock(&devinfo->dev_init_lock);
+ complete(&devinfo->dev_init_done);
kfree(devinfo);
usb_set_intfdata(intf, NULL);
return ret;
devinfo = (struct brcmf_usbdev_info *)usb_get_intfdata(intf);
if (devinfo) {
- mutex_lock(&devinfo->dev_init_lock);
+ wait_for_completion(&devinfo->dev_init_done);
/* Make sure that devinfo still exists. Firmware probe routines
* may have released the device and cleared the intfdata.
*/
sign = -sign;
denom = -denom;
}
- *res = 1;
*res = ((num * 2 + denom) / (denom * 2)) * sign;
return 1;
#define IWL_22000_HR_A0_FW_PRE "iwlwifi-QuQnj-a0-hr-a0-"
#define IWL_22000_SU_Z0_FW_PRE "iwlwifi-su-z0-"
#define IWL_QU_B_JF_B_FW_PRE "iwlwifi-Qu-b0-jf-b0-"
+#define IWL_QUZ_A_HR_B_FW_PRE "iwlwifi-QuZ-a0-hr-b0-"
#define IWL_QNJ_B_JF_B_FW_PRE "iwlwifi-QuQnj-b0-jf-b0-"
#define IWL_CC_A_FW_PRE "iwlwifi-cc-a0-"
#define IWL_22000_SO_A_JF_B_FW_PRE "iwlwifi-so-a0-jf-b0-"
#define IWL_22000_SO_A_HR_B_FW_PRE "iwlwifi-so-a0-hr-b0-"
#define IWL_22000_SO_A_GF_A_FW_PRE "iwlwifi-so-a0-gf-a0-"
#define IWL_22000_TY_A_GF_A_FW_PRE "iwlwifi-ty-a0-gf-a0-"
+#define IWL_22000_SO_A_GF4_A_FW_PRE "iwlwifi-so-a0-gf4-a0-"
#define IWL_22000_HR_MODULE_FIRMWARE(api) \
IWL_22000_HR_FW_PRE __stringify(api) ".ucode"
IWL_22000_HR_A0_FW_PRE __stringify(api) ".ucode"
#define IWL_22000_SU_Z0_MODULE_FIRMWARE(api) \
IWL_22000_SU_Z0_FW_PRE __stringify(api) ".ucode"
-#define IWL_QU_B_JF_B_MODULE_FIRMWARE(api) \
- IWL_QU_B_JF_B_FW_PRE __stringify(api) ".ucode"
+#define IWL_QUZ_A_HR_B_MODULE_FIRMWARE(api) \
+ IWL_QUZ_A_HR_B_FW_PRE __stringify(api) ".ucode"
#define IWL_QU_B_JF_B_MODULE_FIRMWARE(api) \
IWL_QU_B_JF_B_FW_PRE __stringify(api) ".ucode"
#define IWL_QNJ_B_JF_B_MODULE_FIRMWARE(api) \
.dbgc_supported = true, \
.min_umac_error_event_table = 0x400000, \
.d3_debug_data_base_addr = 0x401000, \
- .d3_debug_data_length = 60 * 1024
+ .d3_debug_data_length = 60 * 1024, \
+ .fw_mon_smem_write_ptr_addr = 0xa0c16c, \
+ .fw_mon_smem_write_ptr_msk = 0xfffff, \
+ .fw_mon_smem_cycle_cnt_ptr_addr = 0xa0c174, \
+ .fw_mon_smem_cycle_cnt_ptr_msk = 0xfffff
#define IWL_DEVICE_AX200_COMMON \
IWL_DEVICE_22000_COMMON, \
IWL_DEVICE_22000_COMMON, \
.device_family = IWL_DEVICE_FAMILY_22000, \
.base_params = &iwl_22000_base_params, \
- .csr = &iwl_csr_v1
+ .csr = &iwl_csr_v1, \
+ .gp2_reg_addr = 0xa02c68
#define IWL_DEVICE_22560 \
IWL_DEVICE_22000_COMMON, \
.device_family = IWL_DEVICE_FAMILY_AX210, \
.base_params = &iwl_22000_base_params, \
.csr = &iwl_csr_v1, \
- .min_txq_size = 128
+ .min_txq_size = 128, \
+ .gp2_reg_addr = 0xd02c68, \
+ .min_256_ba_txq_size = 512
const struct iwl_cfg iwl22000_2ac_cfg_hr = {
.name = "Intel(R) Dual Band Wireless AC 22000",
.max_tx_agg_size = IEEE80211_MAX_AMPDU_BUF_HT,
};
-const struct iwl_cfg iwl22260_2ax_cfg = {
- .name = "Intel(R) Wireless-AX 22260",
+const struct iwl_cfg iwl_ax101_cfg_quz_hr = {
+ .name = "Intel(R) Wi-Fi 6 AX101",
+ .fw_name_pre = IWL_QUZ_A_HR_B_FW_PRE,
+ IWL_DEVICE_22500,
+ /*
+ * This device doesn't support receiving BlockAck with a large bitmap
+ * so we need to restrict the size of transmitted aggregation to the
+ * HT size; mac80211 would otherwise pick the HE max (256) by default.
+ */
+ .max_tx_agg_size = IEEE80211_MAX_AMPDU_BUF_HT,
+};
+
+const struct iwl_cfg iwl_ax200_cfg_cc = {
+ .name = "Intel(R) Wi-Fi 6 AX200 160MHz",
.fw_name_pre = IWL_CC_A_FW_PRE,
IWL_DEVICE_22500,
/*
};
const struct iwl_cfg killer1650x_2ax_cfg = {
- .name = "Killer(R) Wireless-AX 1650x Wireless Network Adapter (200NGW)",
+ .name = "Killer(R) Wi-Fi 6 AX1650x 160MHz Wireless Network Adapter (200NGW)",
.fw_name_pre = IWL_CC_A_FW_PRE,
IWL_DEVICE_22500,
/*
};
const struct iwl_cfg killer1650w_2ax_cfg = {
- .name = "Killer(R) Wireless-AX 1650w Wireless Network Adapter (200D2W)",
+ .name = "Killer(R) Wi-Fi 6 AX1650w 160MHz Wireless Network Adapter (200D2W)",
.fw_name_pre = IWL_CC_A_FW_PRE,
IWL_DEVICE_22500,
/*
};
const struct iwl_cfg killer1650s_2ax_cfg_qu_b0_hr_b0 = {
- .name = "Killer(R) Wireless-AX 1650i Wireless Network Adapter (22560NGW)",
+ .name = "Killer(R) Wi-Fi 6 AX1650i 160MHz Wireless Network Adapter (201NGW)",
.fw_name_pre = IWL_22000_QU_B_HR_B_FW_PRE,
IWL_DEVICE_22500,
/*
};
const struct iwl_cfg killer1650i_2ax_cfg_qu_b0_hr_b0 = {
- .name = "Killer(R) Wireless-AX 1650s Wireless Network Adapter (22560D2W)",
+ .name = "Killer(R) Wi-Fi 6 AX1650s 160MHz Wireless Network Adapter (201D2W)",
.fw_name_pre = IWL_22000_QU_B_HR_B_FW_PRE,
IWL_DEVICE_22500,
/*
const struct iwl_cfg iwlax210_2ax_cfg_so_gf_a0 = {
.name = "Intel(R) Wi-Fi 7 AX211 160MHz",
.fw_name_pre = IWL_22000_SO_A_GF_A_FW_PRE,
+ .uhb_supported = true,
IWL_DEVICE_AX210,
};
const struct iwl_cfg iwlax210_2ax_cfg_ty_gf_a0 = {
.name = "Intel(R) Wi-Fi 7 AX210 160MHz",
.fw_name_pre = IWL_22000_TY_A_GF_A_FW_PRE,
+ .uhb_supported = true,
+ IWL_DEVICE_AX210,
+};
+
+const struct iwl_cfg iwlax210_2ax_cfg_so_gf4_a0 = {
+ .name = "Intel(R) Wi-Fi 7 AX210 160MHz",
+ .fw_name_pre = IWL_22000_SO_A_GF4_A_FW_PRE,
IWL_DEVICE_AX210,
};
MODULE_FIRMWARE(IWL_22000_HR_A0_QNJ_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX));
MODULE_FIRMWARE(IWL_22000_SU_Z0_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX));
MODULE_FIRMWARE(IWL_QU_B_JF_B_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX));
+MODULE_FIRMWARE(IWL_QUZ_A_HR_B_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX));
MODULE_FIRMWARE(IWL_QNJ_B_JF_B_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX));
MODULE_FIRMWARE(IWL_CC_A_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX));
MODULE_FIRMWARE(IWL_22000_SO_A_JF_B_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX));
* GPL LICENSE SUMMARY
*
* Copyright(c) 2015-2017 Intel Deutschland GmbH
- * Copyright (C) 2018 Intel Corporation
+ * Copyright (C) 2018 - 2019 Intel Corporation
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* BSD LICENSE
*
* Copyright(c) 2015-2017 Intel Deutschland GmbH
- * Copyright (C) 2018 Intel Corporation
+ * Copyright (C) 2018 - 2019 Intel Corporation
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
.d3_debug_data_length = 92 * 1024, \
.ht_params = &iwl9000_ht_params, \
.nvm_ver = IWL9000_NVM_VERSION, \
- .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K
+ .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K, \
+ .fw_mon_smem_write_ptr_addr = 0xa0476c, \
+ .fw_mon_smem_write_ptr_msk = 0xfffff, \
+ .fw_mon_smem_cycle_cnt_ptr_addr = 0xa04774, \
+ .fw_mon_smem_cycle_cnt_ptr_msk = 0xfffff
const struct iwl_cfg iwl9160_2ac_cfg = {
#include <linux/bitops.h>
-/*
+/**
* struct iwl_fw_ini_header: Common Header for all debug group TLV's structures
+ *
* @tlv_version: version info
* @apply_point: &enum iwl_fw_ini_apply_point
* @data: TLV data followed
- **/
+ */
struct iwl_fw_ini_header {
__le32 tlv_version;
__le32 apply_point;
} __packed; /* FW_DEBUG_TLV_HEADER_S */
/**
- * struct iwl_fw_ini_allocation_tlv - (IWL_FW_INI_TLV_TYPE_BUFFER_ALLOCATION)
+ * struct iwl_fw_ini_allocation_tlv - (IWL_UCODE_TLV_TYPE_BUFFER_ALLOCATION)
* buffer allocation TLV - for debug
*
* @iwl_fw_ini_header: header
* @max_fragments: the maximum allowed fragmentation in the desired memory
* allocation above
* @min_frag_size: the minimum allowed fragmentation size in bytes
-*/
+ */
struct iwl_fw_ini_allocation_tlv {
struct iwl_fw_ini_header header;
__le32 allocation_id;
} __packed; /* FW_DEBUG_TLV_BUFFER_ALLOCATION_TLV_S_VER_1 */
/**
- * struct iwl_fw_ini_hcmd (IWL_FW_INI_TLV_TYPE_HCMD)
- * Generic Host command pass through TLV
+ * enum iwl_fw_ini_dbg_domain - debug domains
+ * allows to send host cmd or collect memory region if a given domain is enabled
+ *
+ * @IWL_FW_INI_DBG_DOMAIN_ALWAYS_ON: the default domain, always on
+ * @IWL_FW_INI_DBG_DOMAIN_REPORT_PS: power save domain
+ */
+enum iwl_fw_ini_dbg_domain {
+ IWL_FW_INI_DBG_DOMAIN_ALWAYS_ON = 0,
+ IWL_FW_INI_DBG_DOMAIN_REPORT_PS,
+}; /* FW_DEBUG_TLV_DOMAIN_API_E_VER_1 */
+
+/**
+ * struct iwl_fw_ini_hcmd
*
* @id: the debug configuration command type for instance: 0xf6 / 0xf5 / DHC
* @group: the desired cmd group
- * @padding: all zeros for dword alignment
- * @data: all of the relevant command (0xf6/0xf5) to be sent
-*/
+ * @reserved: to align to FW struct
+ * @data: all of the relevant command data to be sent
+ */
struct iwl_fw_ini_hcmd {
u8 id;
u8 group;
- __le16 padding;
+ __le16 reserved;
u8 data[0];
-} __packed; /* FW_DEBUG_TLV_HCMD_DATA_S */
+} __packed; /* FW_DEBUG_TLV_HCMD_DATA_API_S_VER_1 */
/**
- * struct iwl_fw_ini_hcmd_tlv
+ * struct iwl_fw_ini_hcmd_tlv - (IWL_UCODE_TLV_TYPE_HCMD)
+ * Generic Host command pass through TLV
+ *
* @header: header
+ * @domain: send command only if the specific domain is enabled
+ * &enum iwl_fw_ini_dbg_domain
+ * @period_msec: period in which the hcmd will be sent to FW. Measured in msec
+ * (0 = one time command).
* @hcmd: a variable length host-command to be sent to apply the configuration.
*/
struct iwl_fw_ini_hcmd_tlv {
struct iwl_fw_ini_header header;
+ __le32 domain;
+ __le32 period_msec;
struct iwl_fw_ini_hcmd hcmd;
-} __packed; /* FW_DEBUG_TLV_HCMD_S_VER_1 */
+} __packed; /* FW_DEBUG_TLV_HCMD_API_S_VER_1 */
-/*
- * struct iwl_fw_ini_debug_flow_tlv (IWL_FW_INI_TLV_TYPE_DEBUG_FLOW)
+/**
+ * struct iwl_fw_ini_debug_flow_tlv - (IWL_UCODE_TLV_TYPE_DEBUG_FLOW)
*
* @header: header
* @debug_flow_cfg: &enum iwl_fw_ini_debug_flow
#define IWL_FW_INI_MAX_REGION_ID 64
#define IWL_FW_INI_MAX_NAME 32
+/**
+ * struct iwl_fw_ini_region_cfg_dhc - defines dhc response to dump.
+ *
+ * @id_and_grp: id and group of dhc response.
+ * @desc: dhc response descriptor.
+ */
+struct iwl_fw_ini_region_cfg_dhc {
+ __le32 id_and_grp;
+ __le32 desc;
+} __packed; /* FW_DEBUG_TLV_REGION_DHC_API_S_VER_1 */
+
/**
* struct iwl_fw_ini_region_cfg_internal - meta data of internal memory region
+ *
* @num_of_range: the amount of ranges in the region
* @range_data_size: size of the data to read per range, in bytes.
*/
/**
* struct iwl_fw_ini_region_cfg_fifos - meta data of fifos region
+ *
* @fid1: fifo id 1 - bitmap of lmac tx/rx fifos to include in the region
* @fid2: fifo id 2 - bitmap of umac rx fifos to include in the region.
* It is unused for tx.
/**
* struct iwl_fw_ini_region_cfg
+ *
* @region_id: ID of this dump configuration
* @region_type: &enum iwl_fw_ini_region_type
- * @num_regions: amount of regions in the address array.
+ * @domain: dump this region only if the specific domain is enabled
+ * &enum iwl_fw_ini_dbg_domain
* @name_len: name length
* @name: file name to use for this region
* @internal: used in case the region uses internal memory.
* @allocation_id: For DRAM type field substitutes for allocation_id
* @fifos: used in case of fifos region.
+ * @dhc_desc: dhc response descriptor.
+ * @notif_id_and_grp: dump this region only if the specific notification
+ * occurred.
* @offset: offset to use for each memory base address
* @start_addr: array of addresses.
*/
struct iwl_fw_ini_region_cfg {
__le32 region_id;
__le32 region_type;
+ __le32 domain;
__le32 name_len;
u8 name[IWL_FW_INI_MAX_NAME];
union {
struct iwl_fw_ini_region_cfg_internal internal;
__le32 allocation_id;
struct iwl_fw_ini_region_cfg_fifos fifos;
- };
+ struct iwl_fw_ini_region_cfg_dhc dhc_desc;
+ __le32 notif_id_and_grp;
+ }; /* FW_DEBUG_TLV_REGION_EXT_INT_PARAMS_API_U_VER_1 */
__le32 offset;
__le32 start_addr[];
-} __packed; /* FW_DEBUG_TLV_REGION_CONFIG_S */
+} __packed; /* FW_DEBUG_TLV_REGION_CONFIG_API_S_VER_1 */
/**
- * struct iwl_fw_ini_region_tlv - (IWL_FW_INI_TLV_TYPE_REGION_CFG)
- * DUMP sections define IDs and triggers that use those IDs TLV
+ * struct iwl_fw_ini_region_tlv - (IWL_UCODE_TLV_TYPE_REGIONS)
+ * defines memory regions to dump
+ *
* @header: header
* @num_regions: how many different region section and IDs are coming next
* @region_config: list of dump configurations
struct iwl_fw_ini_header header;
__le32 num_regions;
struct iwl_fw_ini_region_cfg region_config[];
-} __packed; /* FW_DEBUG_TLV_REGIONS_S_VER_1 */
+} __packed; /* FW_DEBUG_TLV_REGIONS_API_S_VER_1 */
/**
- * struct iwl_fw_ini_trigger - (IWL_FW_INI_TLV_TYPE_DUMP_CFG)
- * Region sections define IDs and triggers that use those IDs TLV
+ * struct iwl_fw_ini_trigger
*
- * @trigger_id: enum &iwl_fw_ini_tigger_id
+ * @trigger_id: &enum iwl_fw_ini_trigger_id
* @override_trig: determines how apply trigger in case a trigger with the
* same id is already in use. Using the first 2 bytes:
* Byte 0: if 0, override trigger configuration, otherwise use the
* existing trigger.
* @dump_delay: delay from trigger fire to dump, in usec
* @occurrences: max amount of times to be fired
+ * @reserved: to align to FW struct
* @ignore_consec: ignore consecutive triggers, in usec
* @force_restart: force FW restart
* @multi_dut: initiate debug dump data on several DUTs
__le32 override_trig;
__le32 dump_delay;
__le32 occurrences;
+ __le32 reserved;
__le32 ignore_consec;
__le32 force_restart;
__le32 multi_dut;
__le32 trigger_data;
__le32 num_regions;
__le32 data[];
-} __packed; /* FW_TLV_DEBUG_TRIGGER_CONFIG_S */
+} __packed; /* FW_TLV_DEBUG_TRIGGER_CONFIG_API_S_VER_1 */
/**
- * struct iwl_fw_ini_trigger_tlv - (IWL_FW_INI_TLV_TYPE_TRIGGERS_CFG)
- * DUMP sections define IDs and triggers that use those IDs TLV
+ * struct iwl_fw_ini_trigger_tlv - (IWL_UCODE_TLV_TYPE_TRIGGERS)
+ * Triggers that hold memory regions to dump in case a trigger fires
*
* @header: header
* @num_triggers: how many different triggers section and IDs are coming next
struct iwl_fw_ini_header header;
__le32 num_triggers;
struct iwl_fw_ini_trigger trigger_config[];
-} __packed; /* FW_TLV_DEBUG_TRIGGERS_S_VER_1 */
+} __packed; /* FW_TLV_DEBUG_TRIGGERS_API_S_VER_1 */
/**
* enum iwl_fw_ini_trigger_id
+ *
* @IWL_FW_TRIGGER_ID_FW_ASSERT: FW assert
* @IWL_FW_TRIGGER_ID_FW_HW_ERROR: HW assert
* @IWL_FW_TRIGGER_ID_FW_TFD_Q_HANG: TFD queue hang
* @IWL_FW_TRIGGER_ID_FW_DEBUG_HOST_TRIGGER: FW debug notification
- * @IWL_FW_TRIGGER_ID_FW_GENERIC_NOTIFOCATION: FW generic notification
+ * @IWL_FW_TRIGGER_ID_FW_GENERIC_NOTIFICATION: FW generic notification
* @IWL_FW_TRIGGER_ID_USER_TRIGGER: User trigger
+ * @IWL_FW_TRIGGER_ID_PERIODIC_TRIGGER: triggers periodically
* @IWL_FW_TRIGGER_ID_HOST_PEER_CLIENT_INACTIVITY: peer inactivity
* @IWL_FW_TRIGGER_ID_HOST_TX_LATENCY_THRESHOLD_CROSSED: TX latency
* threshold was crossed
/* FW triggers */
IWL_FW_TRIGGER_ID_FW_DEBUG_HOST_TRIGGER = 4,
- IWL_FW_TRIGGER_ID_FW_GENERIC_NOTIFOCATION = 5,
+ IWL_FW_TRIGGER_ID_FW_GENERIC_NOTIFICATION = 5,
/* User trigger */
IWL_FW_TRIGGER_ID_USER_TRIGGER = 6,
+ /* periodic uses the data field for the interval time */
+ IWL_FW_TRIGGER_ID_PERIODIC_TRIGGER = 7,
+
/* Host triggers */
- IWL_FW_TRIGGER_ID_HOST_PEER_CLIENT_INACTIVITY = 7,
- IWL_FW_TRIGGER_ID_HOST_TX_LATENCY_THRESHOLD_CROSSED = 8,
- IWL_FW_TRIGGER_ID_HOST_TX_RESPONSE_STATUS_FAILED = 9,
- IWL_FW_TRIGGER_ID_HOST_OS_REQ_DEAUTH_PEER = 10,
- IWL_FW_TRIGGER_ID_HOST_STOP_GO_REQUEST = 11,
- IWL_FW_TRIGGER_ID_HOST_START_GO_REQUEST = 12,
- IWL_FW_TRIGGER_ID_HOST_JOIN_GROUP_REQUEST = 13,
- IWL_FW_TRIGGER_ID_HOST_SCAN_START = 14,
- IWL_FW_TRIGGER_ID_HOST_SCAN_SUBMITTED = 15,
- IWL_FW_TRIGGER_ID_HOST_SCAN_PARAMS = 16,
- IWL_FW_TRIGGER_ID_HOST_CHECK_FOR_HANG = 17,
- IWL_FW_TRIGGER_ID_HOST_BAR_RECEIVED = 18,
- IWL_FW_TRIGGER_ID_HOST_AGG_TX_RESPONSE_STATUS_FAILED = 19,
- IWL_FW_TRIGGER_ID_HOST_EAPOL_TX_RESPONSE_FAILED = 20,
- IWL_FW_TRIGGER_ID_HOST_FAKE_TX_RESPONSE_SUSPECTED = 21,
- IWL_FW_TRIGGER_ID_HOST_AUTH_REQ_FROM_ASSOC_CLIENT = 22,
- IWL_FW_TRIGGER_ID_HOST_ROAM_COMPLETE = 23,
- IWL_FW_TRIGGER_ID_HOST_AUTH_ASSOC_FAST_FAILED = 24,
- IWL_FW_TRIGGER_ID_HOST_D3_START = 25,
- IWL_FW_TRIGGER_ID_HOST_D3_END = 26,
- IWL_FW_TRIGGER_ID_HOST_BSS_MISSED_BEACONS = 27,
- IWL_FW_TRIGGER_ID_HOST_P2P_CLIENT_MISSED_BEACONS = 28,
- IWL_FW_TRIGGER_ID_HOST_PEER_CLIENT_TX_FAILURES = 29,
- IWL_FW_TRIGGER_ID_HOST_TX_WFD_ACTION_FRAME_FAILED = 30,
- IWL_FW_TRIGGER_ID_HOST_AUTH_ASSOC_FAILED = 31,
- IWL_FW_TRIGGER_ID_HOST_SCAN_COMPLETE = 32,
- IWL_FW_TRIGGER_ID_HOST_SCAN_ABORT = 33,
- IWL_FW_TRIGGER_ID_HOST_NIC_ALIVE = 34,
- IWL_FW_TRIGGER_ID_HOST_CHANNEL_SWITCH_COMPLETE = 35,
+ IWL_FW_TRIGGER_ID_HOST_PEER_CLIENT_INACTIVITY = 8,
+ IWL_FW_TRIGGER_ID_HOST_TX_LATENCY_THRESHOLD_CROSSED = 9,
+ IWL_FW_TRIGGER_ID_HOST_TX_RESPONSE_STATUS_FAILED = 10,
+ IWL_FW_TRIGGER_ID_HOST_OS_REQ_DEAUTH_PEER = 11,
+ IWL_FW_TRIGGER_ID_HOST_STOP_GO_REQUEST = 12,
+ IWL_FW_TRIGGER_ID_HOST_START_GO_REQUEST = 13,
+ IWL_FW_TRIGGER_ID_HOST_JOIN_GROUP_REQUEST = 14,
+ IWL_FW_TRIGGER_ID_HOST_SCAN_START = 15,
+ IWL_FW_TRIGGER_ID_HOST_SCAN_SUBMITTED = 16,
+ IWL_FW_TRIGGER_ID_HOST_SCAN_PARAMS = 17,
+ IWL_FW_TRIGGER_ID_HOST_CHECK_FOR_HANG = 18,
+ IWL_FW_TRIGGER_ID_HOST_BAR_RECEIVED = 19,
+ IWL_FW_TRIGGER_ID_HOST_AGG_TX_RESPONSE_STATUS_FAILED = 20,
+ IWL_FW_TRIGGER_ID_HOST_EAPOL_TX_RESPONSE_FAILED = 21,
+ IWL_FW_TRIGGER_ID_HOST_FAKE_TX_RESPONSE_SUSPECTED = 22,
+ IWL_FW_TRIGGER_ID_HOST_AUTH_REQ_FROM_ASSOC_CLIENT = 23,
+ IWL_FW_TRIGGER_ID_HOST_ROAM_COMPLETE = 24,
+ IWL_FW_TRIGGER_ID_HOST_AUTH_ASSOC_FAST_FAILED = 25,
+ IWL_FW_TRIGGER_ID_HOST_D3_START = 26,
+ IWL_FW_TRIGGER_ID_HOST_D3_END = 27,
+ IWL_FW_TRIGGER_ID_HOST_BSS_MISSED_BEACONS = 28,
+ IWL_FW_TRIGGER_ID_HOST_P2P_CLIENT_MISSED_BEACONS = 29,
+ IWL_FW_TRIGGER_ID_HOST_PEER_CLIENT_TX_FAILURES = 30,
+ IWL_FW_TRIGGER_ID_HOST_TX_WFD_ACTION_FRAME_FAILED = 31,
+ IWL_FW_TRIGGER_ID_HOST_AUTH_ASSOC_FAILED = 32,
+ IWL_FW_TRIGGER_ID_HOST_SCAN_COMPLETE = 33,
+ IWL_FW_TRIGGER_ID_HOST_SCAN_ABORT = 34,
+ IWL_FW_TRIGGER_ID_HOST_NIC_ALIVE = 35,
+ IWL_FW_TRIGGER_ID_HOST_CHANNEL_SWITCH_COMPLETE = 36,
IWL_FW_TRIGGER_ID_NUM,
}; /* FW_DEBUG_TLV_TRIGGER_ID_E_VER_1 */
/**
* enum iwl_fw_ini_apply_point
+ *
* @IWL_FW_INI_APPLY_INVALID: invalid
* @IWL_FW_INI_APPLY_EARLY: pre loading FW
* @IWL_FW_INI_APPLY_AFTER_ALIVE: first cmd from host after alive
/**
* enum iwl_fw_ini_allocation_id
+ *
* @IWL_FW_INI_ALLOCATION_INVALID: invalid
* @IWL_FW_INI_ALLOCATION_ID_DBGC1: allocation meant for DBGC1 configuration
* @IWL_FW_INI_ALLOCATION_ID_DBGC2: allocation meant for DBGC2 configuration
/**
* enum iwl_fw_ini_buffer_location
+ *
* @IWL_FW_INI_LOCATION_INVALID: invalid
* @IWL_FW_INI_LOCATION_SRAM_PATH: SRAM location
* @IWL_FW_INI_LOCATION_DRAM_PATH: DRAM location
+ * @IWL_FW_INI_LOCATION_NPK_PATH: NPK location
*/
enum iwl_fw_ini_buffer_location {
IWL_FW_INI_LOCATION_INVALID,
IWL_FW_INI_LOCATION_SRAM_PATH,
IWL_FW_INI_LOCATION_DRAM_PATH,
+ IWL_FW_INI_LOCATION_NPK_PATH,
}; /* FW_DEBUG_TLV_BUFFER_LOCATION_E_VER_1 */
/**
* enum iwl_fw_ini_debug_flow
+ *
* @IWL_FW_INI_DEBUG_INVALID: invalid
* @IWL_FW_INI_DEBUG_DBTR_FLOW: undefined
* @IWL_FW_INI_DEBUG_TB2DTF_FLOW: undefined
/**
* enum iwl_fw_ini_region_type
+ *
* @IWL_FW_INI_REGION_INVALID: invalid
* @IWL_FW_INI_REGION_DEVICE_MEMORY: device internal memory
* @IWL_FW_INI_REGION_PERIPHERY_MAC: periphery registers of MAC
* @IWL_FW_INI_REGION_RXF: RX fifo
* @IWL_FW_INI_REGION_PAGING: paging memory
* @IWL_FW_INI_REGION_CSR: CSR registers
+ * @IWL_FW_INI_REGION_NOTIFICATION: FW notification data
+ * @IWL_FW_INI_REGION_DHC: dhc response to dump
* @IWL_FW_INI_REGION_NUM: number of region types
*/
enum iwl_fw_ini_region_type {
IWL_FW_INI_REGION_RXF,
IWL_FW_INI_REGION_PAGING,
IWL_FW_INI_REGION_CSR,
+ IWL_FW_INI_REGION_NOTIFICATION,
+ IWL_FW_INI_REGION_DHC,
IWL_FW_INI_REGION_NUM
}; /* FW_DEBUG_TLV_REGION_TYPE_E_VER_1 */
#define IWL_HE_HTC_LINK_ADAP_UNSOLICITED (2 << IWL_HE_HTC_LINK_ADAP_POS)
#define IWL_HE_HTC_LINK_ADAP_BOTH (3 << IWL_HE_HTC_LINK_ADAP_POS)
+/**
+ * struct iwl_he_sta_context_cmd_v1 - configure FW to work with HE AP
+ * @sta_id: STA id
+ * @tid_limit: max num of TIDs in TX HE-SU multi-TID agg
+ * 0 - bad value, 1 - multi-tid not supported, 2..8 - tid limit
+ * @reserved1: reserved byte for future use
+ * @reserved2: reserved byte for future use
+ * @flags: see %iwl_11ax_sta_ctxt_flags
+ * @ref_bssid_addr: reference BSSID used by the AP
+ * @reserved0: reserved 2 bytes for aligning the ref_bssid_addr field to 8 bytes
+ * @htc_flags: which features are supported in HTC
+ * @frag_flags: frag support in A-MSDU
+ * @frag_level: frag support level
+ * @frag_max_num: max num of "open" MSDUs in the receiver (in power of 2)
+ * @frag_min_size: min frag size (except last frag)
+ * @pkt_ext: optional, exists according to PPE-present bit in the HE-PHY capa
+ * @bss_color: 11ax AP ID that is used in the HE SIG-A to mark inter BSS frame
+ * @htc_trig_based_pkt_ext: default PE in 4us units
+ * @frame_time_rts_th: HE duration RTS threshold, in units of 32us
+ * @rand_alloc_ecwmin: random CWmin = 2**ECWmin-1
+ * @rand_alloc_ecwmax: random CWmax = 2**ECWmax-1
+ * @reserved3: reserved byte for future use
+ * @trig_based_txf: MU EDCA Parameter set for the trigger based traffic queues
+ */
+struct iwl_he_sta_context_cmd_v1 {
+ u8 sta_id;
+ u8 tid_limit;
+ u8 reserved1;
+ u8 reserved2;
+ __le32 flags;
+
+ /* The below fields are set via Multiple BSSID IE */
+ u8 ref_bssid_addr[6];
+ __le16 reserved0;
+
+ /* The below fields are set via HE-capabilities IE */
+ __le32 htc_flags;
+
+ u8 frag_flags;
+ u8 frag_level;
+ u8 frag_max_num;
+ u8 frag_min_size;
+
+ /* The below fields are set via PPE thresholds element */
+ struct iwl_he_pkt_ext pkt_ext;
+
+ /* The below fields are set via HE-Operation IE */
+ u8 bss_color;
+ u8 htc_trig_based_pkt_ext;
+ __le16 frame_time_rts_th;
+
+ /* Random access parameter set (i.e. RAPS) */
+ u8 rand_alloc_ecwmin;
+ u8 rand_alloc_ecwmax;
+ __le16 reserved3;
+
+ /* The below fields are set via MU EDCA parameter set element */
+ struct iwl_he_backoff_conf trig_based_txf[AC_NUM];
+} __packed; /* STA_CONTEXT_DOT11AX_API_S_VER_1 */
+
/**
* struct iwl_he_sta_context_cmd - configure FW to work with HE AP
* @sta_id: STA id
* @rand_alloc_ecwmax: random CWmax = 2**ECWmax-1
* @reserved3: reserved byte for future use
* @trig_based_txf: MU EDCA Parameter set for the trigger based traffic queues
+ * @max_bssid_indicator: indicator of the max bssid supported on the associated
+ * bss
+ * @bssid_index: index of the associated VAP
+ * @ema_ap: AP supports enhanced Multi BSSID advertisement
+ * @profile_periodicity: number of Beacon periods that are needed to receive the
+ * complete VAPs info
+ * @bssid_count: actual number of VAPs in the MultiBSS Set
+ * @reserved4: alignment
*/
struct iwl_he_sta_context_cmd {
u8 sta_id;
/* The below fields are set via MU EDCA parameter set element */
struct iwl_he_backoff_conf trig_based_txf[AC_NUM];
-} __packed; /* STA_CONTEXT_DOT11AX_API_S */
+
+ u8 max_bssid_indicator;
+ u8 bssid_index;
+ u8 ema_ap;
+ u8 profile_periodicity;
+ u8 bssid_count;
+ u8 reserved4[3];
+} __packed; /* STA_CONTEXT_DOT11AX_API_S_VER_2 */
/**
* struct iwl_he_monitor_cmd - configure air sniffer for HE
* Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
* Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
* Copyright(c) 2016 - 2017 Intel Deutschland GmbH
- * Copyright (C) 2018 Intel Corporation
+ * Copyright(C) 2018 - 2019 Intel Corporation
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
* Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
* Copyright(c) 2016 - 2017 Intel Deutschland GmbH
- * Copyright (C) 2018 Intel Corporation
+ * Copyright(C) 2018 - 2019 Intel Corporation
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
__le32 rx_chains;
} __packed; /* REGULATORY_NVM_GET_INFO_PHY_SKU_SECTION_S_VER_1 */
-#define IWL_NUM_CHANNELS (51)
+#define IWL_NUM_CHANNELS_V1 51
+#define IWL_NUM_CHANNELS 110
/**
* struct iwl_nvm_get_info_regulatory - regulatory information
* @channel_profile: regulatory data of this channel
* @reserved: reserved
*/
-struct iwl_nvm_get_info_regulatory {
+struct iwl_nvm_get_info_regulatory_v1 {
__le32 lar_enabled;
- __le16 channel_profile[IWL_NUM_CHANNELS];
+ __le16 channel_profile[IWL_NUM_CHANNELS_V1];
__le16 reserved;
} __packed; /* REGULATORY_NVM_GET_INFO_REGULATORY_S_VER_1 */
+/**
+ * struct iwl_nvm_get_info_regulatory - regulatory information
+ * @lar_enabled: is LAR enabled
+ * @n_channels: number of valid channels in the array
+ * @channel_profile: regulatory data of this channel
+ */
+struct iwl_nvm_get_info_regulatory {
+ __le32 lar_enabled;
+ __le32 n_channels;
+ __le32 channel_profile[IWL_NUM_CHANNELS];
+} __packed; /* REGULATORY_NVM_GET_INFO_REGULATORY_S_VER_2 */
+
+/**
+ * struct iwl_nvm_get_info_rsp_v3 - response to get NVM data
+ * @general: general NVM data
+ * @mac_sku: data relating to MAC sku
+ * @phy_sku: data relating to PHY sku
+ * @regulatory: regulatory data
+ */
+struct iwl_nvm_get_info_rsp_v3 {
+ struct iwl_nvm_get_info_general general;
+ struct iwl_nvm_get_info_sku mac_sku;
+ struct iwl_nvm_get_info_phy phy_sku;
+ struct iwl_nvm_get_info_regulatory_v1 regulatory;
+} __packed; /* REGULATORY_NVM_GET_INFO_RSP_API_S_VER_3 */
+
/**
* struct iwl_nvm_get_info_rsp - response to get NVM data
* @general: general NVM data
struct iwl_nvm_get_info_sku mac_sku;
struct iwl_nvm_get_info_phy phy_sku;
struct iwl_nvm_get_info_regulatory regulatory;
-} __packed; /* REGULATORY_NVM_GET_INFO_RSP_API_S_VER_3 */
+} __packed; /* REGULATORY_NVM_GET_INFO_RSP_API_S_VER_4 */
/**
* struct iwl_nvm_access_complete_cmd - NVM_ACCESS commands are completed
* Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
* Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
* Copyright(c) 2015 - 2017 Intel Deutschland GmbH
- * Copyright(c) 2018 Intel Corporation
+ * Copyright(c) 2018 - 2019 Intel Corporation
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
* Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
* Copyright(c) 2015 - 2017 Intel Deutschland GmbH
- * Copyright(c) 2018 Intel Corporation
+ * Copyright(c) 2018 - 2019 Intel Corporation
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
#define IWL_RX_DESC_SIZE_V1 offsetofend(struct iwl_rx_mpdu_desc, v1)
-#define IWL_CD_STTS_OPTIMIZED_POS 0
-#define IWL_CD_STTS_OPTIMIZED_MSK 0x01
-#define IWL_CD_STTS_TRANSFER_STATUS_POS 1
-#define IWL_CD_STTS_TRANSFER_STATUS_MSK 0x0E
-#define IWL_CD_STTS_WIFI_STATUS_POS 4
-#define IWL_CD_STTS_WIFI_STATUS_MSK 0xF0
-
#define RX_NO_DATA_CHAIN_A_POS 0
#define RX_NO_DATA_CHAIN_A_MSK (0xff << RX_NO_DATA_CHAIN_A_POS)
#define RX_NO_DATA_CHAIN_B_POS 8
__le32 rx_vec[2];
} __packed; /* RX_NO_DATA_NTFY_API_S_VER_1 */
-/**
- * enum iwl_completion_desc_transfer_status - transfer status (bits 1-3)
- * @IWL_CD_STTS_UNUSED: unused
- * @IWL_CD_STTS_UNUSED_2: unused
- * @IWL_CD_STTS_END_TRANSFER: successful transfer complete.
- * In sniffer mode, when split is used, set in last CD completion. (RX)
- * @IWL_CD_STTS_OVERFLOW: In sniffer mode, when using split - used for
- * all CD completion. (RX)
- * @IWL_CD_STTS_ABORTED: CR abort / close flow. (RX)
- * @IWL_CD_STTS_ERROR: general error (RX)
- */
-enum iwl_completion_desc_transfer_status {
- IWL_CD_STTS_UNUSED,
- IWL_CD_STTS_UNUSED_2,
- IWL_CD_STTS_END_TRANSFER,
- IWL_CD_STTS_OVERFLOW,
- IWL_CD_STTS_ABORTED,
- IWL_CD_STTS_ERROR,
-};
-
-/**
- * enum iwl_completion_desc_wifi_status - wifi status (bits 4-7)
- * @IWL_CD_STTS_VALID: the packet is valid (RX)
- * @IWL_CD_STTS_FCS_ERR: frame check sequence error (RX)
- * @IWL_CD_STTS_SEC_KEY_ERR: error handling the security key of rx (RX)
- * @IWL_CD_STTS_DECRYPTION_ERR: error decrypting the frame (RX)
- * @IWL_CD_STTS_DUP: duplicate packet (RX)
- * @IWL_CD_STTS_ICV_MIC_ERR: MIC error (RX)
- * @IWL_CD_STTS_INTERNAL_SNAP_ERR: problems removing the snap (RX)
- * @IWL_CD_STTS_SEC_PORT_FAIL: security port fail (RX)
- * @IWL_CD_STTS_BA_OLD_SN: block ack received old SN (RX)
- * @IWL_CD_STTS_QOS_NULL: QoS null packet (RX)
- * @IWL_CD_STTS_MAC_HDR_ERR: MAC header conversion error (RX)
- * @IWL_CD_STTS_MAX_RETRANS: reached max number of retransmissions (TX)
- * @IWL_CD_STTS_EX_LIFETIME: exceeded lifetime (TX)
- * @IWL_CD_STTS_NOT_USED: completed but not used (RX)
- * @IWL_CD_STTS_REPLAY_ERR: pn check failed, replay error (RX)
- */
-enum iwl_completion_desc_wifi_status {
- IWL_CD_STTS_VALID,
- IWL_CD_STTS_FCS_ERR,
- IWL_CD_STTS_SEC_KEY_ERR,
- IWL_CD_STTS_DECRYPTION_ERR,
- IWL_CD_STTS_DUP,
- IWL_CD_STTS_ICV_MIC_ERR,
- IWL_CD_STTS_INTERNAL_SNAP_ERR,
- IWL_CD_STTS_SEC_PORT_FAIL,
- IWL_CD_STTS_BA_OLD_SN,
- IWL_CD_STTS_QOS_NULL,
- IWL_CD_STTS_MAC_HDR_ERR,
- IWL_CD_STTS_MAX_RETRANS,
- IWL_CD_STTS_EX_LIFETIME,
- IWL_CD_STTS_NOT_USED,
- IWL_CD_STTS_REPLAY_ERR,
-};
-
struct iwl_frame_release {
u8 baid;
u8 reserved;
__le32 reserved;
} __packed; /* SCAN_COMPLETE_NTF_UMAC_API_S_VER_1 */
-#define SCAN_OFFLOAD_MATCHING_CHANNELS_LEN 5
+#define SCAN_OFFLOAD_MATCHING_CHANNELS_LEN_V1 5
+#define SCAN_OFFLOAD_MATCHING_CHANNELS_LEN 7
+
+/**
+ * struct iwl_scan_offload_profile_match_v1 - match information
+ * @bssid: matched bssid
+ * @reserved: reserved
+ * @channel: channel where the match occurred
+ * @energy: energy
+ * @matching_feature: feature matches
+ * @matching_channels: bitmap of channels that matched, referencing
+ * the channels passed in the scan offload request.
+ */
+struct iwl_scan_offload_profile_match_v1 {
+ u8 bssid[ETH_ALEN];
+ __le16 reserved;
+ u8 channel;
+ u8 energy;
+ u8 matching_feature;
+ u8 matching_channels[SCAN_OFFLOAD_MATCHING_CHANNELS_LEN_V1];
+} __packed; /* SCAN_OFFLOAD_PROFILE_MATCH_RESULTS_S_VER_1 */
+
+/**
+ * struct iwl_scan_offload_profiles_query_v1 - match results query response
+ * @matched_profiles: bitmap of matched profiles, referencing the
+ * matches passed in the scan offload request
+ * @last_scan_age: age of the last offloaded scan
+ * @n_scans_done: number of offloaded scans done
+ * @gp2_d0u: GP2 when D0U occurred
+ * @gp2_invoked: GP2 when scan offload was invoked
+ * @resume_while_scanning: not used
+ * @self_recovery: obsolete
+ * @reserved: reserved
+ * @matches: array of match information, one for each match
+ */
+struct iwl_scan_offload_profiles_query_v1 {
+ __le32 matched_profiles;
+ __le32 last_scan_age;
+ __le32 n_scans_done;
+ __le32 gp2_d0u;
+ __le32 gp2_invoked;
+ u8 resume_while_scanning;
+ u8 self_recovery;
+ __le16 reserved;
+ struct iwl_scan_offload_profile_match_v1 matches[IWL_SCAN_MAX_PROFILES];
+} __packed; /* SCAN_OFFLOAD_PROFILES_QUERY_RSP_S_VER_2 */
+
/**
* struct iwl_scan_offload_profile_match - match information
* @bssid: matched bssid
* @energy: energy
* @matching_feature: feature matches
* @matching_channels: bitmap of channels that matched, referencing
- * the channels passed in tue scan offload request
+ * the channels passed in the scan offload request.
*/
struct iwl_scan_offload_profile_match {
u8 bssid[ETH_ALEN];
u8 energy;
u8 matching_feature;
u8 matching_channels[SCAN_OFFLOAD_MATCHING_CHANNELS_LEN];
-} __packed; /* SCAN_OFFLOAD_PROFILE_MATCH_RESULTS_S_VER_1 */
+} __packed; /* SCAN_OFFLOAD_PROFILE_MATCH_RESULTS_S_VER_2 */
/**
* struct iwl_scan_offload_profiles_query - match results query response
u8 self_recovery;
__le16 reserved;
struct iwl_scan_offload_profile_match matches[IWL_SCAN_MAX_PROFILES];
-} __packed; /* SCAN_OFFLOAD_PROFILES_QUERY_RSP_S_VER_2 */
+} __packed; /* SCAN_OFFLOAD_PROFILES_QUERY_RSP_S_VER_3 */
/**
* struct iwl_umac_scan_iter_complete_notif - notifies end of scanning iteration
}
static struct iwl_fw_error_dump_file *
-_iwl_fw_error_dump(struct iwl_fw_runtime *fwrt,
- struct iwl_fw_dump_ptrs *fw_error_dump)
+iwl_fw_error_dump_file(struct iwl_fw_runtime *fwrt,
+ struct iwl_fw_dump_ptrs *fw_error_dump)
{
struct iwl_fw_error_dump_file *dump_file;
struct iwl_fw_error_dump_data *dump_data;
if (fifo_len) {
iwl_fw_dump_rxf(fwrt, &dump_data);
iwl_fw_dump_txf(fwrt, &dump_data);
- if (radio_len)
- iwl_read_radio_regs(fwrt, &dump_data);
}
+ if (radio_len)
+ iwl_read_radio_regs(fwrt, &dump_data);
+
if (iwl_fw_dbg_type_on(fwrt, IWL_FW_ERROR_DUMP_ERROR_INFO) &&
fwrt->dump.desc) {
dump_data->type = cpu_to_le32(IWL_FW_ERROR_DUMP_ERROR_INFO);
{
struct iwl_fw_ini_error_dump_range *range = range_ptr;
__le32 *val = range->data;
- u32 addr, prph_val, offset = le32_to_cpu(reg->offset);
+ u32 prph_val;
+ u32 addr = le32_to_cpu(reg->start_addr[idx]) + le32_to_cpu(reg->offset);
int i;
- range->start_addr = reg->start_addr[idx];
+ range->start_addr = cpu_to_le64(addr);
range->range_data_size = reg->internal.range_data_size;
for (i = 0; i < le32_to_cpu(reg->internal.range_data_size); i += 4) {
- addr = le32_to_cpu(range->start_addr) + i;
- prph_val = iwl_read_prph(fwrt->trans, addr + offset);
+ prph_val = iwl_read_prph(fwrt->trans, addr + i);
if (prph_val == 0x5a5a5a5a)
return -EBUSY;
*val++ = cpu_to_le32(prph_val);
{
struct iwl_fw_ini_error_dump_range *range = range_ptr;
__le32 *val = range->data;
- u32 addr, offset = le32_to_cpu(reg->offset);
+ u32 addr = le32_to_cpu(reg->start_addr[idx]) + le32_to_cpu(reg->offset);
int i;
- range->start_addr = reg->start_addr[idx];
+ range->start_addr = cpu_to_le64(addr);
range->range_data_size = reg->internal.range_data_size;
- for (i = 0; i < le32_to_cpu(reg->internal.range_data_size); i += 4) {
- addr = le32_to_cpu(range->start_addr) + i;
- *val++ = cpu_to_le32(iwl_trans_read32(fwrt->trans,
- addr + offset));
- }
+ for (i = 0; i < le32_to_cpu(reg->internal.range_data_size); i += 4)
+ *val++ = cpu_to_le32(iwl_trans_read32(fwrt->trans, addr + i));
return sizeof(*range) + le32_to_cpu(range->range_data_size);
}
void *range_ptr, int idx)
{
struct iwl_fw_ini_error_dump_range *range = range_ptr;
- u32 addr = le32_to_cpu(range->start_addr);
- u32 offset = le32_to_cpu(reg->offset);
+ u32 addr = le32_to_cpu(reg->start_addr[idx]) + le32_to_cpu(reg->offset);
- range->start_addr = reg->start_addr[idx];
+ range->start_addr = cpu_to_le64(addr);
range->range_data_size = reg->internal.range_data_size;
- iwl_trans_read_mem_bytes(fwrt->trans, addr + offset, range->data,
+ iwl_trans_read_mem_bytes(fwrt->trans, addr, range->data,
le32_to_cpu(reg->internal.range_data_size));
return sizeof(*range) + le32_to_cpu(range->range_data_size);
struct iwl_fw_ini_error_dump_range *range = range_ptr;
u32 page_size = fwrt->trans->init_dram.paging[idx].size;
- range->start_addr = cpu_to_le32(idx);
+ range->start_addr = cpu_to_le64(idx);
range->range_data_size = cpu_to_le32(page_size);
memcpy(range->data, fwrt->trans->init_dram.paging[idx].block,
page_size);
dma_addr_t addr = fwrt->fw_paging_db[idx].fw_paging_phys;
u32 page_size = fwrt->fw_paging_db[idx].fw_paging_size;
- range->start_addr = cpu_to_le32(idx);
+ range->start_addr = cpu_to_le64(idx);
range->range_data_size = cpu_to_le32(page_size);
dma_sync_single_for_cpu(fwrt->trans->dev, addr, page_size,
DMA_BIDIRECTIONAL);
if (start_addr == 0x5a5a5a5a)
return -EBUSY;
- range->start_addr = cpu_to_le32(start_addr);
+ range->start_addr = cpu_to_le64(start_addr);
range->range_data_size = cpu_to_le32(fwrt->trans->fw_mon[idx].size);
memcpy(range->data, fwrt->trans->fw_mon[idx].block,
{
struct iwl_fw_ini_fifo_error_dump_range *range = range_ptr;
struct iwl_ini_txf_iter_data *iter;
+ struct iwl_fw_ini_error_dump_register *reg_dump = (void *)range->data;
u32 offs = le32_to_cpu(reg->offset), addr;
u32 registers_size =
- le32_to_cpu(reg->fifos.num_of_registers) * sizeof(__le32);
- __le32 *val = range->data;
+ le32_to_cpu(reg->fifos.num_of_registers) * sizeof(*reg_dump);
+ __le32 *data;
unsigned long flags;
int i;
iwl_write_prph_no_grab(fwrt->trans, TXF_LARC_NUM + offs, iter->fifo);
- /* read txf registers */
+ /*
+ * read txf registers. for each register, write to the dump the
+ * register address and its value
+ */
for (i = 0; i < le32_to_cpu(reg->fifos.num_of_registers); i++) {
addr = le32_to_cpu(reg->start_addr[i]) + offs;
- *val++ = cpu_to_le32(iwl_read_prph_no_grab(fwrt->trans, addr));
+ reg_dump->addr = cpu_to_le32(addr);
+ reg_dump->data = cpu_to_le32(iwl_read_prph_no_grab(fwrt->trans,
+ addr));
+
+ reg_dump++;
}
if (reg->fifos.header_only) {
/* Read FIFO */
addr = TXF_READ_MODIFY_DATA + offs;
- for (i = 0; i < iter->fifo_size; i += sizeof(__le32))
- *val++ = cpu_to_le32(iwl_read_prph_no_grab(fwrt->trans, addr));
+ data = (void *)reg_dump;
+ for (i = 0; i < iter->fifo_size; i += sizeof(*data))
+ *data++ = cpu_to_le32(iwl_read_prph_no_grab(fwrt->trans, addr));
out:
iwl_trans_release_nic_access(fwrt->trans, &flags);
{
struct iwl_fw_ini_fifo_error_dump_range *range = range_ptr;
struct iwl_ini_rxf_data rxf_data;
+ struct iwl_fw_ini_error_dump_register *reg_dump = (void *)range->data;
u32 offs = le32_to_cpu(reg->offset), addr;
u32 registers_size =
- le32_to_cpu(reg->fifos.num_of_registers) * sizeof(__le32);
- __le32 *val = range->data;
+ le32_to_cpu(reg->fifos.num_of_registers) * sizeof(*reg_dump);
+ __le32 *data;
unsigned long flags;
int i;
if (!iwl_trans_grab_nic_access(fwrt->trans, &flags))
return -EBUSY;
- offs += rxf_data.offset;
-
range->fifo_num = cpu_to_le32(rxf_data.fifo_num);
range->num_of_registers = reg->fifos.num_of_registers;
range->range_data_size = cpu_to_le32(rxf_data.size + registers_size);
- /* read rxf registers */
+ /*
+ * read rxf registers. for each register, write to the dump the
+ * register address and its value
+ */
for (i = 0; i < le32_to_cpu(reg->fifos.num_of_registers); i++) {
addr = le32_to_cpu(reg->start_addr[i]) + offs;
- *val++ = cpu_to_le32(iwl_read_prph_no_grab(fwrt->trans, addr));
+ reg_dump->addr = cpu_to_le32(addr);
+ reg_dump->data = cpu_to_le32(iwl_read_prph_no_grab(fwrt->trans,
+ addr));
+
+ reg_dump++;
}
if (reg->fifos.header_only) {
goto out;
}
+ /*
+ * region register have absolute value so apply rxf offset after
+ * reading the registers
+ */
+ offs += rxf_data.offset;
+
/* Lock fence */
iwl_write_prph_no_grab(fwrt->trans, RXF_SET_FENCE_MODE + offs, 0x1);
/* Set fence pointer to the same place like WR pointer */
/* Read FIFO */
addr = RXF_FIFO_RD_FENCE_INC + offs;
- for (i = 0; i < rxf_data.size; i += sizeof(__le32))
- *val++ = cpu_to_le32(iwl_read_prph_no_grab(fwrt->trans, addr));
+ data = (void *)reg_dump;
+ for (i = 0; i < rxf_data.size; i += sizeof(*data))
+ *data++ = cpu_to_le32(iwl_read_prph_no_grab(fwrt->trans, addr));
out:
iwl_trans_release_nic_access(fwrt->trans, &flags);
{
struct iwl_fw_ini_error_dump *dump = data;
+ dump->header.version = cpu_to_le32(IWL_INI_DUMP_MEM_VER);
+
return dump->ranges;
}
static void
-*iwl_dump_ini_mon_dram_fill_header(struct iwl_fw_runtime *fwrt,
- struct iwl_fw_ini_region_cfg *reg,
- void *data)
+*iwl_dump_ini_mon_fill_header(struct iwl_fw_runtime *fwrt,
+ struct iwl_fw_ini_region_cfg *reg,
+ struct iwl_fw_ini_monitor_dump *data,
+ u32 write_ptr_addr, u32 write_ptr_msk,
+ u32 cycle_cnt_addr, u32 cycle_cnt_msk)
{
- struct iwl_fw_ini_monitor_dram_dump *mon_dump = (void *)data;
u32 write_ptr, cycle_cnt;
unsigned long flags;
if (!iwl_trans_grab_nic_access(fwrt->trans, &flags)) {
- IWL_ERR(fwrt, "Failed to get DRAM monitor header\n");
+ IWL_ERR(fwrt, "Failed to get monitor header\n");
return NULL;
}
- write_ptr = iwl_read_umac_prph_no_grab(fwrt->trans,
- MON_BUFF_WRPTR_VER2);
- cycle_cnt = iwl_read_umac_prph_no_grab(fwrt->trans,
- MON_BUFF_CYCLE_CNT_VER2);
+
+ write_ptr = iwl_read_prph_no_grab(fwrt->trans, write_ptr_addr);
+ cycle_cnt = iwl_read_prph_no_grab(fwrt->trans, cycle_cnt_addr);
+
iwl_trans_release_nic_access(fwrt->trans, &flags);
- mon_dump->write_ptr = cpu_to_le32(write_ptr);
- mon_dump->cycle_cnt = cpu_to_le32(cycle_cnt);
+ data->header.version = cpu_to_le32(IWL_INI_DUMP_MONITOR_VER);
+ data->write_ptr = cpu_to_le32(write_ptr & write_ptr_msk);
+ data->cycle_cnt = cpu_to_le32(cycle_cnt & cycle_cnt_msk);
+
+ return data->ranges;
+}
+
+static void
+*iwl_dump_ini_mon_dram_fill_header(struct iwl_fw_runtime *fwrt,
+ struct iwl_fw_ini_region_cfg *reg,
+ void *data)
+{
+ struct iwl_fw_ini_monitor_dump *mon_dump = (void *)data;
+ u32 write_ptr_addr, write_ptr_msk, cycle_cnt_addr, cycle_cnt_msk;
+
+ switch (fwrt->trans->cfg->device_family) {
+ case IWL_DEVICE_FAMILY_9000:
+ case IWL_DEVICE_FAMILY_22000:
+ write_ptr_addr = MON_BUFF_WRPTR_VER2;
+ write_ptr_msk = -1;
+ cycle_cnt_addr = MON_BUFF_CYCLE_CNT_VER2;
+ cycle_cnt_msk = -1;
+ break;
+ default:
+ IWL_ERR(fwrt, "Unsupported device family %d\n",
+ fwrt->trans->cfg->device_family);
+ return NULL;
+ }
+
+ return iwl_dump_ini_mon_fill_header(fwrt, reg, mon_dump, write_ptr_addr,
+ write_ptr_msk, cycle_cnt_addr,
+ cycle_cnt_msk);
+}
+
+static void
+*iwl_dump_ini_mon_smem_fill_header(struct iwl_fw_runtime *fwrt,
+ struct iwl_fw_ini_region_cfg *reg,
+ void *data)
+{
+ struct iwl_fw_ini_monitor_dump *mon_dump = (void *)data;
+ const struct iwl_cfg *cfg = fwrt->trans->cfg;
+
+ if (fwrt->trans->cfg->device_family != IWL_DEVICE_FAMILY_9000 &&
+ fwrt->trans->cfg->device_family != IWL_DEVICE_FAMILY_22000) {
+ IWL_ERR(fwrt, "Unsupported device family %d\n",
+ fwrt->trans->cfg->device_family);
+ return NULL;
+ }
+
+ return iwl_dump_ini_mon_fill_header(fwrt, reg, mon_dump,
+ cfg->fw_mon_smem_write_ptr_addr,
+ cfg->fw_mon_smem_write_ptr_msk,
+ cfg->fw_mon_smem_cycle_cnt_ptr_addr,
+ cfg->fw_mon_smem_cycle_cnt_ptr_msk);
- return mon_dump->ranges;
}
static void *iwl_dump_ini_fifo_fill_header(struct iwl_fw_runtime *fwrt,
{
struct iwl_fw_ini_fifo_error_dump *dump = data;
+ dump->header.version = cpu_to_le32(IWL_INI_DUMP_FIFO_VER);
+
return dump->ranges;
}
static u32 iwl_dump_ini_mon_dram_get_size(struct iwl_fw_runtime *fwrt,
struct iwl_fw_ini_region_cfg *reg)
{
- u32 size = sizeof(struct iwl_fw_ini_monitor_dram_dump);
+ u32 size = sizeof(struct iwl_fw_ini_monitor_dump) +
+ sizeof(struct iwl_fw_ini_error_dump_range);
if (fwrt->trans->num_blocks)
size += fwrt->trans->fw_mon[0].size;
return size;
}
+static u32 iwl_dump_ini_mon_smem_get_size(struct iwl_fw_runtime *fwrt,
+ struct iwl_fw_ini_region_cfg *reg)
+{
+ return sizeof(struct iwl_fw_ini_monitor_dump) +
+ iwl_dump_ini_mem_ranges(fwrt, reg) *
+ (sizeof(struct iwl_fw_ini_error_dump_range) +
+ le32_to_cpu(reg->internal.range_data_size));
+}
+
static u32 iwl_dump_ini_txf_get_size(struct iwl_fw_runtime *fwrt,
struct iwl_fw_ini_region_cfg *reg)
{
void *fifo_iter = fwrt->dump.fifo_iter;
u32 size = 0;
u32 fifo_hdr = sizeof(struct iwl_fw_ini_fifo_error_dump_range) +
- le32_to_cpu(reg->fifos.num_of_registers) * sizeof(__le32);
+ le32_to_cpu(reg->fifos.num_of_registers) * sizeof(__le32) * 2;
fwrt->dump.fifo_iter = &iter;
while (iwl_ini_txf_iter(fwrt, reg)) {
struct iwl_ini_rxf_data rx_data;
u32 size = sizeof(struct iwl_fw_ini_fifo_error_dump) +
sizeof(struct iwl_fw_ini_fifo_error_dump_range) +
- le32_to_cpu(reg->fifos.num_of_registers) * sizeof(__le32);
+ le32_to_cpu(reg->fifos.num_of_registers) * sizeof(__le32) * 2;
if (reg->fifos.header_only)
return size;
* @fwrt: fw runtime struct.
* @data: dump memory data.
* @reg: region to copy to the dump.
+ * @ops: memory dump operations.
*/
static void
iwl_dump_ini_mem(struct iwl_fw_runtime *fwrt,
- enum iwl_fw_ini_region_type type,
struct iwl_fw_error_dump_data **data,
struct iwl_fw_ini_region_cfg *reg,
struct iwl_dump_ini_mem_ops *ops)
{
struct iwl_fw_ini_error_dump_header *header = (void *)(*data)->data;
+ u32 num_of_ranges, i, type = le32_to_cpu(reg->region_type);
void *range;
- u32 num_of_ranges, i;
if (WARN_ON(!ops || !ops->get_num_of_ranges || !ops->get_size ||
!ops->fill_mem_hdr || !ops->fill_range))
(*data)->type = cpu_to_le32(type | INI_DUMP_BIT);
(*data)->len = cpu_to_le32(ops->get_size(fwrt, reg));
+ header->region_id = reg->region_id;
header->num_of_ranges = cpu_to_le32(num_of_ranges);
header->name_len = cpu_to_le32(min_t(int, IWL_FW_INI_MAX_NAME,
le32_to_cpu(reg->name_len)));
if (!range) {
IWL_ERR(fwrt, "Failed to fill region header: id=%d, type=%d\n",
le32_to_cpu(reg->region_id), type);
+ memset(*data, 0, le32_to_cpu((*data)->len));
return;
}
if (range_size < 0) {
IWL_ERR(fwrt, "Failed to dump region: id=%d, type=%d\n",
le32_to_cpu(reg->region_id), type);
+ memset(*data, 0, le32_to_cpu((*data)->len));
return;
}
range = range + range_size;
for (i = 0; i < le32_to_cpu(trigger->num_regions); i++) {
u32 reg_id = le32_to_cpu(trigger->data[i]);
struct iwl_fw_ini_region_cfg *reg;
- enum iwl_fw_ini_region_type type;
if (WARN_ON(reg_id >= ARRAY_SIZE(fwrt->dump.active_regs)))
continue;
if (WARN(!reg, "Unassigned region %d\n", reg_id))
continue;
- type = le32_to_cpu(reg->region_type);
- switch (type) {
+ switch (le32_to_cpu(reg->region_type)) {
case IWL_FW_INI_REGION_DEVICE_MEMORY:
case IWL_FW_INI_REGION_PERIPHERY_MAC:
case IWL_FW_INI_REGION_PERIPHERY_PHY:
case IWL_FW_INI_REGION_PERIPHERY_AUX:
- case IWL_FW_INI_REGION_INTERNAL_BUFFER:
case IWL_FW_INI_REGION_CSR:
size += hdr_len + iwl_dump_ini_mem_get_size(fwrt, reg);
break;
case IWL_FW_INI_REGION_RXF:
size += hdr_len + iwl_dump_ini_rxf_get_size(fwrt, reg);
break;
- case IWL_FW_INI_REGION_PAGING: {
+ case IWL_FW_INI_REGION_PAGING:
size += hdr_len;
if (iwl_fw_dbg_is_paging_enabled(fwrt)) {
size += iwl_dump_ini_paging_get_size(fwrt, reg);
reg);
}
break;
- }
case IWL_FW_INI_REGION_DRAM_BUFFER:
if (!fwrt->trans->num_blocks)
break;
size += hdr_len +
iwl_dump_ini_mon_dram_get_size(fwrt, reg);
break;
+ case IWL_FW_INI_REGION_INTERNAL_BUFFER:
+ size += hdr_len +
+ iwl_dump_ini_mon_smem_get_size(fwrt, reg);
+ break;
case IWL_FW_INI_REGION_DRAM_IMR:
/* Undefined yet */
default:
for (i = 0; i < num; i++) {
u32 reg_id = le32_to_cpu(trigger->data[i]);
- enum iwl_fw_ini_region_type type;
struct iwl_fw_ini_region_cfg *reg;
struct iwl_dump_ini_mem_ops ops;
if (!reg)
continue;
- type = le32_to_cpu(reg->region_type);
- switch (type) {
+ /* currently the driver supports always on domain only */
+ if (le32_to_cpu(reg->domain) != IWL_FW_INI_DBG_DOMAIN_ALWAYS_ON)
+ continue;
+
+ switch (le32_to_cpu(reg->region_type)) {
case IWL_FW_INI_REGION_DEVICE_MEMORY:
- case IWL_FW_INI_REGION_INTERNAL_BUFFER:
ops.get_num_of_ranges = iwl_dump_ini_mem_ranges;
ops.get_size = iwl_dump_ini_mem_get_size;
ops.fill_mem_hdr = iwl_dump_ini_mem_fill_header;
ops.fill_range = iwl_dump_ini_dev_mem_iter;
- iwl_dump_ini_mem(fwrt, type, data, reg, &ops);
+ iwl_dump_ini_mem(fwrt, data, reg, &ops);
break;
case IWL_FW_INI_REGION_PERIPHERY_MAC:
case IWL_FW_INI_REGION_PERIPHERY_PHY:
ops.get_size = iwl_dump_ini_mem_get_size;
ops.fill_mem_hdr = iwl_dump_ini_mem_fill_header;
ops.fill_range = iwl_dump_ini_prph_iter;
- iwl_dump_ini_mem(fwrt, type, data, reg, &ops);
+ iwl_dump_ini_mem(fwrt, data, reg, &ops);
break;
case IWL_FW_INI_REGION_DRAM_BUFFER:
ops.get_num_of_ranges = iwl_dump_ini_mon_dram_ranges;
ops.get_size = iwl_dump_ini_mon_dram_get_size;
ops.fill_mem_hdr = iwl_dump_ini_mon_dram_fill_header;
ops.fill_range = iwl_dump_ini_mon_dram_iter;
- iwl_dump_ini_mem(fwrt, type, data, reg, &ops);
+ iwl_dump_ini_mem(fwrt, data, reg, &ops);
+ break;
+ case IWL_FW_INI_REGION_INTERNAL_BUFFER:
+ ops.get_num_of_ranges = iwl_dump_ini_mem_ranges;
+ ops.get_size = iwl_dump_ini_mon_smem_get_size;
+ ops.fill_mem_hdr = iwl_dump_ini_mon_smem_fill_header;
+ ops.fill_range = iwl_dump_ini_dev_mem_iter;
+ iwl_dump_ini_mem(fwrt, data, reg, &ops);
break;
- case IWL_FW_INI_REGION_PAGING: {
+ case IWL_FW_INI_REGION_PAGING:
ops.fill_mem_hdr = iwl_dump_ini_mem_fill_header;
if (iwl_fw_dbg_is_paging_enabled(fwrt)) {
ops.get_num_of_ranges =
ops.fill_range = iwl_dump_ini_paging_gen2_iter;
}
- iwl_dump_ini_mem(fwrt, type, data, reg, &ops);
+ iwl_dump_ini_mem(fwrt, data, reg, &ops);
break;
- }
case IWL_FW_INI_REGION_TXF: {
struct iwl_ini_txf_iter_data iter = { .init = true };
void *fifo_iter = fwrt->dump.fifo_iter;
ops.get_size = iwl_dump_ini_txf_get_size;
ops.fill_mem_hdr = iwl_dump_ini_fifo_fill_header;
ops.fill_range = iwl_dump_ini_txf_iter;
- iwl_dump_ini_mem(fwrt, type, data, reg, &ops);
+ iwl_dump_ini_mem(fwrt, data, reg, &ops);
fwrt->dump.fifo_iter = fifo_iter;
break;
}
ops.get_size = iwl_dump_ini_rxf_get_size;
ops.fill_mem_hdr = iwl_dump_ini_fifo_fill_header;
ops.fill_range = iwl_dump_ini_rxf_iter;
- iwl_dump_ini_mem(fwrt, type, data, reg, &ops);
+ iwl_dump_ini_mem(fwrt, data, reg, &ops);
break;
case IWL_FW_INI_REGION_CSR:
ops.get_num_of_ranges = iwl_dump_ini_mem_ranges;
ops.get_size = iwl_dump_ini_mem_get_size;
ops.fill_mem_hdr = iwl_dump_ini_mem_fill_header;
ops.fill_range = iwl_dump_ini_csr_iter;
- iwl_dump_ini_mem(fwrt, type, data, reg, &ops);
+ iwl_dump_ini_mem(fwrt, data, reg, &ops);
break;
case IWL_FW_INI_REGION_DRAM_IMR:
/* This is undefined yet */
}
static struct iwl_fw_error_dump_file *
-_iwl_fw_error_ini_dump(struct iwl_fw_runtime *fwrt,
- struct iwl_fw_dump_ptrs *fw_error_dump)
+iwl_fw_error_ini_dump_file(struct iwl_fw_runtime *fwrt)
{
- int size, id = le32_to_cpu(fwrt->dump.desc->trig_desc.type);
+ int size;
struct iwl_fw_error_dump_data *dump_data;
struct iwl_fw_error_dump_file *dump_file;
struct iwl_fw_ini_trigger *trigger;
-
- if (id == FW_DBG_TRIGGER_FW_ASSERT)
- id = IWL_FW_TRIGGER_ID_FW_ASSERT;
+ enum iwl_fw_ini_trigger_id id = fwrt->dump.ini_trig_id;
if (!iwl_fw_ini_trigger_on(fwrt, id))
return NULL;
trigger = fwrt->dump.active_trigs[id].trig;
- size = sizeof(*dump_file);
- size += iwl_fw_ini_get_trigger_len(fwrt, trigger);
-
+ size = iwl_fw_ini_get_trigger_len(fwrt, trigger);
if (!size)
return NULL;
+ size += sizeof(*dump_file);
+
dump_file = vzalloc(size);
if (!dump_file)
return NULL;
- fw_error_dump->fwrt_ptr = dump_file;
-
dump_file->barker = cpu_to_le32(IWL_FW_ERROR_DUMP_BARKER);
dump_data = (void *)dump_file->data;
dump_file->file_len = cpu_to_le32(size);
return dump_file;
}
-void iwl_fw_error_dump(struct iwl_fw_runtime *fwrt)
+static void iwl_fw_error_dump(struct iwl_fw_runtime *fwrt)
{
- struct iwl_fw_dump_ptrs *fw_error_dump;
+ struct iwl_fw_dump_ptrs fw_error_dump = {};
struct iwl_fw_error_dump_file *dump_file;
struct scatterlist *sg_dump_data;
u32 file_len;
u32 dump_mask = fwrt->fw->dbg.dump_mask;
- IWL_DEBUG_INFO(fwrt, "WRT dump start\n");
-
- /* there's no point in fw dump if the bus is dead */
- if (test_bit(STATUS_TRANS_DEAD, &fwrt->trans->status)) {
- IWL_ERR(fwrt, "Skip fw error dump since bus is dead\n");
- goto out;
- }
-
- fw_error_dump = kzalloc(sizeof(*fw_error_dump), GFP_KERNEL);
- if (!fw_error_dump)
- goto out;
-
- if (fwrt->trans->ini_valid)
- dump_file = _iwl_fw_error_ini_dump(fwrt, fw_error_dump);
- else
- dump_file = _iwl_fw_error_dump(fwrt, fw_error_dump);
-
- if (!dump_file) {
- kfree(fw_error_dump);
+ dump_file = iwl_fw_error_dump_file(fwrt, &fw_error_dump);
+ if (!dump_file)
goto out;
- }
if (!fwrt->trans->ini_valid && fwrt->dump.monitor_only)
dump_mask &= IWL_FW_ERROR_DUMP_FW_MONITOR;
- if (!fwrt->trans->ini_valid)
- fw_error_dump->trans_ptr =
- iwl_trans_dump_data(fwrt->trans, dump_mask);
-
+ fw_error_dump.trans_ptr = iwl_trans_dump_data(fwrt->trans, dump_mask);
file_len = le32_to_cpu(dump_file->file_len);
- fw_error_dump->fwrt_len = file_len;
- if (fw_error_dump->trans_ptr) {
- file_len += fw_error_dump->trans_ptr->len;
+ fw_error_dump.fwrt_len = file_len;
+
+ if (fw_error_dump.trans_ptr) {
+ file_len += fw_error_dump.trans_ptr->len;
dump_file->file_len = cpu_to_le32(file_len);
}
if (sg_dump_data) {
sg_pcopy_from_buffer(sg_dump_data,
sg_nents(sg_dump_data),
- fw_error_dump->fwrt_ptr,
- fw_error_dump->fwrt_len, 0);
- if (fw_error_dump->trans_ptr)
+ fw_error_dump.fwrt_ptr,
+ fw_error_dump.fwrt_len, 0);
+ if (fw_error_dump.trans_ptr)
sg_pcopy_from_buffer(sg_dump_data,
sg_nents(sg_dump_data),
- fw_error_dump->trans_ptr->data,
- fw_error_dump->trans_ptr->len,
- fw_error_dump->fwrt_len);
+ fw_error_dump.trans_ptr->data,
+ fw_error_dump.trans_ptr->len,
+ fw_error_dump.fwrt_len);
dev_coredumpsg(fwrt->trans->dev, sg_dump_data, file_len,
GFP_KERNEL);
}
- vfree(fw_error_dump->fwrt_ptr);
- vfree(fw_error_dump->trans_ptr);
- kfree(fw_error_dump);
+ vfree(fw_error_dump.fwrt_ptr);
+ vfree(fw_error_dump.trans_ptr);
out:
iwl_fw_free_dump_desc(fwrt);
clear_bit(IWL_FWRT_STATUS_DUMPING, &fwrt->status);
- IWL_DEBUG_INFO(fwrt, "WRT dump done\n");
}
-IWL_EXPORT_SYMBOL(iwl_fw_error_dump);
+
+static void iwl_fw_error_ini_dump(struct iwl_fw_runtime *fwrt)
+{
+ struct iwl_fw_error_dump_file *dump_file;
+ struct scatterlist *sg_dump_data;
+ u32 file_len;
+
+ dump_file = iwl_fw_error_ini_dump_file(fwrt);
+ if (!dump_file)
+ goto out;
+
+ file_len = le32_to_cpu(dump_file->file_len);
+
+ sg_dump_data = alloc_sgtable(file_len);
+ if (sg_dump_data) {
+ sg_pcopy_from_buffer(sg_dump_data, sg_nents(sg_dump_data),
+ dump_file, file_len, 0);
+ dev_coredumpsg(fwrt->trans->dev, sg_dump_data, file_len,
+ GFP_KERNEL);
+ }
+ vfree(dump_file);
+out:
+ fwrt->dump.ini_trig_id = IWL_FW_TRIGGER_ID_INVALID;
+ clear_bit(IWL_FWRT_STATUS_DUMPING, &fwrt->status);
+}
const struct iwl_fw_dump_desc iwl_dump_desc_assert = {
.trig_desc = {
bool monitor_only,
unsigned int delay)
{
+ u32 trig_type = le32_to_cpu(desc->trig_desc.type);
+ int ret;
+
+ if (fwrt->trans->ini_valid) {
+ ret = iwl_fw_dbg_ini_collect(fwrt, trig_type);
+ if (!ret)
+ iwl_fw_free_dump_desc(fwrt);
+
+ return ret;
+ }
+
if (test_and_set_bit(IWL_FWRT_STATUS_DUMPING, &fwrt->status))
return -EBUSY;
iwl_dump_error_desc->len = 0;
ret = iwl_fw_dbg_collect_desc(fwrt, iwl_dump_error_desc, false, 0);
- if (ret) {
+ if (ret)
kfree(iwl_dump_error_desc);
- } else {
- set_bit(STATUS_FW_WAIT_DUMP, &fwrt->trans->status);
-
- /* trigger nmi to halt the fw */
- iwl_force_nmi(fwrt->trans);
- }
+ else
+ iwl_trans_sync_nmi(fwrt->trans);
return ret;
}
IWL_EXPORT_SYMBOL(iwl_fw_dbg_error_collect);
-int _iwl_fw_dbg_collect(struct iwl_fw_runtime *fwrt,
- enum iwl_fw_dbg_trigger trig,
- const char *str, size_t len,
- struct iwl_fw_dbg_trigger_tlv *trigger)
+int iwl_fw_dbg_collect(struct iwl_fw_runtime *fwrt,
+ enum iwl_fw_dbg_trigger trig,
+ const char *str, size_t len,
+ struct iwl_fw_dbg_trigger_tlv *trigger)
{
struct iwl_fw_dump_desc *desc;
unsigned int delay = 0;
return iwl_fw_dbg_collect_desc(fwrt, desc, monitor_only, delay);
}
-IWL_EXPORT_SYMBOL(_iwl_fw_dbg_collect);
+IWL_EXPORT_SYMBOL(iwl_fw_dbg_collect);
-int iwl_fw_dbg_collect(struct iwl_fw_runtime *fwrt,
- u32 id, const char *str, size_t len)
+int _iwl_fw_dbg_ini_collect(struct iwl_fw_runtime *fwrt,
+ enum iwl_fw_ini_trigger_id id)
{
- struct iwl_fw_dump_desc *desc;
struct iwl_fw_ini_active_triggers *active;
u32 occur, delay;
- if (!fwrt->trans->ini_valid)
- return _iwl_fw_dbg_collect(fwrt, id, str, len, NULL);
+ if (WARN_ON(!iwl_fw_ini_trigger_on(fwrt, id)))
+ return -EINVAL;
- if (id == FW_DBG_TRIGGER_USER)
- id = IWL_FW_TRIGGER_ID_USER_TRIGGER;
+ if (test_and_set_bit(IWL_FWRT_STATUS_DUMPING, &fwrt->status))
+ return -EBUSY;
active = &fwrt->dump.active_trigs[id];
-
- if (WARN_ON(!active->active))
- return -EINVAL;
-
delay = le32_to_cpu(active->trig->dump_delay);
occur = le32_to_cpu(active->trig->occurrences);
if (!occur)
return 0;
+ active->trig->occurrences = cpu_to_le32(--occur);
+
if (le32_to_cpu(active->trig->force_restart)) {
IWL_WARN(fwrt, "Force restart: trigger %d fired.\n", id);
iwl_force_nmi(fwrt->trans);
return 0;
}
- desc = kzalloc(sizeof(*desc) + len, GFP_ATOMIC);
- if (!desc)
- return -ENOMEM;
+ fwrt->dump.ini_trig_id = id;
- active->trig->occurrences = cpu_to_le32(--occur);
+ IWL_WARN(fwrt, "Collecting data: ini trigger %d fired.\n", id);
- desc->len = len;
- desc->trig_desc.type = cpu_to_le32(id);
- memcpy(desc->trig_desc.data, str, len);
+ schedule_delayed_work(&fwrt->dump.wk, usecs_to_jiffies(delay));
- return iwl_fw_dbg_collect_desc(fwrt, desc, true, delay);
+ return 0;
}
-IWL_EXPORT_SYMBOL(iwl_fw_dbg_collect);
+IWL_EXPORT_SYMBOL(_iwl_fw_dbg_ini_collect);
+
+int iwl_fw_dbg_ini_collect(struct iwl_fw_runtime *fwrt, u32 legacy_trigger_id)
+{
+ int id;
+
+ switch (legacy_trigger_id) {
+ case FW_DBG_TRIGGER_FW_ASSERT:
+ case FW_DBG_TRIGGER_ALIVE_TIMEOUT:
+ case FW_DBG_TRIGGER_DRIVER:
+ id = IWL_FW_TRIGGER_ID_FW_ASSERT;
+ break;
+ case FW_DBG_TRIGGER_USER:
+ id = IWL_FW_TRIGGER_ID_USER_TRIGGER;
+ break;
+ default:
+ return -EIO;
+ }
+
+ return _iwl_fw_dbg_ini_collect(fwrt, id);
+}
+IWL_EXPORT_SYMBOL(iwl_fw_dbg_ini_collect);
int iwl_fw_dbg_collect_trig(struct iwl_fw_runtime *fwrt,
struct iwl_fw_dbg_trigger_tlv *trigger,
len = strlen(buf) + 1;
}
- ret = _iwl_fw_dbg_collect(fwrt, le32_to_cpu(trigger->id), buf, len,
- trigger);
+ ret = iwl_fw_dbg_collect(fwrt, le32_to_cpu(trigger->id), buf, len,
+ trigger);
if (ret)
return ret;
return;
}
+ /* there's no point in fw dump if the bus is dead */
+ if (test_bit(STATUS_TRANS_DEAD, &fwrt->trans->status)) {
+ IWL_ERR(fwrt, "Skip fw error dump since bus is dead\n");
+ return;
+ }
+
iwl_fw_dbg_stop_recording(fwrt, ¶ms);
- iwl_fw_error_dump(fwrt);
+ IWL_DEBUG_INFO(fwrt, "WRT dump start\n");
+ if (fwrt->trans->ini_valid)
+ iwl_fw_error_ini_dump(fwrt);
+ else
+ iwl_fw_error_dump(fwrt);
+ IWL_DEBUG_INFO(fwrt, "WRT dump done\n");
/* start recording again if the firmware is not crashed */
if (!test_bit(STATUS_FW_ERROR, &fwrt->trans->status) &&
.data = { data->data, },
};
+ /* currently the driver supports always on domain only */
+ if (le32_to_cpu(hcmd_tlv->domain) != IWL_FW_INI_DBG_DOMAIN_ALWAYS_ON)
+ return;
+
iwl_trans_send_cmd(fwrt->trans, &hcmd);
}
void iwl_fwrt_stop_device(struct iwl_fw_runtime *fwrt)
{
- /* if the wait event timeout elapses instead of wake up then
- * the driver did not receive NMI interrupt and can not assume the FW
- * is halted
- */
- int ret = wait_event_timeout(fwrt->trans->fw_halt_waitq,
- !test_bit(STATUS_FW_WAIT_DUMP,
- &fwrt->trans->status),
- msecs_to_jiffies(2000));
- if (!ret) {
- /* failed to receive NMI interrupt, assuming the FW is stuck */
- set_bit(STATUS_FW_ERROR, &fwrt->trans->status);
-
- clear_bit(STATUS_FW_WAIT_DUMP, &fwrt->trans->status);
- }
-
- /* Assuming the op mode mutex is held at this point */
iwl_fw_dbg_collect_sync(fwrt);
iwl_trans_stop_device(fwrt->trans);
fwrt->dump.umac_err_id = 0;
}
-void iwl_fw_error_dump(struct iwl_fw_runtime *fwrt);
int iwl_fw_dbg_collect_desc(struct iwl_fw_runtime *fwrt,
const struct iwl_fw_dump_desc *desc,
bool monitor_only, unsigned int delay);
int iwl_fw_dbg_error_collect(struct iwl_fw_runtime *fwrt,
enum iwl_fw_dbg_trigger trig_type);
-int _iwl_fw_dbg_collect(struct iwl_fw_runtime *fwrt,
- enum iwl_fw_dbg_trigger trig,
- const char *str, size_t len,
- struct iwl_fw_dbg_trigger_tlv *trigger);
+int _iwl_fw_dbg_ini_collect(struct iwl_fw_runtime *fwrt,
+ enum iwl_fw_ini_trigger_id id);
+int iwl_fw_dbg_ini_collect(struct iwl_fw_runtime *fwrt, u32 legacy_trigger_id);
int iwl_fw_dbg_collect(struct iwl_fw_runtime *fwrt,
- u32 id, const char *str, size_t len);
+ enum iwl_fw_dbg_trigger trig, const char *str,
+ size_t len, struct iwl_fw_dbg_trigger_tlv *trigger);
int iwl_fw_dbg_collect_trig(struct iwl_fw_runtime *fwrt,
struct iwl_fw_dbg_trigger_tlv *trigger,
const char *fmt, ...) __printf(3, 4);
struct iwl_fw_ini_trigger *trig;
u32 usec;
-
-
- if (!fwrt->trans->ini_valid || id >= IWL_FW_TRIGGER_ID_NUM ||
- !fwrt->dump.active_trigs[id].active)
+ if (!fwrt->trans->ini_valid || id == IWL_FW_TRIGGER_ID_INVALID ||
+ id >= IWL_FW_TRIGGER_ID_NUM || !fwrt->dump.active_trigs[id].active)
return false;
trig = fwrt->dump.active_trigs[id].trig;
/* This bit is used to differentiate the legacy dump from the ini dump */
#define INI_DUMP_BIT BIT(31)
+static inline void iwl_fw_error_collect(struct iwl_fw_runtime *fwrt)
+{
+ if (fwrt->trans->ini_valid && fwrt->trans->hw_error) {
+ _iwl_fw_dbg_ini_collect(fwrt, IWL_FW_TRIGGER_ID_FW_HW_ERROR);
+ fwrt->trans->hw_error = false;
+ } else {
+ iwl_fw_dbg_collect_desc(fwrt, &iwl_dump_desc_assert, false, 0);
+ }
+}
+
#endif /* __iwl_fw_dbg_h__ */
* @fw_mon_wr_ptr: the position of the write pointer in the cyclic buffer
* @fw_mon_base_ptr: base pointer of the data
* @fw_mon_cycle_cnt: number of wraparounds
+ * @fw_mon_base_high_ptr: used in AX210 devices, the base adderss is 64 bit
+ * so fw_mon_base_ptr holds LSB 32 bits and fw_mon_base_high_ptr hold
+ * MSB 32 bits
* @reserved: for future use
* @data: captured data
*/
__le32 fw_mon_wr_ptr;
__le32 fw_mon_base_ptr;
__le32 fw_mon_cycle_cnt;
- __le32 reserved[3];
+ __le32 fw_mon_base_high_ptr;
+ __le32 reserved[2];
u8 data[];
} __packed;
u8 data[];
};
+#define IWL_INI_DUMP_MEM_VER 1
+#define IWL_INI_DUMP_MONITOR_VER 1
+#define IWL_INI_DUMP_FIFO_VER 1
+
/**
* struct iwl_fw_ini_error_dump_range - range of memory
- * @start_addr: the start address of this range
* @range_data_size: the size of this range, in bytes
+ * @start_addr: the start address of this range
* @data: the actual memory
*/
struct iwl_fw_ini_error_dump_range {
- __le32 start_addr;
__le32 range_data_size;
+ __le64 start_addr;
__le32 data[];
} __packed;
/**
* struct iwl_fw_ini_error_dump_header - ini region dump header
+ * @version: dump version
+ * @region_id: id of the region
* @num_of_ranges: number of ranges in this region
* @name_len: number of bytes allocated to the name string of this region
* @name: name of the region
*/
struct iwl_fw_ini_error_dump_header {
+ __le32 version;
+ __le32 region_id;
__le32 num_of_ranges;
__le32 name_len;
u8 name[IWL_FW_INI_MAX_NAME];
/* This bit is used to differentiate between lmac and umac rxf */
#define IWL_RXF_UMAC_BIT BIT(31)
+/**
+ * struct iwl_fw_ini_error_dump_register - ini register dump
+ * @addr: address of the register
+ * @data: data of the register
+ */
+struct iwl_fw_ini_error_dump_register {
+ __le32 addr;
+ __le32 data;
+} __packed;
+
/**
* struct iwl_fw_ini_fifo_error_dump_range - ini fifo range dump
* @fifo_num: the fifo num. In case of rxf and umac rxf, set BIT(31) to
* distinguish between lmac and umac
* @num_of_registers: num of registers to dump, dword size each
- * @range_data_size: the size of the registers and fifo data
- * @data: fifo data
+ * @range_data_size: the size of the data
+ * @data: consist of
+ * num_of_registers * (register address + register value) + fifo data
*/
struct iwl_fw_ini_fifo_error_dump_range {
__le32 fifo_num;
};
/**
- * struct iwl_fw_ini_monitor_dram_dump - ini dram monitor dump
+ * struct iwl_fw_ini_monitor_dump - ini monitor dump
* @header - header of the region
- * @write_ptr - write pointer position in the dram
+ * @write_ptr - write pointer position in the buffer
* @cycle_cnt - cycles count
* @ranges - the memory ranges of this this region
*/
-struct iwl_fw_ini_monitor_dram_dump {
+struct iwl_fw_ini_monitor_dump {
struct iwl_fw_ini_error_dump_header header;
__le32 write_ptr;
__le32 cycle_cnt;
* version of the beacon notification.
* @IWL_UCODE_TLV_API_BEACON_FILTER_V4: This ucode supports v4 of
* BEACON_FILTER_CONFIG_API_S_VER_4.
+ * @IWL_UCODE_TLV_API_REGULATORY_NVM_INFO: This ucode supports v4 of
+ * REGULATORY_NVM_GET_INFO_RSP_API_S.
* @IWL_UCODE_TLV_API_FTM_NEW_RANGE_REQ: This ucode supports v7 of
* LOCATION_RANGE_REQ_CMD_API_S and v6 of LOCATION_RANGE_RESP_NTFY_API_S.
+ * @IWL_UCODE_TLV_API_SCAN_OFFLOAD_CHANS: This ucode supports v2 of
+ * SCAN_OFFLOAD_PROFILE_MATCH_RESULTS_S and v3 of
+ * SCAN_OFFLOAD_PROFILES_QUERY_RSP_S.
+ * @IWL_UCODE_TLV_API_MBSSID_HE: This ucode supports v2 of
+ * STA_CONTEXT_DOT11AX_API_S
*
* @NUM_IWL_UCODE_TLV_API: number of bits used
*/
IWL_UCODE_TLV_API_REDUCE_TX_POWER = (__force iwl_ucode_tlv_api_t)45,
IWL_UCODE_TLV_API_SHORT_BEACON_NOTIF = (__force iwl_ucode_tlv_api_t)46,
IWL_UCODE_TLV_API_BEACON_FILTER_V4 = (__force iwl_ucode_tlv_api_t)47,
+ IWL_UCODE_TLV_API_REGULATORY_NVM_INFO = (__force iwl_ucode_tlv_api_t)48,
IWL_UCODE_TLV_API_FTM_NEW_RANGE_REQ = (__force iwl_ucode_tlv_api_t)49,
+ IWL_UCODE_TLV_API_SCAN_OFFLOAD_CHANS = (__force iwl_ucode_tlv_api_t)50,
+ IWL_UCODE_TLV_API_MBSSID_HE = (__force iwl_ucode_tlv_api_t)52,
NUM_IWL_UCODE_TLV_API
#ifdef __CHECKER__
* IWL_UCODE_TLV_CAPA_CHANNEL_SWITCH_CMD: firmware supports CSA command
* @IWL_UCODE_TLV_CAPA_ULTRA_HB_CHANNELS: firmware supports ultra high band
* (6 GHz).
+ * @IWL_UCODE_TLV_CAPA_CS_MODIFY: firmware supports modify action CSA command
* @IWL_UCODE_TLV_CAPA_EXTENDED_DTS_MEASURE: extended DTS measurement
* @IWL_UCODE_TLV_CAPA_SHORT_PM_TIMEOUTS: supports short PM timeouts
* @IWL_UCODE_TLV_CAPA_BT_MPLUT_SUPPORT: supports bt-coex Multi-priority LUT
IWL_UCODE_TLV_CAPA_CHANNEL_SWITCH_CMD = (__force iwl_ucode_tlv_capa_t)46,
IWL_UCODE_TLV_CAPA_ULTRA_HB_CHANNELS = (__force iwl_ucode_tlv_capa_t)48,
IWL_UCODE_TLV_CAPA_FTM_CALIBRATED = (__force iwl_ucode_tlv_capa_t)47,
+ IWL_UCODE_TLV_CAPA_CS_MODIFY = (__force iwl_ucode_tlv_capa_t)49,
/* set 2 */
IWL_UCODE_TLV_CAPA_EXTENDED_DTS_MEASURE = (__force iwl_ucode_tlv_capa_t)64,
fwrt->ops_ctx = ops_ctx;
INIT_DELAYED_WORK(&fwrt->dump.wk, iwl_fw_error_dump_wk);
iwl_fwrt_dbgfs_register(fwrt, dbgfs_dir);
- init_waitqueue_head(&fwrt->trans->fw_halt_waitq);
}
IWL_EXPORT_SYMBOL(iwl_fw_runtime_init);
u32 lmac_err_id[MAX_NUM_LMAC];
u32 umac_err_id;
void *fifo_iter;
+ enum iwl_fw_ini_trigger_id ini_trig_id;
} dump;
#ifdef CONFIG_IWLWIFI_DEBUGFS
struct {
* @bisr_workaround: BISR hardware workaround (for 22260 series devices)
* @min_txq_size: minimum number of slots required in a TX queue
* @umac_prph_offset: offset to add to UMAC periphery address
+ * @uhb_supported: ultra high band channels supported
+ * @min_256_ba_txq_size: minimum number of slots required in a TX queue which
+ * supports 256 BA aggregation
*
* We enable the driver to be backward compatible wrt. hardware features.
* API differences in uCode shouldn't be handled here but through TLVs
gen2:1,
cdb:1,
dbgc_supported:1,
- bisr_workaround:1;
+ bisr_workaround:1,
+ uhb_supported:1;
u8 valid_tx_ant;
u8 valid_rx_ant;
u8 non_shared_ant;
u32 d3_debug_data_length;
u32 min_txq_size;
u32 umac_prph_offset;
+ u32 fw_mon_smem_write_ptr_addr;
+ u32 fw_mon_smem_write_ptr_msk;
+ u32 fw_mon_smem_cycle_cnt_ptr_addr;
+ u32 fw_mon_smem_cycle_cnt_ptr_msk;
+ u32 gp2_reg_addr;
+ u32 min_256_ba_txq_size;
};
extern const struct iwl_csr_params iwl_csr_v1;
extern const struct iwl_cfg iwl22000_2ac_cfg_hr_cdb;
extern const struct iwl_cfg iwl22000_2ac_cfg_jf;
extern const struct iwl_cfg iwl_ax101_cfg_qu_hr;
+extern const struct iwl_cfg iwl_ax101_cfg_quz_hr;
extern const struct iwl_cfg iwl22000_2ax_cfg_hr;
-extern const struct iwl_cfg iwl22260_2ax_cfg;
+extern const struct iwl_cfg iwl_ax200_cfg_cc;
extern const struct iwl_cfg killer1650s_2ax_cfg_qu_b0_hr_b0;
extern const struct iwl_cfg killer1650i_2ax_cfg_qu_b0_hr_b0;
extern const struct iwl_cfg killer1650x_2ax_cfg;
extern const struct iwl_cfg iwlax210_2ax_cfg_so_hr_a0;
extern const struct iwl_cfg iwlax210_2ax_cfg_so_gf_a0;
extern const struct iwl_cfg iwlax210_2ax_cfg_ty_gf_a0;
+extern const struct iwl_cfg iwlax210_2ax_cfg_so_gf4_a0;
#endif /* CPTCFG_IWLMVM || CPTCFG_IWLFMAC */
#endif /* __IWL_CONFIG_H__ */
* Copyright(c) 2005 - 2014 Intel Corporation. All rights reserved.
* Copyright(c) 2013 - 2014 Intel Mobile Communications GmbH
* Copyright(c) 2016 Intel Deutschland GmbH
- * Copyright(c) 2018 Intel Corporation
+ * Copyright(c) 2018 - 2019 Intel Corporation
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
*
* Copyright(c) 2005 - 2014 Intel Corporation. All rights reserved.
* Copyright(c) 2013 - 2014 Intel Mobile Communications GmbH
- * Copyright(c) 2018 Intel Corporation
+ * Copyright(c) 2018 - 2019 Intel Corporation
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
#define CSR_HW_REV_TYPE_NONE (0x00001F0)
#define CSR_HW_REV_TYPE_QNJ (0x0000360)
#define CSR_HW_REV_TYPE_QNJ_B0 (0x0000364)
+#define CSR_HW_REV_TYPE_QUZ (0x0000354)
#define CSR_HW_REV_TYPE_HR_CDB (0x0000340)
#define CSR_HW_REV_TYPE_SO (0x0000370)
#define CSR_HW_REV_TYPE_TY (0x0000420)
#define CSR_HW_RF_ID_TYPE_HR (0x0010A000)
#define CSR_HW_RF_ID_TYPE_HRCDB (0x00109F00)
#define CSR_HW_RF_ID_TYPE_GF (0x0010D000)
+#define CSR_HW_RF_ID_TYPE_GF4 (0x0010E000)
/* HW_RF CHIP ID */
#define CSR_HW_RF_ID_TYPE_CHIP_ID(_val) (((_val) >> 12) & 0xFFF)
*
* GPL LICENSE SUMMARY
*
- * Copyright (C) 2018 Intel Corporation
+ * Copyright (C) 2018 - 2019 Intel Corporation
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
*
* BSD LICENSE
*
- * Copyright (C) 2018 Intel Corporation
+ * Copyright (C) 2018 - 2019 Intel Corporation
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
int copy_size = le32_to_cpu(tlv->length) + sizeof(*tlv);
int offset_size = copy_size;
+ if (le32_to_cpu(header->tlv_version) != 1)
+ return;
+
if (WARN_ONCE(apply_point >= IWL_FW_INI_APPLY_NUM,
"Invalid apply point id %d\n", apply_point))
return;
hdr = (void *)&tlv->data[0];
apply = le32_to_cpu(hdr->apply_point);
+ if (le32_to_cpu(hdr->tlv_version) != 1)
+ continue;
+
IWL_DEBUG_FW(trans, "Read TLV %x, apply point %d\n",
le32_to_cpu(tlv->type), apply);
/*
* These are the channel numbers in the order that they are stored in the NVM
*/
-static const u8 iwl_nvm_channels[] = {
+static const u16 iwl_nvm_channels[] = {
/* 2.4 GHz */
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
/* 5 GHz */
149, 153, 157, 161, 165
};
-static const u8 iwl_ext_nvm_channels[] = {
+static const u16 iwl_ext_nvm_channels[] = {
/* 2.4 GHz */
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
/* 5 GHz */
149, 153, 157, 161, 165, 169, 173, 177, 181
};
+static const u16 iwl_uhb_nvm_channels[] = {
+ /* 2.4 GHz */
+ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
+ /* 5 GHz */
+ 36, 40, 44, 48, 52, 56, 60, 64, 68, 72, 76, 80, 84, 88, 92,
+ 96, 100, 104, 108, 112, 116, 120, 124, 128, 132, 136, 140, 144,
+ 149, 153, 157, 161, 165, 169, 173, 177, 181,
+ /* 6-7 GHz */
+ 189, 193, 197, 201, 205, 209, 213, 217, 221, 225, 229, 233, 237, 241,
+ 245, 249, 253, 257, 261, 265, 269, 273, 277, 281, 285, 289, 293, 297,
+ 301, 305, 309, 313, 317, 321, 325, 329, 333, 337, 341, 345, 349, 353,
+ 357, 361, 365, 369, 373, 377, 381, 385, 389, 393, 397, 401, 405, 409,
+ 413, 417, 421
+};
+
#define IWL_NVM_NUM_CHANNELS ARRAY_SIZE(iwl_nvm_channels)
#define IWL_NVM_NUM_CHANNELS_EXT ARRAY_SIZE(iwl_ext_nvm_channels)
+#define IWL_NVM_NUM_CHANNELS_UHB ARRAY_SIZE(iwl_uhb_nvm_channels)
#define NUM_2GHZ_CHANNELS 14
-#define NUM_2GHZ_CHANNELS_EXT 14
#define FIRST_2GHZ_HT_MINUS 5
#define LAST_2GHZ_HT_PLUS 9
-#define LAST_5GHZ_HT 165
-#define LAST_5GHZ_HT_FAMILY_8000 181
#define N_HW_ADDR_MASK 0xF
/* rate data (static) */
};
static inline void iwl_nvm_print_channel_flags(struct device *dev, u32 level,
- int chan, u16 flags)
+ int chan, u32 flags)
{
#define CHECK_AND_PRINT_I(x) \
((flags & NVM_CHANNEL_##x) ? " " #x : "")
}
static u32 iwl_get_channel_flags(u8 ch_num, int ch_idx, bool is_5ghz,
- u16 nvm_flags, const struct iwl_cfg *cfg)
+ u32 nvm_flags, const struct iwl_cfg *cfg)
{
u32 flags = IEEE80211_CHAN_NO_HT40;
- u32 last_5ghz_ht = LAST_5GHZ_HT;
-
- if (cfg->nvm_type == IWL_NVM_EXT)
- last_5ghz_ht = LAST_5GHZ_HT_FAMILY_8000;
if (!is_5ghz && (nvm_flags & NVM_CHANNEL_40MHZ)) {
if (ch_num <= LAST_2GHZ_HT_PLUS)
flags &= ~IEEE80211_CHAN_NO_HT40PLUS;
if (ch_num >= FIRST_2GHZ_HT_MINUS)
flags &= ~IEEE80211_CHAN_NO_HT40MINUS;
- } else if (ch_num <= last_5ghz_ht && (nvm_flags & NVM_CHANNEL_40MHZ)) {
+ } else if (nvm_flags & NVM_CHANNEL_40MHZ) {
if ((ch_idx - NUM_2GHZ_CHANNELS) % 2 == 0)
flags &= ~IEEE80211_CHAN_NO_HT40PLUS;
else
static int iwl_init_channel_map(struct device *dev, const struct iwl_cfg *cfg,
struct iwl_nvm_data *data,
- const __le16 * const nvm_ch_flags,
- u32 sbands_flags)
+ const void * const nvm_ch_flags,
+ u32 sbands_flags, bool v4)
{
int ch_idx;
int n_channels = 0;
struct ieee80211_channel *channel;
- u16 ch_flags;
- int num_of_ch, num_2ghz_channels;
- const u8 *nvm_chan;
-
- if (cfg->nvm_type != IWL_NVM_EXT) {
- num_of_ch = IWL_NVM_NUM_CHANNELS;
- nvm_chan = &iwl_nvm_channels[0];
- num_2ghz_channels = NUM_2GHZ_CHANNELS;
- } else {
+ u32 ch_flags;
+ int num_of_ch, num_2ghz_channels = NUM_2GHZ_CHANNELS;
+ const u16 *nvm_chan;
+
+ if (cfg->uhb_supported) {
+ num_of_ch = IWL_NVM_NUM_CHANNELS_UHB;
+ nvm_chan = iwl_uhb_nvm_channels;
+ } else if (cfg->nvm_type == IWL_NVM_EXT) {
num_of_ch = IWL_NVM_NUM_CHANNELS_EXT;
- nvm_chan = &iwl_ext_nvm_channels[0];
- num_2ghz_channels = NUM_2GHZ_CHANNELS_EXT;
+ nvm_chan = iwl_ext_nvm_channels;
+ } else {
+ num_of_ch = IWL_NVM_NUM_CHANNELS;
+ nvm_chan = iwl_nvm_channels;
}
for (ch_idx = 0; ch_idx < num_of_ch; ch_idx++) {
bool is_5ghz = (ch_idx >= num_2ghz_channels);
- ch_flags = __le16_to_cpup(nvm_ch_flags + ch_idx);
+ if (v4)
+ ch_flags =
+ __le32_to_cpup((__le32 *)nvm_ch_flags + ch_idx);
+ else
+ ch_flags =
+ __le16_to_cpup((__le16 *)nvm_ch_flags + ch_idx);
if (is_5ghz && !data->sku_cap_band_52ghz_enable)
continue;
static void iwl_init_he_hw_capab(struct ieee80211_supported_band *sband,
u8 tx_chains, u8 rx_chains)
{
- if (sband->band == NL80211_BAND_2GHZ ||
- sband->band == NL80211_BAND_5GHZ)
- sband->iftype_data = iwl_he_capa;
- else
- return;
-
+ sband->iftype_data = iwl_he_capa;
sband->n_iftype_data = ARRAY_SIZE(iwl_he_capa);
/* If not 2x2, we need to indicate 1x1 in the Midamble RX Max NSTS */
static void iwl_init_sbands(struct device *dev, const struct iwl_cfg *cfg,
struct iwl_nvm_data *data,
- const __le16 *nvm_ch_flags, u8 tx_chains,
- u8 rx_chains, u32 sbands_flags)
+ const void *nvm_ch_flags, u8 tx_chains,
+ u8 rx_chains, u32 sbands_flags, bool v4)
{
int n_channels;
int n_used = 0;
struct ieee80211_supported_band *sband;
n_channels = iwl_init_channel_map(dev, cfg, data, nvm_ch_flags,
- sbands_flags);
+ sbands_flags, v4);
sband = &data->bands[NL80211_BAND_2GHZ];
sband->band = NL80211_BAND_2GHZ;
sband->bitrates = &iwl_cfg80211_rates[RATES_24_OFFS];
sbands_flags |= IWL_NVM_SBANDS_FLAGS_NO_WIDE_IN_5GHZ;
iwl_init_sbands(dev, cfg, data, ch_section, tx_chains, rx_chains,
- sbands_flags);
+ sbands_flags, false);
data->calib_version = 255;
return data;
}
IWL_EXPORT_SYMBOL(iwl_parse_nvm_data);
-static u32 iwl_nvm_get_regdom_bw_flags(const u8 *nvm_chan,
+static u32 iwl_nvm_get_regdom_bw_flags(const u16 *nvm_chan,
int ch_idx, u16 nvm_flags,
const struct iwl_cfg *cfg)
{
u32 flags = NL80211_RRF_NO_HT40;
- u32 last_5ghz_ht = LAST_5GHZ_HT;
-
- if (cfg->nvm_type == IWL_NVM_EXT)
- last_5ghz_ht = LAST_5GHZ_HT_FAMILY_8000;
if (ch_idx < NUM_2GHZ_CHANNELS &&
(nvm_flags & NVM_CHANNEL_40MHZ)) {
flags &= ~NL80211_RRF_NO_HT40PLUS;
if (nvm_chan[ch_idx] >= FIRST_2GHZ_HT_MINUS)
flags &= ~NL80211_RRF_NO_HT40MINUS;
- } else if (nvm_chan[ch_idx] <= last_5ghz_ht &&
- (nvm_flags & NVM_CHANNEL_40MHZ)) {
+ } else if (nvm_flags & NVM_CHANNEL_40MHZ) {
if ((ch_idx - NUM_2GHZ_CHANNELS) % 2 == 0)
flags &= ~NL80211_RRF_NO_HT40PLUS;
else
int ch_idx;
u16 ch_flags;
u32 reg_rule_flags, prev_reg_rule_flags = 0;
- const u8 *nvm_chan = cfg->nvm_type == IWL_NVM_EXT ?
- iwl_ext_nvm_channels : iwl_nvm_channels;
+ const u16 *nvm_chan;
struct ieee80211_regdomain *regd, *copy_rd;
- int size_of_regd, regd_to_copy;
struct ieee80211_reg_rule *rule;
struct regdb_ptrs *regdb_ptrs;
enum nl80211_band band;
int center_freq, prev_center_freq = 0;
int valid_rules = 0;
bool new_rule;
- int max_num_ch = cfg->nvm_type == IWL_NVM_EXT ?
- IWL_NVM_NUM_CHANNELS_EXT : IWL_NVM_NUM_CHANNELS;
+ int max_num_ch;
+
+ if (cfg->uhb_supported) {
+ max_num_ch = IWL_NVM_NUM_CHANNELS_UHB;
+ nvm_chan = iwl_uhb_nvm_channels;
+ } else if (cfg->nvm_type == IWL_NVM_EXT) {
+ max_num_ch = IWL_NVM_NUM_CHANNELS_EXT;
+ nvm_chan = iwl_ext_nvm_channels;
+ } else {
+ max_num_ch = IWL_NVM_NUM_CHANNELS;
+ nvm_chan = iwl_nvm_channels;
+ }
if (WARN_ON(num_of_ch > max_num_ch))
num_of_ch = max_num_ch;
num_of_ch);
/* build a regdomain rule for every valid channel */
- size_of_regd =
- sizeof(struct ieee80211_regdomain) +
- num_of_ch * sizeof(struct ieee80211_reg_rule);
-
- regd = kzalloc(size_of_regd, GFP_KERNEL);
+ regd = kzalloc(struct_size(regd, reg_rules, num_of_ch), GFP_KERNEL);
if (!regd)
return ERR_PTR(-ENOMEM);
* Narrow down regdom for unused regulatory rules to prevent hole
* between reg rules to wmm rules.
*/
- regd_to_copy = sizeof(struct ieee80211_regdomain) +
- valid_rules * sizeof(struct ieee80211_reg_rule);
-
- copy_rd = kmemdup(regd, regd_to_copy, GFP_KERNEL);
- if (!copy_rd) {
+ copy_rd = kmemdup(regd, struct_size(regd, reg_rules, valid_rules),
+ GFP_KERNEL);
+ if (!copy_rd)
copy_rd = ERR_PTR(-ENOMEM);
- goto out;
- }
out:
kfree(regdb_ptrs);
const struct iwl_fw *fw)
{
struct iwl_nvm_get_info cmd = {};
- struct iwl_nvm_get_info_rsp *rsp;
struct iwl_nvm_data *nvm;
struct iwl_host_cmd hcmd = {
.flags = CMD_WANT_SKB | CMD_SEND_IN_RFKILL,
bool empty_otp;
u32 mac_flags;
u32 sbands_flags = 0;
+ /*
+ * All the values in iwl_nvm_get_info_rsp v4 are the same as
+ * in v3, except for the channel profile part of the
+ * regulatory. So we can just access the new struct, with the
+ * exception of the latter.
+ */
+ struct iwl_nvm_get_info_rsp *rsp;
+ struct iwl_nvm_get_info_rsp_v3 *rsp_v3;
+ bool v4 = fw_has_api(&fw->ucode_capa,
+ IWL_UCODE_TLV_API_REGULATORY_NVM_INFO);
+ size_t rsp_size = v4 ? sizeof(*rsp) : sizeof(*rsp_v3);
+ void *channel_profile;
ret = iwl_trans_send_cmd(trans, &hcmd);
if (ret)
return ERR_PTR(ret);
- if (WARN(iwl_rx_packet_payload_len(hcmd.resp_pkt) != sizeof(*rsp),
+ if (WARN(iwl_rx_packet_payload_len(hcmd.resp_pkt) != rsp_size,
"Invalid payload len in NVM response from FW %d",
iwl_rx_packet_payload_len(hcmd.resp_pkt))) {
ret = -EINVAL;
sbands_flags |= IWL_NVM_SBANDS_FLAGS_LAR;
}
+ rsp_v3 = (void *)rsp;
+ channel_profile = v4 ? (void *)rsp->regulatory.channel_profile :
+ (void *)rsp_v3->regulatory.channel_profile;
+
iwl_init_sbands(trans->dev, trans->cfg, nvm,
rsp->regulatory.channel_profile,
nvm->valid_tx_ant & fw->valid_tx_ant,
nvm->valid_rx_ant & fw->valid_rx_ant,
- sbands_flags);
+ sbands_flags, v4);
iwl_free_resp(&hcmd);
return nvm;
#define MON_BUFF_WRPTR_VER2 (0xa03c24)
#define MON_BUFF_CYCLE_CNT_VER2 (0xa03c28)
#define MON_BUFF_SHIFT_VER2 (0x8)
+/* FW monitor familiy AX210 and on */
+#define DBGC_CUR_DBGBUF_BASE_ADDR_LSB (0xd03c20)
+#define DBGC_CUR_DBGBUF_BASE_ADDR_MSB (0xd03c24)
+#define DBGC_CUR_DBGBUF_STATUS (0xd03c1c)
+#define DBGC_DBGBUF_WRAP_AROUND (0xd03c2c)
+#define DBGC_CUR_DBGBUF_STATUS_OFFSET_MSK (0x00ffffff)
#define MON_DMARB_RD_CTL_ADDR (0xa03c60)
#define MON_DMARB_RD_DATA_ADDR (0xa03c5c)
bool _page_stolen;
u32 _rx_page_order;
unsigned int truesize;
- u8 status;
};
static inline void *rxb_addr(struct iwl_rx_cmd_buffer *r)
* are sent
* @STATUS_TRANS_IDLE: the trans is idle - general commands are not to be sent
* @STATUS_TRANS_DEAD: trans is dead - avoid any read/write operation
- * @STATUS_FW_WAIT_DUMP: if set, wait until cleared before collecting dump
*/
enum iwl_trans_status {
STATUS_SYNC_HCMD_ACTIVE,
STATUS_TRANS_GOING_IDLE,
STATUS_TRANS_IDLE,
STATUS_TRANS_DEAD,
- STATUS_FW_WAIT_DUMP,
};
static inline int
struct iwl_trans_dump_data *(*dump_data)(struct iwl_trans *trans,
u32 dump_mask);
void (*debugfs_cleanup)(struct iwl_trans *trans);
+ void (*sync_nmi)(struct iwl_trans *trans);
};
/**
* @umac_error_event_table: addr of umac error table
* @error_event_table_tlv_status: bitmap that indicates what error table
* pointers was recevied via TLV. use enum &iwl_error_event_table_status
+ * @hw_error: equals true if hw error interrupt was received from the FW
*/
struct iwl_trans {
const struct iwl_trans_ops *ops;
u32 lmac_error_event_table[2];
u32 umac_error_event_table;
unsigned int error_event_table_tlv_status;
- wait_queue_head_t fw_halt_waitq;
+ bool hw_error;
/* pointer to trans specific struct */
/*Ensure that this pointer will always be aligned to sizeof pointer */
/* prevent double restarts due to the same erroneous FW */
if (!test_and_set_bit(STATUS_FW_ERROR, &trans->status))
iwl_op_mode_nic_error(trans->op_mode);
+}
- if (test_and_clear_bit(STATUS_FW_WAIT_DUMP, &trans->status))
- wake_up(&trans->fw_halt_waitq);
-
+static inline void iwl_trans_sync_nmi(struct iwl_trans *trans)
+{
+ if (trans->ops->sync_nmi)
+ trans->ops->sync_nmi(trans);
}
/*****************************************************
* Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
* Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
* Copyright(c) 2016 - 2017 Intel Deutschland GmbH
- * Copyright(c) 2018 Intel Corporation
+ * Copyright(c) 2018 - 2019 Intel Corporation
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
* Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
* Copyright(c) 2016 - 2017 Intel Deutschland GmbH
- * Copyright(c) 2018 Intel Corporation
+ * Copyright(c) 2018 - 2019 Intel Corporation
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
iwl_mvm_iter_d0i3_ap_keys(mvm, vif, iwl_mvm_d3_update_keys, >kdata);
}
+#define ND_QUERY_BUF_LEN (sizeof(struct iwl_scan_offload_profile_match) * \
+ IWL_SCAN_MAX_PROFILES)
+
struct iwl_mvm_nd_query_results {
u32 matched_profiles;
- struct iwl_scan_offload_profile_match matches[IWL_SCAN_MAX_PROFILES];
+ u8 matches[ND_QUERY_BUF_LEN];
};
static int
.flags = CMD_WANT_SKB,
};
int ret, len;
+ size_t query_len, matches_len;
ret = iwl_mvm_send_cmd(mvm, &cmd);
if (ret) {
return ret;
}
+ if (fw_has_api(&mvm->fw->ucode_capa,
+ IWL_UCODE_TLV_API_SCAN_OFFLOAD_CHANS)) {
+ query_len = sizeof(struct iwl_scan_offload_profiles_query);
+ matches_len = sizeof(struct iwl_scan_offload_profile_match) *
+ IWL_SCAN_MAX_PROFILES;
+ } else {
+ query_len = sizeof(struct iwl_scan_offload_profiles_query_v1);
+ matches_len = sizeof(struct iwl_scan_offload_profile_match_v1) *
+ IWL_SCAN_MAX_PROFILES;
+ }
+
len = iwl_rx_packet_payload_len(cmd.resp_pkt);
- if (len < sizeof(*query)) {
+ if (len < query_len) {
IWL_ERR(mvm, "Invalid scan offload profiles query response!\n");
ret = -EIO;
goto out_free_resp;
query = (void *)cmd.resp_pkt->data;
results->matched_profiles = le32_to_cpu(query->matched_profiles);
- memcpy(results->matches, query->matches, sizeof(results->matches));
+ memcpy(results->matches, query->matches, matches_len);
#ifdef CONFIG_IWLWIFI_DEBUGFS
mvm->last_netdetect_scans = le32_to_cpu(query->n_scans_done);
return ret;
}
+static int iwl_mvm_query_num_match_chans(struct iwl_mvm *mvm,
+ struct iwl_mvm_nd_query_results *query,
+ int idx)
+{
+ int n_chans = 0, i;
+
+ if (fw_has_api(&mvm->fw->ucode_capa,
+ IWL_UCODE_TLV_API_SCAN_OFFLOAD_CHANS)) {
+ struct iwl_scan_offload_profile_match *matches =
+ (struct iwl_scan_offload_profile_match *)query->matches;
+
+ for (i = 0; i < SCAN_OFFLOAD_MATCHING_CHANNELS_LEN; i++)
+ n_chans += hweight8(matches[idx].matching_channels[i]);
+ } else {
+ struct iwl_scan_offload_profile_match_v1 *matches =
+ (struct iwl_scan_offload_profile_match_v1 *)query->matches;
+
+ for (i = 0; i < SCAN_OFFLOAD_MATCHING_CHANNELS_LEN_V1; i++)
+ n_chans += hweight8(matches[idx].matching_channels[i]);
+ }
+
+ return n_chans;
+}
+
+static void iwl_mvm_query_set_freqs(struct iwl_mvm *mvm,
+ struct iwl_mvm_nd_query_results *query,
+ struct cfg80211_wowlan_nd_match *match,
+ int idx)
+{
+ int i;
+
+ if (fw_has_api(&mvm->fw->ucode_capa,
+ IWL_UCODE_TLV_API_SCAN_OFFLOAD_CHANS)) {
+ struct iwl_scan_offload_profile_match *matches =
+ (struct iwl_scan_offload_profile_match *)query->matches;
+
+ for (i = 0; i < SCAN_OFFLOAD_MATCHING_CHANNELS_LEN * 8; i++)
+ if (matches[idx].matching_channels[i / 8] & (BIT(i % 8)))
+ match->channels[match->n_channels++] =
+ mvm->nd_channels[i]->center_freq;
+ } else {
+ struct iwl_scan_offload_profile_match_v1 *matches =
+ (struct iwl_scan_offload_profile_match_v1 *)query->matches;
+
+ for (i = 0; i < SCAN_OFFLOAD_MATCHING_CHANNELS_LEN_V1 * 8; i++)
+ if (matches[idx].matching_channels[i / 8] & (BIT(i % 8)))
+ match->channels[match->n_channels++] =
+ mvm->nd_channels[i]->center_freq;
+ }
+}
+
static void iwl_mvm_query_netdetect_reasons(struct iwl_mvm *mvm,
struct ieee80211_vif *vif)
{
struct iwl_wowlan_status *fw_status;
unsigned long matched_profiles;
u32 reasons = 0;
- int i, j, n_matches, ret;
+ int i, n_matches, ret;
fw_status = iwl_mvm_get_wakeup_status(mvm);
if (!IS_ERR_OR_NULL(fw_status)) {
goto out_report_nd;
for_each_set_bit(i, &matched_profiles, mvm->n_nd_match_sets) {
- struct iwl_scan_offload_profile_match *fw_match;
struct cfg80211_wowlan_nd_match *match;
int idx, n_channels = 0;
- fw_match = &query.matches[i];
-
- for (j = 0; j < SCAN_OFFLOAD_MATCHING_CHANNELS_LEN; j++)
- n_channels += hweight8(fw_match->matching_channels[j]);
+ n_channels = iwl_mvm_query_num_match_chans(mvm, &query, i);
match = kzalloc(struct_size(match, channels, n_channels),
GFP_KERNEL);
if (mvm->n_nd_channels < n_channels)
continue;
- for (j = 0; j < SCAN_OFFLOAD_MATCHING_CHANNELS_LEN * 8; j++)
- if (fw_match->matching_channels[j / 8] & (BIT(j % 8)))
- match->channels[match->n_channels++] =
- mvm->nd_channels[j]->center_freq;
+ iwl_mvm_query_set_freqs(mvm, &query, match, i);
}
out_report_nd:
* 2. We are using a unified image but had an error while exiting D3
*/
set_bit(IWL_MVM_STATUS_HW_RESTART_REQUESTED, &mvm->status);
- set_bit(IWL_MVM_STATUS_D3_RECONFIG, &mvm->status);
/*
* When switching images we return 1, which causes mac80211
* to do a reconfig with IEEE80211_RECONFIG_TYPE_RESTART.
#define MVM_DEBUGFS_READ_WRITE_FILE_OPS(name, bufsz) \
_MVM_DEBUGFS_READ_WRITE_FILE_OPS(name, bufsz, struct ieee80211_vif)
#define MVM_DEBUGFS_ADD_FILE_VIF(name, parent, mode) do { \
- if (!debugfs_create_file(#name, mode, parent, vif, \
- &iwl_dbgfs_##name##_ops)) \
- goto err; \
+ debugfs_create_file(#name, mode, parent, vif, \
+ &iwl_dbgfs_##name##_ops); \
} while (0)
MVM_DEBUGFS_READ_FILE_OPS(mac_params);
mvmvif->dbgfs_dir = debugfs_create_dir("iwlmvm", dbgfs_dir);
- if (!mvmvif->dbgfs_dir) {
- IWL_ERR(mvm, "Failed to create debugfs directory under %pd\n",
- dbgfs_dir);
- return;
- }
-
if (iwlmvm_mod_params.power_scheme != IWL_POWER_SCHEME_CAM &&
((vif->type == NL80211_IFTYPE_STATION && !vif->p2p) ||
(vif->type == NL80211_IFTYPE_STATION && vif->p2p)))
mvmvif->dbgfs_slink = debugfs_create_symlink(dbgfs_dir->d_name.name,
mvm->debugfs_dir, buf);
- if (!mvmvif->dbgfs_slink)
- IWL_ERR(mvm, "Can't create debugfs symbolic link under %pd\n",
- dbgfs_dir);
- return;
-err:
- IWL_ERR(mvm, "Can't create debugfs entity\n");
}
void iwl_mvm_vif_dbgfs_clean(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
return 0;
iwl_fw_dbg_collect(&mvm->fwrt, FW_DBG_TRIGGER_USER, buf,
- (count - 1));
+ (count - 1), NULL);
iwl_mvm_unref(mvm, IWL_MVM_REF_PRPH_WRITE);
#define MVM_DEBUGFS_READ_WRITE_FILE_OPS(name, bufsz) \
_MVM_DEBUGFS_READ_WRITE_FILE_OPS(name, bufsz, struct iwl_mvm)
#define MVM_DEBUGFS_ADD_FILE_ALIAS(alias, name, parent, mode) do { \
- if (!debugfs_create_file(alias, mode, parent, mvm, \
- &iwl_dbgfs_##name##_ops)) \
- goto err; \
+ debugfs_create_file(alias, mode, parent, mvm, \
+ &iwl_dbgfs_##name##_ops); \
} while (0)
#define MVM_DEBUGFS_ADD_FILE(name, parent, mode) \
MVM_DEBUGFS_ADD_FILE_ALIAS(#name, name, parent, mode)
_MVM_DEBUGFS_READ_WRITE_FILE_OPS(name, bufsz, struct ieee80211_sta)
#define MVM_DEBUGFS_ADD_STA_FILE_ALIAS(alias, name, parent, mode) do { \
- if (!debugfs_create_file(alias, mode, parent, sta, \
- &iwl_dbgfs_##name##_ops)) \
- goto err; \
+ debugfs_create_file(alias, mode, parent, sta, \
+ &iwl_dbgfs_##name##_ops); \
} while (0)
#define MVM_DEBUGFS_ADD_STA_FILE(name, parent, mode) \
MVM_DEBUGFS_ADD_STA_FILE_ALIAS(#name, name, parent, mode)
if (iwl_mvm_has_tlc_offload(mvm))
MVM_DEBUGFS_ADD_STA_FILE(rs_data, dir, 0400);
-
- return;
-err:
- IWL_ERR(mvm, "Can't create the mvm station debugfs entry\n");
}
-int iwl_mvm_dbgfs_register(struct iwl_mvm *mvm, struct dentry *dbgfs_dir)
+void iwl_mvm_dbgfs_register(struct iwl_mvm *mvm, struct dentry *dbgfs_dir)
{
struct dentry *bcast_dir __maybe_unused;
char buf[100];
#endif
MVM_DEBUGFS_ADD_FILE(he_sniffer_params, mvm->debugfs_dir, 0600);
- if (!debugfs_create_bool("enable_scan_iteration_notif",
- 0600,
- mvm->debugfs_dir,
- &mvm->scan_iter_notif_enabled))
- goto err;
- if (!debugfs_create_bool("drop_bcn_ap_mode", 0600,
- mvm->debugfs_dir, &mvm->drop_bcn_ap_mode))
- goto err;
+ debugfs_create_bool("enable_scan_iteration_notif", 0600,
+ mvm->debugfs_dir, &mvm->scan_iter_notif_enabled);
+ debugfs_create_bool("drop_bcn_ap_mode", 0600, mvm->debugfs_dir,
+ &mvm->drop_bcn_ap_mode);
MVM_DEBUGFS_ADD_FILE(uapsd_noagg_bssids, mvm->debugfs_dir, S_IRUSR);
if (mvm->fw->ucode_capa.flags & IWL_UCODE_TLV_FLAGS_BCAST_FILTERING) {
bcast_dir = debugfs_create_dir("bcast_filtering",
mvm->debugfs_dir);
- if (!bcast_dir)
- goto err;
- if (!debugfs_create_bool("override", 0600,
- bcast_dir,
- &mvm->dbgfs_bcast_filtering.override))
- goto err;
+ debugfs_create_bool("override", 0600, bcast_dir,
+ &mvm->dbgfs_bcast_filtering.override);
MVM_DEBUGFS_ADD_FILE_ALIAS("filters", bcast_filters,
bcast_dir, 0600);
#ifdef CONFIG_PM_SLEEP
MVM_DEBUGFS_ADD_FILE(d3_sram, mvm->debugfs_dir, 0600);
MVM_DEBUGFS_ADD_FILE(d3_test, mvm->debugfs_dir, 0400);
- if (!debugfs_create_bool("d3_wake_sysassert", 0600,
- mvm->debugfs_dir, &mvm->d3_wake_sysassert))
- goto err;
- if (!debugfs_create_u32("last_netdetect_scans", 0400,
- mvm->debugfs_dir, &mvm->last_netdetect_scans))
- goto err;
+ debugfs_create_bool("d3_wake_sysassert", 0600, mvm->debugfs_dir,
+ &mvm->d3_wake_sysassert);
+ debugfs_create_u32("last_netdetect_scans", 0400, mvm->debugfs_dir,
+ &mvm->last_netdetect_scans);
#endif
- if (!debugfs_create_u8("ps_disabled", 0400,
- mvm->debugfs_dir, &mvm->ps_disabled))
- goto err;
- if (!debugfs_create_blob("nvm_hw", 0400,
- mvm->debugfs_dir, &mvm->nvm_hw_blob))
- goto err;
- if (!debugfs_create_blob("nvm_sw", 0400,
- mvm->debugfs_dir, &mvm->nvm_sw_blob))
- goto err;
- if (!debugfs_create_blob("nvm_calib", 0400,
- mvm->debugfs_dir, &mvm->nvm_calib_blob))
- goto err;
- if (!debugfs_create_blob("nvm_prod", 0400,
- mvm->debugfs_dir, &mvm->nvm_prod_blob))
- goto err;
- if (!debugfs_create_blob("nvm_phy_sku", 0400,
- mvm->debugfs_dir, &mvm->nvm_phy_sku_blob))
- goto err;
- if (!debugfs_create_blob("nvm_reg", S_IRUSR,
- mvm->debugfs_dir, &mvm->nvm_reg_blob))
- goto err;
+ debugfs_create_u8("ps_disabled", 0400, mvm->debugfs_dir,
+ &mvm->ps_disabled);
+ debugfs_create_blob("nvm_hw", 0400, mvm->debugfs_dir,
+ &mvm->nvm_hw_blob);
+ debugfs_create_blob("nvm_sw", 0400, mvm->debugfs_dir,
+ &mvm->nvm_sw_blob);
+ debugfs_create_blob("nvm_calib", 0400, mvm->debugfs_dir,
+ &mvm->nvm_calib_blob);
+ debugfs_create_blob("nvm_prod", 0400, mvm->debugfs_dir,
+ &mvm->nvm_prod_blob);
+ debugfs_create_blob("nvm_phy_sku", 0400, mvm->debugfs_dir,
+ &mvm->nvm_phy_sku_blob);
+ debugfs_create_blob("nvm_reg", S_IRUSR,
+ mvm->debugfs_dir, &mvm->nvm_reg_blob);
debugfs_create_file("mem", 0600, dbgfs_dir, mvm, &iwl_dbgfs_mem_ops);
* exists (before the opmode exists which removes the target.)
*/
snprintf(buf, 100, "../../%pd2", dbgfs_dir->d_parent);
- if (!debugfs_create_symlink("iwlwifi", mvm->hw->wiphy->debugfsdir, buf))
- goto err;
-
- return 0;
-err:
- IWL_ERR(mvm, "Can't create the mvm debugfs directory\n");
- return -ENOMEM;
+ debugfs_create_symlink("iwlwifi", mvm->hw->wiphy->debugfsdir, buf);
}
* Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
* Copyright(c) 2013 - 2014 Intel Mobile Communications GmbH
* Copyright(c) 2015 - 2017 Intel Deutschland GmbH
- * Copyright(c) 2018 Intel Corporation
+ * Copyright(c) 2018 - 2019 Intel Corporation
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
* Copyright(c) 2013 - 2014 Intel Mobile Communications GmbH
* Copyright(c) 2015 - 2017 Intel Deutschland GmbH
- * Copyright(c) 2018 Intel Corporation
+ * Copyright(c) 2018 - 2019 Intel Corporation
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
.preferred_tsf = NUM_TSF_IDS,
.found_vif = false,
};
- u32 ac;
- int ret, i, queue_limit;
- unsigned long used_hw_queues;
+ int ret, i;
lockdep_assert_held(&mvm->mutex);
INIT_LIST_HEAD(&mvmvif->time_event_data.list);
mvmvif->time_event_data.id = TE_MAX;
- /* No need to allocate data queues to P2P Device MAC.*/
- if (vif->type == NL80211_IFTYPE_P2P_DEVICE) {
- for (ac = 0; ac < IEEE80211_NUM_ACS; ac++)
- vif->hw_queue[ac] = IEEE80211_INVAL_HW_QUEUE;
-
+ /* No need to allocate data queues to P2P Device MAC and NAN.*/
+ if (vif->type == NL80211_IFTYPE_P2P_DEVICE)
return 0;
- }
-
- /*
- * queues in mac80211 almost entirely independent of
- * the ones here - no real limit
- */
- queue_limit = IEEE80211_MAX_QUEUES;
-
- /*
- * Find available queues, and allocate them to the ACs. When in
- * DQA-mode they aren't really used, and this is done only so the
- * mac80211 ieee80211_check_queues() function won't fail
- */
- for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
- u8 queue = find_first_zero_bit(&used_hw_queues, queue_limit);
-
- if (queue >= queue_limit) {
- IWL_ERR(mvm, "Failed to allocate queue\n");
- ret = -EIO;
- goto exit_fail;
- }
-
- __set_bit(queue, &used_hw_queues);
- vif->hw_queue[ac] = queue;
- }
/* Allocate the CAB queue for softAP and GO interfaces */
if (vif->type == NL80211_IFTYPE_AP ||
ieee80211_tu_to_usec(data.beacon_int * rand /
100);
} else {
- mvmvif->ap_beacon_time =
- iwl_read_prph(mvm->trans,
- DEVICE_SYSTEM_TIME_REG);
+ mvmvif->ap_beacon_time = iwl_mvm_get_systime(mvm);
}
}
rcu_read_lock();
vif = rcu_dereference(mvm->vif_id_to_mac[mac_id]);
+ mvmvif = iwl_mvm_vif_from_mac80211(vif);
switch (vif->type) {
case NL80211_IFTYPE_AP:
csa_vif != vif))
goto out_unlock;
- mvmvif = iwl_mvm_vif_from_mac80211(csa_vif);
csa_id = FW_CMD_ID_AND_COLOR(mvmvif->id, mvmvif->color);
if (WARN(csa_id != id_n_color,
"channel switch noa notification on unexpected vif (csa_vif=%d, notif=%d)",
return;
case NL80211_IFTYPE_STATION:
iwl_mvm_csa_client_absent(mvm, vif);
+ cancel_delayed_work_sync(&mvmvif->csa_work);
ieee80211_chswitch_done(vif, true);
break;
default:
* Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
* Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
* Copyright(c) 2016 - 2017 Intel Deutschland GmbH
- * Copyright(c) 2018 Intel Corporation
+ * Copyright(c) 2018 - 2019 Intel Corporation
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
* Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
* Copyright(c) 2016 - 2017 Intel Deutschland GmbH
- * Copyright(c) 2018 Intel Corporation
+ * Copyright(c) 2018 - 2019 Intel Corporation
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
const static u8 he_if_types_ext_capa_sta[] = {
[0] = WLAN_EXT_CAPA1_EXT_CHANNEL_SWITCHING,
+ [2] = WLAN_EXT_CAPA3_MULTI_BSSID_SUPPORT,
[7] = WLAN_EXT_CAPA8_OPMODE_NOTIF,
[9] = WLAN_EXT_CAPA10_TWT_REQUESTER_SUPPORT,
};
BIT(NL80211_IFTYPE_ADHOC);
hw->wiphy->flags |= WIPHY_FLAG_IBSS_RSN;
+ wiphy_ext_feature_set(hw->wiphy, NL80211_EXT_FEATURE_VHT_IBSS);
+ hw->wiphy->features |= NL80211_FEATURE_HT_IBSS;
+
hw->wiphy->regulatory_flags |= REGULATORY_ENABLE_RELAX_NO_IR;
if (iwl_mvm_is_lar_supported(mvm))
hw->wiphy->regulatory_flags |= REGULATORY_WIPHY_SELF_MANAGED;
hw->wiphy->iftype_ext_capab = he_iftypes_ext_capa;
hw->wiphy->num_iftype_ext_capab =
ARRAY_SIZE(he_iftypes_ext_capa);
+
+ ieee80211_hw_set(hw, SUPPORTS_MULTI_BSSID);
+ ieee80211_hw_set(hw, SUPPORTS_ONLY_HE_MULTI_BSSID);
}
mvm->rts_threshold = IEEE80211_MAX_RTS_THRESHOLD;
static void iwl_mvm_restart_cleanup(struct iwl_mvm *mvm)
{
- /* clear the D3 reconfig, we only need it to avoid dumping a
- * firmware coredump on reconfiguration, we shouldn't do that
- * on D3->D0 transition
- */
- if (!test_and_clear_bit(IWL_MVM_STATUS_D3_RECONFIG, &mvm->status)) {
- mvm->fwrt.dump.desc = &iwl_dump_desc_assert;
- iwl_fw_error_dump(&mvm->fwrt);
- }
-
/* cleanup all stale references (scan, roc), but keep the
* ucode_down ref until reconfig is complete
*/
return iwl_mvm_send_cmd_pdu(mvm, REDUCE_TX_POWER_CMD, 0, len, &cmd);
}
+static int iwl_mvm_post_channel_switch(struct ieee80211_hw *hw,
+ struct ieee80211_vif *vif)
+{
+ struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
+ struct iwl_mvm *mvm = IWL_MAC80211_GET_MVM(hw);
+ int ret;
+
+ mutex_lock(&mvm->mutex);
+
+ if (mvmvif->csa_failed) {
+ mvmvif->csa_failed = false;
+ ret = -EIO;
+ goto out_unlock;
+ }
+
+ if (vif->type == NL80211_IFTYPE_STATION) {
+ struct iwl_mvm_sta *mvmsta;
+
+ mvmvif->csa_bcn_pending = false;
+ mvmsta = iwl_mvm_sta_from_staid_protected(mvm,
+ mvmvif->ap_sta_id);
+
+ if (WARN_ON(!mvmsta)) {
+ ret = -EIO;
+ goto out_unlock;
+ }
+
+ iwl_mvm_sta_modify_disable_tx(mvm, mvmsta, false);
+
+ iwl_mvm_mac_ctxt_changed(mvm, vif, false, NULL);
+
+ ret = iwl_mvm_enable_beacon_filter(mvm, vif, 0);
+ if (ret)
+ goto out_unlock;
+
+ iwl_mvm_stop_session_protection(mvm, vif);
+ }
+
+ mvmvif->ps_disabled = false;
+
+ ret = iwl_mvm_power_update_ps(mvm);
+
+out_unlock:
+ mutex_unlock(&mvm->mutex);
+
+ return ret;
+}
+
+static void iwl_mvm_abort_channel_switch(struct ieee80211_hw *hw,
+ struct ieee80211_vif *vif)
+{
+ struct iwl_mvm *mvm = IWL_MAC80211_GET_MVM(hw);
+ struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
+ struct iwl_chan_switch_te_cmd cmd = {
+ .mac_id = cpu_to_le32(FW_CMD_ID_AND_COLOR(mvmvif->id,
+ mvmvif->color)),
+ .action = cpu_to_le32(FW_CTXT_ACTION_REMOVE),
+ };
+
+ IWL_DEBUG_MAC80211(mvm, "Abort CSA on mac %d\n", mvmvif->id);
+
+ mutex_lock(&mvm->mutex);
+ WARN_ON(iwl_mvm_send_cmd_pdu(mvm,
+ WIDE_ID(MAC_CONF_GROUP,
+ CHANNEL_SWITCH_TIME_EVENT_CMD),
+ 0, sizeof(cmd), &cmd));
+ mutex_unlock(&mvm->mutex);
+
+ WARN_ON(iwl_mvm_post_channel_switch(hw, vif));
+}
+
+static void iwl_mvm_channel_switch_disconnect_wk(struct work_struct *wk)
+{
+ struct iwl_mvm *mvm;
+ struct iwl_mvm_vif *mvmvif;
+ struct ieee80211_vif *vif;
+
+ mvmvif = container_of(wk, struct iwl_mvm_vif, csa_work.work);
+ vif = container_of((void *)mvmvif, struct ieee80211_vif, drv_priv);
+ mvm = mvmvif->mvm;
+
+ iwl_mvm_abort_channel_switch(mvm->hw, vif);
+ ieee80211_chswitch_done(vif, false);
+}
+
static int iwl_mvm_mac_add_interface(struct ieee80211_hw *hw,
struct ieee80211_vif *vif)
{
}
iwl_mvm_tcm_add_vif(mvm, vif);
+ INIT_DELAYED_WORK(&mvmvif->csa_work,
+ iwl_mvm_channel_switch_disconnect_wk);
if (vif->type == NL80211_IFTYPE_MONITOR)
mvm->monitor_on = true;
.frame_time_rts_th =
cpu_to_le16(vif->bss_conf.frame_time_rts_th),
};
+ int size = fw_has_api(&mvm->fw->ucode_capa,
+ IWL_UCODE_TLV_API_MBSSID_HE) ?
+ sizeof(sta_ctxt_cmd) :
+ sizeof(struct iwl_he_sta_context_cmd_v1);
struct ieee80211_sta *sta;
u32 flags;
int i;
/* Set the PPE thresholds accordingly */
if (low_th >= 0 && high_th >= 0) {
- u8 ***pkt_ext_qam =
- (void *)sta_ctxt_cmd.pkt_ext.pkt_ext_qam_th;
+ struct iwl_he_pkt_ext *pkt_ext =
+ (struct iwl_he_pkt_ext *)&sta_ctxt_cmd.pkt_ext;
for (i = 0; i < MAX_HE_SUPP_NSS; i++) {
u8 bw;
for (bw = 0; bw < MAX_HE_CHANNEL_BW_INDX;
bw++) {
- pkt_ext_qam[i][bw][0] = low_th;
- pkt_ext_qam[i][bw][1] = high_th;
+ pkt_ext->pkt_ext_qam_th[i][bw][0] =
+ low_th;
+ pkt_ext->pkt_ext_qam_th[i][bw][1] =
+ high_th;
}
}
(vif->bss_conf.uora_ocw_range >> 3) & 0x7;
}
- /* TODO: support Multi BSSID IE */
+ if (vif->bss_conf.nontransmitted) {
+ flags |= STA_CTXT_HE_REF_BSSID_VALID;
+ ether_addr_copy(sta_ctxt_cmd.ref_bssid_addr,
+ vif->bss_conf.transmitter_bssid);
+ sta_ctxt_cmd.max_bssid_indicator =
+ vif->bss_conf.bssid_indicator;
+ sta_ctxt_cmd.bssid_index = vif->bss_conf.bssid_index;
+ sta_ctxt_cmd.ema_ap = vif->bss_conf.ema_ap;
+ sta_ctxt_cmd.profile_periodicity =
+ vif->bss_conf.profile_periodicity;
+ }
sta_ctxt_cmd.flags = cpu_to_le32(flags);
if (iwl_mvm_send_cmd_pdu(mvm, iwl_cmd_id(STA_HE_CTXT_CMD,
DATA_PATH_GROUP, 0),
- 0, sizeof(sta_ctxt_cmd), &sta_ctxt_cmd))
+ 0, size, &sta_ctxt_cmd))
IWL_ERR(mvm, "Failed to config FW to work HE!\n");
}
iwl_mvm_mac_ctxt_remove(mvm, vif);
- kfree(mvmvif->ap_wep_key);
- mvmvif->ap_wep_key = NULL;
-
mutex_unlock(&mvm->mutex);
}
ret = iwl_mvm_update_sta(mvm, vif, sta);
} else if (old_state == IEEE80211_STA_ASSOC &&
new_state == IEEE80211_STA_AUTHORIZED) {
- /* if wep is used, need to set the key for the station now */
- if (vif->type == NL80211_IFTYPE_AP && mvmvif->ap_wep_key) {
- mvm_sta->wep_key =
- kmemdup(mvmvif->ap_wep_key,
- sizeof(*mvmvif->ap_wep_key) +
- mvmvif->ap_wep_key->keylen,
- GFP_KERNEL);
- if (!mvm_sta->wep_key) {
- ret = -ENOMEM;
- goto out_unlock;
- }
-
- ret = iwl_mvm_set_sta_key(mvm, vif, sta,
- mvm_sta->wep_key,
- STA_KEY_IDX_INVALID);
- } else {
- ret = 0;
- }
+ ret = 0;
/* we don't support TDLS during DCM */
if (iwl_mvm_phy_ctx_count(mvm) > 1)
NL80211_TDLS_DISABLE_LINK);
}
- /* Remove STA key if this is an AP using WEP */
- if (vif->type == NL80211_IFTYPE_AP && mvmvif->ap_wep_key) {
- int rm_ret = iwl_mvm_remove_sta_key(mvm, vif, sta,
- mvm_sta->wep_key);
-
- if (!ret)
- ret = rm_ret;
- kfree(mvm_sta->wep_key);
- mvm_sta->wep_key = NULL;
- }
-
if (unlikely(ret &&
test_bit(IWL_MVM_STATUS_HW_RESTART_REQUESTED,
&mvm->status)))
struct ieee80211_sta *sta, u32 changed)
{
struct iwl_mvm *mvm = IWL_MAC80211_GET_MVM(hw);
+ struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
+
+ if (changed & (IEEE80211_RC_BW_CHANGED |
+ IEEE80211_RC_SUPP_RATES_CHANGED |
+ IEEE80211_RC_NSS_CHANGED))
+ iwl_mvm_rs_rate_init(mvm, sta, mvmvif->phy_ctxt->channel->band,
+ true);
if (vif->type == NL80211_IFTYPE_STATION &&
changed & IEEE80211_RC_NSS_CHANGED)
break;
case WLAN_CIPHER_SUITE_WEP40:
case WLAN_CIPHER_SUITE_WEP104:
- if (vif->type == NL80211_IFTYPE_AP) {
- struct iwl_mvm_vif *mvmvif =
- iwl_mvm_vif_from_mac80211(vif);
-
- mvmvif->ap_wep_key = kmemdup(key,
- sizeof(*key) + key->keylen,
- GFP_KERNEL);
- if (!mvmvif->ap_wep_key)
- return -ENOMEM;
- }
-
- if (vif->type != NL80211_IFTYPE_STATION)
- return 0;
- break;
+ if (vif->type == NL80211_IFTYPE_STATION)
+ break;
+ if (iwl_mvm_has_new_tx_api(mvm))
+ return -EOPNOTSUPP;
+ /* support HW crypto on TX */
+ return 0;
default:
/* currently FW supports only one optional cipher scheme */
if (hw->n_cipher_schemes &&
ret = iwl_mvm_set_sta_key(mvm, vif, sta, key, key_offset);
if (ret) {
IWL_WARN(mvm, "set key failed\n");
+ key->hw_key_idx = STA_KEY_IDX_INVALID;
/*
* can't add key for RX, but we don't need it
- * in the device for TX so still return 0
+ * in the device for TX so still return 0,
+ * unless we have new TX API where we cannot
+ * put key material into the TX_CMD
*/
- key->hw_key_idx = STA_KEY_IDX_INVALID;
- ret = 0;
+ if (iwl_mvm_has_new_tx_api(mvm))
+ ret = -EOPNOTSUPP;
+ else
+ ret = 0;
}
break;
struct ieee80211_vif *vif,
int duration)
{
- int res, time_reg = DEVICE_SYSTEM_TIME_REG;
+ int res;
struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
struct iwl_mvm_time_event_data *te_data = &mvmvif->hs_time_event_data;
static const u16 time_event_response[] = { HOT_SPOT_CMD };
0);
/* Set the time and duration */
- tail->apply_time = cpu_to_le32(iwl_read_prph(mvm->trans, time_reg));
+ tail->apply_time = cpu_to_le32(iwl_mvm_get_systime(mvm));
delay = AUX_ROC_MIN_DELAY;
req_dur = MSEC_TO_TU(duration);
.action = cpu_to_le32(FW_CTXT_ACTION_ADD),
.tsf = cpu_to_le32(chsw->timestamp),
.cs_count = chsw->count,
+ .cs_mode = chsw->block_tx,
};
lockdep_assert_held(&mvm->mutex);
+ if (chsw->delay)
+ cmd.cs_delayed_bcn_count =
+ DIV_ROUND_UP(chsw->delay, vif->bss_conf.beacon_int);
+
return iwl_mvm_send_cmd_pdu(mvm,
WIDE_ID(MAC_CONF_GROUP,
CHANNEL_SWITCH_TIME_EVENT_CMD),
0, sizeof(cmd), &cmd);
}
+#define IWL_MAX_CSA_BLOCK_TX 1500
static int iwl_mvm_pre_channel_switch(struct ieee80211_hw *hw,
struct ieee80211_vif *vif,
struct ieee80211_channel_switch *chsw)
((vif->bss_conf.beacon_int * (chsw->count - 1) -
IWL_MVM_CHANNEL_SWITCH_TIME_CLIENT) * 1024);
- if (chsw->block_tx)
+ if (chsw->block_tx) {
iwl_mvm_csa_client_absent(mvm, vif);
+ /*
+ * In case of undetermined / long time with immediate
+ * quiet monitor status to gracefully disconnect
+ */
+ if (!chsw->count ||
+ chsw->count * vif->bss_conf.beacon_int >
+ IWL_MAX_CSA_BLOCK_TX)
+ schedule_delayed_work(&mvmvif->csa_work,
+ msecs_to_jiffies(IWL_MAX_CSA_BLOCK_TX));
+ }
if (mvmvif->bf_data.bf_enabled) {
ret = iwl_mvm_disable_beacon_filter(mvm, vif, 0);
iwl_mvm_schedule_csa_period(mvm, vif,
vif->bss_conf.beacon_int,
apply_time);
+
+ mvmvif->csa_count = chsw->count;
+ mvmvif->csa_misbehave = false;
break;
default:
break;
return ret;
}
-static int iwl_mvm_post_channel_switch(struct ieee80211_hw *hw,
- struct ieee80211_vif *vif)
+static void iwl_mvm_channel_switch_rx_beacon(struct ieee80211_hw *hw,
+ struct ieee80211_vif *vif,
+ struct ieee80211_channel_switch *chsw)
{
- struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
struct iwl_mvm *mvm = IWL_MAC80211_GET_MVM(hw);
- int ret;
-
- mutex_lock(&mvm->mutex);
-
- if (mvmvif->csa_failed) {
- mvmvif->csa_failed = false;
- ret = -EIO;
- goto out_unlock;
- }
-
- if (vif->type == NL80211_IFTYPE_STATION) {
- struct iwl_mvm_sta *mvmsta;
+ struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
+ struct iwl_chan_switch_te_cmd cmd = {
+ .mac_id = cpu_to_le32(FW_CMD_ID_AND_COLOR(mvmvif->id,
+ mvmvif->color)),
+ .action = cpu_to_le32(FW_CTXT_ACTION_MODIFY),
+ .tsf = cpu_to_le32(chsw->timestamp),
+ .cs_count = chsw->count,
+ .cs_mode = chsw->block_tx,
+ };
- mvmvif->csa_bcn_pending = false;
- mvmsta = iwl_mvm_sta_from_staid_protected(mvm,
- mvmvif->ap_sta_id);
+ if (!fw_has_capa(&mvm->fw->ucode_capa, IWL_UCODE_TLV_CAPA_CS_MODIFY))
+ return;
- if (WARN_ON(!mvmsta)) {
- ret = -EIO;
- goto out_unlock;
+ if (chsw->count >= mvmvif->csa_count && chsw->block_tx) {
+ if (mvmvif->csa_misbehave) {
+ /* Second time, give up on this AP*/
+ iwl_mvm_abort_channel_switch(hw, vif);
+ ieee80211_chswitch_done(vif, false);
+ mvmvif->csa_misbehave = false;
+ return;
}
-
- iwl_mvm_sta_modify_disable_tx(mvm, mvmsta, false);
-
- iwl_mvm_mac_ctxt_changed(mvm, vif, false, NULL);
-
- ret = iwl_mvm_enable_beacon_filter(mvm, vif, 0);
- if (ret)
- goto out_unlock;
-
- iwl_mvm_stop_session_protection(mvm, vif);
+ mvmvif->csa_misbehave = true;
}
+ mvmvif->csa_count = chsw->count;
- mvmvif->ps_disabled = false;
-
- ret = iwl_mvm_power_update_ps(mvm);
+ IWL_DEBUG_MAC80211(mvm, "Modify CSA on mac %d\n", mvmvif->id);
-out_unlock:
- mutex_unlock(&mvm->mutex);
-
- return ret;
+ WARN_ON(iwl_mvm_send_cmd_pdu(mvm,
+ WIDE_ID(MAC_CONF_GROUP,
+ CHANNEL_SWITCH_TIME_EVENT_CMD),
+ CMD_ASYNC, sizeof(cmd), &cmd));
}
static void iwl_mvm_flush_no_vif(struct iwl_mvm *mvm, u32 queues, bool drop)
.channel_switch = iwl_mvm_channel_switch,
.pre_channel_switch = iwl_mvm_pre_channel_switch,
.post_channel_switch = iwl_mvm_post_channel_switch,
+ .abort_channel_switch = iwl_mvm_abort_channel_switch,
+ .channel_switch_rx_beacon = iwl_mvm_channel_switch_rx_beacon,
.tdls_channel_switch = iwl_mvm_tdls_channel_switch,
.tdls_cancel_channel_switch = iwl_mvm_tdls_cancel_channel_switch,
* Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
* Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
* Copyright(c) 2016 - 2017 Intel Deutschland GmbH
- * Copyright(c) 2018 Intel Corporation
+ * Copyright(c) 2018 - 2019 Intel Corporation
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
* Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
* Copyright(c) 2016 - 2017 Intel Deutschland GmbH
- * Copyright(c) 2018 Intel Corporation
+ * Copyright(c) 2018 - 2019 Intel Corporation
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
bool csa_countdown;
bool csa_failed;
u16 csa_target_freq;
+ u16 csa_count;
+ u16 csa_misbehave;
+ struct delayed_work csa_work;
/* Indicates that we are waiting for a beacon on a new channel */
bool csa_bcn_pending;
netdev_features_t features;
struct iwl_probe_resp_data __rcu *probe_resp_data;
- struct ieee80211_key_conf *ap_wep_key;
};
static inline struct iwl_mvm_vif *
* @IWL_MVM_STATUS_IN_HW_RESTART: HW restart is active
* @IWL_MVM_STATUS_IN_D0I3: NIC is in D0i3
* @IWL_MVM_STATUS_ROC_AUX_RUNNING: AUX remain-on-channel is running
- * @IWL_MVM_STATUS_D3_RECONFIG: D3 reconfiguration is being done
* @IWL_MVM_STATUS_FIRMWARE_RUNNING: firmware is running
* @IWL_MVM_STATUS_NEED_FLUSH_P2P: need to flush P2P bcast STA
*/
IWL_MVM_STATUS_IN_HW_RESTART,
IWL_MVM_STATUS_IN_D0I3,
IWL_MVM_STATUS_ROC_AUX_RUNNING,
- IWL_MVM_STATUS_D3_RECONFIG,
IWL_MVM_STATUS_FIRMWARE_RUNNING,
IWL_MVM_STATUS_NEED_FLUSH_P2P,
};
u8 first_antenna(u8 mask);
u8 iwl_mvm_next_antenna(struct iwl_mvm *mvm, u8 valid, u8 last_idx);
void iwl_mvm_get_sync_time(struct iwl_mvm *mvm, u32 *gp2, u64 *boottime);
+u32 iwl_mvm_get_systime(struct iwl_mvm *mvm);
/* Tx / Host Commands */
int __must_check iwl_mvm_send_cmd(struct iwl_mvm *mvm,
struct iwl_rx_cmd_buffer *rxb);
void iwl_mvm_rx_mpdu_mq(struct iwl_mvm *mvm, struct napi_struct *napi,
struct iwl_rx_cmd_buffer *rxb, int queue);
-void iwl_mvm_rx_monitor_ndp(struct iwl_mvm *mvm, struct napi_struct *napi,
- struct iwl_rx_cmd_buffer *rxb, int queue);
+void iwl_mvm_rx_monitor_no_data(struct iwl_mvm *mvm, struct napi_struct *napi,
+ struct iwl_rx_cmd_buffer *rxb, int queue);
void iwl_mvm_rx_frame_release(struct iwl_mvm *mvm, struct napi_struct *napi,
struct iwl_rx_cmd_buffer *rxb, int queue);
int iwl_mvm_notify_rx_queue(struct iwl_mvm *mvm, u32 rxq_mask,
/* MVM debugfs */
#ifdef CONFIG_IWLWIFI_DEBUGFS
-int iwl_mvm_dbgfs_register(struct iwl_mvm *mvm, struct dentry *dbgfs_dir);
+void iwl_mvm_dbgfs_register(struct iwl_mvm *mvm, struct dentry *dbgfs_dir);
void iwl_mvm_vif_dbgfs_register(struct iwl_mvm *mvm, struct ieee80211_vif *vif);
void iwl_mvm_vif_dbgfs_clean(struct iwl_mvm *mvm, struct ieee80211_vif *vif);
#else
-static inline int iwl_mvm_dbgfs_register(struct iwl_mvm *mvm,
- struct dentry *dbgfs_dir)
+static inline void iwl_mvm_dbgfs_register(struct iwl_mvm *mvm,
+ struct dentry *dbgfs_dir)
{
- return 0;
}
static inline void
iwl_mvm_vif_dbgfs_register(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
static inline void iwl_mvm_stop_device(struct iwl_mvm *mvm)
{
lockdep_assert_held(&mvm->mutex);
- /* If IWL_MVM_STATUS_HW_RESTART_REQUESTED bit is set then we received
- * an assert. Since we failed to bring the interface up, mac80211
- * will not attempt to reconfig the device,
- * which handles the dump collection in assert flow,
- * so trigger dump collection here.
- */
- if (test_and_clear_bit(IWL_MVM_STATUS_HW_RESTART_REQUESTED,
- &mvm->status))
- iwl_fw_dbg_collect_desc(&mvm->fwrt, &iwl_dump_desc_assert,
- false, 0);
-
iwl_fw_cancel_timestamp(&mvm->fwrt);
clear_bit(IWL_MVM_STATUS_FIRMWARE_RUNNING, &mvm->status);
iwl_fwrt_stop_device(&mvm->fwrt);
* Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
* Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
* Copyright(c) 2016 - 2017 Intel Deutschland GmbH
- * Copyright(c) 2018 Intel Corporation
+ * Copyright(c) 2018 - 2019 Intel Corporation
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
* Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
* Copyright(c) 2016 - 2017 Intel Deutschland GmbH
- * Copyright(c) 2018 Intel Corporation
+ * Copyright(c) 2018 - 2019 Intel Corporation
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
min_backoff = iwl_mvm_min_backoff(mvm);
iwl_mvm_thermal_initialize(mvm, min_backoff);
- err = iwl_mvm_dbgfs_register(mvm, dbgfs_dir);
- if (err)
- goto out_unregister;
+ iwl_mvm_dbgfs_register(mvm, dbgfs_dir);
if (!iwl_mvm_has_new_rx_stats_api(mvm))
memset(&mvm->rx_stats_v3, 0,
return op_mode;
- out_unregister:
- if (iwlmvm_mod_params.init_dbg)
- return op_mode;
-
- ieee80211_unregister_hw(mvm->hw);
- mvm->hw_registered = false;
- iwl_mvm_leds_exit(mvm);
- iwl_mvm_thermal_exit(mvm);
out_free:
iwl_fw_flush_dump(&mvm->fwrt);
iwl_fw_runtime_free(&mvm->fwrt);
else if (cmd == WIDE_ID(LEGACY_GROUP, FRAME_RELEASE))
iwl_mvm_rx_frame_release(mvm, napi, rxb, 0);
else if (cmd == WIDE_ID(DATA_PATH_GROUP, RX_NO_DATA_NOTIF))
- iwl_mvm_rx_monitor_ndp(mvm, napi, rxb, 0);
+ iwl_mvm_rx_monitor_no_data(mvm, napi, rxb, 0);
else
iwl_mvm_rx_common(mvm, rxb, pkt);
}
* can't recover this since we're already half suspended.
*/
if (!mvm->fw_restart && fw_error) {
- iwl_fw_dbg_collect_desc(&mvm->fwrt, &iwl_dump_desc_assert,
- false, 0);
+ iwl_fw_error_collect(&mvm->fwrt);
} else if (test_bit(IWL_MVM_STATUS_IN_HW_RESTART, &mvm->status)) {
struct iwl_mvm_reprobe *reprobe;
}
}
+ iwl_fw_error_collect(&mvm->fwrt);
+
if (fw_error && mvm->fw_restart > 0)
mvm->fw_restart--;
set_bit(IWL_MVM_STATUS_HW_RESTART_REQUESTED, &mvm->status);
* GPL LICENSE SUMMARY
*
* Copyright(c) 2017 Intel Deutschland GmbH
- * Copyright(c) 2018 Intel Corporation
+ * Copyright(c) 2018 - 2019 Intel Corporation
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* BSD LICENSE
*
* Copyright(c) 2017 Intel Deutschland GmbH
- * Copyright(c) 2018 Intel Corporation
+ * Copyright(c) 2018 - 2019 Intel Corporation
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
rcu_read_unlock();
}
+static u16 rs_fw_get_max_amsdu_len(struct ieee80211_sta *sta)
+{
+ const struct ieee80211_sta_vht_cap *vht_cap = &sta->vht_cap;
+ const struct ieee80211_sta_ht_cap *ht_cap = &sta->ht_cap;
+
+ if (vht_cap && vht_cap->vht_supported) {
+ switch (vht_cap->cap & IEEE80211_VHT_CAP_MAX_MPDU_MASK) {
+ case IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_11454:
+ return IEEE80211_MAX_MPDU_LEN_VHT_11454;
+ case IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_7991:
+ return IEEE80211_MAX_MPDU_LEN_VHT_7991;
+ default:
+ return IEEE80211_MAX_MPDU_LEN_VHT_3895;
+ }
+
+ } else if (ht_cap && ht_cap->ht_supported) {
+ if (ht_cap->cap & IEEE80211_HT_CAP_MAX_AMSDU)
+ /*
+ * agg is offloaded so we need to assume that agg
+ * are enabled and max mpdu in ampdu is 4095
+ * (spec 802.11-2016 9.3.2.1)
+ */
+ return IEEE80211_MAX_MPDU_LEN_HT_BA;
+ else
+ return IEEE80211_MAX_MPDU_LEN_HT_3839;
+ }
+
+ /* in legacy mode no amsdu is enabled so return zero */
+ return 0;
+}
+
void rs_fw_rate_init(struct iwl_mvm *mvm, struct ieee80211_sta *sta,
enum nl80211_band band, bool update)
{
struct iwl_lq_sta_rs_fw *lq_sta = &mvmsta->lq_sta.rs_fw;
u32 cmd_id = iwl_cmd_id(TLC_MNG_CONFIG_CMD, DATA_PATH_GROUP, 0);
struct ieee80211_supported_band *sband;
+ u16 max_amsdu_len = rs_fw_get_max_amsdu_len(sta);
struct iwl_tlc_config_cmd cfg_cmd = {
.sta_id = mvmsta->sta_id,
.max_ch_width = update ?
rs_fw_bw_from_sta_bw(sta) : RATE_MCS_CHAN_WIDTH_20,
.flags = cpu_to_le16(rs_fw_set_config_flags(mvm, sta)),
.chains = rs_fw_set_active_chains(iwl_mvm_get_valid_tx_ant(mvm)),
- .max_mpdu_len = cpu_to_le16(sta->max_amsdu_len),
.sgi_ch_width_supp = rs_fw_sgi_cw_support(sta),
+ .max_mpdu_len = cpu_to_le16(max_amsdu_len),
.amsdu = iwl_mvm_is_csum_supported(mvm),
};
int ret;
sband = hw->wiphy->bands[band];
rs_fw_set_supp_rates(sta, sband, &cfg_cmd);
+ /*
+ * since TLC offload works with one mode we can assume
+ * that only vht/ht is used and also set it as station max amsdu
+ */
+ sta->max_amsdu_len = max_amsdu_len;
+
ret = iwl_mvm_send_cmd_pdu(mvm, cmd_id, 0, sizeof(cfg_cmd), &cfg_cmd);
if (ret)
IWL_ERR(mvm, "Failed to send rate scale config (%d)\n", ret);
#define MVM_DEBUGFS_READ_WRITE_FILE_OPS(name, bufsz) \
_MVM_DEBUGFS_READ_WRITE_FILE_OPS(name, bufsz, struct iwl_lq_sta)
#define MVM_DEBUGFS_ADD_FILE_RS(name, parent, mode) do { \
- if (!debugfs_create_file(#name, mode, parent, lq_sta, \
- &iwl_dbgfs_##name##_ops)) \
- goto err; \
+ debugfs_create_file(#name, mode, parent, lq_sta, \
+ &iwl_dbgfs_##name##_ops); \
} while (0)
MVM_DEBUGFS_READ_WRITE_FILE_OPS(ss_force, 32);
&lq_sta->pers.dbg_fixed_txp_reduction);
MVM_DEBUGFS_ADD_FILE_RS(ss_force, dir, 0600);
- return;
-err:
- IWL_ERR((struct iwl_mvm *)mvm, "Can't create debugfs entity\n");
}
void rs_remove_sta_debugfs(void *mvm, void *mvm_sta)
* Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
* Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
* Copyright(c) 2015 - 2017 Intel Deutschland GmbH
- * Copyright(c) 2018 Intel Corporation
+ * Copyright(c) 2018 - 2019 Intel Corporation
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
* Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
* Copyright(c) 2015 - 2017 Intel Deutschland GmbH
- * Copyright(c) 2018 Intel Corporation
+ * Copyright(c) 2018 - 2019 Intel Corporation
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
rcu_read_unlock();
}
-void iwl_mvm_rx_monitor_ndp(struct iwl_mvm *mvm, struct napi_struct *napi,
- struct iwl_rx_cmd_buffer *rxb, int queue)
+void iwl_mvm_rx_monitor_no_data(struct iwl_mvm *mvm, struct napi_struct *napi,
+ struct iwl_rx_cmd_buffer *rxb, int queue)
{
struct ieee80211_rx_status *rx_status;
struct iwl_rx_packet *pkt = rxb_addr(rxb);
if (unlikely(test_bit(IWL_MVM_STATUS_IN_HW_RESTART, &mvm->status)))
return;
- /* Currently only NDP type is supported */
- if (info_type != RX_NO_DATA_INFO_TYPE_NDP)
- return;
-
energy_a = (rssi & RX_NO_DATA_CHAIN_A_MSK) >> RX_NO_DATA_CHAIN_A_POS;
energy_b = (rssi & RX_NO_DATA_CHAIN_B_MSK) >> RX_NO_DATA_CHAIN_B_POS;
channel = (rssi & RX_NO_DATA_CHANNEL_MSK) >> RX_NO_DATA_CHANNEL_POS;
/* 0-length PSDU */
rx_status->flag |= RX_FLAG_NO_PSDU;
- /* currently this is the only type for which we get this notif */
- rx_status->zero_length_psdu_type =
- IEEE80211_RADIOTAP_ZERO_LEN_PSDU_SOUNDING;
+
+ switch (info_type) {
+ case RX_NO_DATA_INFO_TYPE_NDP:
+ rx_status->zero_length_psdu_type =
+ IEEE80211_RADIOTAP_ZERO_LEN_PSDU_SOUNDING;
+ break;
+ case RX_NO_DATA_INFO_TYPE_MU_UNMATCHED:
+ case RX_NO_DATA_INFO_TYPE_HE_TB_UNMATCHED:
+ rx_status->zero_length_psdu_type =
+ IEEE80211_RADIOTAP_ZERO_LEN_PSDU_NOT_CAPTURED;
+ break;
+ default:
+ rx_status->zero_length_psdu_type =
+ IEEE80211_RADIOTAP_ZERO_LEN_PSDU_VENDOR;
+ break;
+ }
/* This may be overridden by iwl_mvm_rx_he() to HE_RU */
switch (rate_n_flags & RATE_MCS_CHAN_WIDTH_MSK) {
dwell->extended = IWL_SCAN_DWELL_EXTENDED;
}
-static void iwl_mvm_fill_channels(struct iwl_mvm *mvm, u8 *channels)
+static void iwl_mvm_fill_channels(struct iwl_mvm *mvm, u8 *channels,
+ u32 max_channels)
{
struct ieee80211_supported_band *band;
int i, j = 0;
band = &mvm->nvm_data->bands[NL80211_BAND_2GHZ];
- for (i = 0; i < band->n_channels; i++, j++)
+ for (i = 0; i < band->n_channels && j < max_channels; i++, j++)
channels[j] = band->channels[i].hw_value;
band = &mvm->nvm_data->bands[NL80211_BAND_5GHZ];
- for (i = 0; i < band->n_channels; i++, j++)
+ for (i = 0; i < band->n_channels && j < max_channels; i++, j++)
channels[j] = band->channels[i].hw_value;
}
static void iwl_mvm_fill_scan_config_v1(struct iwl_mvm *mvm, void *config,
- u32 flags, u8 channel_flags)
+ u32 flags, u8 channel_flags,
+ u32 max_channels)
{
enum iwl_mvm_scan_type type = iwl_mvm_get_scan_type(mvm, NULL);
struct iwl_scan_config_v1 *cfg = config;
cfg->bcast_sta_id = mvm->aux_sta.sta_id;
cfg->channel_flags = channel_flags;
- iwl_mvm_fill_channels(mvm, cfg->channel_array);
+ iwl_mvm_fill_channels(mvm, cfg->channel_array, max_channels);
}
static void iwl_mvm_fill_scan_config(struct iwl_mvm *mvm, void *config,
- u32 flags, u8 channel_flags)
+ u32 flags, u8 channel_flags,
+ u32 max_channels)
{
struct iwl_scan_config *cfg = config;
cfg->bcast_sta_id = mvm->aux_sta.sta_id;
cfg->channel_flags = channel_flags;
- iwl_mvm_fill_channels(mvm, cfg->channel_array);
+ iwl_mvm_fill_channels(mvm, cfg->channel_array, max_channels);
}
int iwl_mvm_config_scan(struct iwl_mvm *mvm)
u8 channel_flags;
if (WARN_ON(num_channels > mvm->fw->ucode_capa.n_scan_channels))
- return -ENOBUFS;
+ num_channels = mvm->fw->ucode_capa.n_scan_channels;
if (iwl_mvm_is_cdb_supported(mvm)) {
type = iwl_mvm_get_scan_type_band(mvm, NULL,
flags |= (iwl_mvm_is_scan_fragmented(hb_type)) ?
SCAN_CONFIG_FLAG_SET_LMAC2_FRAGMENTED :
SCAN_CONFIG_FLAG_CLEAR_LMAC2_FRAGMENTED;
- iwl_mvm_fill_scan_config(mvm, cfg, flags, channel_flags);
+ iwl_mvm_fill_scan_config(mvm, cfg, flags, channel_flags,
+ num_channels);
} else {
- iwl_mvm_fill_scan_config_v1(mvm, cfg, flags, channel_flags);
+ iwl_mvm_fill_scan_config_v1(mvm, cfg, flags, channel_flags,
+ num_channels);
}
cmd.data[0] = cfg;
* Copyright(c) 2012 - 2015 Intel Corporation. All rights reserved.
* Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
* Copyright(c) 2016 - 2017 Intel Deutschland GmbH
- * Copyright(c) 2018 Intel Corporation
+ * Copyright(c) 2018 - 2019 Intel Corporation
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* Copyright(c) 2012 - 2015 Intel Corporation. All rights reserved.
* Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
* Copyright(c) 2016 - 2017 Intel Deutschland GmbH
- * Copyright(c) 2018 Intel Corporation
+ * Copyright(c) 2018 - 2019 Intel Corporation
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
iwl_mvm_sta_alloc_queue(mvm, txq->sta, txq->ac, tid);
list_del_init(&mvmtxq->list);
+ local_bh_disable();
iwl_mvm_mac_itxq_xmit(mvm->hw, txq);
+ local_bh_enable();
}
mutex_unlock(&mvm->mutex);
static const u8 _maddr[] = {0x03, 0x00, 0x00, 0x00, 0x00, 0x00};
const u8 *maddr = _maddr;
struct iwl_trans_txq_scd_cfg cfg = {
- .fifo = IWL_MVM_TX_FIFO_MCAST,
+ .fifo = vif->type == NL80211_IFTYPE_AP ?
+ IWL_MVM_TX_FIFO_MCAST : IWL_MVM_TX_FIFO_BE,
.sta_id = msta->sta_id,
.tid = 0,
.aggregate = false,
iwl_mvm_enable_txq(mvm, NULL, mvmvif->cab_queue, 0, &cfg,
timeout);
- if (mvmvif->ap_wep_key) {
- u8 key_offset = iwl_mvm_set_fw_key_idx(mvm);
-
- __set_bit(key_offset, mvm->fw_key_table);
-
- if (key_offset == STA_KEY_IDX_INVALID)
- return -ENOSPC;
-
- ret = iwl_mvm_send_sta_key(mvm, mvmvif->mcast_sta.sta_id,
- mvmvif->ap_wep_key, true, 0, NULL, 0,
- key_offset, 0);
- if (ret)
- return ret;
- }
-
return 0;
}
iwl_mvm_disable_txq(mvm, NULL, mvmvif->cab_queue, 0, 0);
- if (mvmvif->ap_wep_key) {
- int i;
-
- if (!__test_and_clear_bit(mvmvif->ap_wep_key->hw_key_idx,
- mvm->fw_key_table)) {
- IWL_ERR(mvm, "offset %d not used in fw key table.\n",
- mvmvif->ap_wep_key->hw_key_idx);
- return -ENOENT;
- }
-
- /* track which key was deleted last */
- for (i = 0; i < STA_KEY_MAX_NUM; i++) {
- if (mvm->fw_key_deleted[i] < U8_MAX)
- mvm->fw_key_deleted[i]++;
- }
- mvm->fw_key_deleted[mvmvif->ap_wep_key->hw_key_idx] = 0;
- ret = __iwl_mvm_remove_sta_key(mvm, mvmvif->mcast_sta.sta_id,
- mvmvif->ap_wep_key, true);
- if (ret)
- return ret;
- }
-
ret = iwl_mvm_rm_sta_common(mvm, mvmvif->mcast_sta.sta_id);
if (ret)
IWL_WARN(mvm, "Failed sending remove station\n");
* Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
* Copyright(c) 2013 - 2014 Intel Mobile Communications GmbH
* Copyright(c) 2015 - 2016 Intel Deutschland GmbH
- * Copyright(c) 2018 Intel Corporation
+ * Copyright(c) 2018 - 2019 Intel Corporation
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
* Copyright(c) 2013 - 2014 Intel Mobile Communications GmbH
* Copyright(c) 2015 - 2016 Intel Deutschland GmbH
- * Copyright(c) 2018 Intel Corporation
+ * Copyright(c) 2018 - 2019 Intel Corporation
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* the BA window. To be used for UAPSD only.
* @ptk_pn: per-queue PTK PN data structures
* @dup_data: per queue duplicate packet detection data
- * @wep_key: used in AP mode. Is a duplicate of the WEP key.
* @deferred_traffic_tid_map: indication bitmap of deferred traffic per-TID
* @tx_ant: the index of the antenna to use for data tx to this station. Only
* used during connection establishment (e.g. for the 4 way handshake
struct iwl_mvm_key_pn __rcu *ptk_pn[4];
struct iwl_mvm_rxq_dup_data *dup_data;
- struct ieee80211_key_conf *wep_key;
-
u8 reserved_queue;
/* Temporary, until the new TLC will control the Tx protection */
*
* Copyright(c) 2014 Intel Mobile Communications GmbH
* Copyright(c) 2017 Intel Deutschland GmbH
- * Copyright(C) 2018 Intel Corporation
+ * Copyright(C) 2018 - 2019 Intel Corporation
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
*
* Copyright(c) 2014 Intel Mobile Communications GmbH
* Copyright(c) 2017 Intel Deutschland GmbH
- * Copyright(C) 2018 Intel Corporation
+ * Copyright(C) 2018 - 2019 Intel Corporation
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
/* we only send requests to our switching peer - update sent time */
if (state == IWL_MVM_TDLS_SW_REQ_SENT)
- mvm->tdls_cs.peer.sent_timestamp =
- iwl_read_prph(mvm->trans, DEVICE_SYSTEM_TIME_REG);
+ mvm->tdls_cs.peer.sent_timestamp = iwl_mvm_get_systime(mvm);
if (state == IWL_MVM_TDLS_SW_IDLE)
mvm->tdls_cs.cur_sta_id = IWL_MVM_INVALID_STA;
* Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
* Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
* Copyright(c) 2017 Intel Deutschland GmbH
- * Copyright(c) 2018 Intel Corporation
+ * Copyright(c) 2018 - 2019 Intel Corporation
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
* Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
* Copyright(c) 2017 Intel Deutschland GmbH
- * Copyright(c) 2018 Intel Corporation
+ * Copyright(c) 2018 - 2019 Intel Corporation
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
break;
}
iwl_mvm_csa_client_absent(mvm, te_data->vif);
+ cancel_delayed_work_sync(&mvmvif->csa_work);
ieee80211_chswitch_done(te_data->vif, true);
break;
default:
cancel_delayed_work_sync(&mvmvif->uapsd_nonagg_detected_wk);
}
+u32 iwl_mvm_get_systime(struct iwl_mvm *mvm)
+{
+ u32 reg_addr = DEVICE_SYSTEM_TIME_REG;
+
+ if (mvm->trans->cfg->device_family >= IWL_DEVICE_FAMILY_22000 &&
+ mvm->trans->cfg->gp2_reg_addr)
+ reg_addr = mvm->trans->cfg->gp2_reg_addr;
+
+ return iwl_read_prph(mvm->trans, reg_addr);
+}
void iwl_mvm_get_sync_time(struct iwl_mvm *mvm, u32 *gp2, u64 *boottime)
{
iwl_mvm_power_update_device(mvm);
}
- *gp2 = iwl_read_prph(mvm->trans, DEVICE_SYSTEM_TIME_REG);
+ *gp2 = iwl_mvm_get_systime(mvm);
*boottime = ktime_get_boot_ns();
if (!ps_disabled) {
{IWL_PCI_DEVICE(0xA0F0, 0x1652, killer1650i_2ax_cfg_qu_b0_hr_b0)},
{IWL_PCI_DEVICE(0xA0F0, 0x4070, iwl_ax101_cfg_qu_hr)},
- {IWL_PCI_DEVICE(0x2723, 0x0080, iwl22260_2ax_cfg)},
- {IWL_PCI_DEVICE(0x2723, 0x0084, iwl22260_2ax_cfg)},
- {IWL_PCI_DEVICE(0x2723, 0x0088, iwl22260_2ax_cfg)},
- {IWL_PCI_DEVICE(0x2723, 0x008C, iwl22260_2ax_cfg)},
+ {IWL_PCI_DEVICE(0x2723, 0x0080, iwl_ax200_cfg_cc)},
+ {IWL_PCI_DEVICE(0x2723, 0x0084, iwl_ax200_cfg_cc)},
+ {IWL_PCI_DEVICE(0x2723, 0x0088, iwl_ax200_cfg_cc)},
+ {IWL_PCI_DEVICE(0x2723, 0x008C, iwl_ax200_cfg_cc)},
{IWL_PCI_DEVICE(0x2723, 0x1653, killer1650w_2ax_cfg)},
{IWL_PCI_DEVICE(0x2723, 0x1654, killer1650x_2ax_cfg)},
- {IWL_PCI_DEVICE(0x2723, 0x4080, iwl22260_2ax_cfg)},
- {IWL_PCI_DEVICE(0x2723, 0x4088, iwl22260_2ax_cfg)},
-
- {IWL_PCI_DEVICE(0x1a56, 0x1653, killer1650w_2ax_cfg)},
- {IWL_PCI_DEVICE(0x1a56, 0x1654, killer1650x_2ax_cfg)},
+ {IWL_PCI_DEVICE(0x2723, 0x2080, iwl_ax200_cfg_cc)},
+ {IWL_PCI_DEVICE(0x2723, 0x4080, iwl_ax200_cfg_cc)},
+ {IWL_PCI_DEVICE(0x2723, 0x4088, iwl_ax200_cfg_cc)},
{IWL_PCI_DEVICE(0x2725, 0x0090, iwlax210_2ax_cfg_so_hr_a0)},
{IWL_PCI_DEVICE(0x7A70, 0x0090, iwlax210_2ax_cfg_so_hr_a0)},
}
/* register transport layer debugfs here */
- ret = iwl_trans_pcie_dbgfs_register(iwl_trans);
- if (ret)
- goto out_free_drv;
+ iwl_trans_pcie_dbgfs_register(iwl_trans);
/* if RTPM is in use, enable it in our device */
if (iwl_trans->runtime_pm_mode != IWL_PLAT_PM_MODE_DISABLED) {
return 0;
-out_free_drv:
- iwl_drv_stop(iwl_trans->drv);
out_free_trans:
iwl_trans_pcie_free(iwl_trans);
return ret;
* @page: driver's pointer to the rxb page
* @invalid: rxb is in driver ownership - not owned by HW
* @vid: index of this rxb in the global table
- * @size: size used from the buffer
*/
struct iwl_rx_mem_buffer {
dma_addr_t page_dma;
u16 vid;
bool invalid;
struct list_head list;
- u32 size;
};
/**
u32 unhandled;
};
-#define IWL_RX_TD_TYPE_MSK 0xff000000
-#define IWL_RX_TD_SIZE_MSK 0x00ffffff
-#define IWL_RX_TD_SIZE_2K BIT(11)
-#define IWL_RX_TD_TYPE 0
-
/**
* struct iwl_rx_transfer_desc - transfer descriptor
- * @type_n_size: buffer type (bit 0: external buff valid,
- * bit 1: optional footer valid, bit 2-7: reserved)
- * and buffer size
* @addr: ptr to free buffer start address
* @rbid: unique tag of the buffer
* @reserved: reserved
*/
struct iwl_rx_transfer_desc {
- __le32 type_n_size;
- __le64 addr;
__le16 rbid;
- __le16 reserved;
+ __le16 reserved[3];
+ __le64 addr;
} __packed;
-#define IWL_RX_CD_SIZE 0xffffff00
+#define IWL_RX_CD_FLAGS_FRAGMENTED BIT(0)
/**
* struct iwl_rx_completion_desc - completion descriptor
- * @type: buffer type (bit 0: external buff valid,
- * bit 1: optional footer valid, bit 2-7: reserved)
- * @status: status of the completion
* @reserved1: reserved
* @rbid: unique tag of the received buffer
- * @size: buffer size, masked by IWL_RX_CD_SIZE
+ * @flags: flags (0: fragmented, all others: reserved)
* @reserved2: reserved
*/
struct iwl_rx_completion_desc {
- u8 type;
- u8 status;
- __le16 reserved1;
+ __le32 reserved1;
__le16 rbid;
- __le32 size;
- u8 reserved2[22];
+ u8 flags;
+ u8 reserved2[25];
} __packed;
/**
void iwl_trans_pcie_rf_kill(struct iwl_trans *trans, bool state);
void iwl_trans_pcie_dump_regs(struct iwl_trans *trans);
-void iwl_trans_sync_nmi(struct iwl_trans *trans);
+void iwl_trans_pcie_sync_nmi(struct iwl_trans *trans);
#ifdef CONFIG_IWLWIFI_DEBUGFS
-int iwl_trans_pcie_dbgfs_register(struct iwl_trans *trans);
+void iwl_trans_pcie_dbgfs_register(struct iwl_trans *trans);
#else
-static inline int iwl_trans_pcie_dbgfs_register(struct iwl_trans *trans)
-{
- return 0;
-}
+static inline void iwl_trans_pcie_dbgfs_register(struct iwl_trans *trans) { }
#endif
int iwl_pci_fw_exit_d0i3(struct iwl_trans *trans);
if (trans->cfg->device_family >= IWL_DEVICE_FAMILY_22560) {
struct iwl_rx_transfer_desc *bd = rxq->bd;
- bd[rxq->write].type_n_size =
- cpu_to_le32((IWL_RX_TD_TYPE & IWL_RX_TD_TYPE_MSK) |
- ((IWL_RX_TD_SIZE_2K >> 8) & IWL_RX_TD_SIZE_MSK));
+ BUILD_BUG_ON(sizeof(*bd) != 2 * sizeof(u64));
+
bd[rxq->write].addr = cpu_to_le64(rxb->page_dma);
bd[rxq->write].rbid = cpu_to_le16(rxb->vid);
} else {
.truesize = max_len,
};
- if (trans->cfg->device_family >= IWL_DEVICE_FAMILY_22560)
- rxcb.status = rxq->cd[i].status;
-
pkt = rxb_addr(&rxcb);
if (pkt->len_n_flags == cpu_to_le32(FH_RSCSR_FRAME_INVALID)) {
struct iwl_rx_mem_buffer *rxb;
u16 vid;
+ BUILD_BUG_ON(sizeof(struct iwl_rx_completion_desc) != 32);
+
if (!trans->cfg->mq_rx_supported) {
rxb = rxq->queue[i];
rxq->queue[i] = NULL;
IWL_DEBUG_RX(trans, "Got virtual RB ID %u\n", (u32)rxb->vid);
- if (trans->cfg->device_family >= IWL_DEVICE_FAMILY_22560)
- rxb->size = le32_to_cpu(rxq->cd[i].size) & IWL_RX_CD_SIZE;
-
rxb->invalid = true;
return rxb;
"Hardware error detected. Restarting.\n");
isr_stats->hw++;
+ trans->hw_error = true;
iwl_pcie_irq_handle_error(trans);
}
#ifdef CONFIG_IWLWIFI_DEBUGFS
/* create and remove of files */
#define DEBUGFS_ADD_FILE(name, parent, mode) do { \
- if (!debugfs_create_file(#name, mode, parent, trans, \
- &iwl_dbgfs_##name##_ops)) \
- goto err; \
+ debugfs_create_file(#name, mode, parent, trans, \
+ &iwl_dbgfs_##name##_ops); \
} while (0)
/* file operation */
};
/* Create the debugfs files and directories */
-int iwl_trans_pcie_dbgfs_register(struct iwl_trans *trans)
+void iwl_trans_pcie_dbgfs_register(struct iwl_trans *trans)
{
struct dentry *dir = trans->dbgfs_dir;
DEBUGFS_ADD_FILE(fh_reg, dir, 0400);
DEBUGFS_ADD_FILE(rfkill, dir, 0600);
DEBUGFS_ADD_FILE(monitor_data, dir, 0400);
- return 0;
-
-err:
- IWL_ERR(trans, "failed to create the trans debugfs entry\n");
- return -ENOMEM;
}
static void iwl_trans_pcie_debugfs_cleanup(struct iwl_trans *trans)
iwl_trans_pcie_dump_pointers(struct iwl_trans *trans,
struct iwl_fw_error_dump_fw_mon *fw_mon_data)
{
- u32 base, write_ptr, wrap_cnt;
+ u32 base, base_high, write_ptr, write_ptr_val, wrap_cnt;
- /* If there was a dest TLV - use the values from there */
- if (trans->ini_valid) {
+ if (trans->cfg->device_family >= IWL_DEVICE_FAMILY_AX210) {
+ base = DBGC_CUR_DBGBUF_BASE_ADDR_LSB;
+ base_high = DBGC_CUR_DBGBUF_BASE_ADDR_MSB;
+ write_ptr = DBGC_CUR_DBGBUF_STATUS;
+ wrap_cnt = DBGC_DBGBUF_WRAP_AROUND;
+ } else if (trans->ini_valid) {
base = iwl_umac_prph(trans, MON_BUFF_BASE_ADDR_VER2);
write_ptr = iwl_umac_prph(trans, MON_BUFF_WRPTR_VER2);
wrap_cnt = iwl_umac_prph(trans, MON_BUFF_CYCLE_CNT_VER2);
write_ptr = MON_BUFF_WRPTR;
wrap_cnt = MON_BUFF_CYCLE_CNT;
}
- fw_mon_data->fw_mon_wr_ptr =
- cpu_to_le32(iwl_read_prph(trans, write_ptr));
+
+ write_ptr_val = iwl_read_prph(trans, write_ptr);
fw_mon_data->fw_mon_cycle_cnt =
cpu_to_le32(iwl_read_prph(trans, wrap_cnt));
fw_mon_data->fw_mon_base_ptr =
cpu_to_le32(iwl_read_prph(trans, base));
+ if (trans->cfg->device_family >= IWL_DEVICE_FAMILY_AX210) {
+ fw_mon_data->fw_mon_base_high_ptr =
+ cpu_to_le32(iwl_read_prph(trans, base_high));
+ write_ptr_val &= DBGC_CUR_DBGBUF_STATUS_OFFSET_MSK;
+ }
+ fw_mon_data->fw_mon_wr_ptr = cpu_to_le32(write_ptr_val);
}
static u32
u32 len = 0;
if ((trans->num_blocks &&
- trans->cfg->device_family == IWL_DEVICE_FAMILY_7000) ||
- (trans->dbg_dest_tlv && !trans->ini_valid) ||
- (trans->ini_valid && trans->num_blocks)) {
+ (trans->cfg->device_family == IWL_DEVICE_FAMILY_7000 ||
+ trans->cfg->device_family >= IWL_DEVICE_FAMILY_AX210 ||
+ trans->ini_valid)) ||
+ (trans->dbg_dest_tlv && !trans->ini_valid)) {
struct iwl_fw_error_dump_fw_mon *fw_mon_data;
(*data)->type = cpu_to_le32(IWL_FW_ERROR_DUMP_FW_MONITOR);
len = sizeof(*dump_data);
/* host commands */
- len += sizeof(*data) +
- cmdq->n_window * (sizeof(*txcmd) + TFD_MAX_PAYLOAD_SIZE);
+ if (dump_mask & BIT(IWL_FW_ERROR_DUMP_TXCMD))
+ len += sizeof(*data) +
+ cmdq->n_window * (sizeof(*txcmd) +
+ TFD_MAX_PAYLOAD_SIZE);
/* FW monitor */
if (dump_mask & BIT(IWL_FW_ERROR_DUMP_FW_MONITOR))
.unref = iwl_trans_pcie_unref, \
.dump_data = iwl_trans_pcie_dump_data, \
.d3_suspend = iwl_trans_pcie_d3_suspend, \
- .d3_resume = iwl_trans_pcie_d3_resume
+ .d3_resume = iwl_trans_pcie_d3_resume, \
+ .sync_nmi = iwl_trans_pcie_sync_nmi
#ifdef CONFIG_PM_SLEEP
#define IWL_TRANS_PM_OPS \
} else if (CSR_HW_RF_ID_TYPE_CHIP_ID(trans->hw_rf_id) ==
CSR_HW_RF_ID_TYPE_CHIP_ID(CSR_HW_RF_ID_TYPE_GF)) {
trans->cfg = &iwlax210_2ax_cfg_so_gf_a0;
+ } else if (CSR_HW_RF_ID_TYPE_CHIP_ID(trans->hw_rf_id) ==
+ CSR_HW_RF_ID_TYPE_CHIP_ID(CSR_HW_RF_ID_TYPE_GF4)) {
+ trans->cfg = &iwlax210_2ax_cfg_so_gf4_a0;
}
} else if (cfg == &iwl_ax101_cfg_qu_hr) {
if (CSR_HW_RF_ID_TYPE_CHIP_ID(trans->hw_rf_id) ==
+ CSR_HW_RF_ID_TYPE_CHIP_ID(CSR_HW_RF_ID_TYPE_HR) &&
+ trans->hw_rev == CSR_HW_REV_TYPE_QNJ_B0) {
+ trans->cfg = &iwl22000_2ax_cfg_qnj_hr_b0;
+ } else if (CSR_HW_RF_ID_TYPE_CHIP_ID(trans->hw_rf_id) ==
CSR_HW_RF_ID_TYPE_CHIP_ID(CSR_HW_RF_ID_TYPE_HR)) {
trans->cfg = &iwl_ax101_cfg_qu_hr;
} else if (CSR_HW_RF_ID_TYPE_CHIP_ID(trans->hw_rf_id) ==
}
} else if (CSR_HW_RF_ID_TYPE_CHIP_ID(trans->hw_rf_id) ==
CSR_HW_RF_ID_TYPE_CHIP_ID(CSR_HW_RF_ID_TYPE_HR) &&
- (trans->cfg != &iwl22260_2ax_cfg ||
+ (trans->cfg != &iwl_ax200_cfg_cc ||
trans->hw_rev == CSR_HW_REV_TYPE_QNJ_B0)) {
u32 hw_status;
return ERR_PTR(ret);
}
-void iwl_trans_sync_nmi(struct iwl_trans *trans)
+void iwl_trans_pcie_sync_nmi(struct iwl_trans *trans)
{
unsigned long timeout = jiffies + IWL_TRANS_NMI_TIMEOUT;
cmd_str);
ret = -ETIMEDOUT;
- iwl_trans_sync_nmi(trans);
+ iwl_trans_pcie_sync_nmi(trans);
goto cancel;
}
slots_num = max_t(u32, TFD_CMD_SLOTS,
trans->cfg->min_txq_size);
else
- slots_num = TFD_TX_CMD_SLOTS;
+ slots_num = max_t(u32, TFD_TX_CMD_SLOTS,
+ trans->cfg->min_256_ba_txq_size);
trans_pcie->txq[txq_id] = &trans_pcie->txq_memory[txq_id];
ret = iwl_pcie_txq_alloc(trans, trans_pcie->txq[txq_id],
slots_num, cmd_queue);
slots_num = max_t(u32, TFD_CMD_SLOTS,
trans->cfg->min_txq_size);
else
- slots_num = TFD_TX_CMD_SLOTS;
+ slots_num = max_t(u32, TFD_TX_CMD_SLOTS,
+ trans->cfg->min_256_ba_txq_size);
ret = iwl_pcie_txq_init(trans, trans_pcie->txq[txq_id],
slots_num, cmd_queue);
if (ret) {
iwl_get_cmd_string(trans, cmd->id));
ret = -ETIMEDOUT;
- iwl_trans_sync_nmi(trans);
+ iwl_trans_pcie_sync_nmi(trans);
goto cancel;
}
enum nl80211_band band;
const struct ieee80211_ops *ops = &mac80211_hwsim_ops;
struct net *net;
- int idx;
+ int idx, i;
int n_limits = 0;
if (WARN_ON(param->channels > 1 && !param->use_chanctx))
goto failed_hw;
}
+ data->if_combination.max_interfaces = 0;
+ for (i = 0; i < n_limits; i++)
+ data->if_combination.max_interfaces +=
+ data->if_limits[i].max;
+
data->if_combination.n_limits = n_limits;
- data->if_combination.max_interfaces = 2048;
data->if_combination.limits = data->if_limits;
- hw->wiphy->iface_combinations = &data->if_combination;
- hw->wiphy->n_iface_combinations = 1;
+ /*
+ * If we actually were asked to support combinations,
+ * advertise them - if there's only a single thing like
+ * only IBSS then don't advertise it as combinations.
+ */
+ if (data->if_combination.max_interfaces > 1) {
+ hw->wiphy->iface_combinations = &data->if_combination;
+ hw->wiphy->n_iface_combinations = 1;
+ }
if (param->ciphers) {
memcpy(data->ciphers, param->ciphers,
if (mwifiex_send_cmd(priv, 0, 0, 0, hostcmd, true)) {
dev_err(priv->adapter->dev, "Failed to process hostcmd\n");
+ kfree(hostcmd);
return -EFAULT;
}
/* process hostcmd response*/
skb = cfg80211_testmode_alloc_reply_skb(wiphy, hostcmd->len);
- if (!skb)
+ if (!skb) {
+ kfree(hostcmd);
return -ENOMEM;
+ }
err = nla_put(skb, MWIFIEX_TM_ATTR_DATA,
hostcmd->len, hostcmd->cmd);
if (err) {
+ kfree(hostcmd);
kfree_skb(skb);
return -EMSGSIZE;
}
sleep_cfm_tmp =
dev_alloc_skb(sizeof(struct mwifiex_opt_sleep_confirm)
+ MWIFIEX_TYPE_LEN);
+ if (!sleep_cfm_tmp) {
+ mwifiex_dbg(adapter, ERROR,
+ "SLEEP_CFM: dev_alloc_skb failed\n");
+ return -ENOMEM;
+ }
+
skb_put(sleep_cfm_tmp, sizeof(struct mwifiex_opt_sleep_confirm)
+ MWIFIEX_TYPE_LEN);
put_unaligned_le32(MWIFIEX_USB_TYPE_CMD, sleep_cfm_tmp->data);
local_rx_pd->nf);
}
} else {
- if (rx_pkt_type != PKT_TYPE_BAR)
+ if (rx_pkt_type != PKT_TYPE_BAR &&
+ local_rx_pd->priority < MAX_NUM_TID)
priv->rx_seq[local_rx_pd->priority] = seq_num;
memcpy(ta, priv->curr_bss_params.bss_descriptor.mac_address,
ETH_ALEN);
bus_ops->rmw = mt7603_rmw;
dev->mt76.bus = bus_ops;
+ spin_lock_init(&dev->ps_lock);
+
INIT_DELAYED_WORK(&dev->mac_work, mt7603_mac_work);
tasklet_init(&dev->pre_tbtt_tasklet, mt7603_pre_tbtt_tasklet,
(unsigned long)dev);
MT_BA_CONTROL_1_RESET));
}
-void mt7603_mac_tx_ba_reset(struct mt7603_dev *dev, int wcid, int tid, int ssn,
+void mt7603_mac_tx_ba_reset(struct mt7603_dev *dev, int wcid, int tid,
int ba_size)
{
u32 addr = mt7603_wtbl2_addr(wcid);
mt76_clear(dev, addr + (15 * 4), tid_mask);
return;
}
- mt76_poll(dev, MT_WTBL_UPDATE, MT_WTBL_UPDATE_BUSY, 0, 5000);
-
- mt7603_mac_stop(dev);
- switch (tid) {
- case 0:
- mt76_rmw_field(dev, addr + (2 * 4), MT_WTBL2_W2_TID0_SN, ssn);
- break;
- case 1:
- mt76_rmw_field(dev, addr + (2 * 4), MT_WTBL2_W2_TID1_SN, ssn);
- break;
- case 2:
- mt76_rmw_field(dev, addr + (2 * 4), MT_WTBL2_W2_TID2_SN_LO,
- ssn);
- mt76_rmw_field(dev, addr + (3 * 4), MT_WTBL2_W3_TID2_SN_HI,
- ssn >> 8);
- break;
- case 3:
- mt76_rmw_field(dev, addr + (3 * 4), MT_WTBL2_W3_TID3_SN, ssn);
- break;
- case 4:
- mt76_rmw_field(dev, addr + (3 * 4), MT_WTBL2_W3_TID4_SN, ssn);
- break;
- case 5:
- mt76_rmw_field(dev, addr + (3 * 4), MT_WTBL2_W3_TID5_SN_LO,
- ssn);
- mt76_rmw_field(dev, addr + (4 * 4), MT_WTBL2_W4_TID5_SN_HI,
- ssn >> 4);
- break;
- case 6:
- mt76_rmw_field(dev, addr + (4 * 4), MT_WTBL2_W4_TID6_SN, ssn);
- break;
- case 7:
- mt76_rmw_field(dev, addr + (4 * 4), MT_WTBL2_W4_TID7_SN, ssn);
- break;
- }
- mt7603_wtbl_update(dev, wcid, MT_WTBL_UPDATE_WTBL2);
- mt7603_mac_start(dev);
for (i = 7; i > 0; i--) {
if (ba_size >= MT_AGG_SIZE_LIMIT(i))
struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
struct ieee80211_tx_rate *rate = &info->control.rates[0];
struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
+ struct ieee80211_bar *bar = (struct ieee80211_bar *)skb->data;
struct ieee80211_vif *vif = info->control.vif;
struct mt7603_vif *mvif;
int wlan_idx;
int tx_count = 8;
u8 frame_type, frame_subtype;
u16 fc = le16_to_cpu(hdr->frame_control);
+ u16 seqno = 0;
u8 vif_idx = 0;
u32 val;
u8 bw;
tx_count = 0x1f;
val = FIELD_PREP(MT_TXD3_REM_TX_COUNT, tx_count) |
- FIELD_PREP(MT_TXD3_SEQ, le16_to_cpu(hdr->seq_ctrl));
+ MT_TXD3_SN_VALID;
+
+ if (ieee80211_is_data_qos(hdr->frame_control))
+ seqno = le16_to_cpu(hdr->seq_ctrl);
+ else if (ieee80211_is_back_req(hdr->frame_control))
+ seqno = le16_to_cpu(bar->start_seq_num);
+ else
+ val &= ~MT_TXD3_SN_VALID;
+
+ val |= FIELD_PREP(MT_TXD3_SEQ, seqno >> 4);
+
txwi[3] = cpu_to_le32(val);
if (key) {
struct mt7603_sta *msta = (struct mt7603_sta *)sta->drv_priv;
struct sk_buff_head list;
- mt76_stop_tx_queues(&dev->mt76, sta, false);
+ mt76_stop_tx_queues(&dev->mt76, sta, true);
mt7603_wtbl_set_ps(dev, msta, ps);
if (ps)
return;
case IEEE80211_AMPDU_TX_OPERATIONAL:
mtxq->aggr = true;
mtxq->send_bar = false;
- mt7603_mac_tx_ba_reset(dev, msta->wcid.idx, tid, *ssn, ba_size);
+ mt7603_mac_tx_ba_reset(dev, msta->wcid.idx, tid, ba_size);
break;
case IEEE80211_AMPDU_TX_STOP_FLUSH:
case IEEE80211_AMPDU_TX_STOP_FLUSH_CONT:
mtxq->aggr = false;
ieee80211_send_bar(vif, sta->addr, tid, mtxq->agg_ssn);
- mt7603_mac_tx_ba_reset(dev, msta->wcid.idx, tid, *ssn, -1);
+ mt7603_mac_tx_ba_reset(dev, msta->wcid.idx, tid, -1);
break;
case IEEE80211_AMPDU_TX_START:
mtxq->agg_ssn = *ssn << 4;
break;
case IEEE80211_AMPDU_TX_STOP_CONT:
mtxq->aggr = false;
- mt7603_mac_tx_ba_reset(dev, msta->wcid.idx, tid, *ssn, -1);
+ mt7603_mac_tx_ba_reset(dev, msta->wcid.idx, tid, -1);
ieee80211_stop_tx_ba_cb_irqsafe(vif, sta->addr, tid);
break;
}
int mt7603_mac_fill_rx(struct mt7603_dev *dev, struct sk_buff *skb);
void mt7603_mac_add_txs(struct mt7603_dev *dev, void *data);
void mt7603_mac_rx_ba_reset(struct mt7603_dev *dev, void *addr, u8 tid);
-void mt7603_mac_tx_ba_reset(struct mt7603_dev *dev, int wcid, int tid, int ssn,
+void mt7603_mac_tx_ba_reset(struct mt7603_dev *dev, int wcid, int tid,
int ba_size);
void mt7603_pse_client_reset(struct mt7603_dev *dev);
return;
rcu_read_lock();
- mt76_tx_status_lock(mdev, &list);
if (stat->wcid < ARRAY_SIZE(dev->mt76.wcid))
wcid = rcu_dereference(dev->mt76.wcid[stat->wcid]);
drv_priv);
}
+ mt76_tx_status_lock(mdev, &list);
+
if (wcid) {
if (stat->pktid >= MT_PACKET_ID_FIRST)
status.skb = mt76_tx_status_skb_get(mdev, wcid,
if (*update == 0 && stat_val == stat_cache &&
stat->wcid == msta->status.wcid && msta->n_frames < 32) {
msta->n_frames++;
- goto out;
+ mt76_tx_status_unlock(mdev, &list);
+ rcu_read_unlock();
+ return;
}
mt76x02_mac_fill_tx_status(dev, status.info, &msta->status,
if (status.skb)
mt76_tx_status_skb_done(mdev, status.skb, &list);
- else
- ieee80211_tx_status_ext(mt76_hw(dev), &status);
-
-out:
mt76_tx_status_unlock(mdev, &list);
+
+ if (!status.skb)
+ ieee80211_tx_status_ext(mt76_hw(dev), &status);
rcu_read_unlock();
}
#define QTNF_MAX_MAC 3
enum qtnf_fw_state {
- QTNF_FW_STATE_RESET,
- QTNF_FW_STATE_FW_DNLD_DONE,
+ QTNF_FW_STATE_DETACHED,
QTNF_FW_STATE_BOOT_DONE,
QTNF_FW_STATE_ACTIVE,
- QTNF_FW_STATE_DETACHED,
- QTNF_FW_STATE_EP_DEAD,
+ QTNF_FW_STATE_RUNNING,
+ QTNF_FW_STATE_DEAD,
};
struct qtnf_bus;
struct napi_struct mux_napi;
struct net_device mux_dev;
struct workqueue_struct *workqueue;
+ struct workqueue_struct *hprio_workqueue;
struct work_struct fw_work;
struct work_struct event_work;
struct mutex bus_lock; /* lock during command/event processing */
char bus_priv[0] __aligned(sizeof(void *));
};
+static inline bool qtnf_fw_is_up(struct qtnf_bus *bus)
+{
+ enum qtnf_fw_state state = bus->fw_state;
+
+ return ((state == QTNF_FW_STATE_ACTIVE) ||
+ (state == QTNF_FW_STATE_RUNNING));
+}
+
+static inline bool qtnf_fw_is_attached(struct qtnf_bus *bus)
+{
+ enum qtnf_fw_state state = bus->fw_state;
+
+ return ((state == QTNF_FW_STATE_ACTIVE) ||
+ (state == QTNF_FW_STATE_RUNNING) ||
+ (state == QTNF_FW_STATE_DEAD));
+}
+
static inline void *get_bus_priv(struct qtnf_bus *bus)
{
if (WARN(!bus, "qtnfmac: invalid bus pointer"))
{
struct net_device *netdev = wdev->netdev;
struct qtnf_vif *vif;
+ struct sk_buff *skb;
if (WARN_ON(!netdev))
return -EFAULT;
if (netif_carrier_ok(netdev))
netif_carrier_off(netdev);
+ while ((skb = skb_dequeue(&vif->high_pri_tx_queue)))
+ dev_kfree_skb_any(skb);
+
+ cancel_work_sync(&vif->high_pri_tx_work);
+
if (netdev->reg_state == NETREG_REGISTERED)
unregister_netdevice(netdev);
*cookie = short_cookie;
if (params->offchan)
- flags |= QLINK_MGMT_FRAME_TX_FLAG_OFFCHAN;
+ flags |= QLINK_FRAME_TX_FLAG_OFFCHAN;
if (params->no_cck)
- flags |= QLINK_MGMT_FRAME_TX_FLAG_NO_CCK;
+ flags |= QLINK_FRAME_TX_FLAG_NO_CCK;
if (params->dont_wait_for_ack)
- flags |= QLINK_MGMT_FRAME_TX_FLAG_ACK_NOWAIT;
+ flags |= QLINK_FRAME_TX_FLAG_ACK_NOWAIT;
/* If channel is not specified, pass "freq = 0" to tell device
* firmware to use current channel.
le16_to_cpu(mgmt_frame->frame_control), mgmt_frame->da,
params->len, short_cookie, flags);
- return qtnf_cmd_send_mgmt_frame(vif, short_cookie, flags,
- freq,
- params->buf, params->len);
+ return qtnf_cmd_send_frame(vif, short_cookie, flags,
+ freq, params->buf, params->len);
}
static int
#endif
};
-static void qtnf_cfg80211_reg_notifier(struct wiphy *wiphy_in,
+static void qtnf_cfg80211_reg_notifier(struct wiphy *wiphy,
struct regulatory_request *req)
{
- struct qtnf_wmac *mac = wiphy_priv(wiphy_in);
- struct qtnf_bus *bus = mac->bus;
- struct wiphy *wiphy;
- unsigned int mac_idx;
+ struct qtnf_wmac *mac = wiphy_priv(wiphy);
enum nl80211_band band;
int ret;
pr_debug("MAC%u: initiator=%d alpha=%c%c\n", mac->macid, req->initiator,
req->alpha2[0], req->alpha2[1]);
- ret = qtnf_cmd_reg_notify(bus, req);
+ ret = qtnf_cmd_reg_notify(mac, req);
if (ret) {
- if (ret == -EOPNOTSUPP) {
- pr_warn("reg update not supported\n");
- } else if (ret == -EALREADY) {
- pr_info("regulatory domain is already set to %c%c",
- req->alpha2[0], req->alpha2[1]);
- } else {
- pr_err("failed to update reg domain to %c%c\n",
- req->alpha2[0], req->alpha2[1]);
- }
-
+ pr_err("MAC%u: failed to update region to %c%c: %d\n",
+ mac->macid, req->alpha2[0], req->alpha2[1], ret);
return;
}
- for (mac_idx = 0; mac_idx < QTNF_MAX_MAC; ++mac_idx) {
- if (!(bus->hw_info.mac_bitmap & (1 << mac_idx)))
+ for (band = 0; band < NUM_NL80211_BANDS; ++band) {
+ if (!wiphy->bands[band])
continue;
- mac = bus->mac[mac_idx];
- if (!mac)
- continue;
-
- wiphy = priv_to_wiphy(mac);
-
- for (band = 0; band < NUM_NL80211_BANDS; ++band) {
- if (!wiphy->bands[band])
- continue;
-
- ret = qtnf_cmd_band_info_get(mac, wiphy->bands[band]);
- if (ret)
- pr_err("failed to get chan info for mac %u band %u\n",
- mac_idx, band);
- }
+ ret = qtnf_cmd_band_info_get(mac, wiphy->bands[band]);
+ if (ret)
+ pr_err("MAC%u: failed to update band %u\n",
+ mac->macid, band);
}
}
struct wiphy *wiphy = priv_to_wiphy(mac);
struct qtnf_mac_info *macinfo = &mac->macinfo;
int ret;
+ bool regdomain_is_known;
if (!wiphy) {
pr_err("invalid wiphy pointer\n");
WIPHY_FLAG_AP_PROBE_RESP_OFFLOAD |
WIPHY_FLAG_AP_UAPSD |
WIPHY_FLAG_HAS_CHANNEL_SWITCH |
- WIPHY_FLAG_4ADDR_STATION;
+ WIPHY_FLAG_4ADDR_STATION |
+ WIPHY_FLAG_NETNS_OK;
wiphy->flags &= ~WIPHY_FLAG_PS_ON_BY_DEFAULT;
if (hw_info->hw_capab & QLINK_HW_CAPAB_DFS_OFFLOAD)
wiphy->wowlan = macinfo->wowlan;
#endif
+ regdomain_is_known = isalpha(mac->rd->alpha2[0]) &&
+ isalpha(mac->rd->alpha2[1]);
+
if (hw_info->hw_capab & QLINK_HW_CAPAB_REG_UPDATE) {
- wiphy->regulatory_flags |= REGULATORY_STRICT_REG |
- REGULATORY_CUSTOM_REG;
wiphy->reg_notifier = qtnf_cfg80211_reg_notifier;
- wiphy_apply_custom_regulatory(wiphy, hw_info->rd);
+
+ if (mac->rd->alpha2[0] == '9' && mac->rd->alpha2[1] == '9') {
+ wiphy->regulatory_flags |= REGULATORY_CUSTOM_REG |
+ REGULATORY_STRICT_REG;
+ wiphy_apply_custom_regulatory(wiphy, mac->rd);
+ } else if (regdomain_is_known) {
+ wiphy->regulatory_flags |= REGULATORY_STRICT_REG;
+ }
} else {
wiphy->regulatory_flags |= REGULATORY_WIPHY_SELF_MANAGED;
}
goto out;
if (wiphy->regulatory_flags & REGULATORY_WIPHY_SELF_MANAGED)
- ret = regulatory_set_wiphy_regd(wiphy, hw_info->rd);
- else if (isalpha(hw_info->rd->alpha2[0]) &&
- isalpha(hw_info->rd->alpha2[1]))
- ret = regulatory_hint(wiphy, hw_info->rd->alpha2);
+ ret = regulatory_set_wiphy_regd(wiphy, mac->rd);
+ else if (regdomain_is_known)
+ ret = regulatory_hint(wiphy, mac->rd->alpha2);
out:
return ret;
#include "bus.h"
#include "commands.h"
+#define QTNF_SCAN_TIME_AUTO 0
+
+/* Let device itself to select best values for current conditions */
+#define QTNF_SCAN_DWELL_ACTIVE_DEFAULT QTNF_SCAN_TIME_AUTO
+#define QTNF_SCAN_DWELL_PASSIVE_DEFAULT QTNF_SCAN_TIME_AUTO
+#define QTNF_SCAN_SAMPLE_DURATION_DEFAULT QTNF_SCAN_TIME_AUTO
+
static int qtnf_cmd_check_reply_header(const struct qlink_resp *resp,
u16 cmd_id, u8 mac_id, u8 vif_id,
size_t resp_size)
pr_debug("VIF%u.%u cmd=0x%.4X\n", mac_id, vif_id, cmd_id);
- if (bus->fw_state != QTNF_FW_STATE_ACTIVE &&
- cmd_id != QLINK_CMD_FW_INIT) {
+ if (!qtnf_fw_is_up(bus) && cmd_id != QLINK_CMD_FW_INIT) {
pr_warn("VIF%u.%u: drop cmd 0x%.4X in fw state %d\n",
mac_id, vif_id, cmd_id, bus->fw_state);
dev_kfree_skb(cmd_skb);
memcpy(tlv->ie_data, buf, len);
}
-static inline size_t qtnf_cmd_acl_data_size(const struct cfg80211_acl_data *acl)
-{
- size_t size = sizeof(struct qlink_acl_data) +
- acl->n_acl_entries * sizeof(struct qlink_mac_address);
-
- return size;
-}
-
static bool qtnf_cmd_start_ap_can_fit(const struct qtnf_vif *vif,
const struct cfg80211_ap_settings *s)
{
if (s->acl)
len += sizeof(struct qlink_tlv_hdr) +
- qtnf_cmd_acl_data_size(s->acl);
+ struct_size(s->acl, mac_addrs, s->acl->n_acl_entries);
if (len > (sizeof(struct qlink_cmd) + QTNF_MAX_CMD_BUF_SIZE)) {
pr_err("VIF%u.%u: can not fit AP settings: %u\n",
}
if (s->acl) {
- size_t acl_size = qtnf_cmd_acl_data_size(s->acl);
+ size_t acl_size = struct_size(s->acl, mac_addrs,
+ s->acl->n_acl_entries);
struct qlink_tlv_hdr *tlv =
skb_put(cmd_skb, sizeof(*tlv) + acl_size);
return ret;
}
-int qtnf_cmd_send_mgmt_frame(struct qtnf_vif *vif, u32 cookie, u16 flags,
- u16 freq, const u8 *buf, size_t len)
+int qtnf_cmd_send_frame(struct qtnf_vif *vif, u32 cookie, u16 flags,
+ u16 freq, const u8 *buf, size_t len)
{
struct sk_buff *cmd_skb;
- struct qlink_cmd_mgmt_frame_tx *cmd;
+ struct qlink_cmd_frame_tx *cmd;
int ret;
if (sizeof(*cmd) + len > QTNF_MAX_CMD_BUF_SIZE) {
}
cmd_skb = qtnf_cmd_alloc_new_cmdskb(vif->mac->macid, vif->vifid,
- QLINK_CMD_SEND_MGMT_FRAME,
+ QLINK_CMD_SEND_FRAME,
sizeof(*cmd));
if (!cmd_skb)
return -ENOMEM;
qtnf_bus_lock(vif->mac->bus);
- cmd = (struct qlink_cmd_mgmt_frame_tx *)cmd_skb->data;
+ cmd = (struct qlink_cmd_frame_tx *)cmd_skb->data;
cmd->cookie = cpu_to_le32(cookie);
cmd->freq = cpu_to_le16(freq);
cmd->flags = cpu_to_le16(flags);
int use4addr,
u8 *mac_addr)
{
- return qtnf_cmd_send_add_change_intf(vif, iftype, use4addr, mac_addr,
- QLINK_CMD_CHANGE_INTF);
+ int ret;
+
+ ret = qtnf_cmd_send_add_change_intf(vif, iftype, use4addr, mac_addr,
+ QLINK_CMD_CHANGE_INTF);
+
+ /* Regulatory settings may be different for different interface types */
+ if (ret == 0 && vif->wdev.iftype != iftype) {
+ enum nl80211_band band;
+ struct wiphy *wiphy = priv_to_wiphy(vif->mac);
+
+ for (band = 0; band < NUM_NL80211_BANDS; ++band) {
+ if (!wiphy->bands[band])
+ continue;
+
+ qtnf_cmd_band_info_get(vif->mac, wiphy->bands[band]);
+ }
+ }
+
+ return ret;
}
int qtnf_cmd_send_del_intf(struct qtnf_vif *vif)
return ret;
}
-static u32 qtnf_cmd_resp_reg_rule_flags_parse(u32 qflags)
-{
- u32 flags = 0;
-
- if (qflags & QLINK_RRF_NO_OFDM)
- flags |= NL80211_RRF_NO_OFDM;
-
- if (qflags & QLINK_RRF_NO_CCK)
- flags |= NL80211_RRF_NO_CCK;
-
- if (qflags & QLINK_RRF_NO_INDOOR)
- flags |= NL80211_RRF_NO_INDOOR;
-
- if (qflags & QLINK_RRF_NO_OUTDOOR)
- flags |= NL80211_RRF_NO_OUTDOOR;
-
- if (qflags & QLINK_RRF_DFS)
- flags |= NL80211_RRF_DFS;
-
- if (qflags & QLINK_RRF_PTP_ONLY)
- flags |= NL80211_RRF_PTP_ONLY;
-
- if (qflags & QLINK_RRF_PTMP_ONLY)
- flags |= NL80211_RRF_PTMP_ONLY;
-
- if (qflags & QLINK_RRF_NO_IR)
- flags |= NL80211_RRF_NO_IR;
-
- if (qflags & QLINK_RRF_AUTO_BW)
- flags |= NL80211_RRF_AUTO_BW;
-
- if (qflags & QLINK_RRF_IR_CONCURRENT)
- flags |= NL80211_RRF_IR_CONCURRENT;
-
- if (qflags & QLINK_RRF_NO_HT40MINUS)
- flags |= NL80211_RRF_NO_HT40MINUS;
-
- if (qflags & QLINK_RRF_NO_HT40PLUS)
- flags |= NL80211_RRF_NO_HT40PLUS;
-
- if (qflags & QLINK_RRF_NO_80MHZ)
- flags |= NL80211_RRF_NO_80MHZ;
-
- if (qflags & QLINK_RRF_NO_160MHZ)
- flags |= NL80211_RRF_NO_160MHZ;
-
- return flags;
-}
-
static int
qtnf_cmd_resp_proc_hw_info(struct qtnf_bus *bus,
const struct qlink_resp_get_hw_info *resp,
{
struct qtnf_hw_info *hwinfo = &bus->hw_info;
const struct qlink_tlv_hdr *tlv;
- const struct qlink_tlv_reg_rule *tlv_rule;
const char *bld_name = NULL;
const char *bld_rev = NULL;
const char *bld_type = NULL;
const char *calibration_ver = NULL;
const char *uboot_ver = NULL;
u32 hw_ver = 0;
- struct ieee80211_reg_rule *rule;
u16 tlv_type;
u16 tlv_value_len;
- unsigned int rule_idx = 0;
-
- if (WARN_ON(resp->n_reg_rules > NL80211_MAX_SUPP_REG_RULES))
- return -E2BIG;
-
- hwinfo->rd = kzalloc(struct_size(hwinfo->rd, reg_rules,
- resp->n_reg_rules), GFP_KERNEL);
-
- if (!hwinfo->rd)
- return -ENOMEM;
hwinfo->num_mac = resp->num_mac;
hwinfo->mac_bitmap = resp->mac_bitmap;
hwinfo->total_tx_chain = resp->total_tx_chain;
hwinfo->total_rx_chain = resp->total_rx_chain;
hwinfo->hw_capab = le32_to_cpu(resp->hw_capab);
- hwinfo->rd->n_reg_rules = resp->n_reg_rules;
- hwinfo->rd->alpha2[0] = resp->alpha2[0];
- hwinfo->rd->alpha2[1] = resp->alpha2[1];
bld_tmstamp = le32_to_cpu(resp->bld_tmstamp);
plat_id = le32_to_cpu(resp->plat_id);
hw_ver = le32_to_cpu(resp->hw_ver);
- switch (resp->dfs_region) {
- case QLINK_DFS_FCC:
- hwinfo->rd->dfs_region = NL80211_DFS_FCC;
- break;
- case QLINK_DFS_ETSI:
- hwinfo->rd->dfs_region = NL80211_DFS_ETSI;
- break;
- case QLINK_DFS_JP:
- hwinfo->rd->dfs_region = NL80211_DFS_JP;
- break;
- case QLINK_DFS_UNSET:
- default:
- hwinfo->rd->dfs_region = NL80211_DFS_UNSET;
- break;
- }
-
tlv = (const struct qlink_tlv_hdr *)resp->info;
while (info_len >= sizeof(*tlv)) {
}
switch (tlv_type) {
- case QTN_TLV_ID_REG_RULE:
- if (rule_idx >= resp->n_reg_rules) {
- pr_warn("unexpected number of rules: %u\n",
- resp->n_reg_rules);
- return -EINVAL;
- }
-
- if (tlv_value_len != sizeof(*tlv_rule) - sizeof(*tlv)) {
- pr_warn("malformed TLV 0x%.2X; LEN: %u\n",
- tlv_type, tlv_value_len);
- return -EINVAL;
- }
-
- tlv_rule = (const struct qlink_tlv_reg_rule *)tlv;
- rule = &hwinfo->rd->reg_rules[rule_idx++];
-
- rule->freq_range.start_freq_khz =
- le32_to_cpu(tlv_rule->start_freq_khz);
- rule->freq_range.end_freq_khz =
- le32_to_cpu(tlv_rule->end_freq_khz);
- rule->freq_range.max_bandwidth_khz =
- le32_to_cpu(tlv_rule->max_bandwidth_khz);
- rule->power_rule.max_antenna_gain =
- le32_to_cpu(tlv_rule->max_antenna_gain);
- rule->power_rule.max_eirp =
- le32_to_cpu(tlv_rule->max_eirp);
- rule->dfs_cac_ms =
- le32_to_cpu(tlv_rule->dfs_cac_ms);
- rule->flags = qtnf_cmd_resp_reg_rule_flags_parse(
- le32_to_cpu(tlv_rule->flags));
- break;
case QTN_TLV_ID_BUILD_NAME:
bld_name = (const void *)tlv->val;
break;
tlv = (struct qlink_tlv_hdr *)(tlv->val + tlv_value_len);
}
- if (rule_idx != resp->n_reg_rules) {
- pr_warn("unexpected number of rules: expected %u got %u\n",
- resp->n_reg_rules, rule_idx);
- kfree(hwinfo->rd);
- hwinfo->rd = NULL;
- return -EINVAL;
- }
-
- pr_info("fw_version=%d, MACs map %#x, alpha2=\"%c%c\", chains Tx=%u Rx=%u, capab=0x%x\n",
+ pr_info("fw_version=%d, MACs map %#x, chains Tx=%u Rx=%u, capab=0x%x\n",
hwinfo->fw_ver, hwinfo->mac_bitmap,
- hwinfo->rd->alpha2[0], hwinfo->rd->alpha2[1],
hwinfo->total_tx_chain, hwinfo->total_rx_chain,
hwinfo->hw_capab);
"\nHardware ID: %s" \
"\nCalibration version: %s" \
"\nU-Boot version: %s" \
- "\nHardware version: 0x%08x",
+ "\nHardware version: 0x%08x\n",
bld_name, bld_rev, bld_type, bld_label,
(unsigned long)bld_tmstamp,
(unsigned long)plat_id,
}
}
-static int qtnf_parse_variable_mac_info(struct qtnf_wmac *mac,
- const u8 *tlv_buf, size_t tlv_buf_size)
+static int
+qtnf_parse_variable_mac_info(struct qtnf_wmac *mac,
+ const struct qlink_resp_get_mac_info *resp,
+ size_t tlv_buf_size)
{
+ const u8 *tlv_buf = resp->var_info;
struct ieee80211_iface_combination *comb = NULL;
size_t n_comb = 0;
struct ieee80211_iface_limit *limits;
u8 ext_capa_len = 0;
u8 ext_capa_mask_len = 0;
int i = 0;
+ struct ieee80211_reg_rule *rule;
+ unsigned int rule_idx = 0;
+ const struct qlink_tlv_reg_rule *tlv_rule;
+
+ if (WARN_ON(resp->n_reg_rules > NL80211_MAX_SUPP_REG_RULES))
+ return -E2BIG;
+
+ mac->rd = kzalloc(sizeof(*mac->rd) +
+ sizeof(struct ieee80211_reg_rule) *
+ resp->n_reg_rules, GFP_KERNEL);
+ if (!mac->rd)
+ return -ENOMEM;
+
+ mac->rd->n_reg_rules = resp->n_reg_rules;
+ mac->rd->alpha2[0] = resp->alpha2[0];
+ mac->rd->alpha2[1] = resp->alpha2[1];
+
+ switch (resp->dfs_region) {
+ case QLINK_DFS_FCC:
+ mac->rd->dfs_region = NL80211_DFS_FCC;
+ break;
+ case QLINK_DFS_ETSI:
+ mac->rd->dfs_region = NL80211_DFS_ETSI;
+ break;
+ case QLINK_DFS_JP:
+ mac->rd->dfs_region = NL80211_DFS_JP;
+ break;
+ case QLINK_DFS_UNSET:
+ default:
+ mac->rd->dfs_region = NL80211_DFS_UNSET;
+ break;
+ }
tlv = (const struct qlink_tlv_hdr *)tlv_buf;
while (tlv_buf_size >= sizeof(struct qlink_tlv_hdr)) {
mac->macinfo.wowlan = NULL;
qtnf_parse_wowlan_info(mac, wowlan);
break;
+ case QTN_TLV_ID_REG_RULE:
+ if (rule_idx >= resp->n_reg_rules) {
+ pr_warn("unexpected number of rules: %u\n",
+ resp->n_reg_rules);
+ return -EINVAL;
+ }
+
+ if (tlv_value_len != sizeof(*tlv_rule) - sizeof(*tlv)) {
+ pr_warn("malformed TLV 0x%.2X; LEN: %u\n",
+ tlv_type, tlv_value_len);
+ return -EINVAL;
+ }
+
+ tlv_rule = (const struct qlink_tlv_reg_rule *)tlv;
+ rule = &mac->rd->reg_rules[rule_idx++];
+ qlink_utils_regrule_q2nl(rule, tlv_rule);
+ break;
default:
pr_warn("MAC%u: unknown TLV type %u\n",
mac->macid, tlv_type);
return -EINVAL;
}
+ if (rule_idx != resp->n_reg_rules) {
+ pr_warn("unexpected number of rules: expected %u got %u\n",
+ resp->n_reg_rules, rule_idx);
+ return -EINVAL;
+ }
+
if (ext_capa_len > 0) {
ext_capa = kmemdup(ext_capa, ext_capa_len, GFP_KERNEL);
if (!ext_capa)
resp = (const struct qlink_resp_get_mac_info *)resp_skb->data;
qtnf_cmd_resp_proc_mac_info(mac, resp);
- ret = qtnf_parse_variable_mac_info(mac, resp->var_info, var_data_len);
+ ret = qtnf_parse_variable_mac_info(mac, resp, var_data_len);
out:
qtnf_bus_unlock(mac->bus);
struct qlink_resp_band_info_get *resp;
size_t info_len = 0;
int ret = 0;
- u8 qband;
-
- switch (band->band) {
- case NL80211_BAND_2GHZ:
- qband = QLINK_BAND_2GHZ;
- break;
- case NL80211_BAND_5GHZ:
- qband = QLINK_BAND_5GHZ;
- break;
- case NL80211_BAND_60GHZ:
- qband = QLINK_BAND_60GHZ;
- break;
- default:
- return -EINVAL;
- }
+ u8 qband = qlink_utils_band_cfg2q(band->band);
cmd_skb = qtnf_cmd_alloc_new_cmdskb(mac->macid, 0,
QLINK_CMD_BAND_INFO_GET,
static void qtnf_cmd_channel_tlv_add(struct sk_buff *cmd_skb,
const struct ieee80211_channel *sc)
{
- struct qlink_tlv_channel *qchan;
- u32 flags = 0;
-
- qchan = skb_put_zero(cmd_skb, sizeof(*qchan));
- qchan->hdr.type = cpu_to_le16(QTN_TLV_ID_CHANNEL);
- qchan->hdr.len = cpu_to_le16(sizeof(*qchan) - sizeof(qchan->hdr));
- qchan->chan.center_freq = cpu_to_le16(sc->center_freq);
- qchan->chan.hw_value = cpu_to_le16(sc->hw_value);
-
- if (sc->flags & IEEE80211_CHAN_NO_IR)
- flags |= QLINK_CHAN_NO_IR;
-
- if (sc->flags & IEEE80211_CHAN_RADAR)
- flags |= QLINK_CHAN_RADAR;
-
- qchan->chan.flags = cpu_to_le32(flags);
+ struct qlink_tlv_channel *tlv;
+ struct qlink_channel *qch;
+
+ tlv = skb_put_zero(cmd_skb, sizeof(*tlv));
+ qch = &tlv->chan;
+ tlv->hdr.type = cpu_to_le16(QTN_TLV_ID_CHANNEL);
+ tlv->hdr.len = cpu_to_le16(sizeof(*qch));
+
+ qch->center_freq = cpu_to_le16(sc->center_freq);
+ qch->hw_value = cpu_to_le16(sc->hw_value);
+ qch->band = qlink_utils_band_cfg2q(sc->band);
+ qch->max_power = sc->max_power;
+ qch->max_reg_power = sc->max_reg_power;
+ qch->max_antenna_gain = sc->max_antenna_gain;
+ qch->beacon_found = sc->beacon_found;
+ qch->dfs_state = qlink_utils_dfs_state_cfg2q(sc->dfs_state);
+ qch->flags = cpu_to_le32(qlink_utils_chflags_cfg2q(sc->flags));
}
static void qtnf_cmd_randmac_tlv_add(struct sk_buff *cmd_skb,
memcpy(randmac->mac_addr_mask, mac_addr_mask, ETH_ALEN);
}
+static void qtnf_cmd_scan_set_dwell(struct qtnf_wmac *mac,
+ struct sk_buff *cmd_skb)
+{
+ struct cfg80211_scan_request *scan_req = mac->scan_req;
+ u16 dwell_active = QTNF_SCAN_DWELL_ACTIVE_DEFAULT;
+ u16 dwell_passive = QTNF_SCAN_DWELL_PASSIVE_DEFAULT;
+ u16 duration = QTNF_SCAN_SAMPLE_DURATION_DEFAULT;
+
+ if (scan_req->duration) {
+ dwell_active = scan_req->duration;
+ dwell_passive = scan_req->duration;
+ }
+
+ pr_debug("MAC%u: %s scan dwell active=%u, passive=%u, duration=%u\n",
+ mac->macid,
+ scan_req->duration_mandatory ? "mandatory" : "max",
+ dwell_active, dwell_passive, duration);
+
+ qtnf_cmd_skb_put_tlv_u16(cmd_skb,
+ QTN_TLV_ID_SCAN_DWELL_ACTIVE,
+ dwell_active);
+ qtnf_cmd_skb_put_tlv_u16(cmd_skb,
+ QTN_TLV_ID_SCAN_DWELL_PASSIVE,
+ dwell_passive);
+ qtnf_cmd_skb_put_tlv_u16(cmd_skb,
+ QTN_TLV_ID_SCAN_SAMPLE_DURATION,
+ duration);
+}
+
int qtnf_cmd_send_scan(struct qtnf_wmac *mac)
{
struct sk_buff *cmd_skb;
}
}
+ qtnf_cmd_scan_set_dwell(mac, cmd_skb);
+
if (scan_req->flags & NL80211_SCAN_FLAG_RANDOM_ADDR) {
pr_debug("MAC%u: scan with random addr=%pM, mask=%pM\n",
mac->macid,
qtnf_cmd_skb_put_tlv_tag(cmd_skb, QTN_TLV_ID_SCAN_FLUSH);
}
- if (scan_req->duration) {
- pr_debug("MAC%u: %s scan duration %u\n", mac->macid,
- scan_req->duration_mandatory ? "mandatory" : "max",
- scan_req->duration);
-
- qtnf_cmd_skb_put_tlv_u16(cmd_skb, QTN_TLV_ID_SCAN_DWELL,
- scan_req->duration);
- }
-
ret = qtnf_cmd_send(mac->bus, cmd_skb);
if (ret)
goto out;
return ret;
}
-int qtnf_cmd_reg_notify(struct qtnf_bus *bus, struct regulatory_request *req)
+int qtnf_cmd_reg_notify(struct qtnf_wmac *mac, struct regulatory_request *req)
{
+ struct wiphy *wiphy = priv_to_wiphy(mac);
+ struct qtnf_bus *bus = mac->bus;
struct sk_buff *cmd_skb;
int ret;
struct qlink_cmd_reg_notify *cmd;
+ enum nl80211_band band;
+ const struct ieee80211_supported_band *cfg_band;
- cmd_skb = qtnf_cmd_alloc_new_cmdskb(QLINK_MACID_RSVD, QLINK_VIFID_RSVD,
+ cmd_skb = qtnf_cmd_alloc_new_cmdskb(mac->macid, QLINK_VIFID_RSVD,
QLINK_CMD_REG_NOTIFY,
sizeof(*cmd));
if (!cmd_skb)
break;
}
+ switch (req->dfs_region) {
+ case NL80211_DFS_FCC:
+ cmd->dfs_region = QLINK_DFS_FCC;
+ break;
+ case NL80211_DFS_ETSI:
+ cmd->dfs_region = QLINK_DFS_ETSI;
+ break;
+ case NL80211_DFS_JP:
+ cmd->dfs_region = QLINK_DFS_JP;
+ break;
+ default:
+ cmd->dfs_region = QLINK_DFS_UNSET;
+ break;
+ }
+
+ cmd->num_channels = 0;
+
+ for (band = 0; band < NUM_NL80211_BANDS; band++) {
+ unsigned int i;
+
+ cfg_band = wiphy->bands[band];
+ if (!cfg_band)
+ continue;
+
+ cmd->num_channels += cfg_band->n_channels;
+
+ for (i = 0; i < cfg_band->n_channels; ++i) {
+ qtnf_cmd_channel_tlv_add(cmd_skb,
+ &cfg_band->channels[i]);
+ }
+ }
+
qtnf_bus_lock(bus);
ret = qtnf_cmd_send(bus, cmd_skb);
- if (ret)
- goto out;
-
-out:
qtnf_bus_unlock(bus);
return ret;
struct qtnf_bus *bus = vif->mac->bus;
struct sk_buff *cmd_skb;
struct qlink_tlv_hdr *tlv;
- size_t acl_size = qtnf_cmd_acl_data_size(params);
+ size_t acl_size = struct_size(params, mac_addrs, params->n_acl_entries);
int ret;
cmd_skb = qtnf_cmd_alloc_new_cmdskb(vif->mac->macid, vif->vifid,
const struct cfg80211_ap_settings *s);
int qtnf_cmd_send_stop_ap(struct qtnf_vif *vif);
int qtnf_cmd_send_register_mgmt(struct qtnf_vif *vif, u16 frame_type, bool reg);
-int qtnf_cmd_send_mgmt_frame(struct qtnf_vif *vif, u32 cookie, u16 flags,
- u16 freq, const u8 *buf, size_t len);
+int qtnf_cmd_send_frame(struct qtnf_vif *vif, u32 cookie, u16 flags,
+ u16 freq, const u8 *buf, size_t len);
int qtnf_cmd_send_mgmt_set_appie(struct qtnf_vif *vif, u8 frame_type,
const u8 *buf, size_t len);
int qtnf_cmd_get_sta_info(struct qtnf_vif *vif, const u8 *sta_mac,
u16 reason_code);
int qtnf_cmd_send_updown_intf(struct qtnf_vif *vif,
bool up);
-int qtnf_cmd_reg_notify(struct qtnf_bus *bus, struct regulatory_request *req);
+int qtnf_cmd_reg_notify(struct qtnf_wmac *mac, struct regulatory_request *req);
int qtnf_cmd_get_chan_stats(struct qtnf_wmac *mac, u16 channel,
struct qtnf_chan_stats *stats);
int qtnf_cmd_send_chan_switch(struct qtnf_vif *vif,
qtnf_mac_scan_finish(mac, true);
}
+static void qtnf_vif_send_data_high_pri(struct work_struct *work)
+{
+ struct qtnf_vif *vif =
+ container_of(work, struct qtnf_vif, high_pri_tx_work);
+ struct sk_buff *skb;
+
+ if (!vif->netdev ||
+ vif->wdev.iftype == NL80211_IFTYPE_UNSPECIFIED)
+ return;
+
+ while ((skb = skb_dequeue(&vif->high_pri_tx_queue))) {
+ qtnf_cmd_send_frame(vif, 0, QLINK_FRAME_TX_FLAG_8023,
+ 0, skb->data, skb->len);
+ dev_kfree_skb_any(skb);
+ }
+}
+
static struct qtnf_wmac *qtnf_core_mac_alloc(struct qtnf_bus *bus,
unsigned int macid)
{
vif->mac = mac;
vif->vifid = i;
qtnf_sta_list_init(&vif->sta_list);
-
+ INIT_WORK(&vif->high_pri_tx_work, qtnf_vif_send_data_high_pri);
+ skb_queue_head_init(&vif->high_pri_tx_queue);
vif->stats64 = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
if (!vif->stats64)
pr_warn("VIF%u.%u: per cpu stats allocation failed\n",
qtnf_mac_iface_comb_free(mac);
qtnf_mac_ext_caps_free(mac);
kfree(mac->macinfo.wowlan);
+ kfree(mac->rd);
+ mac->rd = NULL;
wiphy_free(wiphy);
bus->mac[macid] = NULL;
}
int ret;
qtnf_trans_init(bus);
-
- bus->fw_state = QTNF_FW_STATE_BOOT_DONE;
qtnf_bus_data_rx_start(bus);
bus->workqueue = alloc_ordered_workqueue("QTNF_BUS", 0);
goto error;
}
+ bus->hprio_workqueue = alloc_workqueue("QTNF_HPRI", WQ_HIGHPRI, 0);
+ if (!bus->hprio_workqueue) {
+ pr_err("failed to alloc high prio workqueue\n");
+ ret = -ENOMEM;
+ goto error;
+ }
+
INIT_WORK(&bus->event_work, qtnf_event_work_handler);
ret = qtnf_cmd_send_init_fw(bus);
}
bus->fw_state = QTNF_FW_STATE_ACTIVE;
-
ret = qtnf_cmd_get_hw_info(bus);
if (ret) {
pr_err("failed to get HW info: %d\n", ret);
}
}
+ bus->fw_state = QTNF_FW_STATE_RUNNING;
return 0;
error:
qtnf_core_detach(bus);
-
return ret;
}
EXPORT_SYMBOL_GPL(qtnf_core_attach);
for (macid = 0; macid < QTNF_MAX_MAC; macid++)
qtnf_core_mac_detach(bus, macid);
- if (bus->fw_state == QTNF_FW_STATE_ACTIVE)
+ if (qtnf_fw_is_up(bus))
qtnf_cmd_send_deinit_fw(bus);
bus->fw_state = QTNF_FW_STATE_DETACHED;
if (bus->workqueue) {
flush_workqueue(bus->workqueue);
destroy_workqueue(bus->workqueue);
+ bus->workqueue = NULL;
}
- kfree(bus->hw_info.rd);
- bus->hw_info.rd = NULL;
+ if (bus->hprio_workqueue) {
+ flush_workqueue(bus->hprio_workqueue);
+ destroy_workqueue(bus->hprio_workqueue);
+ bus->hprio_workqueue = NULL;
+ }
qtnf_trans_free(bus);
}
struct qtnf_wmac *mac;
struct qtnf_vif *vif;
+ if (unlikely(bus->fw_state != QTNF_FW_STATE_RUNNING))
+ return NULL;
+
meta = (struct qtnf_frame_meta_info *)
(skb_tail_pointer(skb) - sizeof(*meta));
}
EXPORT_SYMBOL_GPL(qtnf_update_tx_stats);
+void qtnf_packet_send_hi_pri(struct sk_buff *skb)
+{
+ struct qtnf_vif *vif = qtnf_netdev_get_priv(skb->dev);
+
+ skb_queue_tail(&vif->high_pri_tx_queue, skb);
+ queue_work(vif->mac->bus->hprio_workqueue, &vif->high_pri_tx_work);
+}
+EXPORT_SYMBOL_GPL(qtnf_packet_send_hi_pri);
+
MODULE_AUTHOR("Quantenna Communications");
MODULE_DESCRIPTION("Quantenna 802.11 wireless LAN FullMAC driver.");
MODULE_LICENSE("GPL");
struct qtnf_wmac *mac;
struct work_struct reset_work;
+ struct work_struct high_pri_tx_work;
+ struct sk_buff_head high_pri_tx_queue;
struct qtnf_sta_list sta_list;
unsigned long cons_tx_timeout_cnt;
int generation;
struct cfg80211_scan_request *scan_req;
struct mutex mac_lock; /* lock during wmac speicific ops */
struct delayed_work scan_timeout;
+ struct ieee80211_regdomain *rd;
};
struct qtnf_hw_info {
u8 mac_bitmap;
u32 fw_ver;
u32 hw_capab;
- struct ieee80211_regdomain *rd;
u8 total_tx_chain;
u8 total_rx_chain;
char fw_version[ETHTOOL_FWVERS_LEN];
void qtnf_netdev_updown(struct net_device *ndev, bool up);
void qtnf_scan_done(struct qtnf_wmac *mac, bool aborted);
+void qtnf_packet_send_hi_pri(struct sk_buff *skb);
static inline struct qtnf_vif *qtnf_netdev_get_priv(struct net_device *dev)
{
if (ret == -ETIMEDOUT) {
pr_err("EP firmware is dead\n");
- bus->fw_state = QTNF_FW_STATE_EP_DEAD;
+ bus->fw_state = QTNF_FW_STATE_DEAD;
}
return ret;
return 0;
}
-void qtnf_pcie_fw_boot_done(struct qtnf_bus *bus, bool boot_success)
+int qtnf_pcie_fw_boot_done(struct qtnf_bus *bus)
{
- struct qtnf_pcie_bus_priv *priv = get_bus_priv(bus);
- struct pci_dev *pdev = priv->pdev;
int ret;
- if (boot_success) {
- bus->fw_state = QTNF_FW_STATE_FW_DNLD_DONE;
-
- ret = qtnf_core_attach(bus);
- if (ret) {
- pr_err("failed to attach core\n");
- boot_success = false;
- }
- }
-
- if (boot_success) {
+ bus->fw_state = QTNF_FW_STATE_BOOT_DONE;
+ ret = qtnf_core_attach(bus);
+ if (ret) {
+ pr_err("failed to attach core\n");
+ } else {
qtnf_debugfs_init(bus, DRV_NAME);
qtnf_debugfs_add_entry(bus, "mps", qtnf_dbg_mps_show);
qtnf_debugfs_add_entry(bus, "msi_enabled", qtnf_dbg_msi_show);
qtnf_debugfs_add_entry(bus, "shm_stats", qtnf_dbg_shm_stats);
- } else {
- bus->fw_state = QTNF_FW_STATE_DETACHED;
}
- put_device(&pdev->dev);
+ return ret;
}
static void qtnf_tune_pcie_mps(struct pci_dev *pdev)
pcie_priv = get_bus_priv(bus);
pci_set_drvdata(pdev, bus);
bus->dev = &pdev->dev;
- bus->fw_state = QTNF_FW_STATE_RESET;
+ bus->fw_state = QTNF_FW_STATE_DETACHED;
pcie_priv->pdev = pdev;
pcie_priv->tx_stopped = 0;
pcie_priv->rx_bd_num = rx_bd_size_param;
pcie_priv->pcie_irq_count = 0;
pcie_priv->tx_reclaim_done = 0;
pcie_priv->tx_reclaim_req = 0;
+ pcie_priv->tx_eapol = 0;
pcie_priv->workqueue = create_singlethread_workqueue("QTNF_PCIE");
if (!pcie_priv->workqueue) {
cancel_work_sync(&bus->fw_work);
- if (bus->fw_state == QTNF_FW_STATE_ACTIVE ||
- bus->fw_state == QTNF_FW_STATE_EP_DEAD)
+ if (qtnf_fw_is_attached(bus))
qtnf_core_detach(bus);
netif_napi_del(&bus->mux_napi);
u32 tx_done_count;
u32 tx_reclaim_done;
u32 tx_reclaim_req;
+ u32 tx_eapol;
u8 msi_enabled;
u8 tx_stopped;
int qtnf_pcie_control_tx(struct qtnf_bus *bus, struct sk_buff *skb);
int qtnf_pcie_alloc_skb_array(struct qtnf_pcie_bus_priv *priv);
-void qtnf_pcie_fw_boot_done(struct qtnf_bus *bus, bool boot_success);
+int qtnf_pcie_fw_boot_done(struct qtnf_bus *bus);
void qtnf_pcie_init_shm_ipc(struct qtnf_pcie_bus_priv *priv,
struct qtnf_shm_ipc_region __iomem *ipc_tx_reg,
struct qtnf_shm_ipc_region __iomem *ipc_rx_reg,
{
struct qtnf_bus *bus = container_of(work, struct qtnf_bus, fw_work);
struct qtnf_pcie_pearl_state *ps = (void *)get_bus_priv(bus);
+ u32 state = QTN_RC_FW_LOADRDY | QTN_RC_FW_QLINK;
+ const char *fwname = QTN_PCI_PEARL_FW_NAME;
struct pci_dev *pdev = ps->base.pdev;
const struct firmware *fw;
int ret;
- u32 state = QTN_RC_FW_LOADRDY | QTN_RC_FW_QLINK;
- const char *fwname = QTN_PCI_PEARL_FW_NAME;
- bool fw_boot_success = false;
if (ps->base.flashboot) {
state |= QTN_RC_FW_FLASHBOOT;
goto fw_load_exit;
}
- pr_info("firmware is up and running\n");
-
if (qtnf_poll_state(&ps->bda->bda_ep_state,
QTN_EP_FW_QLINK_DONE, QTN_FW_QLINK_TIMEOUT_MS)) {
pr_err("firmware runtime failure\n");
goto fw_load_exit;
}
- fw_boot_success = true;
+ pr_info("firmware is up and running\n");
-fw_load_exit:
- qtnf_pcie_fw_boot_done(bus, fw_boot_success);
+ ret = qtnf_pcie_fw_boot_done(bus);
+ if (ret)
+ goto fw_load_exit;
- if (fw_boot_success) {
- qtnf_debugfs_add_entry(bus, "hdp_stats", qtnf_dbg_hdp_stats);
- qtnf_debugfs_add_entry(bus, "irq_stats", qtnf_dbg_irq_stats);
- }
+ qtnf_debugfs_add_entry(bus, "hdp_stats", qtnf_dbg_hdp_stats);
+ qtnf_debugfs_add_entry(bus, "irq_stats", qtnf_dbg_irq_stats);
+
+fw_load_exit:
+ put_device(&pdev->dev);
}
static void qtnf_pearl_reclaim_tasklet_fn(unsigned long data)
int len;
int i;
+ if (unlikely(skb->protocol == htons(ETH_P_PAE))) {
+ qtnf_packet_send_hi_pri(skb);
+ qtnf_update_tx_stats(skb->dev, skb);
+ priv->tx_eapol++;
+ return NETDEV_TX_OK;
+ }
+
spin_lock_irqsave(&priv->tx_lock, flags);
if (!qtnf_tx_queue_ready(ts)) {
seq_printf(s, "tx_done_count(%u)\n", priv->tx_done_count);
seq_printf(s, "tx_reclaim_done(%u)\n", priv->tx_reclaim_done);
seq_printf(s, "tx_reclaim_req(%u)\n", priv->tx_reclaim_req);
+ seq_printf(s, "tx_eapol(%u)\n", priv->tx_eapol);
seq_printf(s, "tx_bd_r_index(%u)\n", priv->tx_bd_r_index);
seq_printf(s, "tx_done_index(%u)\n", tx_done_index);
{
struct qtnf_bus *bus = container_of(work, struct qtnf_bus, fw_work);
struct qtnf_pcie_topaz_state *ts = (void *)get_bus_priv(bus);
- int ret;
int bootloader_needed = readl(&ts->bda->bda_flags) & QTN_BDA_XMIT_UBOOT;
+ struct pci_dev *pdev = ts->base.pdev;
+ int ret;
qtnf_set_state(&ts->bda->bda_bootstate, QTN_BDA_FW_TARGET_BOOT);
}
}
+ ret = qtnf_post_init_ep(ts);
+ if (ret) {
+ pr_err("FW runtime failure\n");
+ goto fw_load_exit;
+ }
+
pr_info("firmware is up and running\n");
- ret = qtnf_post_init_ep(ts);
+ ret = qtnf_pcie_fw_boot_done(bus);
if (ret)
- pr_err("FW runtime failure\n");
+ goto fw_load_exit;
-fw_load_exit:
- qtnf_pcie_fw_boot_done(bus, ret ? false : true);
+ qtnf_debugfs_add_entry(bus, "pkt_stats", qtnf_dbg_pkt_stats);
+ qtnf_debugfs_add_entry(bus, "irq_stats", qtnf_dbg_irq_stats);
- if (ret == 0) {
- qtnf_debugfs_add_entry(bus, "pkt_stats", qtnf_dbg_pkt_stats);
- qtnf_debugfs_add_entry(bus, "irq_stats", qtnf_dbg_irq_stats);
- }
+fw_load_exit:
+ put_device(&pdev->dev);
}
static void qtnf_reclaim_tasklet_fn(unsigned long data)
#include <linux/ieee80211.h>
-#define QLINK_PROTO_VER 13
+#define QLINK_PROTO_VER 15
#define QLINK_MACID_RSVD 0xFF
#define QLINK_VIFID_RSVD 0xFF
* execution status (one of &enum qlink_cmd_result). Reply message
* may also contain data payload specific to the command type.
*
+ * @QLINK_CMD_SEND_FRAME: send specified frame over the air; firmware will
+ * encapsulate 802.3 packet into 802.11 frame automatically.
* @QLINK_CMD_BAND_INFO_GET: for the specified MAC and specified band, get
* the band's description including number of operational channels and
* info on each channel, HT/VHT capabilities, supported rates etc.
QLINK_CMD_FW_INIT = 0x0001,
QLINK_CMD_FW_DEINIT = 0x0002,
QLINK_CMD_REGISTER_MGMT = 0x0003,
- QLINK_CMD_SEND_MGMT_FRAME = 0x0004,
+ QLINK_CMD_SEND_FRAME = 0x0004,
QLINK_CMD_MGMT_SET_APPIE = 0x0005,
QLINK_CMD_PHY_PARAMS_GET = 0x0011,
QLINK_CMD_PHY_PARAMS_SET = 0x0012,
u8 do_register;
} __packed;
-enum qlink_mgmt_frame_tx_flags {
- QLINK_MGMT_FRAME_TX_FLAG_NONE = 0,
- QLINK_MGMT_FRAME_TX_FLAG_OFFCHAN = BIT(0),
- QLINK_MGMT_FRAME_TX_FLAG_NO_CCK = BIT(1),
- QLINK_MGMT_FRAME_TX_FLAG_ACK_NOWAIT = BIT(2),
+/**
+ * @QLINK_FRAME_TX_FLAG_8023: frame has a 802.3 header; if not set, frame
+ * is a 802.11 encapsulated.
+ */
+enum qlink_frame_tx_flags {
+ QLINK_FRAME_TX_FLAG_OFFCHAN = BIT(0),
+ QLINK_FRAME_TX_FLAG_NO_CCK = BIT(1),
+ QLINK_FRAME_TX_FLAG_ACK_NOWAIT = BIT(2),
+ QLINK_FRAME_TX_FLAG_8023 = BIT(3),
};
/**
- * struct qlink_cmd_mgmt_frame_tx - data for QLINK_CMD_SEND_MGMT_FRAME command
+ * struct qlink_cmd_frame_tx - data for QLINK_CMD_SEND_FRAME command
*
* @cookie: opaque request identifier.
* @freq: Frequency to use for frame transmission.
- * @flags: Transmission flags, one of &enum qlink_mgmt_frame_tx_flags.
+ * @flags: Transmission flags, one of &enum qlink_frame_tx_flags.
* @frame_data: frame to transmit.
*/
-struct qlink_cmd_mgmt_frame_tx {
+struct qlink_cmd_frame_tx {
struct qlink_cmd chdr;
__le32 cookie;
__le16 freq;
* @initiator: which entity sent the request, one of &enum qlink_reg_initiator.
* @user_reg_hint_type: type of hint for QLINK_REGDOM_SET_BY_USER request, one
* of &enum qlink_user_reg_hint_type.
+ * @num_channels: number of &struct qlink_tlv_channel in a variable portion of a
+ * payload.
+ * @dfs_region: one of &enum qlink_dfs_regions.
+ * @info: variable portion of regulatory notifier callback.
*/
struct qlink_cmd_reg_notify {
struct qlink_cmd chdr;
u8 alpha2[2];
u8 initiator;
u8 user_reg_hint_type;
+ u8 num_channels;
+ u8 dfs_region;
+ u8 rsvd[2];
+ u8 info[0];
} __packed;
/**
u8 vifid;
} __packed;
+/**
+ * enum qlink_dfs_regions - regulatory DFS regions
+ *
+ * Corresponds to &enum nl80211_dfs_regions.
+ */
+enum qlink_dfs_regions {
+ QLINK_DFS_UNSET = 0,
+ QLINK_DFS_FCC = 1,
+ QLINK_DFS_ETSI = 2,
+ QLINK_DFS_JP = 3,
+};
+
/**
* struct qlink_resp_get_mac_info - response for QLINK_CMD_MAC_INFO command
*
* @bands_cap: wireless bands WMAC can operate in, bitmap of &enum qlink_band.
* @max_ap_assoc_sta: Maximum number of associations supported by WMAC.
* @radar_detect_widths: bitmask of channels BW for which WMAC can detect radar.
+ * @alpha2: country code ID firmware is configured to.
+ * @n_reg_rules: number of regulatory rules TLVs in variable portion of the
+ * message.
+ * @dfs_region: regulatory DFS region, one of &enum qlink_dfs_regions.
* @var_info: variable-length WMAC info data.
*/
struct qlink_resp_get_mac_info {
__le16 radar_detect_widths;
__le32 max_acl_mac_addrs;
u8 bands_cap;
+ u8 alpha2[2];
+ u8 n_reg_rules;
+ u8 dfs_region;
u8 rsvd[1];
u8 var_info[0];
} __packed;
-/**
- * enum qlink_dfs_regions - regulatory DFS regions
- *
- * Corresponds to &enum nl80211_dfs_regions.
- */
-enum qlink_dfs_regions {
- QLINK_DFS_UNSET = 0,
- QLINK_DFS_FCC = 1,
- QLINK_DFS_ETSI = 2,
- QLINK_DFS_JP = 3,
-};
-
/**
* struct qlink_resp_get_hw_info - response for QLINK_CMD_GET_HW_INFO command
*
* @mac_bitmap: Bitmap of MAC IDs that are active and can be used in firmware.
* @total_tx_chains: total number of transmit chains used by device.
* @total_rx_chains: total number of receive chains.
- * @alpha2: country code ID firmware is configured to.
- * @n_reg_rules: number of regulatory rules TLVs in variable portion of the
- * message.
- * @dfs_region: regulatory DFS region, one of @enum qlink_dfs_region.
- * @info: variable-length HW info, can contain QTN_TLV_ID_REG_RULE.
+ * @info: variable-length HW info.
*/
struct qlink_resp_get_hw_info {
struct qlink_resp rhdr;
u8 mac_bitmap;
u8 total_tx_chain;
u8 total_rx_chain;
- u8 alpha2[2];
- u8 n_reg_rules;
- u8 dfs_region;
u8 info[0];
} __packed;
* carried by QTN_TLV_ID_STA_STATS_MAP.
* @QTN_TLV_ID_MAX_SCAN_SSIDS: maximum number of SSIDs the device can scan
* for in any given scan.
+ * @QTN_TLV_ID_SCAN_DWELL_ACTIVE: time spent on a single channel for an active
+ * scan.
+ * @QTN_TLV_ID_SCAN_DWELL_PASSIVE: time spent on a single channel for a passive
+ * scan.
+ * @QTN_TLV_ID_SCAN_SAMPLE_DURATION: total duration of sampling a single channel
+ * during a scan including off-channel dwell time and operating channel
+ * time.
*/
enum qlink_tlv_id {
QTN_TLV_ID_FRAG_THRESH = 0x0201,
QTN_TLV_ID_WOWLAN_CAPAB = 0x0410,
QTN_TLV_ID_WOWLAN_PATTERN = 0x0411,
QTN_TLV_ID_SCAN_FLUSH = 0x0412,
- QTN_TLV_ID_SCAN_DWELL = 0x0413,
+ QTN_TLV_ID_SCAN_DWELL_ACTIVE = 0x0413,
+ QTN_TLV_ID_SCAN_DWELL_PASSIVE = 0x0416,
+ QTN_TLV_ID_SCAN_SAMPLE_DURATION = 0x0417,
};
struct qlink_tlv_hdr {
memcpy(qacl->mac_addrs, acl->mac_addrs,
acl->n_acl_entries * sizeof(*qacl->mac_addrs));
}
+
+enum qlink_band qlink_utils_band_cfg2q(enum nl80211_band band)
+{
+ switch (band) {
+ case NL80211_BAND_2GHZ:
+ return QLINK_BAND_2GHZ;
+ case NL80211_BAND_5GHZ:
+ return QLINK_BAND_5GHZ;
+ case NL80211_BAND_60GHZ:
+ return QLINK_BAND_60GHZ;
+ default:
+ return -EINVAL;
+ }
+}
+
+enum qlink_dfs_state qlink_utils_dfs_state_cfg2q(enum nl80211_dfs_state state)
+{
+ switch (state) {
+ case NL80211_DFS_USABLE:
+ return QLINK_DFS_USABLE;
+ case NL80211_DFS_AVAILABLE:
+ return QLINK_DFS_AVAILABLE;
+ case NL80211_DFS_UNAVAILABLE:
+ default:
+ return QLINK_DFS_UNAVAILABLE;
+ }
+}
+
+u32 qlink_utils_chflags_cfg2q(u32 cfgflags)
+{
+ u32 flags = 0;
+
+ if (cfgflags & IEEE80211_CHAN_DISABLED)
+ flags |= QLINK_CHAN_DISABLED;
+
+ if (cfgflags & IEEE80211_CHAN_NO_IR)
+ flags |= QLINK_CHAN_NO_IR;
+
+ if (cfgflags & IEEE80211_CHAN_RADAR)
+ flags |= QLINK_CHAN_RADAR;
+
+ if (cfgflags & IEEE80211_CHAN_NO_HT40PLUS)
+ flags |= QLINK_CHAN_NO_HT40PLUS;
+
+ if (cfgflags & IEEE80211_CHAN_NO_HT40MINUS)
+ flags |= QLINK_CHAN_NO_HT40MINUS;
+
+ if (cfgflags & IEEE80211_CHAN_NO_80MHZ)
+ flags |= QLINK_CHAN_NO_80MHZ;
+
+ if (cfgflags & IEEE80211_CHAN_NO_160MHZ)
+ flags |= QLINK_CHAN_NO_160MHZ;
+
+ return flags;
+}
+
+static u32 qtnf_reg_rule_flags_parse(u32 qflags)
+{
+ u32 flags = 0;
+
+ if (qflags & QLINK_RRF_NO_OFDM)
+ flags |= NL80211_RRF_NO_OFDM;
+
+ if (qflags & QLINK_RRF_NO_CCK)
+ flags |= NL80211_RRF_NO_CCK;
+
+ if (qflags & QLINK_RRF_NO_INDOOR)
+ flags |= NL80211_RRF_NO_INDOOR;
+
+ if (qflags & QLINK_RRF_NO_OUTDOOR)
+ flags |= NL80211_RRF_NO_OUTDOOR;
+
+ if (qflags & QLINK_RRF_DFS)
+ flags |= NL80211_RRF_DFS;
+
+ if (qflags & QLINK_RRF_PTP_ONLY)
+ flags |= NL80211_RRF_PTP_ONLY;
+
+ if (qflags & QLINK_RRF_PTMP_ONLY)
+ flags |= NL80211_RRF_PTMP_ONLY;
+
+ if (qflags & QLINK_RRF_NO_IR)
+ flags |= NL80211_RRF_NO_IR;
+
+ if (qflags & QLINK_RRF_AUTO_BW)
+ flags |= NL80211_RRF_AUTO_BW;
+
+ if (qflags & QLINK_RRF_IR_CONCURRENT)
+ flags |= NL80211_RRF_IR_CONCURRENT;
+
+ if (qflags & QLINK_RRF_NO_HT40MINUS)
+ flags |= NL80211_RRF_NO_HT40MINUS;
+
+ if (qflags & QLINK_RRF_NO_HT40PLUS)
+ flags |= NL80211_RRF_NO_HT40PLUS;
+
+ if (qflags & QLINK_RRF_NO_80MHZ)
+ flags |= NL80211_RRF_NO_80MHZ;
+
+ if (qflags & QLINK_RRF_NO_160MHZ)
+ flags |= NL80211_RRF_NO_160MHZ;
+
+ return flags;
+}
+
+void qlink_utils_regrule_q2nl(struct ieee80211_reg_rule *rule,
+ const struct qlink_tlv_reg_rule *tlv)
+{
+ rule->freq_range.start_freq_khz = le32_to_cpu(tlv->start_freq_khz);
+ rule->freq_range.end_freq_khz = le32_to_cpu(tlv->end_freq_khz);
+ rule->freq_range.max_bandwidth_khz =
+ le32_to_cpu(tlv->max_bandwidth_khz);
+ rule->power_rule.max_antenna_gain = le32_to_cpu(tlv->max_antenna_gain);
+ rule->power_rule.max_eirp = le32_to_cpu(tlv->max_eirp);
+ rule->dfs_cac_ms = le32_to_cpu(tlv->dfs_cac_ms);
+ rule->flags = qtnf_reg_rule_flags_parse(le32_to_cpu(tlv->flags));
+}
unsigned int arr_max_len);
void qlink_acl_data_cfg2q(const struct cfg80211_acl_data *acl,
struct qlink_acl_data *qacl);
+enum qlink_band qlink_utils_band_cfg2q(enum nl80211_band band);
+enum qlink_dfs_state qlink_utils_dfs_state_cfg2q(enum nl80211_dfs_state state);
+u32 qlink_utils_chflags_cfg2q(u32 cfgflags);
+void qlink_utils_regrule_q2nl(struct ieee80211_reg_rule *rule,
+ const struct qlink_tlv_reg_rule *tlv_rule);
#endif /* _QTN_FMAC_QLINK_UTIL_H_ */
CONFIG_CHANNEL_HT40,
CONFIG_POWERSAVING,
CONFIG_HT_DISABLED,
- CONFIG_QOS_DISABLED,
CONFIG_MONITORING,
/*
rt2x00dev->intf_associated--;
rt2x00leds_led_assoc(rt2x00dev, !!rt2x00dev->intf_associated);
-
- clear_bit(CONFIG_QOS_DISABLED, &rt2x00dev->flags);
}
- /*
- * Check for access point which do not support 802.11e . We have to
- * generate data frames sequence number in S/W for such AP, because
- * of H/W bug.
- */
- if (changes & BSS_CHANGED_QOS && !bss_conf->qos)
- set_bit(CONFIG_QOS_DISABLED, &rt2x00dev->flags);
-
/*
* When the erp information has changed, we should perform
* additional configuration steps. For all other changes we are done.
if (!rt2x00_has_cap_flag(rt2x00dev, REQUIRE_SW_SEQNO)) {
/*
* rt2800 has a H/W (or F/W) bug, device incorrectly increase
- * seqno on retransmited data (non-QOS) frames. To workaround
- * the problem let's generate seqno in software if QOS is
- * disabled.
+ * seqno on retransmitted data (non-QOS) and management frames.
+ * To workaround the problem let's generate seqno in software.
+ * Except for beacons which are transmitted periodically by H/W
+ * hence hardware has to assign seqno for them.
*/
- if (test_bit(CONFIG_QOS_DISABLED, &rt2x00dev->flags))
- __clear_bit(ENTRY_TXD_GENERATE_SEQ, &txdesc->flags);
- else
+ if (ieee80211_is_beacon(hdr->frame_control)) {
+ __set_bit(ENTRY_TXD_GENERATE_SEQ, &txdesc->flags);
/* H/W will generate sequence number */
return;
+ }
+
+ __clear_bit(ENTRY_TXD_GENERATE_SEQ, &txdesc->flags);
}
/*
/* <2> work queue */
rtlpriv->works.hw = hw;
rtlpriv->works.rtl_wq = alloc_workqueue("%s", 0, 0, rtlpriv->cfg->name);
+ if (unlikely(!rtlpriv->works.rtl_wq)) {
+ pr_err("Failed to allocate work queue\n");
+ return;
+ }
+
INIT_DELAYED_WORK(&rtlpriv->works.watchdog_wq,
(void *)rtl_watchdog_wq_callback);
INIT_DELAYED_WORK(&rtlpriv->works.ips_nic_off_wq,
memset(&tcb_desc, 0, sizeof(struct rtl_tcb_desc));
- spin_lock_bh(&rtlpriv->locks.waitq_lock);
+ spin_lock(&rtlpriv->locks.waitq_lock);
if (!skb_queue_empty(&mac->skb_waitq[tid]) &&
(ring->entries - skb_queue_len(&ring->queue) >
rtlhal->max_earlymode_num)) {
skb = skb_dequeue(&mac->skb_waitq[tid]);
} else {
- spin_unlock_bh(&rtlpriv->locks.waitq_lock);
+ spin_unlock(&rtlpriv->locks.waitq_lock);
break;
}
- spin_unlock_bh(&rtlpriv->locks.waitq_lock);
+ spin_unlock(&rtlpriv->locks.waitq_lock);
/* Some macaddr can't do early mode. like
* multicast/broadcast/no_qos data
u1rsvdpageloc, 3);
skb = dev_alloc_skb(totalpacketlen);
+ if (!skb)
+ return;
skb_put_data(skb, &reserved_page_packet, totalpacketlen);
rtstatus = rtl_cmd_send_packet(hw, skb);
struct rtl_priv *rtlpriv = rtl_priv(hw);
struct rx_fwinfo_88e *p_drvinfo;
struct ieee80211_hdr *hdr;
-
+ u8 wake_match;
u32 phystatus = GET_RX_DESC_PHYST(pdesc);
+
status->packet_report_type = (u8)GET_RX_STATUS_DESC_RPT_SEL(pdesc);
if (status->packet_report_type == TX_REPORT2)
status->length = (u16)GET_RX_RPT2_DESC_PKT_LEN(pdesc);
status->is_cck = RTL8188_RX_HAL_IS_CCK_RATE(status->rate);
status->macid = GET_RX_DESC_MACID(pdesc);
- if (GET_RX_STATUS_DESC_MAGIC_MATCH(pdesc))
- status->wake_match = BIT(2);
+ if (GET_RX_STATUS_DESC_PATTERN_MATCH(pdesc))
+ wake_match = BIT(2);
else if (GET_RX_STATUS_DESC_MAGIC_MATCH(pdesc))
- status->wake_match = BIT(1);
+ wake_match = BIT(1);
else if (GET_RX_STATUS_DESC_UNICAST_MATCH(pdesc))
- status->wake_match = BIT(0);
+ wake_match = BIT(0);
else
- status->wake_match = 0;
- if (status->wake_match)
+ wake_match = 0;
+ if (wake_match)
RT_TRACE(rtlpriv, COMP_RXDESC, DBG_LOUD,
"GGGGGGGGGGGGGet Wakeup Packet!! WakeMatch=%d\n",
- status->wake_match);
+ wake_match);
rx_status->freq = hw->conf.chandef.chan->center_freq;
rx_status->band = hw->conf.chandef.chan->band;
u1rsvdpageloc, 3);
skb = dev_alloc_skb(totalpacketlen);
+ if (!skb)
+ return;
skb_put_data(skb, &reserved_page_packet, totalpacketlen);
if (cmd_send_packet)
u1rsvdpageloc, 3);
skb = dev_alloc_skb(totalpacketlen);
+ if (!skb)
+ return;
skb_put_data(skb, &reserved_page_packet, totalpacketlen);
rtstatus = rtl_cmd_send_packet(hw, skb);
struct rx_fwinfo *p_drvinfo;
struct ieee80211_hdr *hdr;
u32 phystatus = GET_RX_DESC_PHYST(pdesc);
+ u8 wake_match;
if (GET_RX_STATUS_DESC_RPT_SEL(pdesc) == 0)
status->packet_report_type = NORMAL_RX;
status->is_cck = RTL92EE_RX_HAL_IS_CCK_RATE(status->rate);
status->macid = GET_RX_DESC_MACID(pdesc);
- if (GET_RX_STATUS_DESC_MAGIC_MATCH(pdesc))
- status->wake_match = BIT(2);
+ if (GET_RX_STATUS_DESC_PATTERN_MATCH(pdesc))
+ wake_match = BIT(2);
else if (GET_RX_STATUS_DESC_MAGIC_MATCH(pdesc))
- status->wake_match = BIT(1);
+ wake_match = BIT(1);
else if (GET_RX_STATUS_DESC_UNICAST_MATCH(pdesc))
- status->wake_match = BIT(0);
+ wake_match = BIT(0);
else
- status->wake_match = 0;
- if (status->wake_match)
+ wake_match = 0;
+ if (wake_match)
RT_TRACE(rtlpriv, COMP_RXDESC, DBG_LOUD,
"GGGGGGGGGGGGGet Wakeup Packet!! WakeMatch=%d\n",
- status->wake_match);
+ wake_match);
rx_status->freq = hw->conf.chandef.chan->center_freq;
rx_status->band = hw->conf.chandef.chan->band;
}
-void rtl8723e_dm_refresh_rate_adaptive_mask(struct ieee80211_hw *hw)
+static void rtl8723e_dm_refresh_rate_adaptive_mask(struct ieee80211_hw *hw)
{
struct rtl_priv *rtlpriv = rtl_priv(hw);
struct rtl_hal *rtlhal = rtl_hal(rtl_priv(hw));
u1rsvdpageloc, 3);
skb = dev_alloc_skb(totalpacketlen);
+ if (!skb)
+ return;
skb_put_data(skb, &reserved_page_packet, totalpacketlen);
rtstatus = rtl_cmd_send_packet(hw, skb);
u1rsvdpageloc, sizeof(u1rsvdpageloc));
skb = dev_alloc_skb(totalpacketlen);
+ if (!skb)
+ return;
skb_put_data(skb, &reserved_page_packet, totalpacketlen);
rtstatus = rtl_cmd_send_packet(hw, skb);
struct rtl_priv *rtlpriv = rtl_priv(hw);
struct rx_fwinfo_8723be *p_drvinfo;
struct ieee80211_hdr *hdr;
-
+ u8 wake_match;
u32 phystatus = GET_RX_DESC_PHYST(pdesc);
status->length = (u16)GET_RX_DESC_PKT_LEN(pdesc);
status->packet_report_type = NORMAL_RX;
- if (GET_RX_STATUS_DESC_MAGIC_MATCH(pdesc))
- status->wake_match = BIT(2);
+ if (GET_RX_STATUS_DESC_PATTERN_MATCH(pdesc))
+ wake_match = BIT(2);
else if (GET_RX_STATUS_DESC_MAGIC_MATCH(pdesc))
- status->wake_match = BIT(1);
+ wake_match = BIT(1);
else if (GET_RX_STATUS_DESC_UNICAST_MATCH(pdesc))
- status->wake_match = BIT(0);
+ wake_match = BIT(0);
else
- status->wake_match = 0;
- if (status->wake_match)
+ wake_match = 0;
+ if (wake_match)
RT_TRACE(rtlpriv, COMP_RXDESC, DBG_LOUD,
"GGGGGGGGGGGGGet Wakeup Packet!! WakeMatch=%d\n",
- status->wake_match);
+ wake_match);
rx_status->freq = hw->conf.chandef.chan->center_freq;
rx_status->band = hw->conf.chandef.chan->band;
&reserved_page_packet_8812[0], totalpacketlen);
skb = dev_alloc_skb(totalpacketlen);
+ if (!skb)
+ return;
skb_put_data(skb, &reserved_page_packet_8812, totalpacketlen);
rtstatus = rtl_cmd_send_packet(hw, skb);
&reserved_page_packet_8821[0], totalpacketlen);
skb = dev_alloc_skb(totalpacketlen);
+ if (!skb)
+ return;
skb_put_data(skb, &reserved_page_packet_8821, totalpacketlen);
rtstatus = rtl_cmd_send_packet(hw, skb);
struct rtl_priv *rtlpriv = rtl_priv(hw);
struct rx_fwinfo_8821ae *p_drvinfo;
struct ieee80211_hdr *hdr;
-
+ u8 wake_match;
u32 phystatus = GET_RX_DESC_PHYST(pdesc);
status->length = (u16)GET_RX_DESC_PKT_LEN(pdesc);
status->packet_report_type = NORMAL_RX;
if (GET_RX_STATUS_DESC_PATTERN_MATCH(pdesc))
- status->wake_match = BIT(2);
+ wake_match = BIT(2);
else if (GET_RX_STATUS_DESC_MAGIC_MATCH(pdesc))
- status->wake_match = BIT(1);
+ wake_match = BIT(1);
else if (GET_RX_STATUS_DESC_UNICAST_MATCH(pdesc))
- status->wake_match = BIT(0);
+ wake_match = BIT(0);
else
- status->wake_match = 0;
+ wake_match = 0;
- if (status->wake_match)
+ if (wake_match)
RT_TRACE(rtlpriv, COMP_RXDESC, DBG_LOUD,
"GGGGGGGGGGGGGet Wakeup Packet!! WakeMatch=%d\n",
- status->wake_match);
+ wake_match);
rx_status->freq = hw->conf.chandef.chan->center_freq;
rx_status->band = hw->conf.chandef.chan->band;
u8 packet_report_type;
u32 macid;
- u8 wake_match;
u32 bt_rx_rssi_percentage;
u32 macid_valid_entry[2];
};
struct xenvif_hash_cache cache;
};
+struct backend_info {
+ struct xenbus_device *dev;
+ struct xenvif *vif;
+
+ /* This is the state that will be reflected in xenstore when any
+ * active hotplug script completes.
+ */
+ enum xenbus_state state;
+
+ enum xenbus_state frontend_state;
+ struct xenbus_watch hotplug_status_watch;
+ u8 have_hotplug_status_watch:1;
+
+ const char *hotplug_script;
+};
+
struct xenvif {
/* Unique identifier for this interface. */
domid_t domid;
struct xenbus_watch credit_watch;
struct xenbus_watch mcast_ctrl_watch;
+ struct backend_info *be;
+
spinlock_t lock;
#ifdef CONFIG_DEBUG_FS
#include <linux/vmalloc.h>
#include <linux/rtnetlink.h>
-struct backend_info {
- struct xenbus_device *dev;
- struct xenvif *vif;
-
- /* This is the state that will be reflected in xenstore when any
- * active hotplug script completes.
- */
- enum xenbus_state state;
-
- enum xenbus_state frontend_state;
- struct xenbus_watch hotplug_status_watch;
- u8 have_hotplug_status_watch:1;
-
- const char *hotplug_script;
-};
-
static int connect_data_rings(struct backend_info *be,
struct xenvif_queue *queue);
static void connect(struct backend_info *be);
return err;
}
be->vif = vif;
+ vif->be = be;
kobject_uevent(&dev->dev.kobj, KOBJ_ONLINE);
return 0;
case XenbusStateClosed:
if (dev->state == XenbusStateClosed)
break;
- /* Missed the backend's CLOSING state -- fallthrough */
+ /* Fall through - Missed the backend's CLOSING state. */
case XenbusStateClosing:
xenbus_frontend_closed(dev);
break;
int result = 0;
int res_len;
static bool wtx;
- struct device *dev;
struct device *spidevice;
- struct nfc_digital_dev *nfcddev;
struct sk_buff *skb_resp;
struct st95hf_context *stcontext =
(struct st95hf_context *)st95hfcontext;
goto end;
}
- dev = &stcontext->nfcdev->dev;
- nfcddev = stcontext->ddev;
if (skb_resp->data[2] == WTX_REQ_FROM_TAG) {
/* Request for new FWT from tag */
result = st95hf_handle_wtx(stcontext, true, skb_resp->data[3]);
};
MODULE_DEVICE_TABLE(spi, st95hf_id);
+static const struct of_device_id st95hf_spi_of_match[] = {
+ { .compatible = "st,st95hf" },
+ { },
+};
+MODULE_DEVICE_TABLE(of, st95hf_spi_of_match);
+
static int st95hf_probe(struct spi_device *nfc_spi_dev)
{
int ret;
.driver = {
.name = "st95hf",
.owner = THIS_MODULE,
+ .of_match_table = of_match_ptr(st95hf_spi_of_match),
},
.id_table = st95hf_id,
.probe = st95hf_probe,
return NULL;
nd_btt->id = ida_simple_get(&nd_region->btt_ida, 0, 0, GFP_KERNEL);
- if (nd_btt->id < 0) {
- kfree(nd_btt);
- return NULL;
- }
+ if (nd_btt->id < 0)
+ goto out_nd_btt;
nd_btt->lbasize = lbasize;
- if (uuid)
+ if (uuid) {
uuid = kmemdup(uuid, 16, GFP_KERNEL);
+ if (!uuid)
+ goto out_put_id;
+ }
nd_btt->uuid = uuid;
dev = &nd_btt->dev;
dev_set_name(dev, "btt%d.%d", nd_region->id, nd_btt->id);
return NULL;
}
return dev;
+
+out_put_id:
+ ida_simple_remove(&nd_region->btt_ida, nd_btt->id);
+
+out_nd_btt:
+ kfree(nd_btt);
+ return NULL;
}
struct device *nd_btt_create(struct nd_region *nd_region)
if (!nsblk->uuid)
goto blk_err;
memcpy(name, nd_label->name, NSLABEL_NAME_LEN);
- if (name[0])
+ if (name[0]) {
nsblk->alt_name = kmemdup(name, NSLABEL_NAME_LEN,
GFP_KERNEL);
+ if (!nsblk->alt_name)
+ goto blk_err;
+ }
res = nsblk_add_resource(nd_region, ndd, nsblk,
__le64_to_cpu(nd_label->dpa));
if (!res)
while (len) {
mem = kmap_atomic(page);
- chunk = min_t(unsigned int, len, PAGE_SIZE);
+ chunk = min_t(unsigned int, len, PAGE_SIZE - off);
memcpy_flushcache(pmem_addr, mem + off, chunk);
kunmap_atomic(mem);
len -= chunk;
off = 0;
page++;
- pmem_addr += PAGE_SIZE;
+ pmem_addr += chunk;
}
}
while (len) {
mem = kmap_atomic(page);
- chunk = min_t(unsigned int, len, PAGE_SIZE);
+ chunk = min_t(unsigned int, len, PAGE_SIZE - off);
rem = memcpy_mcsafe(mem + off, pmem_addr, chunk);
kunmap_atomic(mem);
if (rem)
len -= chunk;
off = 0;
page++;
- pmem_addr += PAGE_SIZE;
+ pmem_addr += chunk;
}
return BLK_STS_OK;
}
module_param(key_revalidate, bool, 0444);
MODULE_PARM_DESC(key_revalidate, "Require key validation at init.");
+static const char zero_key[NVDIMM_PASSPHRASE_LEN];
+
static void *key_data(struct key *key)
{
struct encrypted_key_payload *epayload = dereference_key_locked(key);
return key;
}
+static const void *nvdimm_get_key_payload(struct nvdimm *nvdimm,
+ struct key **key)
+{
+ *key = nvdimm_request_key(nvdimm);
+ if (!*key)
+ return zero_key;
+
+ return key_data(*key);
+}
+
static struct key *nvdimm_lookup_user_key(struct nvdimm *nvdimm,
key_serial_t id, int subclass)
{
return key;
}
-static struct key *nvdimm_key_revalidate(struct nvdimm *nvdimm)
+static const void *nvdimm_get_user_key_payload(struct nvdimm *nvdimm,
+ key_serial_t id, int subclass, struct key **key)
+{
+ *key = NULL;
+ if (id == 0) {
+ if (subclass == NVDIMM_BASE_KEY)
+ return zero_key;
+ else
+ return NULL;
+ }
+
+ *key = nvdimm_lookup_user_key(nvdimm, id, subclass);
+ if (!*key)
+ return NULL;
+
+ return key_data(*key);
+}
+
+
+static int nvdimm_key_revalidate(struct nvdimm *nvdimm)
{
struct key *key;
int rc;
+ const void *data;
if (!nvdimm->sec.ops->change_key)
- return NULL;
+ return -EOPNOTSUPP;
- key = nvdimm_request_key(nvdimm);
- if (!key)
- return NULL;
+ data = nvdimm_get_key_payload(nvdimm, &key);
/*
* Send the same key to the hardware as new and old key to
* verify that the key is good.
*/
- rc = nvdimm->sec.ops->change_key(nvdimm, key_data(key),
- key_data(key), NVDIMM_USER);
+ rc = nvdimm->sec.ops->change_key(nvdimm, data, data, NVDIMM_USER);
if (rc < 0) {
nvdimm_put_key(key);
- key = NULL;
+ return rc;
}
- return key;
+
+ nvdimm_put_key(key);
+ nvdimm->sec.state = nvdimm_security_state(nvdimm, NVDIMM_USER);
+ return 0;
}
static int __nvdimm_security_unlock(struct nvdimm *nvdimm)
{
struct device *dev = &nvdimm->dev;
struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
- struct key *key = NULL;
+ struct key *key;
+ const void *data;
int rc;
/* The bus lock should be held at the top level of the call stack */
if (!key_revalidate)
return 0;
- key = nvdimm_key_revalidate(nvdimm);
- if (!key)
- return nvdimm_security_freeze(nvdimm);
+ return nvdimm_key_revalidate(nvdimm);
} else
- key = nvdimm_request_key(nvdimm);
+ data = nvdimm_get_key_payload(nvdimm, &key);
- if (!key)
- return -ENOKEY;
-
- rc = nvdimm->sec.ops->unlock(nvdimm, key_data(key));
+ rc = nvdimm->sec.ops->unlock(nvdimm, data);
dev_dbg(dev, "key: %d unlock: %s\n", key_serial(key),
rc == 0 ? "success" : "fail");
struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
struct key *key;
int rc;
+ const void *data;
/* The bus lock should be held at the top level of the call stack */
lockdep_assert_held(&nvdimm_bus->reconfig_mutex);
return -EBUSY;
}
- key = nvdimm_lookup_user_key(nvdimm, keyid, NVDIMM_BASE_KEY);
- if (!key)
+ data = nvdimm_get_user_key_payload(nvdimm, keyid,
+ NVDIMM_BASE_KEY, &key);
+ if (!data)
return -ENOKEY;
- rc = nvdimm->sec.ops->disable(nvdimm, key_data(key));
+ rc = nvdimm->sec.ops->disable(nvdimm, data);
dev_dbg(dev, "key: %d disable: %s\n", key_serial(key),
rc == 0 ? "success" : "fail");
struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
struct key *key, *newkey;
int rc;
+ const void *data, *newdata;
/* The bus lock should be held at the top level of the call stack */
lockdep_assert_held(&nvdimm_bus->reconfig_mutex);
return -EIO;
}
- if (keyid == 0)
- key = NULL;
- else {
- key = nvdimm_lookup_user_key(nvdimm, keyid, NVDIMM_BASE_KEY);
- if (!key)
- return -ENOKEY;
- }
+ data = nvdimm_get_user_key_payload(nvdimm, keyid,
+ NVDIMM_BASE_KEY, &key);
+ if (!data)
+ return -ENOKEY;
- newkey = nvdimm_lookup_user_key(nvdimm, new_keyid, NVDIMM_NEW_KEY);
- if (!newkey) {
+ newdata = nvdimm_get_user_key_payload(nvdimm, new_keyid,
+ NVDIMM_NEW_KEY, &newkey);
+ if (!newdata) {
nvdimm_put_key(key);
return -ENOKEY;
}
- rc = nvdimm->sec.ops->change_key(nvdimm, key ? key_data(key) : NULL,
- key_data(newkey), pass_type);
+ rc = nvdimm->sec.ops->change_key(nvdimm, data, newdata, pass_type);
dev_dbg(dev, "key: %d %d update%s: %s\n",
key_serial(key), key_serial(newkey),
pass_type == NVDIMM_MASTER ? "(master)" : "(user)",
{
struct device *dev = &nvdimm->dev;
struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
- struct key *key;
+ struct key *key = NULL;
int rc;
+ const void *data;
/* The bus lock should be held at the top level of the call stack */
lockdep_assert_held(&nvdimm_bus->reconfig_mutex);
return -EOPNOTSUPP;
}
- key = nvdimm_lookup_user_key(nvdimm, keyid, NVDIMM_BASE_KEY);
- if (!key)
+ data = nvdimm_get_user_key_payload(nvdimm, keyid,
+ NVDIMM_BASE_KEY, &key);
+ if (!data)
return -ENOKEY;
- rc = nvdimm->sec.ops->erase(nvdimm, key_data(key), pass_type);
+ rc = nvdimm->sec.ops->erase(nvdimm, data, pass_type);
dev_dbg(dev, "key: %d erase%s: %s\n", key_serial(key),
pass_type == NVDIMM_MASTER ? "(master)" : "(user)",
rc == 0 ? "success" : "fail");
{
struct device *dev = &nvdimm->dev;
struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
- struct key *key;
+ struct key *key = NULL;
int rc;
+ const void *data;
/* The bus lock should be held at the top level of the call stack */
lockdep_assert_held(&nvdimm_bus->reconfig_mutex);
return -EBUSY;
}
- if (keyid == 0)
- key = NULL;
- else {
- key = nvdimm_lookup_user_key(nvdimm, keyid, NVDIMM_BASE_KEY);
- if (!key)
- return -ENOKEY;
- }
+ data = nvdimm_get_user_key_payload(nvdimm, keyid,
+ NVDIMM_BASE_KEY, &key);
+ if (!data)
+ return -ENOKEY;
- rc = nvdimm->sec.ops->overwrite(nvdimm, key ? key_data(key) : NULL);
+ rc = nvdimm->sec.ops->overwrite(nvdimm, data);
dev_dbg(dev, "key: %d overwrite submission: %s\n", key_serial(key),
rc == 0 ? "success" : "fail");
"Cancelling I/O %d", req->tag);
nvme_req(req)->status = NVME_SC_ABORT_REQ;
- blk_mq_complete_request(req);
+ blk_mq_complete_request_sync(req);
return true;
}
EXPORT_SYMBOL_GPL(nvme_cancel_request);
memset(queue, 0, sizeof(*queue));
queue->ctrl = ctrl;
queue->qnum = idx;
- atomic_set(&queue->csn, 1);
+ atomic_set(&queue->csn, 0);
queue->dev = ctrl->dev;
if (idx > 0)
*/
queue->connection_id = 0;
- atomic_set(&queue->csn, 1);
+ atomic_set(&queue->csn, 0);
}
static void
{
struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu;
struct nvme_command *sqe = &cmdiu->sqe;
- u32 csn;
int ret, opstate;
/*
/* format the FC-NVME CMD IU and fcp_req */
cmdiu->connection_id = cpu_to_be64(queue->connection_id);
- csn = atomic_inc_return(&queue->csn);
- cmdiu->csn = cpu_to_be32(csn);
cmdiu->data_len = cpu_to_be32(data_len);
switch (io_dir) {
case NVMEFC_FCP_WRITE:
if (!(op->flags & FCOP_FLAGS_AEN))
blk_mq_start_request(op->rq);
+ cmdiu->csn = cpu_to_be32(atomic_inc_return(&queue->csn));
ret = ctrl->lport->ops->fcp_io(&ctrl->lport->localport,
&ctrl->rport->remoteport,
queue->lldd_handle, &op->fcp_req);
if (ret) {
+ /*
+ * If the lld fails to send the command is there an issue with
+ * the csn value? If the command that fails is the Connect,
+ * no - as the connection won't be live. If it is a command
+ * post-connect, it's possible a gap in csn may be created.
+ * Does this matter? As Linux initiators don't send fused
+ * commands, no. The gap would exist, but as there's nothing
+ * that depends on csn order to be delivered on the target
+ * side, it shouldn't hurt. It would be difficult for a
+ * target to even detect the csn gap as it has no idea when the
+ * cmd with the csn was supposed to arrive.
+ */
opstate = atomic_xchg(&op->state, FCPOP_STATE_COMPLETE);
__nvme_fc_fcpop_chk_teardowns(ctrl, op, opstate);
return len;
}
+u64 nvmet_get_log_page_offset(struct nvme_command *cmd)
+{
+ return le64_to_cpu(cmd->get_log_page.lpo);
+}
+
static void nvmet_execute_get_log_page_noop(struct nvmet_req *req)
{
nvmet_req_complete(req, nvmet_zero_sgl(req, 0, req->data_len));
memcpy(traddr, port->disc_addr.traddr, NVMF_TRADDR_SIZE);
}
+static size_t discovery_log_entries(struct nvmet_req *req)
+{
+ struct nvmet_ctrl *ctrl = req->sq->ctrl;
+ struct nvmet_subsys_link *p;
+ struct nvmet_port *r;
+ size_t entries = 0;
+
+ list_for_each_entry(p, &req->port->subsystems, entry) {
+ if (!nvmet_host_allowed(p->subsys, ctrl->hostnqn))
+ continue;
+ entries++;
+ }
+ list_for_each_entry(r, &req->port->referrals, entry)
+ entries++;
+ return entries;
+}
+
static void nvmet_execute_get_disc_log_page(struct nvmet_req *req)
{
const int entry_size = sizeof(struct nvmf_disc_rsp_page_entry);
struct nvmet_ctrl *ctrl = req->sq->ctrl;
struct nvmf_disc_rsp_page_hdr *hdr;
+ u64 offset = nvmet_get_log_page_offset(req->cmd);
size_t data_len = nvmet_get_log_page_len(req->cmd);
- size_t alloc_len = max(data_len, sizeof(*hdr));
- int residual_len = data_len - sizeof(*hdr);
+ size_t alloc_len;
struct nvmet_subsys_link *p;
struct nvmet_port *r;
u32 numrec = 0;
u16 status = 0;
+ void *buffer;
+
+ /* Spec requires dword aligned offsets */
+ if (offset & 0x3) {
+ status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ goto out;
+ }
/*
* Make sure we're passing at least a buffer of response header size.
* If host provided data len is less than the header size, only the
* number of bytes requested by host will be sent to host.
*/
- hdr = kzalloc(alloc_len, GFP_KERNEL);
- if (!hdr) {
+ down_read(&nvmet_config_sem);
+ alloc_len = sizeof(*hdr) + entry_size * discovery_log_entries(req);
+ buffer = kzalloc(alloc_len, GFP_KERNEL);
+ if (!buffer) {
+ up_read(&nvmet_config_sem);
status = NVME_SC_INTERNAL;
goto out;
}
- down_read(&nvmet_config_sem);
+ hdr = buffer;
list_for_each_entry(p, &req->port->subsystems, entry) {
+ char traddr[NVMF_TRADDR_SIZE];
+
if (!nvmet_host_allowed(p->subsys, ctrl->hostnqn))
continue;
- if (residual_len >= entry_size) {
- char traddr[NVMF_TRADDR_SIZE];
-
- nvmet_set_disc_traddr(req, req->port, traddr);
- nvmet_format_discovery_entry(hdr, req->port,
- p->subsys->subsysnqn, traddr,
- NVME_NQN_NVME, numrec);
- residual_len -= entry_size;
- }
+
+ nvmet_set_disc_traddr(req, req->port, traddr);
+ nvmet_format_discovery_entry(hdr, req->port,
+ p->subsys->subsysnqn, traddr,
+ NVME_NQN_NVME, numrec);
numrec++;
}
list_for_each_entry(r, &req->port->referrals, entry) {
- if (residual_len >= entry_size) {
- nvmet_format_discovery_entry(hdr, r,
- NVME_DISC_SUBSYS_NAME,
- r->disc_addr.traddr,
- NVME_NQN_DISC, numrec);
- residual_len -= entry_size;
- }
+ nvmet_format_discovery_entry(hdr, r,
+ NVME_DISC_SUBSYS_NAME,
+ r->disc_addr.traddr,
+ NVME_NQN_DISC, numrec);
numrec++;
}
up_read(&nvmet_config_sem);
- status = nvmet_copy_to_sgl(req, 0, hdr, data_len);
- kfree(hdr);
+ status = nvmet_copy_to_sgl(req, 0, buffer + offset, data_len);
+ kfree(buffer);
out:
nvmet_req_complete(req, status);
}
u16 nvmet_zero_sgl(struct nvmet_req *req, off_t off, size_t len);
u32 nvmet_get_log_page_len(struct nvme_command *cmd);
+u64 nvmet_get_log_page_offset(struct nvme_command *cmd);
extern struct list_head *nvmet_ports;
void nvmet_port_disc_changed(struct nvmet_port *port,
*/
#include <linux/etherdevice.h>
#include <linux/kernel.h>
-#include <linux/nvmem-consumer.h>
#include <linux/of_net.h>
#include <linux/phy.h>
#include <linux/export.h>
#define DBG_IRT(x...)
#endif
+#ifdef CONFIG_64BIT
+#define COMPARE_IRTE_ADDR(irte, hpa) ((irte)->dest_iosapic_addr == (hpa))
+#else
#define COMPARE_IRTE_ADDR(irte, hpa) \
- ((irte)->dest_iosapic_addr == F_EXTEND(hpa))
+ ((irte)->dest_iosapic_addr == ((hpa) | 0xffffffff00000000ULL))
+#endif
#define IOSAPIC_REG_SELECT 0x00
#define IOSAPIC_REG_WINDOW 0x10
* removed from the slot/adapter.
*/
msleep(1000);
+
+ /* Ignore link or presence changes caused by power off */
+ atomic_and(~(PCI_EXP_SLTSTA_DLLSC | PCI_EXP_SLTSTA_PDC),
+ &ctrl->pending_events);
}
/* turn off Green LED */
/* https://bugzilla.kernel.org/show_bug.cgi?id=42679#c14 */
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL_EXT, 0x9130,
quirk_dma_func1_alias);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL_EXT, 0x9170,
+ quirk_dma_func1_alias);
/* https://bugzilla.kernel.org/show_bug.cgi?id=42679#c47 + c57 */
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL_EXT, 0x9172,
quirk_dma_func1_alias);
#include <linux/debugfs.h>
#include <linux/device.h>
+#include <linux/dmi.h>
#include <linux/init.h>
#include <linux/io.h>
#include <linux/platform_data/x86/clk-pmc-atom.h>
}
#endif /* CONFIG_DEBUG_FS */
+/*
+ * Some systems need one or more of their pmc_plt_clks to be
+ * marked as critical.
+ */
+static const struct dmi_system_id critclk_systems[] = {
+ {
+ .ident = "MPL CEC1x",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "MPL AG"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "CEC10 Family"),
+ },
+ },
+ { /*sentinel*/ }
+};
+
static int pmc_setup_clks(struct pci_dev *pdev, void __iomem *pmc_regmap,
const struct pmc_data *pmc_data)
{
struct platform_device *clkdev;
struct pmc_clk_data *clk_data;
+ const struct dmi_system_id *d = dmi_first_match(critclk_systems);
clk_data = kzalloc(sizeof(*clk_data), GFP_KERNEL);
if (!clk_data)
clk_data->base = pmc_regmap; /* offset is added by client */
clk_data->clks = pmc_data->clks;
+ if (d) {
+ clk_data->critical = true;
+ pr_info("%s critclks quirk enabled\n", d->ident);
+ }
clkdev = platform_device_register_data(&pdev->dev, "clk-pmc-atom",
PLATFORM_DEVID_NONE,
-// SPDX-License-Identifier: GPL
+// SPDX-License-Identifier: GPL-2.0
/*
* Power supply driver for the goldfish emulator
*
arb->rstc.nr_resets = ARRAY_SIZE(axg_audio_arb_reset_bits);
arb->rstc.ops = &meson_audio_arb_rstc_ops;
arb->rstc.of_node = dev->of_node;
+ arb->rstc.owner = THIS_MODULE;
/*
* Enable general :
will be called rtc-s5m.
config RTC_DRV_SD3078
- tristate "ZXW Crystal SD3078"
+ tristate "ZXW Shenzhen whwave SD3078"
help
- If you say yes here you get support for the ZXW Crystal
+ If you say yes here you get support for the ZXW Shenzhen whwave
SD3078 RTC chips.
This driver can also be built as a module. If so, the module
struct cros_ec_rtc *cros_ec_rtc = dev_get_drvdata(&pdev->dev);
if (device_may_wakeup(dev))
- enable_irq_wake(cros_ec_rtc->cros_ec->irq);
+ return enable_irq_wake(cros_ec_rtc->cros_ec->irq);
return 0;
}
struct cros_ec_rtc *cros_ec_rtc = dev_get_drvdata(&pdev->dev);
if (device_may_wakeup(dev))
- disable_irq_wake(cros_ec_rtc->cros_ec->irq);
+ return disable_irq_wake(cros_ec_rtc->cros_ec->irq);
return 0;
}
da9063_data_to_tm(data, &rtc->alarm_time, rtc);
rtc->rtc_sync = false;
+ /*
+ * TODO: some models have alarms on a minute boundary but still support
+ * real hardware interrupts. Add this once the core supports it.
+ */
+ if (config->rtc_data_start != RTC_SEC)
+ rtc->rtc_dev->uie_unsupported = 1;
+
irq_alarm = platform_get_irq_byname(pdev, "ALARM");
ret = devm_request_threaded_irq(&pdev->dev, irq_alarm, NULL,
da9063_alarm_event,
static inline int sh_rtc_read_alarm_value(struct sh_rtc *rtc, int reg_off)
{
unsigned int byte;
- int value = 0xff; /* return 0xff for ignored values */
+ int value = -1; /* return -1 for ignored values */
byte = readb(rtc->regbase + reg_off);
if (byte & AR_ENB) {
blk_per_trk = recs_per_track(&private->rdc_data, 0, block->bp_block);
raw:
- block->blocks = (private->real_cyl *
+ block->blocks = ((unsigned long) private->real_cyl *
private->rdc_data.trk_per_cyl *
blk_per_trk);
dev_info(&device->cdev->dev,
- "DASD with %d KB/block, %d KB total size, %d KB/track, "
+ "DASD with %u KB/block, %lu KB total size, %u KB/track, "
"%s\n", (block->bp_block >> 10),
- ((private->real_cyl *
+ (((unsigned long) private->real_cyl *
private->rdc_data.trk_per_cyl *
blk_per_trk * (block->bp_block >> 9)) >> 1),
((blk_per_trk * block->bp_block) >> 10),
(void (*)(unsigned long)) con3270_read_tasklet,
(unsigned long) condev->read);
- raw3270_add_view(&condev->view, &con3270_fn, 1);
+ raw3270_add_view(&condev->view, &con3270_fn, 1, RAW3270_VIEW_LOCK_IRQ);
INIT_LIST_HEAD(&condev->freemem);
for (i = 0; i < CON3270_STRING_PAGES; i++) {
init_waitqueue_head(&fp->wait);
fp->fs_pid = get_pid(task_pid(current));
- rc = raw3270_add_view(&fp->view, &fs3270_fn, minor);
+ rc = raw3270_add_view(&fp->view, &fs3270_fn, minor,
+ RAW3270_VIEW_LOCK_BH);
if (rc) {
fs3270_free_view(&fp->view);
goto out;
* Add view to device with minor "minor".
*/
int
-raw3270_add_view(struct raw3270_view *view, struct raw3270_fn *fn, int minor)
+raw3270_add_view(struct raw3270_view *view, struct raw3270_fn *fn, int minor, int subclass)
{
unsigned long flags;
struct raw3270 *rp;
view->cols = rp->cols;
view->ascebc = rp->ascebc;
spin_lock_init(&view->lock);
+ lockdep_set_subclass(&view->lock, subclass);
list_add(&view->list, &rp->view_list);
rc = 0;
spin_unlock_irqrestore(get_ccwdev_lock(rp->cdev), flags);
struct raw3270_view {
struct list_head list;
spinlock_t lock;
+#define RAW3270_VIEW_LOCK_IRQ 0
+#define RAW3270_VIEW_LOCK_BH 1
atomic_t ref_count;
struct raw3270 *dev;
struct raw3270_fn *fn;
unsigned char *ascebc; /* ascii -> ebcdic table */
};
-int raw3270_add_view(struct raw3270_view *, struct raw3270_fn *, int);
+int raw3270_add_view(struct raw3270_view *, struct raw3270_fn *, int, int);
int raw3270_activate_view(struct raw3270_view *);
void raw3270_del_view(struct raw3270_view *);
void raw3270_deactivate_view(struct raw3270_view *);
return PTR_ERR(tp);
rc = raw3270_add_view(&tp->view, &tty3270_fn,
- tty->index + RAW3270_FIRSTMINOR);
+ tty->index + RAW3270_FIRSTMINOR,
+ RAW3270_VIEW_LOCK_BH);
if (rc) {
tty3270_free_view(tp);
return rc;
__ap_flush_queue(aq);
/* set REMOVE state to prevent new messages are queued in */
aq->state = AP_STATE_REMOVE;
- del_timer_sync(&aq->timeout);
spin_unlock_bh(&aq->lock);
+ del_timer_sync(&aq->timeout);
}
void ap_queue_remove(struct ap_queue *aq)
static void __init pkey_debug_init(void)
{
- debug_info = debug_register("pkey", 1, 1, 4 * sizeof(long));
+ /* 5 arguments per dbf entry (including the format string ptr) */
+ debug_info = debug_register("pkey", 1, 1, 5 * sizeof(long));
debug_register_view(debug_info, &debug_sprintf_view);
debug_set_level(debug_info, 3);
}
if (priv->channel[direction] == NULL) {
if (direction == CTCM_WRITE)
channel_free(priv->channel[CTCM_READ]);
+ result = -ENODEV;
goto out_dev;
}
priv->channel[direction]->netdev = dev;
bool rx_bcast_enabled;
};
+static inline int qeth_is_adp_supported(struct qeth_ipa_info *ipa,
+ enum qeth_ipa_setadp_cmd func)
+{
+ return (ipa->supported_funcs & func);
+}
+
static inline int qeth_is_ipa_supported(struct qeth_ipa_info *ipa,
enum qeth_ipa_funcs func)
{
}
#define qeth_adp_supported(c, f) \
- qeth_is_ipa_supported(&c->options.adp, f)
-#define qeth_adp_enabled(c, f) \
- qeth_is_ipa_enabled(&c->options.adp, f)
+ qeth_is_adp_supported(&c->options.adp, f)
#define qeth_is_supported(c, f) \
qeth_is_ipa_supported(&c->options.ipa4, f)
#define qeth_is_enabled(c, f) \
/* QDIO queue and buffer handling */
/*****************************************************************************/
#define QETH_MAX_QUEUES 4
+#define QETH_IQD_MIN_TXQ 2 /* One for ucast, one for mcast. */
+#define QETH_IQD_MCAST_TXQ 0
+#define QETH_IQD_MIN_UCAST_TXQ 1
#define QETH_IN_BUF_SIZE_DEFAULT 65536
#define QETH_IN_BUF_COUNT_DEFAULT 64
#define QETH_IN_BUF_COUNT_HSDEFAULT 128
#define QETH_HDR_EXT_CSUM_TRANSP_REQ 0x20
#define QETH_HDR_EXT_UDP 0x40 /*bit off for TCP*/
-enum qeth_qdio_buffer_states {
- /*
- * inbound: read out by driver; owned by hardware in order to be filled
- * outbound: owned by driver in order to be filled
- */
- QETH_QDIO_BUF_EMPTY,
- /*
- * inbound: filled by hardware; owned by driver in order to be read out
- * outbound: filled by driver; owned by hardware in order to be sent
- */
- QETH_QDIO_BUF_PRIMED,
- /*
- * inbound: not applicable
- * outbound: identified to be pending in TPQ
- */
- QETH_QDIO_BUF_PENDING,
- /*
- * inbound: not applicable
- * outbound: found in completion queue
- */
- QETH_QDIO_BUF_IN_CQ,
- /*
- * inbound: not applicable
- * outbound: handled via transfer pending / completion queue
- */
- QETH_QDIO_BUF_HANDLED_DELAYED,
-};
-
enum qeth_qdio_info_states {
QETH_QDIO_UNINITIALIZED,
QETH_QDIO_ALLOCATED,
int next_buf_to_init;
};
+enum qeth_qdio_out_buffer_state {
+ /* Owned by driver, in order to be filled. */
+ QETH_QDIO_BUF_EMPTY,
+ /* Filled by driver; owned by hardware in order to be sent. */
+ QETH_QDIO_BUF_PRIMED,
+ /* Identified to be pending in TPQ. */
+ QETH_QDIO_BUF_PENDING,
+ /* Found in completion queue. */
+ QETH_QDIO_BUF_IN_CQ,
+ /* Handled via transfer pending / completion queue. */
+ QETH_QDIO_BUF_HANDLED_DELAYED,
+};
+
struct qeth_qdio_out_buffer {
struct qdio_buffer *buffer;
atomic_t state;
u64 rx_errors;
u64 rx_dropped;
u64 rx_multicast;
- u64 tx_errors;
};
struct qeth_out_q_stats {
u64 skbs_linearized_fail;
u64 tso_bytes;
u64 packing_mode_switch;
+ u64 stopped;
/* rtnl_link_stats64 */
u64 tx_packets;
struct qeth_qdio_out_buffer *bufs[QDIO_MAX_BUFFERS_PER_Q];
struct qdio_outbuf_state *bufstates; /* convenience pointer */
struct qeth_out_q_stats stats;
- int queue_no;
+ u8 next_buf_to_fill;
+ u8 max_elements;
+ u8 queue_no;
+ u8 do_pack;
struct qeth_card *card;
atomic_t state;
- int do_pack;
- /*
- * index of buffer to be filled by driver; state EMPTY or PACKING
- */
- int next_buf_to_fill;
/*
* number of buffers that are currently filled (PRIMED)
* -> these buffers are hardware-owned
atomic_t set_pci_flags_count;
};
+static inline bool qeth_out_queue_is_full(struct qeth_qdio_out_q *queue)
+{
+ return atomic_read(&queue->used_buffers) >= QDIO_MAX_BUFFERS_PER_Q;
+}
+
struct qeth_qdio_info {
atomic_t state;
/* input */
__u32 pdu_hdr;
__u32 pdu_hdr_ack;
__u16 ipa;
- __u32 pkt_seqno;
};
struct qeth_reply {
__u16 func_level;
char mcl_level[QETH_MCL_LENGTH + 1];
u8 open_when_online:1;
- int guestlan;
+ u8 is_vm_nic:1;
int mac_bits;
enum qeth_card_types type;
enum qeth_link_types link_type;
return dev->netdev_ops != NULL;
}
+static inline u16 qeth_iqd_translate_txq(struct net_device *dev, u16 txq)
+{
+ if (txq == QETH_IQD_MCAST_TXQ)
+ return dev->num_tx_queues - 1;
+ if (txq == dev->num_tx_queues - 1)
+ return QETH_IQD_MCAST_TXQ;
+ return txq;
+}
+
static inline void qeth_scrub_qdio_buffer(struct qdio_buffer *buf,
unsigned int elements)
{
}
}
+static inline int qeth_get_ether_cast_type(struct sk_buff *skb)
+{
+ u8 *addr = eth_hdr(skb)->h_dest;
+
+ if (is_multicast_ether_addr(addr))
+ return is_broadcast_ether_addr(addr) ? RTN_BROADCAST :
+ RTN_MULTICAST;
+ return RTN_UNICAST;
+}
+
static inline void qeth_rx_csum(struct qeth_card *card, struct sk_buff *skb,
u8 flags)
{
data, QETH_PROT_IPV6);
}
-int qeth_get_priority_queue(struct qeth_card *card, struct sk_buff *skb,
- int ipv);
-static inline struct qeth_qdio_out_q *qeth_get_tx_queue(struct qeth_card *card,
- struct sk_buff *skb,
- int ipv, int cast_type)
-{
- if (IS_IQD(card) && cast_type != RTN_UNICAST)
- return card->qdio.out_qs[card->qdio.no_out_queues - 1];
- if (!card->qdio.do_prio_queueing)
- return card->qdio.out_qs[card->qdio.default_out_queue];
- return card->qdio.out_qs[qeth_get_priority_queue(card, skb, ipv)];
-}
+int qeth_get_priority_queue(struct qeth_card *card, struct sk_buff *skb);
extern struct qeth_discipline qeth_l2_discipline;
extern struct qeth_discipline qeth_l3_discipline;
int qeth_qdio_clear_card(struct qeth_card *, int);
void qeth_clear_working_pool_list(struct qeth_card *);
void qeth_clear_cmd_buffers(struct qeth_channel *);
-void qeth_clear_qdio_buffers(struct qeth_card *);
+void qeth_drain_output_queues(struct qeth_card *card);
void qeth_setadp_promisc_mode(struct qeth_card *);
int qeth_setadpparms_change_macaddr(struct qeth_card *);
void qeth_tx_timeout(struct net_device *);
struct net_device *dev,
netdev_features_t features);
void qeth_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats);
+u16 qeth_iqd_select_queue(struct net_device *dev, struct sk_buff *skb,
+ u8 cast_type, struct net_device *sb_dev);
int qeth_open(struct net_device *dev);
int qeth_stop(struct net_device *dev);
static struct qeth_cmd_buffer *qeth_get_buffer(struct qeth_channel *);
static void qeth_free_buffer_pool(struct qeth_card *);
static int qeth_qdio_establish(struct qeth_card *);
-static void qeth_free_qdio_buffers(struct qeth_card *);
+static void qeth_free_qdio_queues(struct qeth_card *card);
static void qeth_notify_skbs(struct qeth_qdio_out_q *queue,
struct qeth_qdio_out_buffer *buf,
enum iucv_tx_notify notification);
static const char *qeth_get_cardname(struct qeth_card *card)
{
- if (card->info.guestlan) {
+ if (IS_VM_NIC(card)) {
switch (card->info.type) {
case QETH_CARD_TYPE_OSD:
return " Virtual NIC QDIO";
/* max length to be returned: 14 */
const char *qeth_get_cardname_short(struct qeth_card *card)
{
- if (card->info.guestlan) {
+ if (IS_VM_NIC(card)) {
switch (card->info.type) {
case QETH_CARD_TYPE_OSD:
return "Virt.NIC QDIO";
qeth_release_skbs(buf);
- for (i = 0; i < QETH_MAX_BUFFER_ELEMENTS(queue->card); ++i) {
+ for (i = 0; i < queue->max_elements; ++i) {
if (buf->buffer->element[i].addr && buf->is_header[i])
kmem_cache_free(qeth_core_header_cache,
buf->buffer->element[i].addr);
buf->is_header[i] = 0;
}
- qeth_scrub_qdio_buffer(buf->buffer,
- QETH_MAX_BUFFER_ELEMENTS(queue->card));
+ qeth_scrub_qdio_buffer(buf->buffer, queue->max_elements);
buf->next_element_to_fill = 0;
atomic_set(&buf->state, QETH_QDIO_BUF_EMPTY);
}
-static void qeth_clear_outq_buffers(struct qeth_qdio_out_q *q, int free)
+static void qeth_drain_output_queue(struct qeth_qdio_out_q *q, bool free)
{
int j;
}
}
-void qeth_clear_qdio_buffers(struct qeth_card *card)
+void qeth_drain_output_queues(struct qeth_card *card)
{
int i;
QETH_CARD_TEXT(card, 2, "clearqdbf");
/* clear outbound buffers to free skbs */
for (i = 0; i < card->qdio.no_out_queues; ++i) {
- if (card->qdio.out_qs[i]) {
- qeth_clear_outq_buffers(card->qdio.out_qs[i], 0);
- }
+ if (card->qdio.out_qs[i])
+ qeth_drain_output_queue(card->qdio.out_qs[i], false);
}
}
-EXPORT_SYMBOL_GPL(qeth_clear_qdio_buffers);
+EXPORT_SYMBOL_GPL(qeth_drain_output_queues);
static void qeth_free_buffer_pool(struct qeth_card *card)
{
return 0;
}
-static void qeth_set_single_write_queues(struct qeth_card *card)
+static void qeth_osa_set_output_queues(struct qeth_card *card, bool single)
{
- if ((atomic_read(&card->qdio.state) != QETH_QDIO_UNINITIALIZED) &&
- (card->qdio.no_out_queues == 4))
- qeth_free_qdio_buffers(card);
+ unsigned int count = single ? 1 : card->dev->num_tx_queues;
- card->qdio.no_out_queues = 1;
- if (card->qdio.default_out_queue != 0)
- dev_info(&card->gdev->dev, "Priority Queueing not supported\n");
+ rtnl_lock();
+ netif_set_real_num_tx_queues(card->dev, count);
+ rtnl_unlock();
- card->qdio.default_out_queue = 0;
-}
+ if (card->qdio.no_out_queues == count)
+ return;
-static void qeth_set_multiple_write_queues(struct qeth_card *card)
-{
- if ((atomic_read(&card->qdio.state) != QETH_QDIO_UNINITIALIZED) &&
- (card->qdio.no_out_queues == 1)) {
- qeth_free_qdio_buffers(card);
- card->qdio.default_out_queue = 2;
- }
- card->qdio.no_out_queues = 4;
+ if (atomic_read(&card->qdio.state) != QETH_QDIO_UNINITIALIZED)
+ qeth_free_qdio_queues(card);
+
+ if (count == 1)
+ dev_info(&card->gdev->dev, "Priority Queueing not supported\n");
+
+ card->qdio.default_out_queue = single ? 0 : QETH_DEFAULT_QUEUE;
+ card->qdio.no_out_queues = count;
}
-static void qeth_update_from_chp_desc(struct qeth_card *card)
+static int qeth_update_from_chp_desc(struct qeth_card *card)
{
struct ccw_device *ccwdev;
struct channel_path_desc_fmt0 *chp_dsc;
ccwdev = card->data.ccwdev;
chp_dsc = ccw_device_get_chp_desc(ccwdev, 0);
if (!chp_dsc)
- goto out;
+ return -ENOMEM;
card->info.func_level = 0x4100 + chp_dsc->desc;
- if (card->info.type == QETH_CARD_TYPE_IQD)
- goto out;
- /* CHPP field bit 6 == 1 -> single queue */
- if ((chp_dsc->chpp & 0x02) == 0x02)
- qeth_set_single_write_queues(card);
- else
- qeth_set_multiple_write_queues(card);
-out:
+ if (IS_OSD(card) || IS_OSX(card))
+ /* CHPP field bit 6 == 1 -> single queue */
+ qeth_osa_set_output_queues(card, chp_dsc->chpp & 0x02);
+
kfree(chp_dsc);
QETH_DBF_TEXT_(SETUP, 2, "nr:%x", card->qdio.no_out_queues);
QETH_DBF_TEXT_(SETUP, 2, "lvl:%02x", card->info.func_level);
+ return 0;
}
static void qeth_init_qdio_info(struct qeth_card *card)
atomic_set(&card->qdio.state, QETH_QDIO_UNINITIALIZED);
card->qdio.do_prio_queueing = QETH_PRIOQ_DEFAULT;
card->qdio.default_out_queue = QETH_DEFAULT_QUEUE;
- card->qdio.no_out_queues = QETH_MAX_QUEUES;
/* inbound */
card->qdio.no_in_queues = 1;
card->qdio.in_buf_size = QETH_IN_BUF_SIZE_DEFAULT;
- if (card->info.type == QETH_CARD_TYPE_IQD)
+ if (IS_IQD(card))
card->qdio.init_pool.buf_count = QETH_IN_BUF_COUNT_HSDEFAULT;
else
card->qdio.init_pool.buf_count = QETH_IN_BUF_COUNT_DEFAULT;
switch (atomic_cmpxchg(&card->qdio.state, QETH_QDIO_ESTABLISHED,
QETH_QDIO_CLEANING)) {
case QETH_QDIO_ESTABLISHED:
- if (card->info.type == QETH_CARD_TYPE_IQD)
+ if (IS_IQD(card))
rc = qdio_shutdown(CARD_DDEV(card),
QDIO_FLAG_CLEANUP_USING_HALT);
else
card->info.chpid = prcd[30];
card->info.unit_addr2 = prcd[31];
card->info.cula = prcd[63];
- card->info.guestlan = ((prcd[0x10] == _ascebc['V']) &&
- (prcd[0x11] == _ascebc['M']));
+ card->info.is_vm_nic = ((prcd[0x10] == _ascebc['V']) &&
+ (prcd[0x11] == _ascebc['M']));
}
static enum qeth_discipline_id qeth_vm_detect_layer(struct qeth_card *card)
{
enum qeth_discipline_id disc = QETH_DISCIPLINE_UNDETERMINED;
- if (card->info.type == QETH_CARD_TYPE_OSM ||
- card->info.type == QETH_CARD_TYPE_OSN)
+ if (IS_OSM(card) || IS_OSN(card))
disc = QETH_DISCIPLINE_LAYER2;
- else if (card->info.guestlan)
- disc = (card->info.type == QETH_CARD_TYPE_IQD) ?
- QETH_DISCIPLINE_LAYER3 :
- qeth_vm_detect_layer(card);
+ else if (IS_VM_NIC(card))
+ disc = IS_IQD(card) ? QETH_DISCIPLINE_LAYER3 :
+ qeth_vm_detect_layer(card);
switch (disc) {
case QETH_DISCIPLINE_LAYER2:
/* adjust RX buffer size to new max MTU: */
card->qdio.in_buf_size = max_mtu + 2 * PAGE_SIZE;
if (dev->max_mtu && dev->max_mtu != max_mtu)
- qeth_free_qdio_buffers(card);
+ qeth_free_qdio_queues(card);
} else {
if (dev->mtu)
new_mtu = dev->mtu;
memcpy(&card->token.ulp_filter_r,
QETH_ULP_ENABLE_RESP_FILTER_TOKEN(iob->data),
QETH_MPC_TOKEN_LENGTH);
- if (card->info.type == QETH_CARD_TYPE_IQD) {
+ if (IS_IQD(card)) {
memcpy(&framesize, QETH_ULP_ENABLE_RESP_MAX_MTU(iob->data), 2);
mtu = qeth_get_mtu_outof_framesize(framesize);
} else {
if (!q)
return;
- qeth_clear_outq_buffers(q, 1);
+ qeth_drain_output_queue(q, true);
qdio_free_buffers(q->qdio_bufs, QDIO_MAX_BUFFERS_PER_Q);
kfree(q);
}
-static struct qeth_qdio_out_q *qeth_alloc_qdio_out_buf(void)
+static struct qeth_qdio_out_q *qeth_alloc_output_queue(void)
{
struct qeth_qdio_out_q *q = kzalloc(sizeof(*q), GFP_KERNEL);
return q;
}
-static int qeth_alloc_qdio_buffers(struct qeth_card *card)
+static int qeth_alloc_qdio_queues(struct qeth_card *card)
{
int i, j;
/* outbound */
for (i = 0; i < card->qdio.no_out_queues; ++i) {
- card->qdio.out_qs[i] = qeth_alloc_qdio_out_buf();
+ card->qdio.out_qs[i] = qeth_alloc_output_queue();
if (!card->qdio.out_qs[i])
goto out_freeoutq;
QETH_DBF_TEXT_(SETUP, 2, "outq %i", i);
QETH_DBF_HEX(SETUP, 2, &card->qdio.out_qs[i], sizeof(void *));
+ card->qdio.out_qs[i]->card = card;
card->qdio.out_qs[i]->queue_no = i;
/* give outbound qeth_qdio_buffers their qdio_buffers */
for (j = 0; j < QDIO_MAX_BUFFERS_PER_Q; ++j) {
return -ENOMEM;
}
-static void qeth_free_qdio_buffers(struct qeth_card *card)
+static void qeth_free_qdio_queues(struct qeth_card *card)
{
int i, j;
QETH_DBF_TEXT_(SETUP, 2, "5err%d", rc);
goto out_qdio;
}
- rc = qeth_alloc_qdio_buffers(card);
+ rc = qeth_alloc_qdio_queues(card);
if (rc) {
QETH_DBF_TEXT_(SETUP, 2, "5err%d", rc);
goto out_qdio;
rc = qeth_qdio_establish(card);
if (rc) {
QETH_DBF_TEXT_(SETUP, 2, "6err%d", rc);
- qeth_free_qdio_buffers(card);
+ qeth_free_qdio_queues(card);
goto out_qdio;
}
rc = qeth_qdio_activate(card);
return 0;
out_qdio:
- qeth_qdio_clear_card(card, card->info.type != QETH_CARD_TYPE_IQD);
+ qeth_qdio_clear_card(card, !IS_IQD(card));
qdio_free(CARD_DDEV(card));
return rc;
}
}
/* fallthrough */
case QETH_CARD_TYPE_IQD:
- if ((card->info.guestlan) ||
- (card->info.mcl_level[0] & 0x80)) {
+ if (IS_VM_NIC(card) || (card->info.mcl_level[0] & 0x80)) {
card->info.mcl_level[0] = (char) _ebcasc[(__u8)
card->info.mcl_level[0]];
card->info.mcl_level[1] = (char) _ebcasc[(__u8)
int qeth_init_qdio_queues(struct qeth_card *card)
{
- int i, j;
+ unsigned int i;
int rc;
QETH_DBF_TEXT(SETUP, 2, "initqdqs");
/* outbound queue */
for (i = 0; i < card->qdio.no_out_queues; ++i) {
- qdio_reset_buffers(card->qdio.out_qs[i]->qdio_bufs,
- QDIO_MAX_BUFFERS_PER_Q);
- for (j = 0; j < QDIO_MAX_BUFFERS_PER_Q; ++j) {
- qeth_clear_output_buffer(card->qdio.out_qs[i],
- card->qdio.out_qs[i]->bufs[j]);
- }
- card->qdio.out_qs[i]->card = card;
- card->qdio.out_qs[i]->next_buf_to_fill = 0;
- card->qdio.out_qs[i]->do_pack = 0;
- atomic_set(&card->qdio.out_qs[i]->used_buffers, 0);
- atomic_set(&card->qdio.out_qs[i]->set_pci_flags_count, 0);
- atomic_set(&card->qdio.out_qs[i]->state,
- QETH_OUT_Q_UNLOCKED);
+ struct qeth_qdio_out_q *queue = card->qdio.out_qs[i];
+
+ qdio_reset_buffers(queue->qdio_bufs, QDIO_MAX_BUFFERS_PER_Q);
+ queue->max_elements = QETH_MAX_BUFFER_ELEMENTS(card);
+ queue->next_buf_to_fill = 0;
+ queue->do_pack = 0;
+ atomic_set(&queue->used_buffers, 0);
+ atomic_set(&queue->set_pci_flags_count, 0);
+ atomic_set(&queue->state, QETH_OUT_Q_UNLOCKED);
}
return 0;
}
int sbalf15 = buffer->buffer->element[15].sflags;
QETH_CARD_TEXT(card, 6, "hdsnderr");
- if (card->info.type == QETH_CARD_TYPE_IQD) {
+ if (IS_IQD(card)) {
if (sbalf15 == 0) {
qdio_err = 0;
} else {
if (queue->bufstates)
queue->bufstates[bidx].user = buf;
- if (queue->card->info.type == QETH_CARD_TYPE_IQD)
+ if (IS_IQD(queue->card))
continue;
if (!queue->do_pack) {
}
QETH_TXQ_STAT_ADD(queue, bufs, count);
- netif_trans_update(queue->card->dev);
qdio_flags = QDIO_FLAG_SYNC_OUTPUT;
if (atomic_read(&queue->set_pci_flags_count))
qdio_flags |= QDIO_FLAG_PCI_OUT;
- atomic_add(count, &queue->used_buffers);
rc = do_QDIO(CARD_DDEV(queue->card), qdio_flags,
queue->queue_no, index, count);
if (rc) {
* do_send_packet. So, we check if there is a
* packing buffer to be flushed here.
*/
- netif_stop_queue(queue->card->dev);
index = queue->next_buf_to_fill;
q_was_packing = queue->do_pack;
/* queue->do_pack may change */
goto out;
}
- qeth_free_qdio_buffers(card);
+ qeth_free_qdio_queues(card);
card->options.cq = cq;
rc = 0;
}
QETH_CARD_TEXT_(card, 5, "qcqherr%d", qdio_err);
if (qdio_err) {
- netif_stop_queue(card->dev);
+ netif_tx_stop_all_queues(card->dev);
qeth_schedule_recovery(card);
return;
}
struct qeth_card *card = (struct qeth_card *) card_ptr;
struct qeth_qdio_out_q *queue = card->qdio.out_qs[__queue];
struct qeth_qdio_out_buffer *buffer;
+ struct net_device *dev = card->dev;
+ struct netdev_queue *txq;
int i;
QETH_CARD_TEXT(card, 6, "qdouhdl");
if (qdio_error & QDIO_ERROR_FATAL) {
QETH_CARD_TEXT(card, 2, "achkcond");
- netif_stop_queue(card->dev);
+ netif_tx_stop_all_queues(dev);
qeth_schedule_recovery(card);
return;
}
/* prepare the queue slot for re-use: */
qeth_scrub_qdio_buffer(buffer->buffer,
- QETH_MAX_BUFFER_ELEMENTS(card));
+ queue->max_elements);
if (qeth_init_qdio_out_buf(queue, bidx)) {
QETH_CARD_TEXT(card, 2, "outofbuf");
qeth_schedule_recovery(card);
}
atomic_sub(count, &queue->used_buffers);
/* check if we need to do something on this outbound queue */
- if (card->info.type != QETH_CARD_TYPE_IQD)
+ if (!IS_IQD(card))
qeth_check_outbound_queue(queue);
- netif_wake_queue(queue->card->dev);
-}
-
-/* We cannot use outbound queue 3 for unicast packets on HiperSockets */
-static inline int qeth_cut_iqd_prio(struct qeth_card *card, int queue_num)
-{
- if ((card->info.type == QETH_CARD_TYPE_IQD) && (queue_num == 3))
- return 2;
- return queue_num;
+ if (IS_IQD(card))
+ __queue = qeth_iqd_translate_txq(dev, __queue);
+ txq = netdev_get_tx_queue(dev, __queue);
+ /* xmit may have observed the full-condition, but not yet stopped the
+ * txq. In which case the code below won't trigger. So before returning,
+ * xmit will re-check the txq's fill level and wake it up if needed.
+ */
+ if (netif_tx_queue_stopped(txq) && !qeth_out_queue_is_full(queue))
+ netif_tx_wake_queue(txq);
}
/**
* Note: Function assumes that we have 4 outbound queues.
*/
-int qeth_get_priority_queue(struct qeth_card *card, struct sk_buff *skb,
- int ipv)
+int qeth_get_priority_queue(struct qeth_card *card, struct sk_buff *skb)
{
- __be16 *tci;
+ struct vlan_ethhdr *veth = vlan_eth_hdr(skb);
u8 tos;
switch (card->qdio.do_prio_queueing) {
case QETH_PRIO_Q_ING_TOS:
case QETH_PRIO_Q_ING_PREC:
- switch (ipv) {
+ switch (qeth_get_ip_version(skb)) {
case 4:
tos = ipv4_get_dsfield(ip_hdr(skb));
break;
return card->qdio.default_out_queue;
}
if (card->qdio.do_prio_queueing == QETH_PRIO_Q_ING_PREC)
- return qeth_cut_iqd_prio(card, ~tos >> 6 & 3);
+ return ~tos >> 6 & 3;
if (tos & IPTOS_MINCOST)
- return qeth_cut_iqd_prio(card, 3);
+ return 3;
if (tos & IPTOS_RELIABILITY)
return 2;
if (tos & IPTOS_THROUGHPUT)
case QETH_PRIO_Q_ING_SKB:
if (skb->priority > 5)
return 0;
- return qeth_cut_iqd_prio(card, ~skb->priority >> 1 & 3);
+ return ~skb->priority >> 1 & 3;
case QETH_PRIO_Q_ING_VLAN:
- tci = &((struct ethhdr *)skb->data)->h_proto;
- if (be16_to_cpu(*tci) == ETH_P_8021Q)
- return qeth_cut_iqd_prio(card,
- ~be16_to_cpu(*(tci + 1)) >> (VLAN_PRIO_SHIFT + 1) & 3);
+ if (veth->h_vlan_proto == htons(ETH_P_8021Q))
+ return ~ntohs(veth->h_vlan_TCI) >>
+ (VLAN_PRIO_SHIFT + 1) & 3;
break;
default:
break;
unsigned int hdr_len, unsigned int proto_len,
unsigned int *elements)
{
- const unsigned int max_elements = QETH_MAX_BUFFER_ELEMENTS(queue->card);
const unsigned int contiguous = proto_len ? proto_len : 1;
+ const unsigned int max_elements = queue->max_elements;
unsigned int __elements;
addr_t start, end;
bool push_ok;
* from qeth_core_header_cache.
* @offset: when mapping the skb, start at skb->data + offset
* @hd_len: if > 0, build a dedicated header element of this size
+ * flush: Prepare the buffer to be flushed, regardless of its fill level.
*/
static int qeth_fill_buffer(struct qeth_qdio_out_q *queue,
struct qeth_qdio_out_buffer *buf,
struct sk_buff *skb, struct qeth_hdr *hdr,
- unsigned int offset, unsigned int hd_len)
+ unsigned int offset, unsigned int hd_len,
+ bool flush)
{
struct qdio_buffer *buffer = buf->buffer;
bool is_first_elem = true;
QETH_TXQ_STAT_INC(queue, skbs_pack);
/* If the buffer still has free elements, keep using it. */
- if (buf->next_element_to_fill <
- QETH_MAX_BUFFER_ELEMENTS(queue->card))
+ if (!flush &&
+ buf->next_element_to_fill < queue->max_elements)
return 0;
}
{
int index = queue->next_buf_to_fill;
struct qeth_qdio_out_buffer *buffer = queue->bufs[index];
+ struct netdev_queue *txq;
+ bool stopped = false;
- /*
- * check if buffer is empty to make sure that we do not 'overtake'
- * ourselves and try to fill a buffer that is already primed
+ /* Just a sanity check, the wake/stop logic should ensure that we always
+ * get a free buffer.
*/
if (atomic_read(&buffer->state) != QETH_QDIO_BUF_EMPTY)
return -EBUSY;
- qeth_fill_buffer(queue, buffer, skb, hdr, offset, hd_len);
+
+ txq = netdev_get_tx_queue(queue->card->dev, skb_get_queue_mapping(skb));
+
+ if (atomic_inc_return(&queue->used_buffers) >= QDIO_MAX_BUFFERS_PER_Q) {
+ /* If a TX completion happens right _here_ and misses to wake
+ * the txq, then our re-check below will catch the race.
+ */
+ QETH_TXQ_STAT_INC(queue, stopped);
+ netif_tx_stop_queue(txq);
+ stopped = true;
+ }
+
+ qeth_fill_buffer(queue, buffer, skb, hdr, offset, hd_len, stopped);
qeth_flush_buffers(queue, index, 1);
+
+ if (stopped && !qeth_out_queue_is_full(queue))
+ netif_tx_start_queue(txq);
return 0;
}
int elements_needed)
{
struct qeth_qdio_out_buffer *buffer;
+ struct netdev_queue *txq;
+ bool stopped = false;
int start_index;
int flush_count = 0;
int do_pack = 0;
QETH_OUT_Q_LOCKED) != QETH_OUT_Q_UNLOCKED);
start_index = queue->next_buf_to_fill;
buffer = queue->bufs[queue->next_buf_to_fill];
- /*
- * check if buffer is empty to make sure that we do not 'overtake'
- * ourselves and try to fill a buffer that is already primed
+
+ /* Just a sanity check, the wake/stop logic should ensure that we always
+ * get a free buffer.
*/
if (atomic_read(&buffer->state) != QETH_QDIO_BUF_EMPTY) {
atomic_set(&queue->state, QETH_OUT_Q_UNLOCKED);
return -EBUSY;
}
+
+ txq = netdev_get_tx_queue(card->dev, skb_get_queue_mapping(skb));
+
/* check if we need to switch packing state of this queue */
qeth_switch_to_packing_if_needed(queue);
if (queue->do_pack) {
do_pack = 1;
/* does packet fit in current buffer? */
- if ((QETH_MAX_BUFFER_ELEMENTS(card) -
- buffer->next_element_to_fill) < elements_needed) {
+ if (buffer->next_element_to_fill + elements_needed >
+ queue->max_elements) {
/* ... no -> set state PRIMED */
atomic_set(&buffer->state, QETH_QDIO_BUF_PRIMED);
flush_count++;
(queue->next_buf_to_fill + 1) %
QDIO_MAX_BUFFERS_PER_Q;
buffer = queue->bufs[queue->next_buf_to_fill];
- /* we did a step forward, so check buffer state
- * again */
+
+ /* We stepped forward, so sanity-check again: */
if (atomic_read(&buffer->state) !=
QETH_QDIO_BUF_EMPTY) {
qeth_flush_buffers(queue, start_index,
}
}
- flush_count += qeth_fill_buffer(queue, buffer, skb, hdr, offset,
- hd_len);
+ if (buffer->next_element_to_fill == 0 &&
+ atomic_inc_return(&queue->used_buffers) >= QDIO_MAX_BUFFERS_PER_Q) {
+ /* If a TX completion happens right _here_ and misses to wake
+ * the txq, then our re-check below will catch the race.
+ */
+ QETH_TXQ_STAT_INC(queue, stopped);
+ netif_tx_stop_queue(txq);
+ stopped = true;
+ }
+
+ flush_count += qeth_fill_buffer(queue, buffer, skb, hdr, offset, hd_len,
+ stopped);
if (flush_count)
qeth_flush_buffers(queue, start_index, flush_count);
else if (!atomic_read(&queue->set_pci_flags_count))
if (do_pack)
QETH_TXQ_STAT_ADD(queue, bufs_pack, flush_count);
+ if (stopped && !qeth_out_queue_is_full(queue))
+ netif_tx_start_queue(txq);
return rc;
}
EXPORT_SYMBOL_GPL(qeth_do_send_packet);
} else {
if (!push_len)
kmem_cache_free(qeth_core_header_cache, hdr);
- if (rc == -EBUSY)
- /* roll back to ETH header */
- skb_pull(skb, push_len);
}
return rc;
}
QETH_CARD_TEXT(card, 4, "setactlo");
- if ((card->info.type == QETH_CARD_TYPE_OSD ||
- card->info.type == QETH_CARD_TYPE_OSX) &&
- qeth_adp_supported(card, IPA_SETADP_SET_ACCESS_CONTROL)) {
+ if ((IS_OSD(card) || IS_OSX(card)) &&
+ qeth_adp_supported(card, IPA_SETADP_SET_ACCESS_CONTROL)) {
rc = qeth_setadpparms_set_access_ctrl(card,
card->options.isolation, fallback);
if (rc) {
card = dev->ml_priv;
QETH_CARD_TEXT(card, 4, "txtimeo");
- QETH_CARD_STAT_INC(card, tx_errors);
qeth_schedule_recovery(card);
}
EXPORT_SYMBOL_GPL(qeth_tx_timeout);
QETH_CARD_TEXT(card, 3, "snmpcmd");
- if (card->info.guestlan)
+ if (IS_VM_NIC(card))
return -EOPNOTSUPP;
if ((!qeth_adp_supported(card, IPA_SETADP_SET_SNMP_CONTROL)) &&
}
EXPORT_SYMBOL_GPL(qeth_vm_request_mac);
-static int qeth_get_qdio_q_format(struct qeth_card *card)
-{
- if (card->info.type == QETH_CARD_TYPE_IQD)
- return QDIO_IQDIO_QFMT;
- else
- return QDIO_QETH_QFMT;
-}
-
static void qeth_determine_capabilities(struct qeth_card *card)
{
int rc;
memset(&init_data, 0, sizeof(struct qdio_initialize));
init_data.cdev = CARD_DDEV(card);
- init_data.q_format = qeth_get_qdio_q_format(card);
+ init_data.q_format = IS_IQD(card) ? QDIO_IQDIO_QFMT :
+ QDIO_QETH_QFMT;
init_data.qib_param_field_format = 0;
init_data.qib_param_field = qib_param_field;
init_data.no_input_qs = card->qdio.no_in_queues;
init_data.input_sbal_addr_array = in_sbal_ptrs;
init_data.output_sbal_addr_array = out_sbal_ptrs;
init_data.output_sbal_state_array = card->qdio.out_bufstates;
- init_data.scan_threshold =
- (card->info.type == QETH_CARD_TYPE_IQD) ? 1 : 32;
+ init_data.scan_threshold = IS_IQD(card) ? 1 : 32;
if (atomic_cmpxchg(&card->qdio.state, QETH_QDIO_ALLOCATED,
QETH_QDIO_ESTABLISHED) == QETH_QDIO_ALLOCATED) {
qeth_clean_channel(&card->write);
qeth_clean_channel(&card->data);
destroy_workqueue(card->event_wq);
- qeth_free_qdio_buffers(card);
+ qeth_free_qdio_queues(card);
unregister_service_level(&card->qeth_service_level);
dev_set_drvdata(&card->gdev->dev, NULL);
kfree(card);
QETH_DBF_TEXT(SETUP, 2, "hrdsetup");
atomic_set(&card->force_alloc_skb, 0);
- qeth_update_from_chp_desc(card);
+ rc = qeth_update_from_chp_desc(card);
+ if (rc)
+ return rc;
retry:
if (retries < 3)
QETH_DBF_MESSAGE(2, "Retrying to do IDX activates on device %x.\n",
CARD_DEVID(card));
- rc = qeth_qdio_clear_card(card, card->info.type != QETH_CARD_TYPE_IQD);
+ rc = qeth_qdio_clear_card(card, !IS_IQD(card));
ccw_device_set_offline(CARD_DDEV(card));
ccw_device_set_offline(CARD_WDEV(card));
ccw_device_set_offline(CARD_RDEV(card));
return NULL;
if (((skb_len >= card->options.rx_sg_cb) &&
- (!(card->info.type == QETH_CARD_TYPE_OSN)) &&
+ !IS_OSN(card) &&
(!atomic_read(&card->force_alloc_skb))) ||
(card->options.cq == QETH_CQ_ENABLED))
use_rx_sg = 1;
switch (card->info.type) {
case QETH_CARD_TYPE_IQD:
- dev = alloc_netdev(0, "hsi%d", NET_NAME_UNKNOWN, ether_setup);
+ dev = alloc_netdev_mqs(0, "hsi%d", NET_NAME_UNKNOWN,
+ ether_setup, QETH_MAX_QUEUES, 1);
+ break;
+ case QETH_CARD_TYPE_OSM:
+ dev = alloc_etherdev(0);
break;
case QETH_CARD_TYPE_OSN:
dev = alloc_netdev(0, "osn%d", NET_NAME_UNKNOWN, ether_setup);
break;
default:
- dev = alloc_etherdev(0);
+ dev = alloc_etherdev_mqs(0, QETH_MAX_QUEUES, 1);
}
if (!dev)
dev->priv_flags &= ~IFF_TX_SKB_SHARING;
dev->hw_features |= NETIF_F_SG;
dev->vlan_features |= NETIF_F_SG;
- if (IS_IQD(card))
+ if (IS_IQD(card)) {
+ netif_set_real_num_tx_queues(dev, QETH_IQD_MIN_TXQ);
dev->features |= NETIF_F_SG;
+ }
}
return dev;
}
qeth_setup_card(card);
- qeth_update_from_chp_desc(card);
-
card->dev = qeth_alloc_netdev(card);
if (!card->dev) {
rc = -ENOMEM;
goto err_card;
}
+ card->qdio.no_out_queues = card->dev->num_tx_queues;
+ rc = qeth_update_from_chp_desc(card);
+ if (rc)
+ goto err_chp_desc;
qeth_determine_capabilities(card);
enforced_disc = qeth_enforce_discipline(card);
switch (enforced_disc) {
if (rc)
goto err_load;
- gdev->dev.type = (card->info.type != QETH_CARD_TYPE_OSN)
- ? card->discipline->devtype
- : &qeth_osn_devtype;
+ gdev->dev.type = IS_OSN(card) ? &qeth_osn_devtype :
+ card->discipline->devtype;
rc = card->discipline->setup(card->gdev);
if (rc)
goto err_disc;
err_disc:
qeth_core_free_discipline(card);
err_load:
+err_chp_desc:
free_netdev(card->dev);
err_card:
qeth_core_free_card(card);
enum qeth_discipline_id def_discipline;
if (!card->discipline) {
- if (card->info.type == QETH_CARD_TYPE_IQD)
- def_discipline = QETH_DISCIPLINE_LAYER3;
- else
- def_discipline = QETH_DISCIPLINE_LAYER2;
+ def_discipline = IS_IQD(card) ? QETH_DISCIPLINE_LAYER3 :
+ QETH_DISCIPLINE_LAYER2;
rc = qeth_core_load_discipline(card, def_discipline);
if (rc)
goto err;
if ((gdev->state == CCWGROUP_ONLINE) && card->info.hwtrap)
qeth_hw_trap(card, QETH_DIAGS_TRAP_DISARM);
qeth_qdio_clear_card(card, 0);
- qeth_clear_qdio_buffers(card);
+ qeth_drain_output_queues(card);
qdio_free(CARD_DDEV(card));
}
rc = qeth_snmp_command(card, rq->ifr_ifru.ifru_data);
break;
case SIOC_QETH_GET_CARD_TYPE:
- if ((card->info.type == QETH_CARD_TYPE_OSD ||
- card->info.type == QETH_CARD_TYPE_OSM ||
- card->info.type == QETH_CARD_TYPE_OSX) &&
- !card->info.guestlan)
+ if ((IS_OSD(card) || IS_OSM(card) || IS_OSX(card)) &&
+ !IS_VM_NIC(card))
return 1;
- else
- return 0;
+ return 0;
case SIOCGMIIPHY:
mii_data = if_mii(rq);
mii_data->phy_id = 0;
stats->rx_errors = card->stats.rx_errors;
stats->rx_dropped = card->stats.rx_dropped;
stats->multicast = card->stats.rx_multicast;
- stats->tx_errors = card->stats.tx_errors;
for (i = 0; i < card->qdio.no_out_queues; i++) {
queue = card->qdio.out_qs[i];
}
EXPORT_SYMBOL_GPL(qeth_get_stats64);
+u16 qeth_iqd_select_queue(struct net_device *dev, struct sk_buff *skb,
+ u8 cast_type, struct net_device *sb_dev)
+{
+ if (cast_type != RTN_UNICAST)
+ return QETH_IQD_MCAST_TXQ;
+ return QETH_IQD_MIN_UCAST_TXQ;
+}
+EXPORT_SYMBOL_GPL(qeth_iqd_select_queue);
+
int qeth_open(struct net_device *dev)
{
struct qeth_card *card = dev->ml_priv;
return -EIO;
card->data.state = CH_STATE_UP;
- netif_start_queue(dev);
+ netif_tx_start_all_queues(dev);
napi_enable(&card->napi);
local_bh_disable();
#define IS_OSM(card) ((card)->info.type == QETH_CARD_TYPE_OSM)
#define IS_OSN(card) ((card)->info.type == QETH_CARD_TYPE_OSN)
#define IS_OSX(card) ((card)->info.type == QETH_CARD_TYPE_OSX)
-#define IS_VM_NIC(card) ((card)->info.guestlan)
+#define IS_VM_NIC(card) ((card)->info.is_vm_nic)
#define QETH_MPC_DIFINFO_LEN_INDICATES_LINK_TYPE 0x18
/* only the first two bytes are looked at in qeth_get_cardname_short */
if (!card)
return -EINVAL;
+ if (IS_IQD(card))
+ return -EOPNOTSUPP;
+
mutex_lock(&card->conf_mutex);
if (card->state != CARD_STATE_DOWN) {
rc = -EPERM;
card->qdio.do_prio_queueing = QETH_NO_PRIO_QUEUEING;
card->qdio.default_out_queue = 2;
} else if (sysfs_streq(buf, "no_prio_queueing:3")) {
- if (card->info.type == QETH_CARD_TYPE_IQD) {
- rc = -EPERM;
- goto out;
- }
card->qdio.do_prio_queueing = QETH_NO_PRIO_QUEUEING;
card->qdio.default_out_queue = 3;
} else if (sysfs_streq(buf, "no_prio_queueing")) {
return -EINVAL;
mutex_lock(&card->conf_mutex);
- if (card->info.type != QETH_CARD_TYPE_OSD &&
- card->info.type != QETH_CARD_TYPE_OSX) {
+ if (!IS_OSD(card) && !IS_OSX(card)) {
rc = -EOPNOTSUPP;
dev_err(&card->gdev->dev, "Adapter does not "
"support QDIO data connection isolation\n");
QETH_TXQ_STAT("linearized+error skbs", skbs_linearized_fail),
QETH_TXQ_STAT("TSO bytes", tso_bytes),
QETH_TXQ_STAT("Packing mode switches", packing_mode_switch),
+ QETH_TXQ_STAT("Queue stopped", stopped),
};
static const struct qeth_stats card_stats[] = {
CARD_RDEV_ID(card), CARD_WDEV_ID(card), CARD_DDEV_ID(card));
}
+static void qeth_get_channels(struct net_device *dev,
+ struct ethtool_channels *channels)
+{
+ struct qeth_card *card = dev->ml_priv;
+
+ channels->max_rx = dev->num_rx_queues;
+ channels->max_tx = card->qdio.no_out_queues;
+ channels->max_other = 0;
+ channels->max_combined = 0;
+ channels->rx_count = dev->real_num_rx_queues;
+ channels->tx_count = dev->real_num_tx_queues;
+ channels->other_count = 0;
+ channels->combined_count = 0;
+}
+
/* Helper function to fill 'advertising' and 'supported' which are the same. */
/* Autoneg and full-duplex are supported and advertised unconditionally. */
/* Always advertise and support all speeds up to specified, and only one */
.get_ethtool_stats = qeth_get_ethtool_stats,
.get_sset_count = qeth_get_sset_count,
.get_drvinfo = qeth_get_drvinfo,
+ .get_channels = qeth_get_channels,
.get_link_ksettings = qeth_get_link_ksettings,
};
}
}
-static int qeth_l2_get_cast_type(struct qeth_card *card, struct sk_buff *skb)
-{
- if (card->info.type == QETH_CARD_TYPE_OSN)
- return RTN_UNICAST;
- if (is_broadcast_ether_addr(skb->data))
- return RTN_BROADCAST;
- if (is_multicast_ether_addr(skb->data))
- return RTN_MULTICAST;
- return RTN_UNICAST;
-}
-
static void qeth_l2_fill_header(struct qeth_qdio_out_q *queue,
struct qeth_hdr *hdr, struct sk_buff *skb,
int ipv, int cast_type, unsigned int data_len)
}
if (card->state == CARD_STATE_HARDSETUP) {
qeth_qdio_clear_card(card, 0);
- qeth_clear_qdio_buffers(card);
+ qeth_drain_output_queues(card);
qeth_clear_working_pool_list(card);
card->state = CARD_STATE_DOWN;
}
case QETH_HEADER_TYPE_LAYER2:
skb->protocol = eth_type_trans(skb, skb->dev);
qeth_rx_csum(card, skb, hdr->hdr.l2.flags[1]);
- if (skb->protocol == htons(ETH_P_802_2))
- *((__u32 *)skb->cb) = ++card->seqno.pkt_seqno;
len = skb->len;
napi_gro_receive(&card->napi, skb);
break;
case QETH_HEADER_TYPE_OSN:
- if (card->info.type == QETH_CARD_TYPE_OSN) {
+ if (IS_OSN(card)) {
skb_push(skb, sizeof(struct qeth_hdr));
skb_copy_to_linear_data(skb, hdr,
sizeof(struct qeth_hdr));
}
/* some devices don't support a custom MAC address: */
- if (card->info.type == QETH_CARD_TYPE_OSM ||
- card->info.type == QETH_CARD_TYPE_OSX)
+ if (IS_OSM(card) || IS_OSX(card))
return (rc) ? rc : -EADDRNOTAVAIL;
eth_hw_addr_random(card->dev);
}
elements += qeth_count_elements(skb, hd_len);
- if (elements > QETH_MAX_BUFFER_ELEMENTS(card)) {
+ if (elements > queue->max_elements) {
rc = -E2BIG;
goto out;
}
struct net_device *dev)
{
struct qeth_card *card = dev->ml_priv;
- int cast_type = qeth_l2_get_cast_type(card, skb);
- int ipv = qeth_get_ip_version(skb);
+ u16 txq = skb_get_queue_mapping(skb);
struct qeth_qdio_out_q *queue;
int tx_bytes = skb->len;
int rc;
- queue = qeth_get_tx_queue(card, skb, ipv, cast_type);
-
- netif_stop_queue(dev);
+ if (IS_IQD(card))
+ txq = qeth_iqd_translate_txq(dev, txq);
+ queue = card->qdio.out_qs[txq];
if (IS_OSN(card))
rc = qeth_l2_xmit_osn(card, skb, queue);
else
- rc = qeth_xmit(card, skb, queue, ipv, cast_type,
+ rc = qeth_xmit(card, skb, queue, qeth_get_ip_version(skb),
+ qeth_get_ether_cast_type(skb),
qeth_l2_fill_header);
if (!rc) {
QETH_TXQ_STAT_INC(queue, tx_packets);
QETH_TXQ_STAT_ADD(queue, tx_bytes, tx_bytes);
- netif_wake_queue(dev);
return NETDEV_TX_OK;
- } else if (rc == -EBUSY) {
- return NETDEV_TX_BUSY;
- } /* else fall through */
+ }
QETH_TXQ_STAT_INC(queue, tx_dropped);
kfree_skb(skb);
- netif_wake_queue(dev);
return NETDEV_TX_OK;
}
+static u16 qeth_l2_select_queue(struct net_device *dev, struct sk_buff *skb,
+ struct net_device *sb_dev)
+{
+ struct qeth_card *card = dev->ml_priv;
+
+ if (IS_IQD(card))
+ return qeth_iqd_select_queue(dev, skb,
+ qeth_get_ether_cast_type(skb),
+ sb_dev);
+ return qeth_get_priority_queue(card, skb);
+}
+
static const struct device_type qeth_l2_devtype = {
.name = "qeth_layer2",
.groups = qeth_l2_attr_groups,
.ndo_get_stats64 = qeth_get_stats64,
.ndo_start_xmit = qeth_l2_hard_start_xmit,
.ndo_features_check = qeth_features_check,
+ .ndo_select_queue = qeth_l2_select_queue,
.ndo_validate_addr = qeth_l2_validate_addr,
.ndo_set_rx_mode = qeth_l2_set_rx_mode,
.ndo_do_ioctl = qeth_do_ioctl,
card->dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
}
- if (card->info.type == QETH_CARD_TYPE_OSD && !card->info.guestlan) {
+ if (IS_OSD(card) && !IS_VM_NIC(card)) {
card->dev->features |= NETIF_F_SG;
/* OSA 3S and earlier has no RX/TX support */
if (qeth_is_supported(card, IPA_OUTBOUND_CHECKSUM)) {
/* softsetup */
QETH_DBF_TEXT(SETUP, 2, "softsetp");
- if ((card->info.type == QETH_CARD_TYPE_OSD) ||
- (card->info.type == QETH_CARD_TYPE_OSX)) {
+ if (IS_OSD(card) || IS_OSX(card)) {
rc = qeth_l2_start_ipassists(card);
if (rc)
goto out_remove;
enum qeth_ipa_sbp_cmd sbp_cmd,
unsigned int cmd_length)
{
- enum qeth_ipa_cmds ipa_cmd = (card->info.type == QETH_CARD_TYPE_IQD) ?
- IPA_CMD_SETBRIDGEPORT_IQD :
- IPA_CMD_SETBRIDGEPORT_OSA;
+ enum qeth_ipa_cmds ipa_cmd = IS_IQD(card) ? IPA_CMD_SETBRIDGEPORT_IQD :
+ IPA_CMD_SETBRIDGEPORT_OSA;
struct qeth_cmd_buffer *iob;
struct qeth_ipa_cmd *cmd;
static int qeth_l3_correct_routing_type(struct qeth_card *card,
enum qeth_routing_types *type, enum qeth_prot_versions prot)
{
- if (card->info.type == QETH_CARD_TYPE_IQD) {
+ if (IS_IQD(card)) {
switch (*type) {
case NO_ROUTER:
case PRIMARY_CONNECTOR:
QETH_CARD_TEXT(card, 3, "softipv6");
- if (card->info.type == QETH_CARD_TYPE_IQD)
+ if (IS_IQD(card))
goto out;
rc = qeth_send_simple_setassparms(card, IPA_IPV6,
switch (hdr->hdr.l3.id) {
case QETH_HEADER_TYPE_LAYER3:
magic = *(__u16 *)skb->data;
- if ((card->info.type == QETH_CARD_TYPE_IQD) &&
- (magic == ETH_P_AF_IUCV)) {
+ if (IS_IQD(card) && magic == ETH_P_AF_IUCV) {
len = skb->len;
dev_hard_header(skb, dev, ETH_P_AF_IUCV,
dev->dev_addr, "FAKELL", len);
}
if (card->state == CARD_STATE_HARDSETUP) {
qeth_qdio_clear_card(card, 0);
- qeth_clear_qdio_buffers(card);
+ qeth_drain_output_queues(card);
qeth_clear_working_pool_list(card);
card->state = CARD_STATE_DOWN;
}
(card->info.promisc_mode == SET_PROMISC_MODE_OFF)))
return;
- if (card->info.guestlan) { /* Guestlan trace */
+ if (IS_VM_NIC(card)) { /* Guestlan trace */
if (qeth_adp_supported(card, IPA_SETADP_SET_PROMISC_MODE))
qeth_setadp_promisc_mode(card);
} else if (card->options.sniffer && /* HiperSockets trace */
* IPA_CMD_ASS_ARP_QUERY_INFO, but not IPA_CMD_ASS_ARP_SET_NO_ENTRIES;
* thus we say EOPNOTSUPP for this ARP function
*/
- if (card->info.guestlan)
+ if (IS_VM_NIC(card))
return -EOPNOTSUPP;
if (!qeth_is_supported(card, IPA_ARP_PROCESSING)) {
return -EOPNOTSUPP;
* IPA_CMD_ASS_ARP_QUERY_INFO, but not IPA_CMD_ASS_ARP_ADD_ENTRY;
* thus we say EOPNOTSUPP for this ARP function
*/
- if (card->info.guestlan)
+ if (IS_VM_NIC(card))
return -EOPNOTSUPP;
if (!qeth_is_supported(card, IPA_ARP_PROCESSING)) {
return -EOPNOTSUPP;
* IPA_CMD_ASS_ARP_QUERY_INFO, but not IPA_CMD_ASS_ARP_FLUSH_CACHE;
* thus we say EOPNOTSUPP for this ARP function
*/
- if (card->info.guestlan || (card->info.type == QETH_CARD_TYPE_IQD))
+ if (IS_VM_NIC(card) || IS_IQD(card))
return -EOPNOTSUPP;
if (!qeth_is_supported(card, IPA_ARP_PROCESSING)) {
return -EOPNOTSUPP;
RTN_MULTICAST : RTN_UNICAST;
default:
/* ... and MAC address */
- if (ether_addr_equal_64bits(eth_hdr(skb)->h_dest,
- skb->dev->broadcast))
- return RTN_BROADCAST;
- if (is_multicast_ether_addr(eth_hdr(skb)->h_dest))
- return RTN_MULTICAST;
- /* default to unicast */
- return RTN_UNICAST;
+ return qeth_get_ether_cast_type(skb);
}
}
hdr->hdr.l3.vlan_id = ntohs(veth->h_vlan_TCI);
}
+ l3_hdr->flags = qeth_l3_cast_type_to_flag(cast_type);
+
/* OSA only: */
if (!ipv) {
- hdr->hdr.l3.flags = QETH_HDR_PASSTHRU;
- if (ether_addr_equal_64bits(eth_hdr(skb)->h_dest,
- skb->dev->broadcast))
- hdr->hdr.l3.flags |= QETH_CAST_BROADCAST;
- else
- hdr->hdr.l3.flags |= (cast_type == RTN_MULTICAST) ?
- QETH_CAST_MULTICAST : QETH_CAST_UNICAST;
+ l3_hdr->flags |= QETH_HDR_PASSTHRU;
return;
}
- hdr->hdr.l3.flags = qeth_l3_cast_type_to_flag(cast_type);
rcu_read_lock();
if (ipv == 4) {
struct rtable *rt = skb_rtable(skb);
l3_hdr->next_hop.ipv6_addr = ipv6_hdr(skb)->daddr;
hdr->hdr.l3.flags |= QETH_HDR_IPV6;
- if (card->info.type != QETH_CARD_TYPE_IQD)
+ if (!IS_IQD(card))
hdr->hdr.l3.flags |= QETH_HDR_PASSTHRU;
}
rcu_read_unlock();
static int qeth_l3_xmit(struct qeth_card *card, struct sk_buff *skb,
struct qeth_qdio_out_q *queue, int ipv, int cast_type)
{
- unsigned char eth_hdr[ETH_HLEN];
unsigned int hw_hdr_len;
int rc;
rc = skb_cow_head(skb, hw_hdr_len - ETH_HLEN);
if (rc)
return rc;
- skb_copy_from_linear_data(skb, eth_hdr, ETH_HLEN);
skb_pull(skb, ETH_HLEN);
qeth_l3_fixup_headers(skb);
- rc = qeth_xmit(card, skb, queue, ipv, cast_type, qeth_l3_fill_header);
- if (rc == -EBUSY) {
- /* roll back to ETH header */
- skb_push(skb, ETH_HLEN);
- skb_copy_to_linear_data(skb, eth_hdr, ETH_HLEN);
- }
- return rc;
+ return qeth_xmit(card, skb, queue, ipv, cast_type, qeth_l3_fill_header);
}
static netdev_tx_t qeth_l3_hard_start_xmit(struct sk_buff *skb,
struct net_device *dev)
{
- int cast_type = qeth_l3_get_cast_type(skb);
struct qeth_card *card = dev->ml_priv;
+ u16 txq = skb_get_queue_mapping(skb);
int ipv = qeth_get_ip_version(skb);
struct qeth_qdio_out_q *queue;
int tx_bytes = skb->len;
- int rc;
-
- queue = qeth_get_tx_queue(card, skb, ipv, cast_type);
+ int cast_type, rc;
if (IS_IQD(card)) {
+ queue = card->qdio.out_qs[qeth_iqd_translate_txq(dev, txq)];
+
if (card->options.sniffer)
goto tx_drop;
if ((card->options.cq != QETH_CQ_ENABLED && !ipv) ||
(card->options.cq == QETH_CQ_ENABLED &&
skb->protocol != htons(ETH_P_AF_IUCV)))
goto tx_drop;
+
+ if (txq == QETH_IQD_MCAST_TXQ)
+ cast_type = qeth_l3_get_cast_type(skb);
+ else
+ cast_type = RTN_UNICAST;
+ } else {
+ queue = card->qdio.out_qs[txq];
+ cast_type = qeth_l3_get_cast_type(skb);
}
if (cast_type == RTN_BROADCAST && !card->info.broadcast_capable)
goto tx_drop;
- netif_stop_queue(dev);
-
if (ipv == 4 || IS_IQD(card))
rc = qeth_l3_xmit(card, skb, queue, ipv, cast_type);
else
if (!rc) {
QETH_TXQ_STAT_INC(queue, tx_packets);
QETH_TXQ_STAT_ADD(queue, tx_bytes, tx_bytes);
- netif_wake_queue(dev);
return NETDEV_TX_OK;
- } else if (rc == -EBUSY) {
- return NETDEV_TX_BUSY;
- } /* else fall through */
+ }
tx_drop:
QETH_TXQ_STAT_INC(queue, tx_dropped);
kfree_skb(skb);
- netif_wake_queue(dev);
return NETDEV_TX_OK;
}
return qeth_features_check(skb, dev, features);
}
+static u16 qeth_l3_iqd_select_queue(struct net_device *dev, struct sk_buff *skb,
+ struct net_device *sb_dev)
+{
+ return qeth_iqd_select_queue(dev, skb, qeth_l3_get_cast_type(skb),
+ sb_dev);
+}
+
+static u16 qeth_l3_osa_select_queue(struct net_device *dev, struct sk_buff *skb,
+ struct net_device *sb_dev)
+{
+ struct qeth_card *card = dev->ml_priv;
+
+ return qeth_get_priority_queue(card, skb);
+}
+
static const struct net_device_ops qeth_l3_netdev_ops = {
.ndo_open = qeth_open,
.ndo_stop = qeth_stop,
.ndo_get_stats64 = qeth_get_stats64,
.ndo_start_xmit = qeth_l3_hard_start_xmit,
+ .ndo_select_queue = qeth_l3_iqd_select_queue,
.ndo_validate_addr = eth_validate_addr,
.ndo_set_rx_mode = qeth_l3_set_rx_mode,
.ndo_do_ioctl = qeth_do_ioctl,
.ndo_get_stats64 = qeth_get_stats64,
.ndo_start_xmit = qeth_l3_hard_start_xmit,
.ndo_features_check = qeth_l3_osa_features_check,
+ .ndo_select_queue = qeth_l3_osa_select_queue,
.ndo_validate_addr = eth_validate_addr,
.ndo_set_rx_mode = qeth_l3_set_rx_mode,
.ndo_do_ioctl = qeth_do_ioctl,
unsigned int headroom;
int rc;
- if (card->info.type == QETH_CARD_TYPE_OSD ||
- card->info.type == QETH_CARD_TYPE_OSX) {
+ if (IS_OSD(card) || IS_OSX(card)) {
if ((card->info.link_type == QETH_LINK_TYPE_LANE_TR) ||
(card->info.link_type == QETH_LINK_TYPE_HSTR)) {
pr_info("qeth_l3: ignoring TR device\n");
if (!(card->info.unique_id & UNIQUE_ID_NOT_BY_CARD))
card->dev->dev_id = card->info.unique_id & 0xffff;
- if (!card->info.guestlan) {
+ if (!IS_VM_NIC(card)) {
card->dev->features |= NETIF_F_SG;
card->dev->hw_features |= NETIF_F_TSO |
NETIF_F_RXCSUM | NETIF_F_IP_CSUM;
headroom = sizeof(struct qeth_hdr_tso);
else
headroom = sizeof(struct qeth_hdr) + VLAN_HLEN;
- } else if (card->info.type == QETH_CARD_TYPE_IQD) {
+ } else if (IS_IQD(card)) {
card->dev->flags |= IFF_NOARP;
card->dev->netdev_ops = &qeth_l3_netdev_ops;
headroom = sizeof(struct qeth_hdr) - ETH_HLEN;
if (!card)
return -EINVAL;
- if (card->info.type != QETH_CARD_TYPE_IQD)
+ if (!IS_IQD(card))
return -EPERM;
if (card->options.cq == QETH_CQ_ENABLED)
return -EPERM;
if (!card)
return -EINVAL;
- if (card->info.type != QETH_CARD_TYPE_IQD)
+ if (!IS_IQD(card))
return -EPERM;
memcpy(tmp_hsuid, card->options.hsuid, sizeof(tmp_hsuid));
if (!card)
return -EINVAL;
- if (card->info.type != QETH_CARD_TYPE_IQD)
+ if (!IS_IQD(card))
return -EPERM;
if (card->state != CARD_STATE_DOWN)
return -EPERM;
ahc = ahc_alloc(&aic7xxx_driver_template, name);
if (ahc == NULL)
return (ENOMEM);
+ ahc->dev = dev;
error = aic7770_config(ahc, aic7770_ident_table + edev->id.driver_data,
eisaBase);
if (error != 0) {
* Platform specific device information.
*/
ahc_dev_softc_t dev_softc;
+ struct device *dev;
/*
* Bus specific device information.
ahc_dmamem_alloc(struct ahc_softc *ahc, bus_dma_tag_t dmat, void** vaddr,
int flags, bus_dmamap_t *mapp)
{
- *vaddr = pci_alloc_consistent(ahc->dev_softc,
- dmat->maxsize, mapp);
+ /* XXX: check if we really need the GFP_ATOMIC and unwind this mess! */
+ *vaddr = dma_alloc_coherent(ahc->dev, dmat->maxsize, mapp, GFP_ATOMIC);
if (*vaddr == NULL)
return ENOMEM;
return 0;
ahc_dmamem_free(struct ahc_softc *ahc, bus_dma_tag_t dmat,
void* vaddr, bus_dmamap_t map)
{
- pci_free_consistent(ahc->dev_softc, dmat->maxsize,
- vaddr, map);
+ dma_free_coherent(ahc->dev, dmat->maxsize, vaddr, map);
}
int
host->transportt = ahc_linux_transport_template;
- retval = scsi_add_host(host,
- (ahc->dev_softc ? &ahc->dev_softc->dev : NULL));
+ retval = scsi_add_host(host, ahc->dev);
if (retval) {
printk(KERN_WARNING "aic7xxx: scsi_add_host failed\n");
scsi_host_put(host);
}
}
ahc->dev_softc = pci;
+ ahc->dev = &pci->dev;
error = ahc_pci_config(ahc, entry);
if (error != 0) {
ahc_free(ahc);
}
out:
- if (req->nsge > 0)
+ if (req->nsge > 0) {
scsi_dma_unmap(cmnd);
+ if (req->dcopy && (host_status == DID_OK))
+ host_status = csio_scsi_copy_to_sgl(hw, req);
+ }
cmnd->result = (((host_status) << 16) | scsi_status);
cmnd->scsi_done(cmnd);
FC_RPORT_DBG(rdata, "Received LOGO request while in state %s\n",
fc_rport_state(rdata));
- rdata->flags &= ~FC_RP_STARTED;
fc_rport_enter_delete(rdata, RPORT_EV_STOP);
mutex_unlock(&rdata->rp_mutex);
kref_put(&rdata->kref, fc_rport_destroy);
* wake up the thread.
*/
spin_lock(&lpfc_cmd->buf_lock);
- if (unlikely(lpfc_cmd->cur_iocbq.iocb_flag & LPFC_DRIVER_ABORTED)) {
- lpfc_cmd->cur_iocbq.iocb_flag &= ~LPFC_DRIVER_ABORTED;
- if (lpfc_cmd->waitq)
- wake_up(lpfc_cmd->waitq);
+ lpfc_cmd->cur_iocbq.iocb_flag &= ~LPFC_DRIVER_ABORTED;
+ if (lpfc_cmd->waitq) {
+ wake_up(lpfc_cmd->waitq);
lpfc_cmd->waitq = NULL;
}
spin_unlock(&lpfc_cmd->buf_lock);
static int qedi_alloc_nvm_iscsi_cfg(struct qedi_ctx *qedi)
{
- struct qedi_nvm_iscsi_image nvm_image;
-
qedi->iscsi_image = dma_alloc_coherent(&qedi->pdev->dev,
- sizeof(nvm_image),
+ sizeof(struct qedi_nvm_iscsi_image),
&qedi->nvm_buf_dma, GFP_KERNEL);
if (!qedi->iscsi_image) {
QEDI_ERR(&qedi->dbg_ctx, "Could not allocate NVM BUF.\n");
static int qedi_get_boot_info(struct qedi_ctx *qedi)
{
int ret = 1;
- struct qedi_nvm_iscsi_image nvm_image;
QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_INFO,
"Get NVM iSCSI CFG image\n");
ret = qedi_ops->common->nvm_get_image(qedi->cdev,
QED_NVM_IMAGE_ISCSI_CFG,
(char *)qedi->iscsi_image,
- sizeof(nvm_image));
+ sizeof(struct qedi_nvm_iscsi_image));
if (ret)
QEDI_ERR(&qedi->dbg_ctx,
"Could not get NVM image. ret = %d\n", ret);
{"NETAPP", "Universal Xport", "*", BLIST_NO_ULD_ATTACH},
{"LSI", "Universal Xport", "*", BLIST_NO_ULD_ATTACH},
{"ENGENIO", "Universal Xport", "*", BLIST_NO_ULD_ATTACH},
+ {"LENOVO", "Universal Xport", "*", BLIST_NO_ULD_ATTACH},
{"SMSC", "USB 2 HS-CF", NULL, BLIST_SPARSELUN | BLIST_INQUIRY_36},
{"SONY", "CD-ROM CDU-8001", NULL, BLIST_BORKEN},
{"SONY", "TSL", NULL, BLIST_FORCELUN}, /* DDS3 & DDS4 autoloaders */
{"NETAPP", "INF-01-00", "rdac", },
{"LSI", "INF-01-00", "rdac", },
{"ENGENIO", "INF-01-00", "rdac", },
+ {"LENOVO", "DE_Series", "rdac", },
{NULL, NULL, NULL },
};
ret = BLK_STS_DEV_RESOURCE;
break;
default:
+ if (unlikely(!scsi_device_online(sdev)))
+ scsi_req(req)->result = DID_NO_CONNECT << 16;
+ else
+ scsi_req(req)->result = DID_ERROR << 16;
/*
- * Make sure to release all allocated ressources when
+ * Make sure to release all allocated resources when
* we hit an error, as we will never see this command
* again.
*/
* This is the end of Protocol specific defines.
*/
-static int storvsc_ringbuffer_size = (256 * PAGE_SIZE);
+static int storvsc_ringbuffer_size = (128 * 1024);
static u32 max_outstanding_req_per_channel;
static int storvsc_vcpus_per_sub_channel = 4;
{
struct device *dev = &device->device;
struct storvsc_device *stor_device;
- int num_cpus = num_online_cpus();
int num_sc;
struct storvsc_cmd_request *request;
struct vstor_packet *vstor_packet;
int ret, t;
- num_sc = ((max_chns > num_cpus) ? num_cpus : max_chns);
+ /*
+ * If the number of CPUs is artificially restricted, such as
+ * with maxcpus=1 on the kernel boot line, Hyper-V could offer
+ * sub-channels >= the number of CPUs. These sub-channels
+ * should not be created. The primary channel is already created
+ * and assigned to one CPU, so check against # CPUs - 1.
+ */
+ num_sc = min((int)(num_online_cpus() - 1), max_chns);
+ if (!num_sc)
+ return;
+
stor_device = get_out_stor_device(device);
if (!stor_device)
return;
/* We need to know how many queues before we allocate. */
num_queues = virtscsi_config_get(vdev, num_queues) ? : 1;
+ num_queues = min_t(unsigned int, nr_cpu_ids, num_queues);
num_targets = virtscsi_config_get(vdev, max_target) + 1;
size = usb_endpoint_maxp(devpriv->ep_tx);
devpriv->usb_tx_buf = kzalloc(size, GFP_KERNEL);
- if (!devpriv->usb_tx_buf) {
- kfree(devpriv->usb_rx_buf);
+ if (!devpriv->usb_tx_buf)
return -ENOMEM;
- }
return 0;
}
if (!devpriv)
return -ENOMEM;
+ mutex_init(&devpriv->mut);
+ usb_set_intfdata(intf, devpriv);
+
ret = ni6501_find_endpoints(dev);
if (ret)
return ret;
if (ret)
return ret;
- mutex_init(&devpriv->mut);
- usb_set_intfdata(intf, devpriv);
-
ret = comedi_alloc_subdevices(dev, 2);
if (ret)
return ret;
size = usb_endpoint_maxp(devpriv->ep_tx);
devpriv->usb_tx_buf = kzalloc(size, GFP_KERNEL);
- if (!devpriv->usb_tx_buf) {
- kfree(devpriv->usb_rx_buf);
+ if (!devpriv->usb_tx_buf)
return -ENOMEM;
- }
return 0;
}
devpriv->model = board->model;
+ sema_init(&devpriv->limit_sem, 8);
+
ret = vmk80xx_find_usb_endpoints(dev);
if (ret)
return ret;
if (ret)
return ret;
- sema_init(&devpriv->limit_sem, 8);
-
usb_set_intfdata(intf, devpriv);
if (devpriv->model == VMK8055_MODEL)
*last_block = current_block;
/* shift in advance in case of it followed by too many gaps */
- if (unlikely(bio->bi_vcnt >= bio->bi_max_vecs)) {
+ if (bio->bi_iter.bi_size >= bio->bi_max_vecs * PAGE_SIZE) {
/* err should reassign to 0 after submitting */
err = 0;
goto submit_bio_out;
#define AD7192_CH_AIN3 BIT(6) /* AIN3 - AINCOM */
#define AD7192_CH_AIN4 BIT(7) /* AIN4 - AINCOM */
-#define AD7193_CH_AIN1P_AIN2M 0x000 /* AIN1(+) - AIN2(-) */
-#define AD7193_CH_AIN3P_AIN4M 0x001 /* AIN3(+) - AIN4(-) */
-#define AD7193_CH_AIN5P_AIN6M 0x002 /* AIN5(+) - AIN6(-) */
-#define AD7193_CH_AIN7P_AIN8M 0x004 /* AIN7(+) - AIN8(-) */
+#define AD7193_CH_AIN1P_AIN2M 0x001 /* AIN1(+) - AIN2(-) */
+#define AD7193_CH_AIN3P_AIN4M 0x002 /* AIN3(+) - AIN4(-) */
+#define AD7193_CH_AIN5P_AIN6M 0x004 /* AIN5(+) - AIN6(-) */
+#define AD7193_CH_AIN7P_AIN8M 0x008 /* AIN7(+) - AIN8(-) */
#define AD7193_CH_TEMP 0x100 /* Temp senseor */
#define AD7193_CH_AIN2P_AIN2M 0x200 /* AIN2(+) - AIN2(-) */
#define AD7193_CH_AIN1 0x401 /* AIN1 - AINCOM */
static IIO_DEV_ATTR_IPEAK(0644,
ade7854_read_32bit,
ade7854_write_32bit,
- ADE7854_VPEAK);
+ ADE7854_IPEAK);
static IIO_DEV_ATTR_APHCAL(0644,
ade7854_read_16bit,
ade7854_write_16bit,
INIT_LIST_HEAD(&iface->p->channel_list);
iface->p->dev_id = id;
- snprintf(iface->p->name, STRING_SIZE, "mdev%d", id);
+ strcpy(iface->p->name, iface->description);
iface->dev.init_name = iface->p->name;
iface->dev.bus = &mc.bus;
iface->dev.parent = &mc.dev;
#endif
return ret;
+#ifdef CONFIG_SERIAL_SC16IS7XX_SPI
err_spi:
+#endif
#ifdef CONFIG_SERIAL_SC16IS7XX_I2C
i2c_del_driver(&sc16is7xx_i2c_uart_driver);
-#endif
err_i2c:
+#endif
uart_unregister_driver(&sc16is7xx_uart);
return ret;
}
* center of the last stop bit in sampling clocks.
*/
int last_stop = bits * 2 - 1;
- int deviation = min_err * srr * last_stop / 2 / baud;
+ int deviation = DIV_ROUND_CLOSEST(min_err * last_stop *
+ (int)(srr + 1),
+ 2 * (int)baud);
if (abs(deviation) >= 2) {
/* At least two sampling clocks off at the
* last stop bit; we can increase the error
* margin by shifting the sampling point.
*/
- int shift = min(-8, max(7, deviation / 2));
+ int shift = clamp(deviation / 2, -8, 7);
hssrr |= (shift << HSCIF_SRHP_SHIFT) &
HSCIF_SRHP_MASK;
return;
}
scr_memsetw(start, vc->vc_video_erase_char, 2 * count);
- update_region(vc, (unsigned long) start, count);
+ if (con_should_update(vc))
+ do_update_region(vc, (unsigned long) start, count);
vc->vc_need_wrap = 0;
}
rc = pci_add_dynid(&vfio_pci_driver, vendor, device,
subvendor, subdevice, class, class_mask, 0);
if (rc)
- pr_warn("failed to add dynamic id [%04hx:%04hx[%04hx:%04hx]] class %#08x/%08x (%d)\n",
+ pr_warn("failed to add dynamic id [%04x:%04x[%04x:%04x]] class %#08x/%08x (%d)\n",
vendor, device, subvendor, subdevice,
class, class_mask, rc);
else
- pr_info("add [%04hx:%04hx[%04hx:%04hx]] class %#08x/%08x\n",
+ pr_info("add [%04x:%04x[%04x:%04x]] class %#08x/%08x\n",
vendor, device, subvendor, subdevice,
class, class_mask);
}
mutex_unlock(&container->lock);
}
-const struct vfio_iommu_driver_ops tce_iommu_driver_ops = {
+static const struct vfio_iommu_driver_ops tce_iommu_driver_ops = {
.name = "iommu-vfio-powerpc",
.owner = THIS_MODULE,
.open = tce_iommu_open,
MODULE_PARM_DESC(disable_hugepages,
"Disable VFIO IOMMU support for IOMMU hugepages.");
+static unsigned int dma_entry_limit __read_mostly = U16_MAX;
+module_param_named(dma_entry_limit, dma_entry_limit, uint, 0644);
+MODULE_PARM_DESC(dma_entry_limit,
+ "Maximum number of user DMA mappings per container (65535).");
+
struct vfio_iommu {
struct list_head domain_list;
struct vfio_domain *external_domain; /* domain for external user */
struct mutex lock;
struct rb_root dma_list;
struct blocking_notifier_head notifier;
+ unsigned int dma_avail;
bool v2;
bool nesting;
};
vfio_unlink_dma(iommu, dma);
put_task_struct(dma->task);
kfree(dma);
+ iommu->dma_avail++;
}
static unsigned long vfio_pgsize_bitmap(struct vfio_iommu *iommu)
goto out_unlock;
}
+ if (!iommu->dma_avail) {
+ ret = -ENOSPC;
+ goto out_unlock;
+ }
+
dma = kzalloc(sizeof(*dma), GFP_KERNEL);
if (!dma) {
ret = -ENOMEM;
goto out_unlock;
}
+ iommu->dma_avail--;
dma->iova = iova;
dma->vaddr = vaddr;
dma->prot = prot;
INIT_LIST_HEAD(&iommu->domain_list);
iommu->dma_list = RB_ROOT;
+ iommu->dma_avail = dma_entry_limit;
mutex_init(&iommu->lock);
BLOCKING_INIT_NOTIFIER_HEAD(&iommu->notifier);
u64 start, u64 size, u64 end,
u64 userspace_addr, int perm)
{
- struct vhost_umem_node *tmp, *node = kmalloc(sizeof(*node), GFP_ATOMIC);
+ struct vhost_umem_node *tmp, *node;
+ if (!size)
+ return -EFAULT;
+
+ node = kmalloc(sizeof(*node), GFP_ATOMIC);
if (!node)
return -ENOMEM;
for (i = 0; i < vp_dev->msix_used_vectors; ++i)
free_irq(pci_irq_vector(vp_dev->pci_dev, i), vp_dev);
- for (i = 0; i < vp_dev->msix_vectors; i++)
- if (vp_dev->msix_affinity_masks[i])
- free_cpumask_var(vp_dev->msix_affinity_masks[i]);
+ if (vp_dev->msix_affinity_masks) {
+ for (i = 0; i < vp_dev->msix_vectors; i++)
+ if (vp_dev->msix_affinity_masks[i])
+ free_cpumask_var(vp_dev->msix_affinity_masks[i]);
+ }
if (vp_dev->msix_enabled) {
/* Disable the vector used for configuration */
GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
if (queue)
break;
+ if (!may_reduce_num)
+ return NULL;
}
if (!num)
if (!(vma->vm_flags & VM_SHARED))
return -EINVAL;
- vma_priv = kzalloc(sizeof(*vma_priv) + count * sizeof(void *),
- GFP_KERNEL);
+ vma_priv = kzalloc(struct_size(vma_priv, pages, count), GFP_KERNEL);
if (!vma_priv)
return -ENOMEM;
if (xen_store_evtchn == 0)
return -ENOENT;
- nonseekable_open(inode, filp);
-
- filp->f_mode &= ~FMODE_ATOMIC_POS; /* cdev-style semantics */
+ stream_open(inode, filp);
u = kzalloc(sizeof(*u), GFP_KERNEL);
if (u == NULL)
*/
void afs_init_callback_state(struct afs_server *server)
{
- if (!test_and_clear_bit(AFS_SERVER_FL_NEW, &server->flags))
- server->cb_s_break++;
+ server->cb_s_break++;
}
/*
static int afs_deliver_yfs_cb_callback(struct afs_call *);
#define CM_NAME(name) \
- const char afs_SRXCB##name##_name[] __tracepoint_string = \
+ char afs_SRXCB##name##_name[] __tracepoint_string = \
"CB." #name
/*
set_nlink(inode, 2);
inode->i_uid = GLOBAL_ROOT_UID;
inode->i_gid = GLOBAL_ROOT_GID;
- inode->i_ctime.tv_sec = get_seconds();
- inode->i_ctime.tv_nsec = 0;
- inode->i_atime = inode->i_mtime = inode->i_ctime;
+ inode->i_ctime = inode->i_atime = inode->i_mtime = current_time(inode);
inode->i_blocks = 0;
inode_set_iversion_raw(inode, 0);
inode->i_generation = 0;
time64_t put_time; /* Time at which last put */
time64_t update_at; /* Time at which to next update the record */
unsigned long flags;
-#define AFS_SERVER_FL_NEW 0 /* New server, don't inc cb_s_break */
#define AFS_SERVER_FL_NOT_READY 1 /* The record is not ready for use */
#define AFS_SERVER_FL_NOT_FOUND 2 /* VL server says no such server */
#define AFS_SERVER_FL_VL_FAIL 3 /* Failed to access VL server */
static inline unsigned int afs_calc_vnode_cb_break(struct afs_vnode *vnode)
{
- return vnode->cb_break + vnode->cb_s_break + vnode->cb_v_break;
+ return vnode->cb_break + vnode->cb_v_break;
}
static inline bool afs_cb_is_broken(unsigned int cb_break,
const struct afs_cb_interest *cbi)
{
return !cbi || cb_break != (vnode->cb_break +
- cbi->server->cb_s_break +
vnode->volume->cb_v_break);
}
case -ENODATA:
case -EBADMSG:
case -EMSGSIZE:
- default:
abort_code = RXGEN_CC_UNMARSHAL;
if (state != AFS_CALL_CL_AWAIT_REPLY)
abort_code = RXGEN_SS_UNMARSHAL;
rxrpc_kernel_abort_call(call->net->socket, call->rxcall,
abort_code, ret, "KUM");
goto local_abort;
+ default:
+ abort_code = RX_USER_ABORT;
+ rxrpc_kernel_abort_call(call->net->socket, call->rxcall,
+ abort_code, ret, "KER");
+ goto local_abort;
}
}
bool stalled = false;
u64 rtt;
u32 life, last_life;
+ bool rxrpc_complete = false;
DECLARE_WAITQUEUE(myself, current);
rtt2 = 2;
timeout = rtt2;
- last_life = rxrpc_kernel_check_life(call->net->socket, call->rxcall);
+ rxrpc_kernel_check_life(call->net->socket, call->rxcall, &last_life);
add_wait_queue(&call->waitq, &myself);
for (;;) {
if (afs_check_call_state(call, AFS_CALL_COMPLETE))
break;
- life = rxrpc_kernel_check_life(call->net->socket, call->rxcall);
+ if (!rxrpc_kernel_check_life(call->net->socket, call->rxcall, &life)) {
+ /* rxrpc terminated the call. */
+ rxrpc_complete = true;
+ break;
+ }
+
if (timeout == 0 &&
life == last_life && signal_pending(current)) {
if (stalled)
remove_wait_queue(&call->waitq, &myself);
__set_current_state(TASK_RUNNING);
- /* Kill off the call if it's still live. */
if (!afs_check_call_state(call, AFS_CALL_COMPLETE)) {
- _debug("call interrupted");
- if (rxrpc_kernel_abort_call(call->net->socket, call->rxcall,
- RX_USER_ABORT, -EINTR, "KWI"))
- afs_set_call_complete(call, -EINTR, 0);
+ if (rxrpc_complete) {
+ afs_set_call_complete(call, call->error, call->abort_code);
+ } else {
+ /* Kill off the call if it's still live. */
+ _debug("call interrupted");
+ if (rxrpc_kernel_abort_call(call->net->socket, call->rxcall,
+ RX_USER_ABORT, -EINTR, "KWI"))
+ afs_set_call_complete(call, -EINTR, 0);
+ }
}
spin_lock_bh(&call->state_lock);
RCU_INIT_POINTER(server->addresses, alist);
server->addr_version = alist->version;
server->uuid = *uuid;
- server->flags = (1UL << AFS_SERVER_FL_NEW);
server->update_at = ktime_get_real_seconds() + afs_server_update_delay;
rwlock_init(&server->fs_lock);
INIT_HLIST_HEAD(&server->cb_volumes);
first = page->index + 1;
lock_page(page);
generic_error_remove_page(mapping, page);
+ unlock_page(page);
}
__pagevec_release(&pv);
return NULL;
if (unlikely(!get_reqs_available(ctx))) {
- kfree(req);
+ kmem_cache_free(kiocb_cachep, req);
return NULL;
}
*/
eventfd = eventfd_ctx_fdget(iocb->aio_resfd);
if (IS_ERR(eventfd))
- return PTR_ERR(req->ki_eventfd);
+ return PTR_ERR(eventfd);
req->ki_eventfd = eventfd;
}
struct blkdev_dio *dio = bio->bi_private;
bool should_dirty = dio->should_dirty;
- if (dio->multi_bio && !atomic_dec_and_test(&dio->ref)) {
- if (bio->bi_status && !dio->bio.bi_status)
- dio->bio.bi_status = bio->bi_status;
- } else {
+ if (bio->bi_status && !dio->bio.bi_status)
+ dio->bio.bi_status = bio->bi_status;
+
+ if (!dio->multi_bio || atomic_dec_and_test(&dio->ref)) {
if (!dio->is_sync) {
struct kiocb *iocb = dio->iocb;
ssize_t ret;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
+ /*
+ * If the fs is mounted with nologreplay, which requires it to be
+ * mounted in RO mode as well, we can not allow discard on free space
+ * inside block groups, because log trees refer to extents that are not
+ * pinned in a block group's free space cache (pinning the extents is
+ * precisely the first phase of replaying a log tree).
+ */
+ if (btrfs_test_opt(fs_info, NOLOGREPLAY))
+ return -EROFS;
+
rcu_read_lock();
list_for_each_entry_rcu(device, &fs_info->fs_devices->devices,
dev_list) {
static int prop_compression_validate(const char *value, size_t len)
{
- if (!strncmp("lzo", value, len))
+ if (!strncmp("lzo", value, 3))
return 0;
- else if (!strncmp("zlib", value, len))
+ else if (!strncmp("zlib", value, 4))
return 0;
- else if (!strncmp("zstd", value, len))
+ else if (!strncmp("zstd", value, 4))
return 0;
return -EINVAL;
btrfs_set_fs_incompat(fs_info, COMPRESS_LZO);
} else if (!strncmp("zlib", value, 4)) {
type = BTRFS_COMPRESS_ZLIB;
- } else if (!strncmp("zstd", value, len)) {
+ } else if (!strncmp("zstd", value, 4)) {
type = BTRFS_COMPRESS_ZSTD;
btrfs_set_fs_incompat(fs_info, COMPRESS_ZSTD);
} else {
}
struct cifsFileInfo *cifsFileInfo_get(struct cifsFileInfo *cifs_file);
+void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, bool wait_oplock_hdlr);
void cifsFileInfo_put(struct cifsFileInfo *cifs_file);
#define CIFS_CACHE_READ_FLG 1
#endif /* CONFIG_CIFS_ACL */
void cifs_oplock_break(struct work_struct *work);
+void cifs_queue_oplock_break(struct cifsFileInfo *cfile);
extern const struct slow_work_ops cifs_oplock_break_ops;
extern struct workqueue_struct *cifsiod_wq;
return cifs_file;
}
-/*
- * Release a reference on the file private data. This may involve closing
- * the filehandle out on the server. Must be called without holding
- * tcon->open_file_lock and cifs_file->file_info_lock.
+/**
+ * cifsFileInfo_put - release a reference of file priv data
+ *
+ * Always potentially wait for oplock handler. See _cifsFileInfo_put().
*/
void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
+{
+ _cifsFileInfo_put(cifs_file, true);
+}
+
+/**
+ * _cifsFileInfo_put - release a reference of file priv data
+ *
+ * This may involve closing the filehandle @cifs_file out on the
+ * server. Must be called without holding tcon->open_file_lock and
+ * cifs_file->file_info_lock.
+ *
+ * If @wait_for_oplock_handler is true and we are releasing the last
+ * reference, wait for any running oplock break handler of the file
+ * and cancel any pending one. If calling this function from the
+ * oplock break handler, you need to pass false.
+ *
+ */
+void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, bool wait_oplock_handler)
{
struct inode *inode = d_inode(cifs_file->dentry);
struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
spin_unlock(&tcon->open_file_lock);
- oplock_break_cancelled = cancel_work_sync(&cifs_file->oplock_break);
+ oplock_break_cancelled = wait_oplock_handler ?
+ cancel_work_sync(&cifs_file->oplock_break) : false;
if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
struct TCP_Server_Info *server = tcon->ses->server;
cinode);
cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
}
+ _cifsFileInfo_put(cfile, false /* do not wait for ourself */);
cifs_done_oplock_break(cinode);
}
CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2,
&pCifsInode->flags);
- queue_work(cifsoplockd_wq,
- &netfile->oplock_break);
+ cifs_queue_oplock_break(netfile);
netfile->oplock_break_cancelled = false;
spin_unlock(&tcon->open_file_lock);
spin_unlock(&cinode->writers_lock);
}
+/**
+ * cifs_queue_oplock_break - queue the oplock break handler for cfile
+ *
+ * This function is called from the demultiplex thread when it
+ * receives an oplock break for @cfile.
+ *
+ * Assumes the tcon->open_file_lock is held.
+ * Assumes cfile->file_info_lock is NOT held.
+ */
+void cifs_queue_oplock_break(struct cifsFileInfo *cfile)
+{
+ /*
+ * Bump the handle refcount now while we hold the
+ * open_file_lock to enforce the validity of it for the oplock
+ * break handler. The matching put is done at the end of the
+ * handler.
+ */
+ cifsFileInfo_get(cfile);
+
+ queue_work(cifsoplockd_wq, &cfile->oplock_break);
+}
+
void cifs_done_oplock_break(struct cifsInodeInfo *cinode)
{
clear_bit(CIFS_INODE_PENDING_OPLOCK_BREAK, &cinode->flags);
clear_bit(CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2,
&cinode->flags);
- queue_work(cifsoplockd_wq, &cfile->oplock_break);
+ cifs_queue_oplock_break(cfile);
kfree(lw);
return true;
}
CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2,
&cinode->flags);
spin_unlock(&cfile->file_info_lock);
- queue_work(cifsoplockd_wq,
- &cfile->oplock_break);
+
+ cifs_queue_oplock_break(cfile);
spin_unlock(&tcon->open_file_lock);
spin_unlock(&cifs_tcp_ses_lock);
rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL, &err_iov,
&resp_buftype);
+ if (!rc)
+ SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid);
if (!rc || !err_iov.iov_base) {
rc = -ENOENT;
goto free_path;
} else if (rsp->DialectRevision == cpu_to_le16(SMB21_PROT_ID)) {
/* ops set to 3.0 by default for default so update */
ses->server->ops = &smb21_operations;
- } else if (rsp->DialectRevision == cpu_to_le16(SMB311_PROT_ID))
+ ses->server->vals = &smb21_values;
+ } else if (rsp->DialectRevision == cpu_to_le16(SMB311_PROT_ID)) {
ses->server->ops = &smb311_operations;
+ ses->server->vals = &smb311_values;
+ }
} else if (le16_to_cpu(rsp->DialectRevision) !=
ses->server->vals->protocol_id) {
/* if requested single dialect ensure returned dialect matched */
rqst.rq_nvec = 1;
rc = cifs_send_recv(xid, ses, &rqst, &resp_buftype, flags, &rsp_iov);
- cifs_small_buf_release(req);
-
rsp = (struct smb2_read_rsp *)rsp_iov.iov_base;
if (rc) {
io_parms->tcon->tid, ses->Suid,
io_parms->offset, io_parms->length);
+ cifs_small_buf_release(req);
+
*nbytes = le32_to_cpu(rsp->DataLength);
if ((*nbytes > CIFS_MAX_MSGSIZE) ||
(*nbytes > io_parms->length)) {
rc = cifs_send_recv(xid, io_parms->tcon->ses, &rqst,
&resp_buftype, flags, &rsp_iov);
- cifs_small_buf_release(req);
rsp = (struct smb2_write_rsp *)rsp_iov.iov_base;
if (rc) {
io_parms->offset, *nbytes);
}
+ cifs_small_buf_release(req);
free_rsp_buf(resp_buftype, rsp);
return rc;
}
#include <linux/sizes.h>
#include <linux/mmu_notifier.h>
#include <linux/iomap.h>
+#include <asm/pgalloc.h>
#include "internal.h"
#define CREATE_TRACE_POINTS
{
struct address_space *mapping = vmf->vma->vm_file->f_mapping;
unsigned long pmd_addr = vmf->address & PMD_MASK;
+ struct vm_area_struct *vma = vmf->vma;
struct inode *inode = mapping->host;
+ pgtable_t pgtable = NULL;
struct page *zero_page;
spinlock_t *ptl;
pmd_t pmd_entry;
*entry = dax_insert_entry(xas, mapping, vmf, *entry, pfn,
DAX_PMD | DAX_ZERO_PAGE, false);
+ if (arch_needs_pgtable_deposit()) {
+ pgtable = pte_alloc_one(vma->vm_mm);
+ if (!pgtable)
+ return VM_FAULT_OOM;
+ }
+
ptl = pmd_lock(vmf->vma->vm_mm, vmf->pmd);
if (!pmd_none(*(vmf->pmd))) {
spin_unlock(ptl);
goto fallback;
}
+ if (pgtable) {
+ pgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd, pgtable);
+ mm_inc_nr_ptes(vma->vm_mm);
+ }
pmd_entry = mk_pmd(zero_page, vmf->vma->vm_page_prot);
pmd_entry = pmd_mkhuge(pmd_entry);
set_pmd_at(vmf->vma->vm_mm, pmd_addr, vmf->pmd, pmd_entry);
return VM_FAULT_NOPAGE;
fallback:
+ if (pgtable)
+ pte_free(vma->vm_mm, pgtable);
trace_dax_pmd_load_hole_fallback(inode, vmf, zero_page, *entry);
return VM_FAULT_FALLBACK;
}
rem += pipe->bufs[(pipe->curbuf + idx) & (pipe->buffers - 1)].len;
ret = -EINVAL;
- if (rem < len) {
- pipe_unlock(pipe);
- goto out;
- }
+ if (rem < len)
+ goto out_free;
rem = len;
while (rem) {
pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1);
pipe->nrbufs--;
} else {
- pipe_buf_get(pipe, ibuf);
+ if (!pipe_buf_get(pipe, ibuf))
+ goto out_free;
+
*obuf = *ibuf;
obuf->flags &= ~PIPE_BUF_FLAG_GIFT;
obuf->len = rem;
ret = fuse_dev_do_write(fud, &cs, len);
pipe_lock(pipe);
+out_free:
for (idx = 0; idx < nbuf; idx++)
pipe_buf_release(pipe, &bufs[idx]);
pipe_unlock(pipe);
-out:
kvfree(bufs);
return ret;
}
umode_t mode, dev_t dev)
{
struct inode *inode;
- struct resv_map *resv_map;
+ struct resv_map *resv_map = NULL;
- resv_map = resv_map_alloc();
- if (!resv_map)
- return NULL;
+ /*
+ * Reserve maps are only needed for inodes that can have associated
+ * page allocations.
+ */
+ if (S_ISREG(mode) || S_ISLNK(mode)) {
+ resv_map = resv_map_alloc();
+ if (!resv_map)
+ return NULL;
+ }
inode = new_inode(sb);
if (inode) {
break;
}
lockdep_annotate_inode_mutex_key(inode);
- } else
- kref_put(&resv_map->refs, resv_map_release);
+ } else {
+ if (resv_map)
+ kref_put(&resv_map->refs, resv_map_release);
+ }
return inode;
}
tail = ctx->cached_cq_tail;
/* See comment at the top of the file */
smp_rmb();
- if (tail + 1 == READ_ONCE(ring->r.head))
+ if (tail - READ_ONCE(ring->r.head) == ring->ring_entries)
return NULL;
ctx->cached_cq_tail++;
list_add_tail(&req->list, &ctx->poll_list);
}
-static void io_file_put(struct io_submit_state *state, struct file *file)
+static void io_file_put(struct io_submit_state *state)
{
- if (!state) {
- fput(file);
- } else if (state->file) {
+ if (state->file) {
int diff = state->has_refs - state->used_refs;
if (diff)
state->ios_left--;
return state->file;
}
- io_file_put(state, NULL);
+ io_file_put(state);
}
state->file = fget_many(fd, state->ios_left);
if (!state->file)
static void io_submit_state_end(struct io_submit_state *state)
{
blk_finish_plug(&state->plug);
- io_file_put(state, NULL);
+ io_file_put(state);
if (state->free_reqs)
kmem_cache_free_bulk(req_cachep, state->free_reqs,
&state->reqs[state->cur_req]);
unuse_mm(cur_mm);
mmput(cur_mm);
}
+
+ if (kthread_should_park())
+ kthread_parkme();
+
return 0;
}
if (ctx->sqo_thread) {
ctx->sqo_stop = 1;
mb();
+ kthread_park(ctx->sqo_thread);
kthread_stop(ctx->sqo_thread);
ctx->sqo_thread = NULL;
}
fput(ctx->user_files[i]);
kfree(ctx->user_files);
+ ctx->user_files = NULL;
ctx->nr_user_files = 0;
return ret;
}
mmgrab(current->mm);
ctx->sqo_mm = current->mm;
- ctx->sq_thread_idle = msecs_to_jiffies(p->sq_thread_idle);
- if (!ctx->sq_thread_idle)
- ctx->sq_thread_idle = HZ;
-
ret = -EINVAL;
if (!cpu_possible(p->sq_thread_cpu))
goto err;
if (ctx->flags & IORING_SETUP_SQPOLL) {
+ ret = -EPERM;
+ if (!capable(CAP_SYS_ADMIN))
+ goto err;
+
+ ctx->sq_thread_idle = msecs_to_jiffies(p->sq_thread_idle);
+ if (!ctx->sq_thread_idle)
+ ctx->sq_thread_idle = HZ;
+
if (p->flags & IORING_SETUP_SQ_AFF) {
int cpu;
cpu = array_index_nospec(p->sq_thread_cpu, NR_CPUS);
+ ret = -EINVAL;
+ if (!cpu_possible(p->sq_thread_cpu))
+ goto err;
+
ctx->sqo_thread = kthread_create_on_cpu(io_sq_thread,
ctx, cpu,
"io_uring-sq");
static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
void __user *arg, unsigned nr_args)
+ __releases(ctx->uring_lock)
+ __acquires(ctx->uring_lock)
{
int ret;
percpu_ref_kill(&ctx->refs);
+
+ /*
+ * Drop uring mutex before waiting for references to exit. If another
+ * thread is currently inside io_uring_enter() it might need to grab
+ * the uring_lock to make progress. If we hold it here across the drain
+ * wait, then we can deadlock. It's safe to drop the mutex here, since
+ * no new references will come in after we've killed the percpu ref.
+ */
+ mutex_unlock(&ctx->uring_lock);
wait_for_completion(&ctx->ctx_done);
+ mutex_lock(&ctx->uring_lock);
switch (opcode) {
case IORING_REGISTER_BUFFERS:
};
ssize_t err, err2;
- if (!nfs_server_capable(file_inode(dst), NFS_CAP_COPY))
- return -EOPNOTSUPP;
-
src_lock = nfs_get_lock_context(nfs_file_open_context(src));
if (IS_ERR(src_lock))
return PTR_ERR(src_lock);
struct file *file_out, loff_t pos_out,
size_t count, unsigned int flags)
{
+ if (!nfs_server_capable(file_inode(file_out), NFS_CAP_COPY))
+ return -EOPNOTSUPP;
if (file_inode(file_in) == file_inode(file_out))
- return -EINVAL;
+ return -EOPNOTSUPP;
return nfs42_proc_copy(file_in, pos_in, file_out, pos_out, count);
}
ARRAY_SIZE(nfs4_acl_bitmap), &hdr);
rpc_prepare_reply_pages(req, args->acl_pages, 0,
- args->acl_len, replen);
+ args->acl_len, replen + 1);
encode_nops(&hdr);
}
}
rpc_prepare_reply_pages(req, (struct page **)&args->page, 0,
- PAGE_SIZE, replen);
+ PAGE_SIZE, replen + 1);
encode_nops(&hdr);
}
memcpy(sap, &data->addr, sizeof(data->addr));
args->nfs_server.addrlen = sizeof(data->addr);
args->nfs_server.port = ntohs(data->addr.sin_port);
- if (!nfs_verify_server_address(sap))
+ if (sap->sa_family != AF_INET ||
+ !nfs_verify_server_address(sap))
goto out_no_address;
if (!(data->flags & NFS_MOUNT_TCP))
struct nfsd3_readdirargs *argp = rqstp->rq_argp;
struct nfsd3_readdirres *resp = rqstp->rq_resp;
__be32 nfserr;
- int count;
+ int count = 0;
+ struct page **p;
+ caddr_t page_addr = NULL;
dprintk("nfsd: READDIR(3) %s %d bytes at %d\n",
SVCFH_fmt(&argp->fh),
nfserr = nfsd_readdir(rqstp, &resp->fh, (loff_t*) &argp->cookie,
&resp->common, nfs3svc_encode_entry);
memcpy(resp->verf, argp->verf, 8);
- resp->count = resp->buffer - argp->buffer;
+ count = 0;
+ for (p = rqstp->rq_respages + 1; p < rqstp->rq_next_page; p++) {
+ page_addr = page_address(*p);
+
+ if (((caddr_t)resp->buffer >= page_addr) &&
+ ((caddr_t)resp->buffer < page_addr + PAGE_SIZE)) {
+ count += (caddr_t)resp->buffer - page_addr;
+ break;
+ }
+ count += PAGE_SIZE;
+ }
+ resp->count = count >> 2;
if (resp->offset) {
loff_t offset = argp->cookie;
nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p)
{
struct nfsd3_readdirargs *args = rqstp->rq_argp;
+ int len;
u32 max_blocksize = svc_max_payload(rqstp);
p = decode_fh(p, &args->fh);
args->verf = p; p += 2;
args->dircount = ~0;
args->count = ntohl(*p++);
- args->count = min_t(u32, args->count, max_blocksize);
- args->buffer = page_address(*(rqstp->rq_next_page++));
+ len = args->count = min_t(u32, args->count, max_blocksize);
+
+ while (len > 0) {
+ struct page *p = *(rqstp->rq_next_page++);
+ if (!args->buffer)
+ args->buffer = page_address(p);
+ len -= PAGE_SIZE;
+ }
return xdr_argsize_check(rqstp, p);
}
cb->cb_seq_status = 1;
cb->cb_status = 0;
if (minorversion) {
- if (!nfsd41_cb_get_slot(clp, task))
+ if (!cb->cb_holds_slot && !nfsd41_cb_get_slot(clp, task))
return;
+ cb->cb_holds_slot = true;
}
rpc_call_start(task);
}
return true;
}
+ if (!cb->cb_holds_slot)
+ goto need_restart;
+
switch (cb->cb_seq_status) {
case 0:
/*
cb->cb_seq_status);
}
+ cb->cb_holds_slot = false;
clear_bit(0, &clp->cl_cb_slot_busy);
rpc_wake_up_next(&clp->cl_cb_waitq);
dprintk("%s: freed slot, new seqid=%d\n", __func__,
cb->cb_seq_status = 1;
cb->cb_status = 0;
cb->cb_need_restart = false;
+ cb->cb_holds_slot = false;
}
void nfsd4_run_cb(struct nfsd4_callback *cb)
static void
free_blocked_lock(struct nfsd4_blocked_lock *nbl)
{
+ locks_delete_block(&nbl->nbl_lock);
locks_release_private(&nbl->nbl_lock);
kfree(nbl);
}
nbl = list_first_entry(&reaplist, struct nfsd4_blocked_lock,
nbl_lru);
list_del_init(&nbl->nbl_lru);
- locks_delete_block(&nbl->nbl_lock);
free_blocked_lock(nbl);
}
}
+static void
+nfsd4_cb_notify_lock_prepare(struct nfsd4_callback *cb)
+{
+ struct nfsd4_blocked_lock *nbl = container_of(cb,
+ struct nfsd4_blocked_lock, nbl_cb);
+ locks_delete_block(&nbl->nbl_lock);
+}
+
static int
nfsd4_cb_notify_lock_done(struct nfsd4_callback *cb, struct rpc_task *task)
{
}
static const struct nfsd4_callback_ops nfsd4_cb_notify_lock_ops = {
+ .prepare = nfsd4_cb_notify_lock_prepare,
.done = nfsd4_cb_notify_lock_done,
.release = nfsd4_cb_notify_lock_release,
};
nbl = list_first_entry(&reaplist,
struct nfsd4_blocked_lock, nbl_lru);
list_del_init(&nbl->nbl_lru);
- locks_delete_block(&nbl->nbl_lock);
free_blocked_lock(nbl);
}
out:
int cb_seq_status;
int cb_status;
bool cb_need_restart;
+ bool cb_holds_slot;
};
struct nfsd4_callback_ops {
}
EXPORT_SYMBOL(nonseekable_open);
+
+/*
+ * stream_open is used by subsystems that want stream-like file descriptors.
+ * Such file descriptors are not seekable and don't have notion of position
+ * (file.f_pos is always 0). Contrary to file descriptors of other regular
+ * files, .read() and .write() can run simultaneously.
+ *
+ * stream_open never fails and is marked to return int so that it could be
+ * directly used as file_operations.open .
+ */
+int stream_open(struct inode *inode, struct file *filp)
+{
+ filp->f_mode &= ~(FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE | FMODE_ATOMIC_POS);
+ filp->f_mode |= FMODE_STREAM;
+ return 0;
+}
+
+EXPORT_SYMBOL(stream_open);
* in the tee() system call, when we duplicate the buffers in one
* pipe into another.
*/
-void generic_pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *buf)
+bool generic_pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *buf)
{
- get_page(buf->page);
+ return try_get_page(buf->page);
}
EXPORT_SYMBOL(generic_pipe_buf_get);
static int proc_pid_syscall(struct seq_file *m, struct pid_namespace *ns,
struct pid *pid, struct task_struct *task)
{
- long nr;
- unsigned long args[6], sp, pc;
+ struct syscall_info info;
+ u64 *args = &info.data.args[0];
int res;
res = lock_trace(task);
if (res)
return res;
- if (task_current_syscall(task, &nr, args, 6, &sp, &pc))
+ if (task_current_syscall(task, &info))
seq_puts(m, "running\n");
- else if (nr < 0)
- seq_printf(m, "%ld 0x%lx 0x%lx\n", nr, sp, pc);
+ else if (info.data.nr < 0)
+ seq_printf(m, "%d 0x%llx 0x%llx\n",
+ info.data.nr, info.sp, info.data.instruction_pointer);
else
seq_printf(m,
- "%ld 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx\n",
- nr,
+ "%d 0x%llx 0x%llx 0x%llx 0x%llx 0x%llx 0x%llx 0x%llx 0x%llx\n",
+ info.data.nr,
args[0], args[1], args[2], args[3], args[4], args[5],
- sp, pc);
+ info.sp, info.data.instruction_pointer);
unlock_trace(task);
return 0;
#include <linux/namei.h>
#include <linux/mm.h>
#include <linux/module.h>
+#include <linux/bpf-cgroup.h>
#include "internal.h"
static const struct dentry_operations proc_sys_dentry_operations;
struct inode *inode = file_inode(filp);
struct ctl_table_header *head = grab_header(inode);
struct ctl_table *table = PROC_I(inode)->sysctl_entry;
+ void *new_buf = NULL;
ssize_t error;
- size_t res;
if (IS_ERR(head))
return PTR_ERR(head);
if (!table->proc_handler)
goto out;
+ error = BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write, buf, &count,
+ ppos, &new_buf);
+ if (error)
+ goto out;
+
/* careful: calling conventions are nasty here */
- res = count;
- error = table->proc_handler(table, write, buf, &res, ppos);
+ if (new_buf) {
+ mm_segment_t old_fs;
+
+ old_fs = get_fs();
+ set_fs(KERNEL_DS);
+ error = table->proc_handler(table, write, (void __user *)new_buf,
+ &count, ppos);
+ set_fs(old_fs);
+ kfree(new_buf);
+ } else {
+ error = table->proc_handler(table, write, buf, &count, ppos);
+ }
+
if (!error)
- error = res;
+ error = count;
out:
sysctl_head_finish(head);
count = -EINTR;
goto out_mm;
}
+ /*
+ * Avoid to modify vma->vm_flags
+ * without locked ops while the
+ * coredump reads the vm_flags.
+ */
+ if (!mmget_still_valid(mm)) {
+ /*
+ * Silently return "count"
+ * like if get_task_mm()
+ * failed. FIXME: should this
+ * function have returned
+ * -ESRCH if get_task_mm()
+ * failed like if
+ * get_proc_task() fails?
+ */
+ up_write(&mm->mmap_sem);
+ goto out_mm;
+ }
for (vma = mm->mmap; vma; vma = vma->vm_next) {
vma->vm_flags &= ~VM_SOFTDIRTY;
vma_set_page_prot(vma);
static inline loff_t file_pos_read(struct file *file)
{
- return file->f_pos;
+ return file->f_mode & FMODE_STREAM ? 0 : file->f_pos;
}
static inline void file_pos_write(struct file *file, loff_t pos)
{
- file->f_pos = pos;
+ if ((file->f_mode & FMODE_STREAM) == 0)
+ file->f_pos = pos;
}
ssize_t ksys_read(unsigned int fd, char __user *buf, size_t count)
* Get a reference to this pipe buffer,
* so we can copy the contents over.
*/
- pipe_buf_get(ipipe, ibuf);
+ if (!pipe_buf_get(ipipe, ibuf)) {
+ if (ret == 0)
+ ret = -EFAULT;
+ break;
+ }
*obuf = *ibuf;
/*
* Get a reference to this pipe buffer,
* so we can copy the contents over.
*/
- pipe_buf_get(ipipe, ibuf);
+ if (!pipe_buf_get(ipipe, ibuf)) {
+ if (ret == 0)
+ ret = -EFAULT;
+ break;
+ }
obuf = opipe->bufs + nbuf;
*obuf = *ibuf;
/* the various vma->vm_userfaultfd_ctx still points to it */
down_write(&mm->mmap_sem);
+ /* no task can run (and in turn coredump) yet */
+ VM_WARN_ON(!mmget_still_valid(mm));
for (vma = mm->mmap; vma; vma = vma->vm_next)
if (vma->vm_userfaultfd_ctx.ctx == release_new_ctx) {
vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
* taking the mmap_sem for writing.
*/
down_write(&mm->mmap_sem);
+ if (!mmget_still_valid(mm))
+ goto skip_mm;
prev = NULL;
for (vma = mm->mmap; vma; vma = vma->vm_next) {
cond_resched();
vma->vm_flags = new_flags;
vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
}
+skip_mm:
up_write(&mm->mmap_sem);
mmput(mm);
wakeup:
goto out;
down_write(&mm->mmap_sem);
+ if (!mmget_still_valid(mm))
+ goto out_unlock;
vma = find_vma_prev(mm, start, &prev);
if (!vma)
goto out_unlock;
goto out;
down_write(&mm->mmap_sem);
+ if (!mmget_still_valid(mm))
+ goto out_unlock;
vma = find_vma_prev(mm, start, &prev);
if (!vma)
goto out_unlock;
* syscall_get_arguments - extract system call parameter values
* @task: task of interest, must be blocked
* @regs: task_pt_regs() of @task
- * @i: argument index [0,5]
- * @n: number of arguments; n+i must be [1,6].
* @args: array filled with argument values
*
- * Fetches @n arguments to the system call starting with the @i'th argument
- * (from 0 through 5). Argument @i is stored in @args[0], and so on.
- * An arch inline version is probably optimal when @i and @n are constants.
+ * Fetches 6 arguments to the system call. First argument is stored in
+* @args[0], and so on.
*
* It's only valid to call this when @task is stopped for tracing on
* entry to a system call, due to %TIF_SYSCALL_TRACE or %TIF_SYSCALL_AUDIT.
- * It's invalid to call this with @i + @n > 6; we only support system calls
- * taking up to 6 arguments.
*/
void syscall_get_arguments(struct task_struct *task, struct pt_regs *regs,
- unsigned int i, unsigned int n, unsigned long *args);
+ unsigned long *args);
/**
* syscall_set_arguments - change system call parameter value
* @task: task of interest, must be in system call entry tracing
* @regs: task_pt_regs() of @task
- * @i: argument index [0,5]
- * @n: number of arguments; n+i must be [1,6].
* @args: array of argument values to store
*
- * Changes @n arguments to the system call starting with the @i'th argument.
- * Argument @i gets value @args[0], and so on.
- * An arch inline version is probably optimal when @i and @n are constants.
+ * Changes 6 arguments to the system call.
+ * The first argument gets value @args[0], and so on.
*
* It's only valid to call this when @task is stopped for tracing on
* entry to a system call, due to %TIF_SYSCALL_TRACE or %TIF_SYSCALL_AUDIT.
- * It's invalid to call this with @i + @n > 6; we only support system calls
- * taking up to 6 arguments.
*/
void syscall_set_arguments(struct task_struct *task, struct pt_regs *regs,
- unsigned int i, unsigned int n,
const unsigned long *args);
/**
* Drivers can use the @old_crtc_state input parameter if the operations
* needed to enable the CRTC don't depend solely on the new state but
* also on the transition between the old state and the new state.
+ *
+ * This function is optional.
*/
void (*atomic_enable)(struct drm_crtc *crtc,
struct drm_crtc_state *old_crtc_state);
* parameter @old_crtc_state which could be used to access the old
* state. Atomic drivers should consider to use this one instead
* of @disable.
+ *
+ * This function is optional.
*/
void (*atomic_disable)(struct drm_crtc *crtc,
struct drm_crtc_state *old_crtc_state);
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2018-2019 SiFive, Inc.
+ * Wesley Terpstra
+ * Paul Walmsley
+ */
+
+#ifndef __DT_BINDINGS_CLOCK_SIFIVE_FU540_PRCI_H
+#define __DT_BINDINGS_CLOCK_SIFIVE_FU540_PRCI_H
+
+/* Clock indexes for use by Device Tree data and the PRCI driver */
+
+#define PRCI_CLK_COREPLL 0
+#define PRCI_CLK_DDRPLL 1
+#define PRCI_CLK_GEMGXLPLL 2
+#define PRCI_CLK_TLCLK 3
+
+#endif
#define RESET_SD_EMMC_A 44
#define RESET_SD_EMMC_B 45
#define RESET_SD_EMMC_C 46
-/* 47-60 */
+/* 47 */
+#define RESET_USB_PHY20 48
+#define RESET_USB_PHY21 49
+/* 50-60 */
#define RESET_AUDIO_CODEC 61
/* 62-63 */
/* RESET2 */
int TSS_authhmac(unsigned char *digest, const unsigned char *key,
unsigned int keylen, unsigned char *h1,
- unsigned char *h2, unsigned char h3, ...);
+ unsigned char *h2, unsigned int h3, ...);
int TSS_checkhmac1(unsigned char *buffer,
const uint32_t command,
const unsigned char *ononce,
return bio->bi_vcnt >= bio->bi_max_vecs;
}
-#define mp_bvec_for_each_segment(bv, bvl, i, iter_all) \
- for (bv = bvec_init_iter_all(&iter_all); \
- (iter_all.done < (bvl)->bv_len) && \
- (mp_bvec_next_segment((bvl), &iter_all), 1); \
- iter_all.done += bv->bv_len, i += 1)
+static inline bool bio_next_segment(const struct bio *bio,
+ struct bvec_iter_all *iter)
+{
+ if (iter->idx >= bio->bi_vcnt)
+ return false;
+
+ bvec_advance(&bio->bi_io_vec[iter->idx], iter);
+ return true;
+}
/*
* drivers should _never_ use the all version - the bio may have been split
* before it got to the driver and the driver won't own all of it
*/
-#define bio_for_each_segment_all(bvl, bio, i, iter_all) \
- for (i = 0, iter_all.idx = 0; iter_all.idx < (bio)->bi_vcnt; iter_all.idx++) \
- mp_bvec_for_each_segment(bvl, &((bio)->bi_io_vec[iter_all.idx]), i, iter_all)
+#define bio_for_each_segment_all(bvl, bio, i, iter) \
+ for (i = 0, bvl = bvec_init_iter_all(&iter); \
+ bio_next_segment((bio), &iter); i++)
static inline void bio_advance_iter(struct bio *bio, struct bvec_iter *iter,
unsigned bytes)
#define __constant_bitrev32(x) \
({ \
- u32 __x = x; \
- __x = (__x >> 16) | (__x << 16); \
- __x = ((__x & (u32)0xFF00FF00UL) >> 8) | ((__x & (u32)0x00FF00FFUL) << 8); \
- __x = ((__x & (u32)0xF0F0F0F0UL) >> 4) | ((__x & (u32)0x0F0F0F0FUL) << 4); \
- __x = ((__x & (u32)0xCCCCCCCCUL) >> 2) | ((__x & (u32)0x33333333UL) << 2); \
- __x = ((__x & (u32)0xAAAAAAAAUL) >> 1) | ((__x & (u32)0x55555555UL) << 1); \
- __x; \
+ u32 ___x = x; \
+ ___x = (___x >> 16) | (___x << 16); \
+ ___x = ((___x & (u32)0xFF00FF00UL) >> 8) | ((___x & (u32)0x00FF00FFUL) << 8); \
+ ___x = ((___x & (u32)0xF0F0F0F0UL) >> 4) | ((___x & (u32)0x0F0F0F0FUL) << 4); \
+ ___x = ((___x & (u32)0xCCCCCCCCUL) >> 2) | ((___x & (u32)0x33333333UL) << 2); \
+ ___x = ((___x & (u32)0xAAAAAAAAUL) >> 1) | ((___x & (u32)0x55555555UL) << 1); \
+ ___x; \
})
#define __constant_bitrev16(x) \
({ \
- u16 __x = x; \
- __x = (__x >> 8) | (__x << 8); \
- __x = ((__x & (u16)0xF0F0U) >> 4) | ((__x & (u16)0x0F0FU) << 4); \
- __x = ((__x & (u16)0xCCCCU) >> 2) | ((__x & (u16)0x3333U) << 2); \
- __x = ((__x & (u16)0xAAAAU) >> 1) | ((__x & (u16)0x5555U) << 1); \
- __x; \
+ u16 ___x = x; \
+ ___x = (___x >> 8) | (___x << 8); \
+ ___x = ((___x & (u16)0xF0F0U) >> 4) | ((___x & (u16)0x0F0FU) << 4); \
+ ___x = ((___x & (u16)0xCCCCU) >> 2) | ((___x & (u16)0x3333U) << 2); \
+ ___x = ((___x & (u16)0xAAAAU) >> 1) | ((___x & (u16)0x5555U) << 1); \
+ ___x; \
})
#define __constant_bitrev8x4(x) \
({ \
- u32 __x = x; \
- __x = ((__x & (u32)0xF0F0F0F0UL) >> 4) | ((__x & (u32)0x0F0F0F0FUL) << 4); \
- __x = ((__x & (u32)0xCCCCCCCCUL) >> 2) | ((__x & (u32)0x33333333UL) << 2); \
- __x = ((__x & (u32)0xAAAAAAAAUL) >> 1) | ((__x & (u32)0x55555555UL) << 1); \
- __x; \
+ u32 ___x = x; \
+ ___x = ((___x & (u32)0xF0F0F0F0UL) >> 4) | ((___x & (u32)0x0F0F0F0FUL) << 4); \
+ ___x = ((___x & (u32)0xCCCCCCCCUL) >> 2) | ((___x & (u32)0x33333333UL) << 2); \
+ ___x = ((___x & (u32)0xAAAAAAAAUL) >> 1) | ((___x & (u32)0x55555555UL) << 1); \
+ ___x; \
})
#define __constant_bitrev8(x) \
({ \
- u8 __x = x; \
- __x = (__x >> 4) | (__x << 4); \
- __x = ((__x & (u8)0xCCU) >> 2) | ((__x & (u8)0x33U) << 2); \
- __x = ((__x & (u8)0xAAU) >> 1) | ((__x & (u8)0x55U) << 1); \
- __x; \
+ u8 ___x = x; \
+ ___x = (___x >> 4) | (___x << 4); \
+ ___x = ((___x & (u8)0xCCU) >> 2) | ((___x & (u8)0x33U) << 2); \
+ ___x = ((___x & (u8)0xAAU) >> 1) | ((___x & (u8)0x55U) << 1); \
+ ___x; \
})
#define bitrev32(x) \
void blk_mq_kick_requeue_list(struct request_queue *q);
void blk_mq_delay_kick_requeue_list(struct request_queue *q, unsigned long msecs);
bool blk_mq_complete_request(struct request *rq);
+void blk_mq_complete_request_sync(struct request *rq);
bool blk_mq_bio_list_merge(struct request_queue *q, struct list_head *list,
struct bio *bio);
bool blk_mq_queue_stopped(struct request_queue *q);
struct rcu_head rcu_head;
wait_queue_head_t mq_freeze_wq;
struct percpu_ref q_usage_counter;
- struct list_head all_q_node;
struct blk_mq_tag_set *tag_set;
struct list_head tag_set_list;
struct bpf_prog;
struct bpf_sock_ops_kern;
struct bpf_cgroup_storage;
+struct ctl_table;
+struct ctl_table_header;
#ifdef CONFIG_CGROUP_BPF
int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
short access, enum bpf_attach_type type);
+int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
+ struct ctl_table *table, int write,
+ void __user *buf, size_t *pcount,
+ loff_t *ppos, void **new_buf,
+ enum bpf_attach_type type);
+
static inline enum bpf_cgroup_storage_type cgroup_storage_type(
struct bpf_map *map)
{
\
__ret; \
})
+
+
+#define BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write, buf, count, pos, nbuf) \
+({ \
+ int __ret = 0; \
+ if (cgroup_bpf_enabled) \
+ __ret = __cgroup_bpf_run_filter_sysctl(head, table, write, \
+ buf, count, pos, nbuf, \
+ BPF_CGROUP_SYSCTL); \
+ __ret; \
+})
+
int cgroup_bpf_prog_attach(const union bpf_attr *attr,
enum bpf_prog_type ptype, struct bpf_prog *prog);
int cgroup_bpf_prog_detach(const union bpf_attr *attr,
#define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, t_ctx) ({ 0; })
#define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; })
#define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type,major,minor,access) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_SYSCTL(head,table,write,buf,count,pos,nbuf) ({ 0; })
#define for_each_cgroup_storage_type(stype) for (; false; )
const struct btf *btf,
const struct btf_type *key_type,
const struct btf_type *value_type);
+
+ /* Direct value access helpers. */
+ int (*map_direct_value_addr)(const struct bpf_map *map,
+ u64 *imm, u32 off);
+ int (*map_direct_value_meta)(const struct bpf_map *map,
+ u64 imm, u32 *off);
};
struct bpf_map {
struct btf *btf;
u32 pages;
bool unpriv_array;
- /* 51 bytes hole */
+ bool frozen; /* write-once */
+ /* 48 bytes hole */
/* The 3rd and 4th cacheline with misc members to avoid false sharing
* particularly with refcounting.
ARG_ANYTHING, /* any (initialized) argument is ok */
ARG_PTR_TO_SPIN_LOCK, /* pointer to bpf_spin_lock */
ARG_PTR_TO_SOCK_COMMON, /* pointer to sock_common */
+ ARG_PTR_TO_INT, /* pointer to int */
+ ARG_PTR_TO_LONG, /* pointer to long */
};
/* type of values returned from helper functions */
};
};
+#define BPF_COMPLEXITY_LIMIT_INSNS 1000000 /* yes. 1M insns */
#define MAX_TAIL_CALL_CNT 32
+#define BPF_F_ACCESS_MASK (BPF_F_RDONLY | \
+ BPF_F_RDONLY_PROG | \
+ BPF_F_WRONLY | \
+ BPF_F_WRONLY_PROG)
+
+#define BPF_MAP_CAN_READ BIT(0)
+#define BPF_MAP_CAN_WRITE BIT(1)
+
+static inline u32 bpf_map_flags_to_cap(struct bpf_map *map)
+{
+ u32 access_flags = map->map_flags & (BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG);
+
+ /* Combination of BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG is
+ * not possible.
+ */
+ if (access_flags & BPF_F_RDONLY_PROG)
+ return BPF_MAP_CAN_READ;
+ else if (access_flags & BPF_F_WRONLY_PROG)
+ return BPF_MAP_CAN_WRITE;
+ else
+ return BPF_MAP_CAN_READ | BPF_MAP_CAN_WRITE;
+}
+
+static inline bool bpf_map_flags_access_ok(u32 access_flags)
+{
+ return (access_flags & (BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG)) !=
+ (BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG);
+}
+
struct bpf_event_entry {
struct perf_event *event;
struct file *perf_file;
u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy);
-int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
- union bpf_attr __user *uattr);
-int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
- union bpf_attr __user *uattr);
-int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
- const union bpf_attr *kattr,
- union bpf_attr __user *uattr);
-
/* an array of programs to be executed under rcu_lock.
*
* Typical usage:
struct bpf_prog *bpf_prog_get_type_path(const char *name, enum bpf_prog_type type);
int array_map_alloc_check(union bpf_attr *attr);
+int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
+ union bpf_attr __user *uattr);
+int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
+ union bpf_attr __user *uattr);
+int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
+ const union bpf_attr *kattr,
+ union bpf_attr __user *uattr);
#else /* !CONFIG_BPF_SYSCALL */
static inline struct bpf_prog *bpf_prog_get(u32 ufd)
{
{
return ERR_PTR(-EOPNOTSUPP);
}
+
+static inline int bpf_prog_test_run_xdp(struct bpf_prog *prog,
+ const union bpf_attr *kattr,
+ union bpf_attr __user *uattr)
+{
+ return -ENOTSUPP;
+}
+
+static inline int bpf_prog_test_run_skb(struct bpf_prog *prog,
+ const union bpf_attr *kattr,
+ union bpf_attr __user *uattr)
+{
+ return -ENOTSUPP;
+}
+
+static inline int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
+ const union bpf_attr *kattr,
+ union bpf_attr __user *uattr)
+{
+ return -ENOTSUPP;
+}
#endif /* CONFIG_BPF_SYSCALL */
static inline struct bpf_prog *bpf_prog_get_type(u32 ufd,
extern const struct bpf_func_proto bpf_spin_lock_proto;
extern const struct bpf_func_proto bpf_spin_unlock_proto;
extern const struct bpf_func_proto bpf_get_local_storage_proto;
+extern const struct bpf_func_proto bpf_strtol_proto;
+extern const struct bpf_func_proto bpf_strtoul_proto;
/* Shared helpers among cBPF and eBPF. */
void bpf_user_rnd_init_once(void);
#endif
#ifdef CONFIG_CGROUP_BPF
BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_DEVICE, cg_dev)
+BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SYSCTL, cg_sysctl)
#endif
#ifdef CONFIG_BPF_LIRC_MODE2
BPF_PROG_TYPE(BPF_PROG_TYPE_LIRC_MODE2, lirc_mode2)
struct bpf_verifier_state_list {
struct bpf_verifier_state state;
struct bpf_verifier_state_list *next;
+ int miss_cnt, hit_cnt;
};
/* Possible states for alu_state member. */
unsigned long map_state; /* pointer/poison value for maps */
s32 call_imm; /* saved imm field of call insn */
u32 alu_limit; /* limit for add/sub register with pointer */
+ struct {
+ u32 map_index; /* index into used_maps[] */
+ u32 map_off; /* offset from value base address */
+ };
};
int ctx_field_size; /* the ctx field size for load insn, maybe 0 */
int sanitize_stack_off; /* stack slot to be cleared */
return log->len_used >= log->len_total - 1;
}
+#define BPF_LOG_LEVEL1 1
+#define BPF_LOG_LEVEL2 2
+#define BPF_LOG_STATS 4
+#define BPF_LOG_LEVEL (BPF_LOG_LEVEL1 | BPF_LOG_LEVEL2)
+#define BPF_LOG_MASK (BPF_LOG_LEVEL | BPF_LOG_STATS)
+
static inline bool bpf_verifier_log_needed(const struct bpf_verifier_log *log)
{
return log->level && log->ubuf && !bpf_verifier_log_full(log);
bool strict_alignment; /* perform strict pointer alignment checks */
struct bpf_verifier_state *cur_state; /* current verifier state */
struct bpf_verifier_state_list **explored_states; /* search pruning optimization */
+ struct bpf_verifier_state_list *free_list;
struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */
u32 used_map_cnt; /* number of used maps */
u32 id_gen; /* used to generate unique reg IDs */
const struct bpf_line_info *prev_linfo;
struct bpf_verifier_log log;
struct bpf_subprog_info subprog_info[BPF_MAX_SUBPROGS + 1];
+ struct {
+ int *insn_state;
+ int *insn_stack;
+ int cur_stack;
+ } cfg;
u32 subprog_cnt;
+ /* number of instructions analyzed by the verifier */
+ u32 insn_processed;
+ /* total verification time */
+ u64 verification_time;
+ /* maximum number of verifier states kept in 'branching' instructions */
+ u32 max_states_per_insn;
+ /* total number of allocated verifier states */
+ u32 total_states;
+ /* some states are freed during program analysis.
+ * this is peak number of states. this number dominates kernel
+ * memory consumption during verification
+ */
+ u32 peak_states;
+ /* longest register parentage chain walked for liveness marking */
+ u32 longest_mark_read_walk;
};
__printf(2, 0) void bpf_verifier_vlog(struct bpf_verifier_log *log,
const struct btf_member *m,
u32 expected_offset, u32 expected_size);
int btf_find_spin_lock(const struct btf *btf, const struct btf_type *t);
+bool btf_type_is_void(const struct btf_type *t);
#ifdef CONFIG_BPF_SYSCALL
const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id);
static inline struct bio_vec *bvec_init_iter_all(struct bvec_iter_all *iter_all)
{
- iter_all->bv.bv_page = NULL;
iter_all->done = 0;
+ iter_all->idx = 0;
return &iter_all->bv;
}
-static inline void mp_bvec_next_segment(const struct bio_vec *bvec,
- struct bvec_iter_all *iter_all)
+static inline void bvec_advance(const struct bio_vec *bvec,
+ struct bvec_iter_all *iter_all)
{
struct bio_vec *bv = &iter_all->bv;
- if (bv->bv_page) {
+ if (iter_all->done) {
bv->bv_page = nth_page(bv->bv_page, 1);
bv->bv_offset = 0;
} else {
- bv->bv_page = bvec->bv_page;
- bv->bv_offset = bvec->bv_offset;
+ bv->bv_page = bvec_nth_page(bvec->bv_page, bvec->bv_offset /
+ PAGE_SIZE);
+ bv->bv_offset = bvec->bv_offset & ~PAGE_MASK;
}
bv->bv_len = min_t(unsigned int, PAGE_SIZE - bv->bv_offset,
bvec->bv_len - iter_all->done);
+ iter_all->done += bv->bv_len;
+
+ if (iter_all->done == bvec->bv_len) {
+ iter_all->idx++;
+ iter_all->done = 0;
+ }
}
/*
struct screen_info *si, efi_guid_t *proto,
unsigned long size);
-bool efi_runtime_disabled(void);
+#ifdef CONFIG_EFI
+extern bool efi_runtime_disabled(void);
+#else
+static inline bool efi_runtime_disabled(void) { return true; }
+#endif
+
extern void efi_call_virt_check_flags(unsigned long flags, const char *call);
extern unsigned long efi_call_virt_save_flags(void);
void (*exit_sched)(struct elevator_queue *);
int (*init_hctx)(struct blk_mq_hw_ctx *, unsigned int);
void (*exit_hctx)(struct blk_mq_hw_ctx *, unsigned int);
+ void (*depth_updated)(struct blk_mq_hw_ctx *);
bool (*allow_merge)(struct request_queue *, struct request *, struct bio *);
bool (*bio_merge)(struct blk_mq_hw_ctx *, struct bio *);
u64_to_ether_addr(u, addr);
}
+/**
+ * eth_addr_inc() - Increment the given MAC address.
+ * @addr: Pointer to a six-byte array containing Ethernet address to increment.
+ */
+static inline void eth_addr_inc(u8 *addr)
+{
+ u64 u = ether_addr_to_u64(addr);
+
+ u++;
+ u64_to_ether_addr(u, addr);
+}
+
/**
* is_etherdev_addr - Tell if given Ethernet address belongs to the device.
* @dev: Pointer to a device structure
struct xdp_rxq_info;
struct xdp_buff;
struct sock_reuseport;
+struct ctl_table;
+struct ctl_table_header;
/* ArgX, context and stack frame pointer register positions. Note,
* Arg1, Arg2, Arg3, etc are used as argument mappings of function
*/
};
+struct bpf_sysctl_kern {
+ struct ctl_table_header *head;
+ struct ctl_table *table;
+ void *cur_val;
+ size_t cur_len;
+ void *new_val;
+ size_t new_len;
+ int new_updated;
+ int write;
+ loff_t *ppos;
+ /* Temporary "register" for indirect stores to ppos. */
+ u64 tmp_reg;
+};
+
#endif /* __LINUX_FILTER_H__ */
#define FMODE_OPENED ((__force fmode_t)0x80000)
#define FMODE_CREATED ((__force fmode_t)0x100000)
+/* File is stream-like */
+#define FMODE_STREAM ((__force fmode_t)0x200000)
+
/* File was opened by fanotify and shouldn't generate fanotify events */
#define FMODE_NONOTIFY ((__force fmode_t)0x4000000)
extern loff_t no_seek_end_llseek(struct file *, loff_t, int);
extern int generic_file_open(struct inode * inode, struct file * filp);
extern int nonseekable_open(struct inode * inode, struct file * filp);
+extern int stream_open(struct inode * inode, struct file * filp);
#ifdef CONFIG_BLOCK
typedef void (dio_submit_t)(struct bio *bio, struct inode *inode,
extern void brioctl_set(int (*ioctl_hook)(struct net *, unsigned int, void __user *));
-typedef int br_should_route_hook_t(struct sk_buff *skb);
-extern br_should_route_hook_t __rcu *br_should_route_hook;
-
#if IS_ENABLED(CONFIG_BRIDGE) && IS_ENABLED(CONFIG_BRIDGE_IGMP_SNOOPING)
int br_multicast_list_adjacent(struct net_device *dev,
struct list_head *br_ip_list);
}
extern u64 jiffies64_to_nsecs(u64 j);
+extern u64 jiffies64_to_msecs(u64 j);
extern unsigned long __msecs_to_jiffies(const unsigned int m);
#if HZ <= MSEC_PER_SEC && !(MSEC_PER_SEC % HZ)
#define u64_to_user_ptr(x) ( \
{ \
- typecheck(u64, x); \
- (void __user *)(uintptr_t)x; \
+ typecheck(u64, (x)); \
+ (void __user *)(uintptr_t)(x); \
} \
)
struct kretprobe *rp;
kprobe_opcode_t *ret_addr;
struct task_struct *task;
+ void *fp;
char data[0];
};
#include <linux/irqbypass.h>
#include <linux/swait.h>
#include <linux/refcount.h>
+#include <linux/nospec.h>
#include <asm/signal.h>
#include <linux/kvm.h>
static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i)
{
- /* Pairs with smp_wmb() in kvm_vm_ioctl_create_vcpu, in case
- * the caller has read kvm->online_vcpus before (as is the case
- * for kvm_for_each_vcpu, for example).
- */
+ int num_vcpus = atomic_read(&kvm->online_vcpus);
+ i = array_index_nospec(i, num_vcpus);
+
+ /* Pairs with smp_wmb() in kvm_vm_ioctl_create_vcpu. */
smp_rmb();
return kvm->vcpus[i];
}
static inline struct kvm_memslots *__kvm_memslots(struct kvm *kvm, int as_id)
{
+ as_id = array_index_nospec(as_id, KVM_ADDRESS_SPACE_NUM);
return srcu_dereference_check(kvm->memslots[as_id], &kvm->srcu,
lockdep_is_held(&kvm->slots_lock) ||
!refcount_read(&kvm->users_count));
/* struct phy_device dev_flags definitions */
#define MARVELL_PHY_M1145_FLAGS_RESISTANCE 0x00000001
#define MARVELL_PHY_M1118_DNS323_LEDS 0x00000002
+#define MARVELL_PHY_LED0_LINK_LED1_ACTIVE 0x00000004
#endif /* _MARVELL_PHY_H */
/* Bus address of the MDIO device (0-31) */
int addr;
int flags;
- struct gpio_desc *reset;
+ struct gpio_desc *reset_gpio;
+ struct reset_control *reset_ctrl;
unsigned int reset_assert_delay;
unsigned int reset_deassert_delay;
};
void __unlock_page_memcg(struct mem_cgroup *memcg);
void unlock_page_memcg(struct page *page);
-/* idx can be of type enum memcg_stat_item or node_stat_item */
+/*
+ * idx can be of type enum memcg_stat_item or node_stat_item.
+ * Keep in sync with memcg_exact_page_state().
+ */
static inline unsigned long memcg_page_state(struct mem_cgroup *memcg,
int idx)
{
doorbell[0] = cpu_to_be32(sn << 28 | cmd | ci);
doorbell[1] = cpu_to_be32(cq->cqn);
- mlx5_write64(doorbell, uar_page + MLX5_CQ_DOORBELL, NULL);
+ mlx5_write64(doorbell, uar_page + MLX5_CQ_DOORBELL);
}
static inline void mlx5_cq_hold(struct mlx5_core_cq *cq)
enum {
MLX5_GENERAL_SUBTYPE_DELAY_DROP_TIMEOUT = 0x1,
+ MLX5_GENERAL_SUBTYPE_PCI_POWER_CHANGE_EVENT = 0x5,
};
enum {
#define MLX5_BF_OFFSET 0x800
#define MLX5_CQ_DOORBELL 0x20
-#if BITS_PER_LONG == 64
/* Assume that we can just write a 64-bit doorbell atomically. s390
* actually doesn't have writeq() but S/390 systems don't even have
* PCI so we won't worry about it.
+ *
+ * Note that the write is not atomic on 32-bit systems! In contrast to 64-bit
+ * ones, it requires proper locking. mlx5_write64 doesn't do any locking, so use
+ * it at your own discretion, protected by some kind of lock on 32 bits.
+ *
+ * TODO: use write{q,l}_relaxed()
*/
-#define MLX5_DECLARE_DOORBELL_LOCK(name)
-#define MLX5_INIT_DOORBELL_LOCK(ptr) do { } while (0)
-#define MLX5_GET_DOORBELL_LOCK(ptr) (NULL)
-
-static inline void mlx5_write64(__be32 val[2], void __iomem *dest,
- spinlock_t *doorbell_lock)
+static inline void mlx5_write64(__be32 val[2], void __iomem *dest)
{
+#if BITS_PER_LONG == 64
__raw_writeq(*(u64 *)val, dest);
-}
-
#else
-
-/* Just fall back to a spinlock to protect the doorbell if
- * BITS_PER_LONG is 32 -- there's no portable way to do atomic 64-bit
- * MMIO writes.
- */
-
-#define MLX5_DECLARE_DOORBELL_LOCK(name) spinlock_t name;
-#define MLX5_INIT_DOORBELL_LOCK(ptr) spin_lock_init(ptr)
-#define MLX5_GET_DOORBELL_LOCK(ptr) (ptr)
-
-static inline void mlx5_write64(__be32 val[2], void __iomem *dest,
- spinlock_t *doorbell_lock)
-{
- unsigned long flags;
-
- if (doorbell_lock)
- spin_lock_irqsave(doorbell_lock, flags);
__raw_writel((__force u32) val[0], dest);
__raw_writel((__force u32) val[1], dest + 4);
- if (doorbell_lock)
- spin_unlock_irqrestore(doorbell_lock, flags);
-}
-
#endif
+}
#endif /* MLX5_DOORBELL_H */
MLX5_REG_MTRC_CONF = 0x9041,
MLX5_REG_MTRC_STDB = 0x9042,
MLX5_REG_MTRC_CTRL = 0x9043,
+ MLX5_REG_MPEIN = 0x9050,
MLX5_REG_MPCNT = 0x9051,
MLX5_REG_MTPPS = 0x9053,
MLX5_REG_MTPPSE = 0x9054,
u64 sys_image_guid;
phys_addr_t iseg_base;
struct mlx5_init_seg __iomem *iseg;
+ phys_addr_t bar_addr;
enum mlx5_device_state state;
/* sync interface state */
struct mutex intf_state_mutex;
int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type);
int mlx5_cmd_alloc_uar(struct mlx5_core_dev *dev, u32 *uarn);
int mlx5_cmd_free_uar(struct mlx5_core_dev *dev, u32 uarn);
+void mlx5_health_flush(struct mlx5_core_dev *dev);
void mlx5_health_cleanup(struct mlx5_core_dev *dev);
int mlx5_health_init(struct mlx5_core_dev *dev);
void mlx5_start_health_poll(struct mlx5_core_dev *dev);
MLX5_FLOW_NAMESPACE_EGRESS,
};
+enum {
+ FDB_BYPASS_PATH,
+ FDB_FAST_PATH,
+ FDB_SLOW_PATH,
+};
+
struct mlx5_flow_table;
struct mlx5_flow_group;
struct mlx5_flow_namespace;
union mlx5_ifc_eth_cntrs_grp_data_layout_auto_bits counter_set;
};
+struct mlx5_ifc_mpein_reg_bits {
+ u8 reserved_at_0[0x2];
+ u8 depth[0x6];
+ u8 pcie_index[0x8];
+ u8 node[0x8];
+ u8 reserved_at_18[0x8];
+
+ u8 capability_mask[0x20];
+
+ u8 reserved_at_40[0x8];
+ u8 link_width_enabled[0x8];
+ u8 link_speed_enabled[0x10];
+
+ u8 lane0_physical_position[0x8];
+ u8 link_width_active[0x8];
+ u8 link_speed_active[0x10];
+
+ u8 num_of_pfs[0x10];
+ u8 num_of_vfs[0x10];
+
+ u8 bdf0[0x10];
+ u8 reserved_at_b0[0x10];
+
+ u8 max_read_request_size[0x4];
+ u8 max_payload_size[0x4];
+ u8 reserved_at_c8[0x5];
+ u8 pwr_status[0x3];
+ u8 port_type[0x4];
+ u8 reserved_at_d4[0xb];
+ u8 lane_reversal[0x1];
+
+ u8 reserved_at_e0[0x14];
+ u8 pci_power[0xc];
+
+ u8 reserved_at_100[0x20];
+
+ u8 device_status[0x10];
+ u8 port_state[0x8];
+ u8 reserved_at_138[0x8];
+
+ u8 reserved_at_140[0x10];
+ u8 receiver_detect_result[0x10];
+
+ u8 reserved_at_160[0x20];
+};
+
struct mlx5_ifc_mpcnt_reg_bits {
u8 reserved_at_0[0x8];
u8 pcie_index[0x8];
};
struct mlx5_ifc_mcam_enhanced_features_bits {
- u8 reserved_at_0[0x74];
+ u8 reserved_at_0[0x6e];
+ u8 pci_status_and_power[0x1];
+ u8 reserved_at_6f[0x5];
u8 mark_tx_action_cnp[0x1];
u8 mark_tx_action_cqe[0x1];
u8 dynamic_tx_overflow[0x1];
struct mlx5_ifc_pmtu_reg_bits pmtu_reg;
struct mlx5_ifc_ppad_reg_bits ppad_reg;
struct mlx5_ifc_ppcnt_reg_bits ppcnt_reg;
+ struct mlx5_ifc_mpein_reg_bits mpein_reg;
struct mlx5_ifc_mpcnt_reg_bits mpcnt_reg;
struct mlx5_ifc_pplm_reg_bits pplm_reg;
struct mlx5_ifc_pplr_reg_bits pplr_reg;
struct mlx5_wqe_inline_seg {
__be32 byte_count;
+ __be32 data[0];
};
enum mlx5_sig_type {
}
#endif /* CONFIG_DEV_PAGEMAP_OPS */
+/* 127: arbitrary random number, small enough to assemble well */
+#define page_ref_zero_or_close_to_overflow(page) \
+ ((unsigned int) page_ref_count(page) + 127u <= 127u)
+
static inline void get_page(struct page *page)
{
page = compound_head(page);
* Getting a normal page or the head of a compound page
* requires to already have an elevated page->_refcount.
*/
- VM_BUG_ON_PAGE(page_ref_count(page) <= 0, page);
+ VM_BUG_ON_PAGE(page_ref_zero_or_close_to_overflow(page), page);
+ page_ref_inc(page);
+}
+
+static inline __must_check bool try_get_page(struct page *page)
+{
+ page = compound_head(page);
+ if (WARN_ON_ONCE(page_ref_count(page) <= 0))
+ return false;
page_ref_inc(page);
+ return true;
}
static inline void put_page(struct page *page)
/* Encode hstate index for a hwpoisoned large page */
#define VM_FAULT_SET_HINDEX(x) ((__force vm_fault_t)((x) << 16))
-#define VM_FAULT_GET_HINDEX(x) (((x) >> 16) & 0xf)
+#define VM_FAULT_GET_HINDEX(x) (((__force unsigned int)(x) >> 16) & 0xf)
#define VM_FAULT_ERROR (VM_FAULT_OOM | VM_FAULT_SIGBUS | \
VM_FAULT_SIGSEGV | VM_FAULT_HWPOISON | \
extern int sdio_register_driver(struct sdio_driver *);
extern void sdio_unregister_driver(struct sdio_driver *);
+/**
+ * module_sdio_driver() - Helper macro for registering a SDIO driver
+ * @__sdio_driver: sdio_driver struct
+ *
+ * Helper macro for SDIO drivers which do not do anything special in module
+ * init/exit. This eliminates a lot of boilerplate. Each module may only
+ * use this macro once, and calling it replaces module_init() and module_exit()
+ */
+#define module_sdio_driver(__sdio_driver) \
+ module_driver(__sdio_driver, sdio_register_driver, \
+ sdio_unregister_driver)
+
/*
* SDIO I/O operations
*/
#define SDIO_DEVICE_ID_MARVELL_8797_F0 0x9128
#define SDIO_DEVICE_ID_MARVELL_8887WLAN 0x9134
+#define SDIO_VENDOR_ID_MEDIATEK 0x037a
+
#define SDIO_VENDOR_ID_SIANO 0x039a
#define SDIO_DEVICE_ID_SIANO_NOVA_B0 0x0201
#define SDIO_DEVICE_ID_SIANO_NICE 0x0202
int (*compat_ioctl) (struct socket *sock, unsigned int cmd,
unsigned long arg);
#endif
+ int (*gettstamp) (struct socket *sock, void __user *userstamp,
+ bool timeval, bool time32);
int (*listen) (struct socket *sock, int len);
int (*shutdown) (struct socket *sock, int flags);
int (*setsockopt)(struct socket *sock, int level,
* @IFF_FAILOVER: device is a failover master device
* @IFF_FAILOVER_SLAVE: device is lower dev of a failover master device
* @IFF_L3MDEV_RX_HANDLER: only invoke the rx handler of L3 master device
+ * @IFF_LIVE_RENAME_OK: rename is allowed while device is up and running
*/
enum netdev_priv_flags {
IFF_802_1Q_VLAN = 1<<0,
IFF_FAILOVER = 1<<27,
IFF_FAILOVER_SLAVE = 1<<28,
IFF_L3MDEV_RX_HANDLER = 1<<29,
+ IFF_LIVE_RENAME_OK = 1<<30,
};
#define IFF_802_1Q_VLAN IFF_802_1Q_VLAN
#define IFF_FAILOVER IFF_FAILOVER
#define IFF_FAILOVER_SLAVE IFF_FAILOVER_SLAVE
#define IFF_L3MDEV_RX_HANDLER IFF_L3MDEV_RX_HANDLER
+#define IFF_LIVE_RENAME_OK IFF_LIVE_RENAME_OK
/**
* struct net_device - The DEVICE structure.
static inline int nf_inet_addr_cmp(const union nf_inet_addr *a1,
const union nf_inet_addr *a2)
{
+#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64
+ const unsigned long *ul1 = (const unsigned long *)a1;
+ const unsigned long *ul2 = (const unsigned long *)a2;
+
+ return ((ul1[0] ^ ul2[0]) | (ul1[1] ^ ul2[1])) == 0UL;
+#else
return a1->all[0] == a2->all[0] &&
a1->all[1] == a2->all[1] &&
a1->all[2] == a2->all[2] &&
a1->all[3] == a2->all[3];
+#endif
}
static inline void nf_inet_addr_mask(const union nf_inet_addr *a1,
static inline void
nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family)
{
-#ifdef CONFIG_NF_NAT_NEEDED
+#if IS_ENABLED(CONFIG_NF_NAT)
struct nf_nat_hook *nat_hook;
rcu_read_lock();
struct nf_osf_user_finger finger;
};
+struct nf_osf_data {
+ const char *genre;
+ const char *version;
+};
+
bool nf_osf_match(const struct sk_buff *skb, u_int8_t family,
int hooknum, struct net_device *in, struct net_device *out,
const struct nf_osf_info *info, struct net *net,
const struct list_head *nf_osf_fingers);
-const char *nf_osf_find(const struct sk_buff *skb,
- const struct list_head *nf_osf_fingers,
- const int ttl_check);
+bool nf_osf_find(const struct sk_buff *skb,
+ const struct list_head *nf_osf_fingers,
+ const int ttl_check, struct nf_osf_data *data);
#endif /* _NFOSF_H */
int *error);
struct xt_match *xt_find_match(u8 af, const char *name, u8 revision);
-struct xt_target *xt_find_target(u8 af, const char *name, u8 revision);
struct xt_match *xt_request_find_match(u8 af, const char *name, u8 revision);
struct xt_target *xt_request_find_target(u8 af, const char *name, u8 revision);
int xt_find_revision(u8 af, const char *name, u8 revision, int target,
}
int ip6_route_me_harder(struct net *net, struct sk_buff *skb);
+
+static inline int nf_ip6_route_me_harder(struct net *net, struct sk_buff *skb)
+{
+#if IS_MODULE(CONFIG_IPV6)
+ const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops();
+
+ if (!v6_ops)
+ return -EHOSTUNREACH;
+
+ return v6_ops->route_me_harder(net, skb);
+#else
+ return ip6_route_me_harder(net, skb);
+#endif
+}
+
__sum16 nf_ip6_checksum(struct sk_buff *skb, unsigned int hook,
unsigned int dataoff, u_int8_t protocol);
__le16 numdl;
__le16 numdu;
__u16 rsvd11;
- __le32 lpol;
- __le32 lpou;
+ union {
+ struct {
+ __le32 lpol;
+ __le32 lpou;
+ };
+ __le64 lpo;
+ };
__u32 rsvd14[2];
};
* is_c45: Set to true if this phy uses clause 45 addressing.
* is_internal: Set to true if this phy is internal to a MAC.
* is_pseudo_fixed_link: Set to true if this phy is an Ethernet switch, etc.
+ * is_gigabit_capable: Set to true if PHY supports 1000Mbps
* has_fixups: Set to true if this phy has fixups/quirks.
* suspended: Set to true if this phy has been suspended successfully.
* sysfs_links: Internal boolean tracking sysfs symbolic links setup/removal.
unsigned is_c45:1;
unsigned is_internal:1;
unsigned is_pseudo_fixed_link:1;
+ unsigned is_gigabit_capable:1;
unsigned has_fixups:1;
unsigned suspended:1;
unsigned sysfs_links:1;
/*
* Get a reference to the pipe buffer.
*/
- void (*get)(struct pipe_inode_info *, struct pipe_buffer *);
+ bool (*get)(struct pipe_inode_info *, struct pipe_buffer *);
};
/**
* pipe_buf_get - get a reference to a pipe_buffer
* @pipe: the pipe that the buffer belongs to
* @buf: the buffer to get a reference to
+ *
+ * Return: %true if the reference was successfully obtained.
*/
-static inline void pipe_buf_get(struct pipe_inode_info *pipe,
+static inline __must_check bool pipe_buf_get(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
- buf->ops->get(pipe, buf);
+ return buf->ops->get(pipe, buf);
}
/**
void free_pipe_info(struct pipe_inode_info *);
/* Generic pipe buffer ops functions */
-void generic_pipe_buf_get(struct pipe_inode_info *, struct pipe_buffer *);
+bool generic_pipe_buf_get(struct pipe_inode_info *, struct pipe_buffer *);
int generic_pipe_buf_confirm(struct pipe_inode_info *, struct pipe_buffer *);
int generic_pipe_buf_steal(struct pipe_inode_info *, struct pipe_buffer *);
void generic_pipe_buf_release(struct pipe_inode_info *, struct pipe_buffer *);
*
* @base: PMC clock register base offset
* @clks: pointer to set of registered clocks, typically 0..5
+ * @critical: flag to indicate if firmware enabled pmc_plt_clks
+ * should be marked as critial or not
*/
struct pmc_clk_data {
void __iomem *base;
const struct pmc_clk *clks;
+ bool critical;
};
#endif /* __PLATFORM_DATA_X86_CLK_PMC_ATOM_H */
#include <linux/bug.h> /* For BUG_ON. */
#include <linux/pid_namespace.h> /* For task_active_pid_ns. */
#include <uapi/linux/ptrace.h>
+#include <linux/seccomp.h>
+
+/* Add sp to seccomp_data, as seccomp is user API, we don't want to modify it */
+struct syscall_info {
+ __u64 sp;
+ struct seccomp_data data;
+};
extern int ptrace_access_vm(struct task_struct *tsk, unsigned long addr,
void *buf, int len, unsigned int gup_flags);
#define current_user_stack_pointer() user_stack_pointer(current_pt_regs())
#endif
-extern int task_current_syscall(struct task_struct *target, long *callno,
- unsigned long args[6], unsigned int maxargs,
- unsigned long *sp, unsigned long *pc);
+extern int task_current_syscall(struct task_struct *target, struct syscall_info *info);
extern void sigaction_compat_abi(struct k_sigaction *act, struct k_sigaction *oact);
#endif
* the least significant bit set but otherwise stores the address of
* the hash bucket. This allows us to be be sure we've found the end
* of the right list.
- * The value stored in the hash bucket has BIT(2) used as a lock bit.
+ * The value stored in the hash bucket has BIT(0) used as a lock bit.
* This bit must be atomically set before any changes are made to
* the chain. To avoid dereferencing this pointer without clearing
* the bit first, we use an opaque 'struct rhash_lock_head *' for the
* pointer stored in the bucket. This struct needs to be defined so
- * that rcu_derefernce() works on it, but it has no content so a
+ * that rcu_dereference() works on it, but it has no content so a
* cast is needed for it to be useful. This ensures it isn't
* used by mistake with clearing the lock bit first.
*/
struct rhash_lock_head __rcu *buckets[] ____cacheline_aligned_in_smp;
};
-/*
- * We lock a bucket by setting BIT(1) in the pointer - this is always
- * zero in real pointers and in the nulls marker.
- * bit_spin_locks do not handle contention well, but the whole point
- * of the hashtable design is to achieve minimum per-bucket contention.
- * A nested hash table might not have a bucket pointer. In that case
- * we cannot get a lock. For remove and replace the bucket cannot be
- * interesting and doesn't need locking.
- * For insert we allocate the bucket if this is the last bucket_table,
- * and then take the lock.
- * Sometimes we unlock a bucket by writing a new pointer there. In that
- * case we don't need to unlock, but we do need to reset state such as
- * local_bh. For that we have rht_assign_unlock(). As rcu_assign_pointer()
- * provides the same release semantics that bit_spin_unlock() provides,
- * this is safe.
- */
-
-static inline void rht_lock(struct bucket_table *tbl,
- struct rhash_lock_head **bkt)
-{
- local_bh_disable();
- bit_spin_lock(1, (unsigned long *)bkt);
- lock_map_acquire(&tbl->dep_map);
-}
-
-static inline void rht_lock_nested(struct bucket_table *tbl,
- struct rhash_lock_head **bucket,
- unsigned int subclass)
-{
- local_bh_disable();
- bit_spin_lock(1, (unsigned long *)bucket);
- lock_acquire_exclusive(&tbl->dep_map, subclass, 0, NULL, _THIS_IP_);
-}
-
-static inline void rht_unlock(struct bucket_table *tbl,
- struct rhash_lock_head **bkt)
-{
- lock_map_release(&tbl->dep_map);
- bit_spin_unlock(1, (unsigned long *)bkt);
- local_bh_enable();
-}
-
-static inline void rht_assign_unlock(struct bucket_table *tbl,
- struct rhash_lock_head **bkt,
- struct rhash_head *obj)
-{
- struct rhash_head **p = (struct rhash_head **)bkt;
-
- lock_map_release(&tbl->dep_map);
- rcu_assign_pointer(*p, obj);
- preempt_enable();
- __release(bitlock);
- local_bh_enable();
-}
-
-/*
- * If 'p' is a bucket head and might be locked:
- * rht_ptr() returns the address without the lock bit.
- * rht_ptr_locked() returns the address WITH the lock bit.
- */
-static inline struct rhash_head __rcu *rht_ptr(const struct rhash_lock_head *p)
-{
- return (void *)(((unsigned long)p) & ~BIT(1));
-}
-
-static inline struct rhash_lock_head __rcu *rht_ptr_locked(const
- struct rhash_head *p)
-{
- return (void *)(((unsigned long)p) | BIT(1));
-}
-
/*
* NULLS_MARKER() expects a hash value with the low
* bits mostly likely to be significant, and it discards
* the msb.
- * We git it an address, in which the bottom 2 bits are
+ * We give it an address, in which the bottom bit is
* always 0, and the msb might be significant.
* So we shift the address down one bit to align with
* expectations and avoid losing a significant bit.
+ *
+ * We never store the NULLS_MARKER in the hash table
+ * itself as we need the lsb for locking.
+ * Instead we store a NULL
*/
#define RHT_NULLS_MARKER(ptr) \
((void *)NULLS_MARKER(((unsigned long) (ptr)) >> 1))
#define INIT_RHT_NULLS_HEAD(ptr) \
- ((ptr) = RHT_NULLS_MARKER(&(ptr)))
+ ((ptr) = NULL)
static inline bool rht_is_a_nulls(const struct rhash_head *ptr)
{
&tbl->buckets[hash];
}
+/*
+ * We lock a bucket by setting BIT(0) in the pointer - this is always
+ * zero in real pointers. The NULLS mark is never stored in the bucket,
+ * rather we store NULL if the bucket is empty.
+ * bit_spin_locks do not handle contention well, but the whole point
+ * of the hashtable design is to achieve minimum per-bucket contention.
+ * A nested hash table might not have a bucket pointer. In that case
+ * we cannot get a lock. For remove and replace the bucket cannot be
+ * interesting and doesn't need locking.
+ * For insert we allocate the bucket if this is the last bucket_table,
+ * and then take the lock.
+ * Sometimes we unlock a bucket by writing a new pointer there. In that
+ * case we don't need to unlock, but we do need to reset state such as
+ * local_bh. For that we have rht_assign_unlock(). As rcu_assign_pointer()
+ * provides the same release semantics that bit_spin_unlock() provides,
+ * this is safe.
+ * When we write to a bucket without unlocking, we use rht_assign_locked().
+ */
+
+static inline void rht_lock(struct bucket_table *tbl,
+ struct rhash_lock_head **bkt)
+{
+ local_bh_disable();
+ bit_spin_lock(0, (unsigned long *)bkt);
+ lock_map_acquire(&tbl->dep_map);
+}
+
+static inline void rht_lock_nested(struct bucket_table *tbl,
+ struct rhash_lock_head **bucket,
+ unsigned int subclass)
+{
+ local_bh_disable();
+ bit_spin_lock(0, (unsigned long *)bucket);
+ lock_acquire_exclusive(&tbl->dep_map, subclass, 0, NULL, _THIS_IP_);
+}
+
+static inline void rht_unlock(struct bucket_table *tbl,
+ struct rhash_lock_head **bkt)
+{
+ lock_map_release(&tbl->dep_map);
+ bit_spin_unlock(0, (unsigned long *)bkt);
+ local_bh_enable();
+}
+
+/*
+ * Where 'bkt' is a bucket and might be locked:
+ * rht_ptr() dereferences that pointer and clears the lock bit.
+ * rht_ptr_exclusive() dereferences in a context where exclusive
+ * access is guaranteed, such as when destroying the table.
+ */
+static inline struct rhash_head *rht_ptr(
+ struct rhash_lock_head __rcu * const *bkt,
+ struct bucket_table *tbl,
+ unsigned int hash)
+{
+ const struct rhash_lock_head *p =
+ rht_dereference_bucket_rcu(*bkt, tbl, hash);
+
+ if ((((unsigned long)p) & ~BIT(0)) == 0)
+ return RHT_NULLS_MARKER(bkt);
+ return (void *)(((unsigned long)p) & ~BIT(0));
+}
+
+static inline struct rhash_head *rht_ptr_exclusive(
+ struct rhash_lock_head __rcu * const *bkt)
+{
+ const struct rhash_lock_head *p =
+ rcu_dereference_protected(*bkt, 1);
+
+ if (!p)
+ return RHT_NULLS_MARKER(bkt);
+ return (void *)(((unsigned long)p) & ~BIT(0));
+}
+
+static inline void rht_assign_locked(struct rhash_lock_head __rcu **bkt,
+ struct rhash_head *obj)
+{
+ struct rhash_head __rcu **p = (struct rhash_head __rcu **)bkt;
+
+ if (rht_is_a_nulls(obj))
+ obj = NULL;
+ rcu_assign_pointer(*p, (void *)((unsigned long)obj | BIT(0)));
+}
+
+static inline void rht_assign_unlock(struct bucket_table *tbl,
+ struct rhash_lock_head __rcu **bkt,
+ struct rhash_head *obj)
+{
+ struct rhash_head __rcu **p = (struct rhash_head __rcu **)bkt;
+
+ if (rht_is_a_nulls(obj))
+ obj = NULL;
+ lock_map_release(&tbl->dep_map);
+ rcu_assign_pointer(*p, obj);
+ preempt_enable();
+ __release(bitlock);
+ local_bh_enable();
+}
+
/**
* rht_for_each_from - iterate over hash chain from given head
* @pos: the &struct rhash_head to use as a loop cursor.
* @hash: the hash value / bucket index
*/
#define rht_for_each_from(pos, head, tbl, hash) \
- for (pos = rht_dereference_bucket(head, tbl, hash); \
- !rht_is_a_nulls(pos); \
+ for (pos = head; \
+ !rht_is_a_nulls(pos); \
pos = rht_dereference_bucket((pos)->next, tbl, hash))
/**
* @hash: the hash value / bucket index
*/
#define rht_for_each(pos, tbl, hash) \
- rht_for_each_from(pos, rht_ptr(*rht_bucket(tbl, hash)), tbl, hash)
+ rht_for_each_from(pos, rht_ptr(rht_bucket(tbl, hash), tbl, hash), \
+ tbl, hash)
/**
* rht_for_each_entry_from - iterate over hash chain from given head
* @member: name of the &struct rhash_head within the hashable struct.
*/
#define rht_for_each_entry_from(tpos, pos, head, tbl, hash, member) \
- for (pos = rht_dereference_bucket(head, tbl, hash); \
+ for (pos = head; \
(!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member); \
pos = rht_dereference_bucket((pos)->next, tbl, hash))
* @member: name of the &struct rhash_head within the hashable struct.
*/
#define rht_for_each_entry(tpos, pos, tbl, hash, member) \
- rht_for_each_entry_from(tpos, pos, rht_ptr(*rht_bucket(tbl, hash)), \
- tbl, hash, member)
+ rht_for_each_entry_from(tpos, pos, \
+ rht_ptr(rht_bucket(tbl, hash), tbl, hash), \
+ tbl, hash, member)
/**
* rht_for_each_entry_safe - safely iterate over hash chain of given type
* remove the loop cursor from the list.
*/
#define rht_for_each_entry_safe(tpos, pos, next, tbl, hash, member) \
- for (pos = rht_dereference_bucket(rht_ptr(*rht_bucket(tbl, hash)), \
- tbl, hash), \
+ for (pos = rht_ptr(rht_bucket(tbl, hash), tbl, hash), \
next = !rht_is_a_nulls(pos) ? \
rht_dereference_bucket(pos->next, tbl, hash) : NULL; \
(!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member); \
*/
#define rht_for_each_rcu_from(pos, head, tbl, hash) \
for (({barrier(); }), \
- pos = rht_dereference_bucket_rcu(head, tbl, hash); \
+ pos = head; \
!rht_is_a_nulls(pos); \
pos = rcu_dereference_raw(pos->next))
* traversal is guarded by rcu_read_lock().
*/
#define rht_for_each_rcu(pos, tbl, hash) \
- for (({barrier(); }), \
- pos = rht_ptr(rht_dereference_bucket_rcu( \
- *rht_bucket(tbl, hash), tbl, hash)); \
- !rht_is_a_nulls(pos); \
+ for (({barrier(); }), \
+ pos = rht_ptr(rht_bucket(tbl, hash), tbl, hash); \
+ !rht_is_a_nulls(pos); \
pos = rcu_dereference_raw(pos->next))
/**
*/
#define rht_for_each_entry_rcu_from(tpos, pos, head, tbl, hash, member) \
for (({barrier(); }), \
- pos = rht_dereference_bucket_rcu(head, tbl, hash); \
+ pos = head; \
(!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member); \
pos = rht_dereference_bucket_rcu(pos->next, tbl, hash))
*/
#define rht_for_each_entry_rcu(tpos, pos, tbl, hash, member) \
rht_for_each_entry_rcu_from(tpos, pos, \
- rht_ptr(*rht_bucket(tbl, hash)), \
- tbl, hash, member)
+ rht_ptr(rht_bucket(tbl, hash), \
+ tbl, hash), \
+ tbl, hash, member)
/**
* rhl_for_each_rcu - iterate over rcu hash table list
hash = rht_key_hashfn(ht, tbl, key, params);
bkt = rht_bucket(tbl, hash);
do {
- he = rht_ptr(rht_dereference_bucket_rcu(*bkt, tbl, hash));
- rht_for_each_rcu_from(he, he, tbl, hash) {
+ rht_for_each_rcu_from(he, rht_ptr(bkt, tbl, hash), tbl, hash) {
if (params.obj_cmpfn ?
params.obj_cmpfn(&arg, rht_obj(ht, he)) :
rhashtable_compare(&arg, rht_obj(ht, he)))
return rhashtable_insert_slow(ht, key, obj);
}
- rht_for_each_from(head, rht_ptr(*bkt), tbl, hash) {
+ rht_for_each_from(head, rht_ptr(bkt, tbl, hash), tbl, hash) {
struct rhlist_head *plist;
struct rhlist_head *list;
goto slow_path;
/* Inserting at head of list makes unlocking free. */
- head = rht_ptr(rht_dereference_bucket(*bkt, tbl, hash));
+ head = rht_ptr(bkt, tbl, hash);
RCU_INIT_POINTER(obj->next, head);
if (rhlist) {
pprev = NULL;
rht_lock(tbl, bkt);
- rht_for_each_from(he, rht_ptr(*bkt), tbl, hash) {
+ rht_for_each_from(he, rht_ptr(bkt, tbl, hash), tbl, hash) {
struct rhlist_head *list;
list = container_of(he, struct rhlist_head, rhead);
pprev = NULL;
rht_lock(tbl, bkt);
- rht_for_each_from(he, rht_ptr(*bkt), tbl, hash) {
+ rht_for_each_from(he, rht_ptr(bkt, tbl, hash), tbl, hash) {
if (he != obj_old) {
pprev = &he->next;
continue;
__mmdrop(mm);
}
+/*
+ * This has to be called after a get_task_mm()/mmget_not_zero()
+ * followed by taking the mmap_sem for writing before modifying the
+ * vmas or anything the coredump pretends not to change from under it.
+ *
+ * NOTE: find_extend_vma() called from GUP context is the only place
+ * that can modify the "mm" (notably the vm_start/end) under mmap_sem
+ * for reading and outside the context of the process, so it is also
+ * the only case that holds the mmap_sem for reading that must call
+ * this function. Generally if the mmap_sem is hold for reading
+ * there's no need of this check after get_task_mm()/mmget_not_zero().
+ *
+ * This function can be obsoleted and the check can be removed, after
+ * the coredump code will hold the mmap_sem for writing before
+ * invoking the ->core_dump methods.
+ */
+static inline bool mmget_still_valid(struct mm_struct *mm)
+{
+ return likely(!mm->core_state);
+}
+
/**
* mmget() - Pin the address space associated with a &struct mm_struct.
* @mm: The address space to pin.
struct list_head swaplist; /* chain of maybes on swap */
struct shared_policy policy; /* NUMA memory alloc policy */
struct simple_xattrs xattrs; /* list of xattrs */
+ atomic_t stop_eviction; /* hold when working on inode */
struct inode vfs_inode;
};
int node);
struct sk_buff *__build_skb(void *data, unsigned int frag_size);
struct sk_buff *build_skb(void *data, unsigned int frag_size);
+struct sk_buff *build_skb_around(struct sk_buff *skb,
+ void *data, unsigned int frag_size);
/**
* alloc_skb - allocate a network buffer
void skb_coalesce_rx_frag(struct sk_buff *skb, int i, int size,
unsigned int truesize);
-#define SKB_PAGE_ASSERT(skb) BUG_ON(skb_shinfo(skb)->nr_frags)
-#define SKB_FRAG_ASSERT(skb) BUG_ON(skb_has_frag_list(skb))
#define SKB_LINEAR_ASSERT(skb) BUG_ON(skb_is_nonlinear(skb))
#ifdef NET_SKBUFF_DATA_USES_OFFSET
unsigned int flags,
void (*destructor)(struct sock *sk,
struct sk_buff *skb),
- int *peeked, int *off, int *err,
+ int *off, int *err,
struct sk_buff **last);
struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned flags,
void (*destructor)(struct sock *sk,
struct sk_buff *skb),
- int *peeked, int *off, int *err,
+ int *off, int *err,
struct sk_buff **last);
struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned flags,
void (*destructor)(struct sock *sk,
struct sk_buff *skb),
- int *peeked, int *off, int *err);
+ int *off, int *err);
struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, int noblock,
int *err);
__poll_t datagram_poll(struct file *file, struct socket *sock,
#ifndef __HAVE_ARCH_MEMCMP
extern int memcmp(const void *,const void *,__kernel_size_t);
#endif
+#ifndef __HAVE_ARCH_BCMP
+extern int bcmp(const void *,const void *,__kernel_size_t);
+#endif
#ifndef __HAVE_ARCH_MEMCHR
extern void * memchr(const void *,int,__kernel_size_t);
#endif
}
#endif /* CONFIG_SUNRPC_SWAP */
-static inline bool
-rpc_task_need_resched(const struct rpc_task *task)
-{
- if (RPC_IS_QUEUED(task) || task->tk_callback)
- return true;
- return false;
-}
-
#endif /* _LINUX_SUNRPC_SCHED_H_ */
/*
* Creates a virtqueue and allocates the descriptor ring. If
* may_reduce_num is set, then this may allocate a smaller ring than
- * expected. The caller should query virtqueue_get_ring_size to learn
+ * expected. The caller should query virtqueue_get_vring_size to learn
* the actual size of the ring.
*/
struct virtqueue *vring_create_virtqueue(unsigned int index,
rxrpc_user_attach_call_t, unsigned long, gfp_t,
unsigned int);
void rxrpc_kernel_set_tx_length(struct socket *, struct rxrpc_call *, s64);
-u32 rxrpc_kernel_check_life(const struct socket *, const struct rxrpc_call *);
+bool rxrpc_kernel_check_life(const struct socket *, const struct rxrpc_call *,
+ u32 *);
void rxrpc_kernel_probe_life(struct socket *, struct rxrpc_call *);
u32 rxrpc_kernel_get_epoch(struct socket *, struct rxrpc_call *);
bool rxrpc_kernel_get_reply_time(struct socket *, struct rxrpc_call *,
ktime_t *);
+bool rxrpc_kernel_call_is_complete(struct rxrpc_call *);
#endif /* _NET_RXRPC_H */
#define HCI_MAX_SHORT_NAME_LENGTH 10
+/* Min encryption key size to match with SMP */
+#define HCI_MIN_ENC_KEY_SIZE 7
+
/* Default LE RPA expiry time, 15 minutes */
#define HCI_DEFAULT_RPA_TIMEOUT (15 * 60)
#define wiphy_info(wiphy, format, args...) \
dev_info(&(wiphy)->dev, format, ##args)
+#define wiphy_err_ratelimited(wiphy, format, args...) \
+ dev_err_ratelimited(&(wiphy)->dev, format, ##args)
+#define wiphy_warn_ratelimited(wiphy, format, args...) \
+ dev_warn_ratelimited(&(wiphy)->dev, format, ##args)
+
#define wiphy_debug(wiphy, format, args...) \
wiphy_printk(KERN_DEBUG, wiphy, format, ##args)
compat_int_t cmsg_type;
};
-int compat_sock_get_timestamp(struct sock *, struct timeval __user *);
-int compat_sock_get_timestampns(struct sock *, struct timespec __user *);
-
#else /* defined(CONFIG_COMPAT) */
/*
* To avoid compiler warnings:
struct devlink_sb_pool_info *pool_info);
int (*sb_pool_set)(struct devlink *devlink, unsigned int sb_index,
u16 pool_index, u32 size,
- enum devlink_sb_threshold_type threshold_type);
+ enum devlink_sb_threshold_type threshold_type,
+ struct netlink_ext_ack *extack);
int (*sb_port_pool_get)(struct devlink_port *devlink_port,
unsigned int sb_index, u16 pool_index,
u32 *p_threshold);
int (*sb_port_pool_set)(struct devlink_port *devlink_port,
unsigned int sb_index, u16 pool_index,
- u32 threshold);
+ u32 threshold, struct netlink_ext_ack *extack);
int (*sb_tc_pool_bind_get)(struct devlink_port *devlink_port,
unsigned int sb_index,
u16 tc_index,
unsigned int sb_index,
u16 tc_index,
enum devlink_sb_pool_type pool_type,
- u16 pool_index, u32 threshold);
+ u16 pool_index, u32 threshold,
+ struct netlink_ext_ack *extack);
int (*sb_occ_snapshot)(struct devlink *devlink,
unsigned int sb_index);
int (*sb_occ_max_clear)(struct devlink *devlink,
#define __NET_FIB_NOTIFIER_H
#include <linux/types.h>
-#include <linux/module.h>
#include <linux/notifier.h>
#include <net/net_namespace.h>
+struct module;
+
struct fib_notifier_info {
struct net *net;
int family;
#include <linux/etherdevice.h>
#include <linux/rtnetlink.h>
-#include <linux/module.h>
#include <uapi/linux/ife.h>
#if IS_ENABLED(CONFIG_NET_IFE)
struct fib6_nh {
struct fib_nh_common nh_common;
+
+#ifdef CONFIG_IPV6_ROUTER_PREF
+ unsigned long last_probe;
+#endif
};
struct fib6_info {
struct list_head fib6_siblings;
unsigned int fib6_nsiblings;
- atomic_t fib6_ref;
+ refcount_t fib6_ref;
unsigned long expires;
struct dst_metrics *fib6_metrics;
#define fib6_pmtu fib6_metrics->metrics[RTAX_MTU-1]
struct rt6_info * __percpu *rt6i_pcpu;
struct rt6_exception_bucket __rcu *rt6i_exception_bucket;
-#ifdef CONFIG_IPV6_ROUTER_PREF
- unsigned long last_probe;
-#endif
-
u32 fib6_metric;
u8 fib6_protocol;
u8 fib6_type;
unsigned short rt6i_nfheader_len;
};
+struct fib6_result {
+ struct fib6_nh *nh;
+ struct fib6_info *f6i;
+ u32 fib6_flags;
+ u8 fib6_type;
+ struct rt6_info *rt6;
+};
+
#define for_each_fib6_node_rt_rcu(fn) \
for (rt = rcu_dereference((fn)->leaf); rt; \
rt = rcu_dereference(rt->fib6_next))
static inline void fib6_info_hold(struct fib6_info *f6i)
{
- atomic_inc(&f6i->fib6_ref);
+ refcount_inc(&f6i->fib6_ref);
}
static inline bool fib6_info_hold_safe(struct fib6_info *f6i)
{
- return atomic_inc_not_zero(&f6i->fib6_ref);
+ return refcount_inc_not_zero(&f6i->fib6_ref);
}
static inline void fib6_info_release(struct fib6_info *f6i)
{
- if (f6i && atomic_dec_and_test(&f6i->fib6_ref))
+ if (f6i && refcount_dec_and_test(&f6i->fib6_ref))
call_rcu(&f6i->rcu, fib6_info_destroy_rcu);
}
/* called with rcu lock held; can return error pointer
* caller needs to select path
*/
-struct fib6_info *fib6_lookup(struct net *net, int oif, struct flowi6 *fl6,
- int flags);
+int fib6_lookup(struct net *net, int oif, struct flowi6 *fl6,
+ struct fib6_result *res, int flags);
/* called with rcu lock held; caller needs to select path */
-struct fib6_info *fib6_table_lookup(struct net *net, struct fib6_table *table,
- int oif, struct flowi6 *fl6, int strict);
-
-struct fib6_info *fib6_multipath_select(const struct net *net,
- struct fib6_info *match,
- struct flowi6 *fl6, int oif,
- const struct sk_buff *skb, int strict);
+int fib6_table_lookup(struct net *net, struct fib6_table *table,
+ int oif, struct flowi6 *fl6, struct fib6_result *res,
+ int strict);
+void fib6_select_path(const struct net *net, struct fib6_result *res,
+ struct flowi6 *fl6, int oif, bool have_oif_match,
+ const struct sk_buff *skb, int strict);
struct fib6_node *fib6_node_lookup(struct fib6_node *root,
const struct in6_addr *daddr,
const struct in6_addr *saddr);
struct netlink_ext_ack *extack);
void fib6_nh_release(struct fib6_nh *fib6_nh);
-static inline
-struct lwtunnel_state *fib6_info_nh_lwt(const struct fib6_info *f6i)
-{
- return f6i->fib6_nh.fib_nh_lws;
-}
-
void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info,
unsigned int flags);
static inline bool rt6_qualify_for_ecmp(const struct fib6_info *f6i)
{
- return !(f6i->fib6_flags & (RTF_ADDRCONF|RTF_DYNAMIC)) &&
- f6i->fib6_nh.fib_nh_has_gw;
+ /* the RTF_ADDRCONF flag filters out RA's */
+ return !(f6i->fib6_flags & RTF_ADDRCONF) &&
+ f6i->fib6_nh.fib_nh_gw_family;
}
void ip6_route_input(struct sk_buff *skb);
void rt6_mtu_change(struct net_device *dev, unsigned int mtu);
void rt6_remove_prefsrc(struct inet6_ifaddr *ifp);
void rt6_clean_tohost(struct net *net, struct in6_addr *gateway);
-void rt6_sync_up(struct net_device *dev, unsigned int nh_flags);
+void rt6_sync_up(struct net_device *dev, unsigned char nh_flags);
void rt6_disable_ip(struct net_device *dev, unsigned long event);
void rt6_sync_down_dev(struct net_device *dev, unsigned long event);
void rt6_multipath_rebalance(struct fib6_info *f6i);
return mtu;
}
-u32 ip6_mtu_from_fib6(struct fib6_info *f6i, struct in6_addr *daddr,
- struct in6_addr *saddr);
+u32 ip6_mtu_from_fib6(const struct fib6_result *res,
+ const struct in6_addr *daddr,
+ const struct in6_addr *saddr);
struct neighbour *ip6_neigh_lookup(const struct in6_addr *gw,
struct net_device *dev, struct sk_buff *skb,
u8 fc_protocol;
u8 fc_scope;
u8 fc_type;
- /* 3 bytes unused */
+ u8 fc_gw_family;
+ /* 2 bytes unused */
u32 fc_table;
__be32 fc_dst;
- __be32 fc_gw;
+ union {
+ __be32 fc_gw4;
+ struct in6_addr fc_gw6;
+ };
int fc_oif;
u32 fc_flags;
u32 fc_priority;
struct fib_nh_common {
struct net_device *nhc_dev;
int nhc_oif;
- unsigned int nhc_flags;
- struct lwtunnel_state *nhc_lwtstate;
unsigned char nhc_scope;
u8 nhc_family;
- u8 nhc_has_gw:1,
- unused:7;
+ u8 nhc_gw_family;
+ unsigned char nhc_flags;
+ struct lwtunnel_state *nhc_lwtstate;
+
union {
__be32 ipv4;
struct in6_addr ipv6;
#define fib_nh_flags nh_common.nhc_flags
#define fib_nh_lws nh_common.nhc_lwtstate
#define fib_nh_scope nh_common.nhc_scope
-#define fib_nh_family nh_common.nhc_family
-#define fib_nh_has_gw nh_common.nhc_has_gw
+#define fib_nh_gw_family nh_common.nhc_gw_family
#define fib_nh_gw4 nh_common.nhc_gw.ipv4
#define fib_nh_gw6 nh_common.nhc_gw.ipv6
#define fib_nh_weight nh_common.nhc_weight
#define fib_rtt fib_metrics->metrics[RTAX_RTT-1]
#define fib_advmss fib_metrics->metrics[RTAX_ADVMSS-1]
int fib_nhs;
+ bool fib_nh_is_v6;
struct rcu_head rcu;
struct fib_nh fib_nh[0];
#define fib_dev fib_nh[0].fib_nh_dev
/* Exported by fib_frontend.c */
extern const struct nla_policy rtm_ipv4_policy[];
void ip_fib_init(void);
+int fib_gw_from_via(struct fib_config *cfg, struct nlattr *nla,
+ struct netlink_ext_ack *extack);
__be32 fib_compute_spec_dst(struct sk_buff *skb);
bool fib_info_nh_uses_dev(struct fib_info *fi, const struct net_device *dev);
int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
int ip_fib_check_default(__be32 gw, struct net_device *dev);
int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force);
int fib_sync_down_addr(struct net_device *dev, __be32 local);
-int fib_sync_up(struct net_device *dev, unsigned int nh_flags);
+int fib_sync_up(struct net_device *dev, unsigned char nh_flags);
void fib_sync_mtu(struct net_device *dev, u32 orig_mtu);
#ifdef CONFIG_IP_ROUTE_MULTIPATH
struct netlink_callback *cb);
int fib_nexthop_info(struct sk_buff *skb, const struct fib_nh_common *nh,
- unsigned int *flags, bool skip_oif);
+ unsigned char *flags, bool skip_oif);
int fib_add_nexthop(struct sk_buff *skb, const struct fib_nh_common *nh,
int nh_weight);
#endif /* _NET_FIB_H */
/* Address family of addr */
u16 af;
+
+ u16 tun_type; /* tunnel type */
+ __be16 tun_port; /* tunnel port */
};
atomic_t conn_flags; /* flags to copy to conn */
atomic_t weight; /* server weight */
atomic_t last_weight; /* server latest weight */
+ __u16 tun_type; /* tunnel type */
+ __be16 tun_port; /* tunnel port */
refcount_t refcnt; /* reference counter */
struct ip_vs_stats stats; /* statistics */
/* structs from net/ip6_fib.h */
struct fib6_info;
+struct fib6_nh;
+struct fib6_config;
+struct fib6_result;
/* This is ugly, ideally these symbols should be built
* into the core kernel.
int (*ipv6_route_input)(struct sk_buff *skb);
struct fib6_table *(*fib6_get_table)(struct net *net, u32 id);
- struct fib6_info *(*fib6_lookup)(struct net *net, int oif,
- struct flowi6 *fl6, int flags);
- struct fib6_info *(*fib6_table_lookup)(struct net *net,
- struct fib6_table *table,
- int oif, struct flowi6 *fl6,
- int flags);
- struct fib6_info *(*fib6_multipath_select)(const struct net *net,
- struct fib6_info *f6i,
- struct flowi6 *fl6, int oif,
- const struct sk_buff *skb,
- int strict);
- u32 (*ip6_mtu_from_fib6)(struct fib6_info *f6i, struct in6_addr *daddr,
- struct in6_addr *saddr);
+ int (*fib6_lookup)(struct net *net, int oif, struct flowi6 *fl6,
+ struct fib6_result *res, int flags);
+ int (*fib6_table_lookup)(struct net *net, struct fib6_table *table,
+ int oif, struct flowi6 *fl6,
+ struct fib6_result *res, int flags);
+ void (*fib6_select_path)(const struct net *net, struct fib6_result *res,
+ struct flowi6 *fl6, int oif, bool oif_match,
+ const struct sk_buff *skb, int strict);
+ u32 (*ip6_mtu_from_fib6)(const struct fib6_result *res,
+ const struct in6_addr *daddr,
+ const struct in6_addr *saddr);
+ int (*fib6_nh_init)(struct net *net, struct fib6_nh *fib6_nh,
+ struct fib6_config *cfg, gfp_t gfp_flags,
+ struct netlink_ext_ack *extack);
+ void (*fib6_nh_release)(struct fib6_nh *fib6_nh);
void (*udpv6_encap_enable)(void);
void (*ndisc_send_na)(struct net_device *dev, const struct in6_addr *daddr,
const struct in6_addr *solicited_addr,
unsigned int family, const void *cfg,
struct lwtunnel_state **lws,
struct netlink_ext_ack *extack);
-int lwtunnel_fill_encap(struct sk_buff *skb,
- struct lwtunnel_state *lwtstate);
+int lwtunnel_fill_encap(struct sk_buff *skb, struct lwtunnel_state *lwtstate,
+ int encap_attr, int encap_type_attr);
int lwtunnel_get_encap_size(struct lwtunnel_state *lwtstate);
struct lwtunnel_state *lwtunnel_state_alloc(int hdr_len);
int lwtunnel_cmp_encap(struct lwtunnel_state *a, struct lwtunnel_state *b);
}
static inline int lwtunnel_fill_encap(struct sk_buff *skb,
- struct lwtunnel_state *lwtstate)
+ struct lwtunnel_state *lwtstate,
+ int encap_attr, int encap_type_attr)
{
return 0;
}
* @hw: pointer as obtained from ieee80211_alloc_hw()
* @ac: AC number to return packets from.
*
- * Should only be called between calls to ieee80211_txq_schedule_start()
- * and ieee80211_txq_schedule_end().
* Returns the next txq if successful, %NULL if no queue is eligible. If a txq
* is returned, it should be returned with ieee80211_return_txq() after the
* driver has finished scheduling it.
struct ieee80211_txq *ieee80211_next_txq(struct ieee80211_hw *hw, u8 ac);
/**
- * ieee80211_return_txq - return a TXQ previously acquired by ieee80211_next_txq()
- *
- * @hw: pointer as obtained from ieee80211_alloc_hw()
- * @txq: pointer obtained from station or virtual interface
- *
- * Should only be called between calls to ieee80211_txq_schedule_start()
- * and ieee80211_txq_schedule_end().
- */
-void ieee80211_return_txq(struct ieee80211_hw *hw, struct ieee80211_txq *txq);
-
-/**
- * ieee80211_txq_schedule_start - acquire locks for safe scheduling of an AC
+ * ieee80211_txq_schedule_start - start new scheduling round for TXQs
*
* @hw: pointer as obtained from ieee80211_alloc_hw()
* @ac: AC number to acquire locks for
*
- * Acquire locks needed to schedule TXQs from the given AC. Should be called
- * before ieee80211_next_txq() or ieee80211_return_txq().
+ * Should be called before ieee80211_next_txq() or ieee80211_return_txq().
+ * The driver must not call multiple TXQ scheduling rounds concurrently.
*/
-void ieee80211_txq_schedule_start(struct ieee80211_hw *hw, u8 ac)
- __acquires(txq_lock);
+void ieee80211_txq_schedule_start(struct ieee80211_hw *hw, u8 ac);
+
+/* (deprecated) */
+static inline void ieee80211_txq_schedule_end(struct ieee80211_hw *hw, u8 ac)
+{
+}
+
+void __ieee80211_schedule_txq(struct ieee80211_hw *hw,
+ struct ieee80211_txq *txq, bool force);
/**
- * ieee80211_txq_schedule_end - release locks for safe scheduling of an AC
+ * ieee80211_schedule_txq - schedule a TXQ for transmission
*
* @hw: pointer as obtained from ieee80211_alloc_hw()
- * @ac: AC number to acquire locks for
+ * @txq: pointer obtained from station or virtual interface
*
- * Release locks previously acquired by ieee80211_txq_schedule_end().
+ * Schedules a TXQ for transmission if it is not already scheduled,
+ * even if mac80211 does not have any packets buffered.
+ *
+ * The driver may call this function if it has buffered packets for
+ * this TXQ internally.
*/
-void ieee80211_txq_schedule_end(struct ieee80211_hw *hw, u8 ac)
- __releases(txq_lock);
+static inline void
+ieee80211_schedule_txq(struct ieee80211_hw *hw, struct ieee80211_txq *txq)
+{
+ __ieee80211_schedule_txq(hw, txq, true);
+}
/**
- * ieee80211_schedule_txq - schedule a TXQ for transmission
+ * ieee80211_return_txq - return a TXQ previously acquired by ieee80211_next_txq()
*
* @hw: pointer as obtained from ieee80211_alloc_hw()
* @txq: pointer obtained from station or virtual interface
+ * @force: schedule txq even if mac80211 does not have any buffered packets.
*
- * Schedules a TXQ for transmission if it is not already scheduled. Takes a
- * lock, which means it must *not* be called between
- * ieee80211_txq_schedule_start() and ieee80211_txq_schedule_end()
+ * The driver may set force=true if it has buffered packets for this TXQ
+ * internally.
*/
-void ieee80211_schedule_txq(struct ieee80211_hw *hw, struct ieee80211_txq *txq)
- __acquires(txq_lock) __releases(txq_lock);
+static inline void
+ieee80211_return_txq(struct ieee80211_hw *hw, struct ieee80211_txq *txq,
+ bool force)
+{
+ __ieee80211_schedule_txq(hw, txq, force);
+}
/**
* ieee80211_txq_may_transmit - check whether TXQ is allowed to transmit
#ifndef _NDISC_H
#define _NDISC_H
+#include <net/ipv6_stubs.h>
+
/*
* ICMP codes for neighbour discovery messages
*/
return ___neigh_lookup_noref(&nd_tbl, neigh_key_eq128, ndisc_hashfn, pkey, dev);
}
+static inline
+struct neighbour *__ipv6_neigh_lookup_noref_stub(struct net_device *dev,
+ const void *pkey)
+{
+ return ___neigh_lookup_noref(ipv6_stub->nd_tbl, neigh_key_eq128,
+ ndisc_hashfn, pkey, dev);
+}
+
static inline struct neighbour *__ipv6_neigh_lookup(struct net_device *dev, const void *pkey)
{
struct neighbour *n;
rcu_read_unlock_bh();
}
+static inline void __ipv6_confirm_neigh_stub(struct net_device *dev,
+ const void *pkey)
+{
+ struct neighbour *n;
+
+ rcu_read_lock_bh();
+ n = __ipv6_neigh_lookup_noref_stub(dev, pkey);
+ if (n) {
+ unsigned long now = jiffies;
+
+ /* avoid dirtying neighbour */
+ if (n->confirmed != now)
+ n->confirmed = now;
+ }
+ rcu_read_unlock_bh();
+}
+
+/* uses ipv6_stub and is meant for use outside of IPv6 core */
+static inline struct neighbour *ip_neigh_gw6(struct net_device *dev,
+ const void *addr)
+{
+ struct neighbour *neigh;
+
+ neigh = __ipv6_neigh_lookup_noref_stub(dev, addr);
+ if (unlikely(!neigh))
+ neigh = __neigh_create(ipv6_stub->nd_tbl, addr, dev, false);
+
+ return neigh;
+}
+
int ndisc_init(void);
int ndisc_late_init(void);
int (*pconstructor)(struct pneigh_entry *);
void (*pdestructor)(struct pneigh_entry *);
void (*proxy_redo)(struct sk_buff *skb);
+ bool (*allow_add)(const struct net_device *dev,
+ struct netlink_ext_ack *extack);
char *id;
struct neigh_parms parms;
struct list_head parms_list;
return dev_queue_xmit(skb);
}
-static inline int neigh_output(struct neighbour *n, struct sk_buff *skb)
+static inline int neigh_output(struct neighbour *n, struct sk_buff *skb,
+ bool skip_cache)
{
const struct hh_cache *hh = &n->hh;
- if ((n->nud_state & NUD_CONNECTED) && hh->hh_len)
+ if ((n->nud_state & NUD_CONNECTED) && hh->hh_len && !skip_cache)
return neigh_hh_output(hh, skb);
else
return n->output(n, skb);
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _NF_NAT_MASQUERADE_IPV4_H_
-#define _NF_NAT_MASQUERADE_IPV4_H_
-
-#include <net/netfilter/nf_nat.h>
-
-unsigned int
-nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum,
- const struct nf_nat_range2 *range,
- const struct net_device *out);
-
-int nf_nat_masquerade_ipv4_register_notifier(void);
-void nf_nat_masquerade_ipv4_unregister_notifier(void);
-
-#endif /*_NF_NAT_MASQUERADE_IPV4_H_ */
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _NF_NAT_MASQUERADE_IPV6_H_
-#define _NF_NAT_MASQUERADE_IPV6_H_
-
-unsigned int
-nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range,
- const struct net_device *out);
-int nf_nat_masquerade_ipv6_register_notifier(void);
-void nf_nat_masquerade_ipv6_unregister_notifier(void);
-
-#endif /* _NF_NAT_MASQUERADE_IPV6_H_ */
gfp_t flags);
void nf_ct_tmpl_free(struct nf_conn *tmpl);
+u32 nf_ct_get_id(const struct nf_conn *ct);
+
static inline void
nf_ct_set(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info info)
{
/* Expectation class */
unsigned int class;
-#ifdef CONFIG_NF_NAT_NEEDED
+#if IS_ENABLED(CONFIG_NF_NAT)
union nf_inet_addr saved_addr;
/* This is the original per-proto part, used to map the
* expected connection the way the recipient expects. */
bool nf_conntrack_invert_icmpv6_tuple(struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_tuple *orig);
+int nf_conntrack_inet_error(struct nf_conn *tmpl, struct sk_buff *skb,
+ unsigned int dataoff,
+ const struct nf_hook_state *state,
+ u8 l4proto,
+ union nf_inet_addr *outer_daddr);
+
int nf_conntrack_icmpv4_error(struct nf_conn *tmpl,
struct sk_buff *skb,
unsigned int dataoff,
#endif
}
-int nf_nat_register_fn(struct net *net, const struct nf_hook_ops *ops,
+int nf_nat_register_fn(struct net *net, u8 pf, const struct nf_hook_ops *ops,
const struct nf_hook_ops *nat_ops, unsigned int ops_count);
-void nf_nat_unregister_fn(struct net *net, const struct nf_hook_ops *ops,
+void nf_nat_unregister_fn(struct net *net, u8 pf, const struct nf_hook_ops *ops,
unsigned int ops_count);
unsigned int nf_nat_packet(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
int nf_nat_ipv6_register_fn(struct net *net, const struct nf_hook_ops *ops);
void nf_nat_ipv6_unregister_fn(struct net *net, const struct nf_hook_ops *ops);
+int nf_nat_inet_register_fn(struct net *net, const struct nf_hook_ops *ops);
+void nf_nat_inet_unregister_fn(struct net *net, const struct nf_hook_ops *ops);
+
unsigned int
nf_nat_inet_fn(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state);
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _NF_NAT_MASQUERADE_H_
+#define _NF_NAT_MASQUERADE_H_
+
+#include <net/netfilter/nf_nat.h>
+
+unsigned int
+nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum,
+ const struct nf_nat_range2 *range,
+ const struct net_device *out);
+
+int nf_nat_masquerade_inet_register_notifiers(void);
+void nf_nat_masquerade_inet_unregister_notifiers(void);
+
+unsigned int
+nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range,
+ const struct net_device *out);
+
+#endif /*_NF_NAT_MASQUERADE_H_ */
return queue;
}
+int nf_queue(struct sk_buff *skb, struct nf_hook_state *state,
+ const struct nf_hook_entries *entries, unsigned int index,
+ unsigned int verdict);
#endif /* _NF_QUEUE_H */
enum nft_trans_phase phase);
int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set,
struct nft_set_binding *binding);
-void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
- struct nft_set_binding *binding, bool commit);
void nf_tables_destroy_set(const struct nft_ctx *ctx, struct nft_set *set);
/**
int __init nft_chain_filter_init(void);
void nft_chain_filter_fini(void);
+void __init nft_chain_route_init(void);
+void nft_chain_route_fini(void);
#endif /* _NET_NF_TABLES_H */
#ifndef __NETNS_IPV6_H__
#define __NETNS_IPV6_H__
#include <net/dst_ops.h>
+#include <uapi/linux/icmpv6.h>
struct ctl_table_header;
int icmpv6_echo_ignore_all;
int icmpv6_echo_ignore_multicast;
int icmpv6_echo_ignore_anycast;
+ DECLARE_BITMAP(icmpv6_ratemask, ICMPV6_MSG_MAX + 1);
+ unsigned long *icmpv6_ratemask_ptr;
int anycast_src_echo_reply;
int ip_nonlocal_bind;
int fwmark_reflect;
int nr_t1timer_running(struct sock *);
/* sysctl_net_netrom.c */
-void nr_register_sysctl(void);
+int nr_register_sysctl(void);
void nr_unregister_sysctl(void);
#endif
* According to specification 102 622 chapter 4.4 Pipes,
* the pipe identifier is 7 bits long.
*/
-#define NCI_HCI_MAX_PIPES 127
+#define NCI_HCI_MAX_PIPES 128
struct nci_hci_gate {
u8 gate;
#define __NET_PSAMPLE_H
#include <uapi/linux/psample.h>
-#include <linux/module.h>
#include <linux/list.h>
struct psample_group {
#include <net/flow.h>
#include <net/inet_sock.h>
#include <net/ip_fib.h>
+#include <net/arp.h>
+#include <net/ndisc.h>
#include <linux/in_route.h>
#include <linux/rtnetlink.h>
#include <linux/rcupdate.h>
unsigned int rt_flags;
__u16 rt_type;
__u8 rt_is_input;
- __u8 rt_uses_gateway;
+ u8 rt_gw_family;
int rt_iif;
/* Info on neighbour */
- __be32 rt_gateway;
+ union {
+ __be32 rt_gw4;
+ struct in6_addr rt_gw6;
+ };
/* Miscellaneous cached information */
u32 rt_mtu_locked:1,
static inline __be32 rt_nexthop(const struct rtable *rt, __be32 daddr)
{
- if (rt->rt_gateway)
- return rt->rt_gateway;
+ if (rt->rt_gw_family == AF_INET)
+ return rt->rt_gw4;
return daddr;
}
return hoplimit;
}
+static inline struct neighbour *ip_neigh_gw4(struct net_device *dev,
+ __be32 daddr)
+{
+ struct neighbour *neigh;
+
+ neigh = __ipv4_neigh_lookup_noref(dev, daddr);
+ if (unlikely(!neigh))
+ neigh = __neigh_create(&arp_tbl, &daddr, dev, false);
+
+ return neigh;
+}
+
+static inline struct neighbour *ip_neigh_for_gw(struct rtable *rt,
+ struct sk_buff *skb,
+ bool *is_v6gw)
+{
+ struct net_device *dev = rt->dst.dev;
+ struct neighbour *neigh;
+
+ if (likely(rt->rt_gw_family == AF_INET)) {
+ neigh = ip_neigh_gw4(dev, rt->rt_gw4);
+ } else if (rt->rt_gw_family == AF_INET6) {
+ neigh = ip_neigh_gw6(dev, &rt->rt_gw6);
+ *is_v6gw = true;
+ } else {
+ neigh = ip_neigh_gw4(dev, ip_hdr(skb)->daddr);
+ }
+ return neigh;
+}
+
#endif /* _ROUTE_H */
/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __NET_NEXTHOP_H
-#define __NET_NEXTHOP_H
+#ifndef __NET_RTNH_H
+#define __NET_RTNH_H
#include <linux/rtnetlink.h>
#include <net/netlink.h>
struct qdisc_skb_head {
struct sk_buff *head;
struct sk_buff *tail;
- union {
- u32 qlen;
- atomic_t atomic_qlen;
- };
+ __u32 qlen;
spinlock_t lock;
};
return (raw_read_seqcount(&qdisc->running) & 1) ? true : false;
}
+static inline bool qdisc_is_percpu_stats(const struct Qdisc *q)
+{
+ return q->flags & TCQ_F_CPUSTATS;
+}
+
static inline bool qdisc_is_empty(const struct Qdisc *qdisc)
{
- if (qdisc->flags & TCQ_F_NOLOCK)
+ if (qdisc_is_percpu_stats(qdisc))
return qdisc->empty;
return !qdisc->q.qlen;
}
BUILD_BUG_ON(sizeof(qcb->data) < sz);
}
+static inline int qdisc_qlen_cpu(const struct Qdisc *q)
+{
+ return this_cpu_ptr(q->cpu_qstats)->qlen;
+}
+
static inline int qdisc_qlen(const struct Qdisc *q)
{
return q->q.qlen;
}
-static inline u32 qdisc_qlen_sum(const struct Qdisc *q)
+static inline int qdisc_qlen_sum(const struct Qdisc *q)
{
- u32 qlen = q->qstats.qlen;
+ __u32 qlen = q->qstats.qlen;
+ int i;
- if (q->flags & TCQ_F_NOLOCK)
- qlen += atomic_read(&q->q.atomic_qlen);
- else
+ if (qdisc_is_percpu_stats(q)) {
+ for_each_possible_cpu(i)
+ qlen += per_cpu_ptr(q->cpu_qstats, i)->qlen;
+ } else {
qlen += q->q.qlen;
+ }
return qlen;
}
struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
const struct Qdisc *q = rcu_dereference(txq->qdisc);
- if (q->q.qlen) {
+ if (!qdisc_is_empty(q)) {
rcu_read_unlock();
return false;
}
return sch->enqueue(skb, sch, to_free);
}
-static inline bool qdisc_is_percpu_stats(const struct Qdisc *q)
-{
- return q->flags & TCQ_F_CPUSTATS;
-}
-
static inline void _bstats_update(struct gnet_stats_basic_packed *bstats,
__u64 bytes, __u32 packets)
{
this_cpu_add(sch->cpu_qstats->backlog, qdisc_pkt_len(skb));
}
-static inline void qdisc_qstats_atomic_qlen_inc(struct Qdisc *sch)
+static inline void qdisc_qstats_cpu_qlen_inc(struct Qdisc *sch)
{
- atomic_inc(&sch->q.atomic_qlen);
+ this_cpu_inc(sch->cpu_qstats->qlen);
}
-static inline void qdisc_qstats_atomic_qlen_dec(struct Qdisc *sch)
+static inline void qdisc_qstats_cpu_qlen_dec(struct Qdisc *sch)
{
- atomic_dec(&sch->q.atomic_qlen);
+ this_cpu_dec(sch->cpu_qstats->qlen);
}
static inline void qdisc_qstats_cpu_requeues_inc(struct Qdisc *sch)
return skb;
}
+static inline void qdisc_update_stats_at_dequeue(struct Qdisc *sch,
+ struct sk_buff *skb)
+{
+ if (qdisc_is_percpu_stats(sch)) {
+ qdisc_qstats_cpu_backlog_dec(sch, skb);
+ qdisc_bstats_cpu_update(sch, skb);
+ qdisc_qstats_cpu_qlen_dec(sch);
+ } else {
+ qdisc_qstats_backlog_dec(sch, skb);
+ qdisc_bstats_update(sch, skb);
+ sch->q.qlen--;
+ }
+}
+
+static inline void qdisc_update_stats_at_enqueue(struct Qdisc *sch,
+ unsigned int pkt_len)
+{
+ if (qdisc_is_percpu_stats(sch)) {
+ qdisc_qstats_cpu_qlen_inc(sch);
+ this_cpu_add(sch->cpu_qstats->backlog, pkt_len);
+ } else {
+ sch->qstats.backlog += pkt_len;
+ sch->q.qlen++;
+ }
+}
+
/* use instead of qdisc->dequeue() for all qdiscs queried with ->peek() */
static inline struct sk_buff *qdisc_dequeue_peeked(struct Qdisc *sch)
{
if (skb) {
skb = __skb_dequeue(&sch->gso_skb);
- qdisc_qstats_backlog_dec(sch, skb);
- sch->q.qlen--;
+ if (qdisc_is_percpu_stats(sch)) {
+ qdisc_qstats_cpu_backlog_dec(sch, skb);
+ qdisc_qstats_cpu_qlen_dec(sch);
+ } else {
+ qdisc_qstats_backlog_dec(sch, skb);
+ sch->q.qlen--;
+ }
} else {
skb = sch->dequeue(sch);
}
/*
* This mimics the behavior of skb_set_owner_r
*/
- sk->sk_forward_alloc -= event->rmem_len;
+ sk_mem_charge(sk, event->rmem_len);
}
/* Tests if the list has one and only one entry. */
int sctp_ulpq_tail_data(struct sctp_ulpq *, struct sctp_chunk *, gfp_t);
/* Add a new event for propagation to the ULP. */
-int sctp_ulpq_tail_event(struct sctp_ulpq *, struct sctp_ulpevent *ev);
+int sctp_ulpq_tail_event(struct sctp_ulpq *, struct sk_buff_head *skb_list);
/* Renege previously received chunks. */
void sctp_ulpq_renege(struct sctp_ulpq *, struct sctp_chunk *, gfp_t);
int sock_getsockopt(struct socket *sock, int level, int op,
char __user *optval, int __user *optlen);
+int sock_gettstamp(struct socket *sock, void __user *userstamp,
+ bool timeval, bool time32);
struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
int noblock, int *errcode);
struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
* @p: poll_table
*
* See the comments in the wq_has_sleeper function.
- *
- * Do not derive sock from filp->private_data here. An SMC socket establishes
- * an internal TCP socket that is used in the fallback case. All socket
- * operations on the SMC socket are then forwarded to the TCP socket. In case of
- * poll, the filp->private_data pointer references the SMC socket because the
- * TCP socket has no file assigned.
*/
static inline void sock_poll_wait(struct file *filp, struct socket *sock,
poll_table *p)
}
void sock_enable_timestamp(struct sock *sk, int flag);
-int sock_get_timestamp(struct sock *, struct timeval __user *);
-int sock_get_timestampns(struct sock *, struct timespec __user *);
int sock_recv_errqueue(struct sock *sk, struct msghdr *msg, int len, int level,
int type);
#include <net/act_api.h>
#include <linux/etherdevice.h>
#include <linux/rtnetlink.h>
-#include <linux/module.h>
+
+struct module;
struct tcf_ife_params {
u8 eth_dst[ETH_ALEN];
static inline __sum16 tcp_v4_check(int len, __be32 saddr,
__be32 daddr, __wsum base)
{
- return csum_tcpudp_magic(saddr,daddr,len,IPPROTO_TCP,base);
+ return csum_tcpudp_magic(saddr, daddr, len, IPPROTO_TCP, base);
}
static inline bool tcp_checksum_complete(struct sk_buff *skb)
int tls_device_sendpage(struct sock *sk, struct page *page,
int offset, size_t size, int flags);
void tls_device_sk_destruct(struct sock *sk);
+void tls_device_free_resources_tx(struct sock *sk);
void tls_device_init(void);
void tls_device_cleanup(void);
int tls_tx_records(struct sock *sk, int flags);
int flags);
int tls_push_partial_record(struct sock *sk, struct tls_context *ctx,
int flags);
+bool tls_free_partial_record(struct sock *sk, struct tls_context *ctx);
static inline struct tls_msg *tls_msg(struct sk_buff *skb)
{
static inline bool tls_is_sk_tx_device_offloaded(struct sock *sk)
{
#ifdef CONFIG_SOCK_VALIDATE_XMIT
- return sk_fullsock(sk) &
+ return sk_fullsock(sk) &&
(smp_load_acquire(&sk->sk_validate_xmit_skb) ==
&tls_validate_xmit_skb);
#else
int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb);
void udp_skb_destructor(struct sock *sk, struct sk_buff *skb);
struct sk_buff *__skb_recv_udp(struct sock *sk, unsigned int flags,
- int noblock, int *peeked, int *off, int *err);
+ int noblock, int *off, int *err);
static inline struct sk_buff *skb_recv_udp(struct sock *sk, unsigned int flags,
int noblock, int *err)
{
- int peeked, off = 0;
+ int off = 0;
- return __skb_recv_udp(sk, flags, noblock, &peeked, &off, err);
+ return __skb_recv_udp(sk, flags, noblock, &off, err);
}
int udp_v4_early_demux(struct sk_buff *skb);
int probe_order;
int remove_order;
- /* signal if the module handling the component cannot be removed */
- unsigned int ignore_module_refcount:1;
+ /*
+ * signal if the module handling the component should not be removed
+ * if a pcm is open. Setting this would prevent the module
+ * refcount being incremented in probe() but allow it be incremented
+ * when a pcm is opened and decremented when it is closed.
+ */
+ unsigned int module_get_upon_open:1;
/* bits */
unsigned int idle_bias_on:1;
struct mutex mutex;
struct mutex dapm_mutex;
+ spinlock_t dpcm_lock;
+
bool instantiated;
bool topology_shortname_created;
__assign_str(name, dev ? dev->name : "-");
if (nhc) {
- if (nhc->nhc_family == AF_INET) {
+ if (nhc->nhc_gw_family == AF_INET) {
p32 = (__be32 *) __entry->gw4;
*p32 = nhc->nhc_gw.ipv4;
in6 = (struct in6_addr *)__entry->gw6;
*in6 = in6_zero;
- } else if (nhc->nhc_family == AF_INET6) {
+ } else if (nhc->nhc_gw_family == AF_INET6) {
p32 = (__be32 *) __entry->gw4;
*p32 = 0;
TRACE_EVENT(fib6_table_lookup,
- TP_PROTO(const struct net *net, const struct fib6_info *f6i,
+ TP_PROTO(const struct net *net, const struct fib6_result *res,
struct fib6_table *table, const struct flowi6 *flp),
- TP_ARGS(net, f6i, table, flp),
+ TP_ARGS(net, res, table, flp),
TP_STRUCT__entry(
__field( u32, tb_id )
struct in6_addr *in6;
__entry->tb_id = table->tb6_id;
- __entry->err = ip6_rt_type_to_error(f6i->fib6_type);
+ __entry->err = ip6_rt_type_to_error(res->fib6_type);
__entry->oif = flp->flowi6_oif;
__entry->iif = flp->flowi6_iif;
__entry->tos = ip6_tclass(flp->flowlabel);
__entry->dport = 0;
}
- if (f6i->fib6_nh.fib_nh_dev) {
- __assign_str(name, f6i->fib6_nh.fib_nh_dev);
+ if (res->nh && res->nh->fib_nh_dev) {
+ __assign_str(name, res->nh->fib_nh_dev);
} else {
__assign_str(name, "-");
}
- if (f6i == net->ipv6.fib6_null_entry) {
+ if (res->f6i == net->ipv6.fib6_null_entry) {
struct in6_addr in6_zero = {};
in6 = (struct in6_addr *)__entry->gw;
*in6 = in6_zero;
- } else if (f6i) {
+ } else if (res->nh) {
in6 = (struct in6_addr *)__entry->gw;
- *in6 = f6i->fib6_nh.fib_nh_gw6;
+ *in6 = res->nh->fib_nh_gw6;
}
),
TP_fast_assign(
__entry->id = id;
- syscall_get_arguments(current, regs, 0, 6, __entry->args);
+ syscall_get_arguments(current, regs, __entry->args);
),
TP_printk("NR %ld (%lx, %lx, %lx, %lx, %lx, %lx)",
#define FIOGETOWN 0x8903
#define SIOCGPGRP 0x8904
#define SIOCATMARK 0x8905
-#define SIOCGSTAMP 0x8906 /* Get stamp (timeval) */
-#define SIOCGSTAMPNS 0x8907 /* Get stamp (timespec) */
+#define SIOCGSTAMP_OLD 0x8906 /* Get stamp (timeval) */
+#define SIOCGSTAMPNS_OLD 0x8907 /* Get stamp (timespec) */
#endif /* __ASM_GENERIC_SOCKIOS_H */
BPF_BTF_GET_FD_BY_ID,
BPF_TASK_FD_QUERY,
BPF_MAP_LOOKUP_AND_DELETE_ELEM,
+ BPF_MAP_FREEZE,
};
enum bpf_map_type {
BPF_PROG_TYPE_LIRC_MODE2,
BPF_PROG_TYPE_SK_REUSEPORT,
BPF_PROG_TYPE_FLOW_DISSECTOR,
+ BPF_PROG_TYPE_CGROUP_SYSCTL,
};
enum bpf_attach_type {
BPF_CGROUP_UDP6_SENDMSG,
BPF_LIRC_MODE2,
BPF_FLOW_DISSECTOR,
+ BPF_CGROUP_SYSCTL,
__MAX_BPF_ATTACH_TYPE
};
*/
#define BPF_F_ANY_ALIGNMENT (1U << 1)
-/* when bpf_ldimm64->src_reg == BPF_PSEUDO_MAP_FD, bpf_ldimm64->imm == fd */
+/* When BPF ldimm64's insn[0].src_reg != 0 then this can have
+ * two extensions:
+ *
+ * insn[0].src_reg: BPF_PSEUDO_MAP_FD BPF_PSEUDO_MAP_VALUE
+ * insn[0].imm: map fd map fd
+ * insn[1].imm: 0 offset into value
+ * insn[0].off: 0 0
+ * insn[1].off: 0 0
+ * ldimm64 rewrite: address of map address of map[0]+offset
+ * verifier type: CONST_PTR_TO_MAP PTR_TO_MAP_VALUE
+ */
#define BPF_PSEUDO_MAP_FD 1
+#define BPF_PSEUDO_MAP_VALUE 2
/* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative
* offset to another bpf function
#define BPF_OBJ_NAME_LEN 16U
-/* Flags for accessing BPF object */
+/* Flags for accessing BPF object from syscall side. */
#define BPF_F_RDONLY (1U << 3)
#define BPF_F_WRONLY (1U << 4)
/* Zero-initialize hash function seed. This should only be used for testing. */
#define BPF_F_ZERO_SEED (1U << 6)
+/* Flags for accessing BPF object from program side. */
+#define BPF_F_RDONLY_PROG (1U << 7)
+#define BPF_F_WRONLY_PROG (1U << 8)
+
/* flags for BPF_PROG_QUERY */
#define BPF_F_QUERY_EFFECTIVE (1U << 0)
__aligned_u64 data_out;
__u32 repeat;
__u32 duration;
+ __u32 ctx_size_in; /* input: len of ctx_in */
+ __u32 ctx_size_out; /* input/output: len of ctx_out
+ * returns ENOSPC if ctx_out
+ * is too small.
+ */
+ __aligned_u64 ctx_in;
+ __aligned_u64 ctx_out;
} test;
struct { /* anonymous struct used by BPF_*_GET_*_ID */
* * **BPF_F_ADJ_ROOM_ENCAP_L4_UDP **:
* Use with ENCAP_L3 flags to further specify the tunnel type.
*
+ * * **BPF_F_ADJ_ROOM_ENCAP_L2(len) **:
+ * Use with ENCAP_L3/L4 flags to further specify the tunnel
+ * type; **len** is the length of the inner MAC header.
+ *
* A call to this helper is susceptible to change the underlaying
* packet buffer. Therefore, at load time, all checks on pointers
* previously done by the verifier are invalidated and must be
* error if an eBPF program tries to set a callback that is not
* supported in the current kernel.
*
- * The supported callback values that *argval* can combine are:
+ * *argval* is a flag array which can combine these flags:
*
* * **BPF_SOCK_OPS_RTO_CB_FLAG** (retransmission time out)
* * **BPF_SOCK_OPS_RETRANS_CB_FLAG** (retransmission)
* * **BPF_SOCK_OPS_STATE_CB_FLAG** (TCP state change)
*
+ * Therefore, this function can be used to clear a callback flag by
+ * setting the appropriate bit to zero. e.g. to disable the RTO
+ * callback:
+ *
+ * **bpf_sock_ops_cb_flags_set(bpf_sock,**
+ * **bpf_sock->bpf_sock_ops_cb_flags & ~BPF_SOCK_OPS_RTO_CB_FLAG)**
+ *
* Here are some examples of where one could call such eBPF
* program:
*
* Return
* 0 if iph and th are a valid SYN cookie ACK, or a negative error
* otherwise.
+ *
+ * int bpf_sysctl_get_name(struct bpf_sysctl *ctx, char *buf, size_t buf_len, u64 flags)
+ * Description
+ * Get name of sysctl in /proc/sys/ and copy it into provided by
+ * program buffer *buf* of size *buf_len*.
+ *
+ * The buffer is always NUL terminated, unless it's zero-sized.
+ *
+ * If *flags* is zero, full name (e.g. "net/ipv4/tcp_mem") is
+ * copied. Use **BPF_F_SYSCTL_BASE_NAME** flag to copy base name
+ * only (e.g. "tcp_mem").
+ * Return
+ * Number of character copied (not including the trailing NUL).
+ *
+ * **-E2BIG** if the buffer wasn't big enough (*buf* will contain
+ * truncated name in this case).
+ *
+ * int bpf_sysctl_get_current_value(struct bpf_sysctl *ctx, char *buf, size_t buf_len)
+ * Description
+ * Get current value of sysctl as it is presented in /proc/sys
+ * (incl. newline, etc), and copy it as a string into provided
+ * by program buffer *buf* of size *buf_len*.
+ *
+ * The whole value is copied, no matter what file position user
+ * space issued e.g. sys_read at.
+ *
+ * The buffer is always NUL terminated, unless it's zero-sized.
+ * Return
+ * Number of character copied (not including the trailing NUL).
+ *
+ * **-E2BIG** if the buffer wasn't big enough (*buf* will contain
+ * truncated name in this case).
+ *
+ * **-EINVAL** if current value was unavailable, e.g. because
+ * sysctl is uninitialized and read returns -EIO for it.
+ *
+ * int bpf_sysctl_get_new_value(struct bpf_sysctl *ctx, char *buf, size_t buf_len)
+ * Description
+ * Get new value being written by user space to sysctl (before
+ * the actual write happens) and copy it as a string into
+ * provided by program buffer *buf* of size *buf_len*.
+ *
+ * User space may write new value at file position > 0.
+ *
+ * The buffer is always NUL terminated, unless it's zero-sized.
+ * Return
+ * Number of character copied (not including the trailing NUL).
+ *
+ * **-E2BIG** if the buffer wasn't big enough (*buf* will contain
+ * truncated name in this case).
+ *
+ * **-EINVAL** if sysctl is being read.
+ *
+ * int bpf_sysctl_set_new_value(struct bpf_sysctl *ctx, const char *buf, size_t buf_len)
+ * Description
+ * Override new value being written by user space to sysctl with
+ * value provided by program in buffer *buf* of size *buf_len*.
+ *
+ * *buf* should contain a string in same form as provided by user
+ * space on sysctl write.
+ *
+ * User space may write new value at file position > 0. To override
+ * the whole sysctl value file position should be set to zero.
+ * Return
+ * 0 on success.
+ *
+ * **-E2BIG** if the *buf_len* is too big.
+ *
+ * **-EINVAL** if sysctl is being read.
+ *
+ * int bpf_strtol(const char *buf, size_t buf_len, u64 flags, long *res)
+ * Description
+ * Convert the initial part of the string from buffer *buf* of
+ * size *buf_len* to a long integer according to the given base
+ * and save the result in *res*.
+ *
+ * The string may begin with an arbitrary amount of white space
+ * (as determined by isspace(3)) followed by a single optional '-'
+ * sign.
+ *
+ * Five least significant bits of *flags* encode base, other bits
+ * are currently unused.
+ *
+ * Base must be either 8, 10, 16 or 0 to detect it automatically
+ * similar to user space strtol(3).
+ * Return
+ * Number of characters consumed on success. Must be positive but
+ * no more than buf_len.
+ *
+ * **-EINVAL** if no valid digits were found or unsupported base
+ * was provided.
+ *
+ * **-ERANGE** if resulting value was out of range.
+ *
+ * int bpf_strtoul(const char *buf, size_t buf_len, u64 flags, unsigned long *res)
+ * Description
+ * Convert the initial part of the string from buffer *buf* of
+ * size *buf_len* to an unsigned long integer according to the
+ * given base and save the result in *res*.
+ *
+ * The string may begin with an arbitrary amount of white space
+ * (as determined by isspace(3)).
+ *
+ * Five least significant bits of *flags* encode base, other bits
+ * are currently unused.
+ *
+ * Base must be either 8, 10, 16 or 0 to detect it automatically
+ * similar to user space strtoul(3).
+ * Return
+ * Number of characters consumed on success. Must be positive but
+ * no more than buf_len.
+ *
+ * **-EINVAL** if no valid digits were found or unsupported base
+ * was provided.
+ *
+ * **-ERANGE** if resulting value was out of range.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
FN(skb_ecn_set_ce), \
FN(get_listener_sock), \
FN(skc_lookup_tcp), \
- FN(tcp_check_syncookie),
+ FN(tcp_check_syncookie), \
+ FN(sysctl_get_name), \
+ FN(sysctl_get_current_value), \
+ FN(sysctl_get_new_value), \
+ FN(sysctl_set_new_value), \
+ FN(strtol), \
+ FN(strtoul),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
/* BPF_FUNC_skb_adjust_room flags. */
#define BPF_F_ADJ_ROOM_FIXED_GSO (1ULL << 0)
+#define BPF_ADJ_ROOM_ENCAP_L2_MASK 0xff
+#define BPF_ADJ_ROOM_ENCAP_L2_SHIFT 56
+
#define BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 (1ULL << 1)
#define BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 (1ULL << 2)
#define BPF_F_ADJ_ROOM_ENCAP_L4_GRE (1ULL << 3)
#define BPF_F_ADJ_ROOM_ENCAP_L4_UDP (1ULL << 4)
+#define BPF_F_ADJ_ROOM_ENCAP_L2(len) (((__u64)len & \
+ BPF_ADJ_ROOM_ENCAP_L2_MASK) \
+ << BPF_ADJ_ROOM_ENCAP_L2_SHIFT)
+
+/* BPF_FUNC_sysctl_get_name flags. */
+#define BPF_F_SYSCTL_BASE_NAME (1ULL << 0)
/* Mode for BPF_FUNC_skb_adjust_room helper. */
enum bpf_adj_room_mode {
struct bpf_spin_lock {
__u32 val;
};
+
+struct bpf_sysctl {
+ __u32 write; /* Sysctl is being read (= 0) or written (= 1).
+ * Allows 1,2,4-byte read, but no write.
+ */
+ __u32 file_pos; /* Sysctl file position to read from, write to.
+ * Allows 1,2,4-byte read an 4-byte write.
+ */
+};
+
#endif /* _UAPI__LINUX_BPF_H__ */
* struct, union and fwd
*/
__u32 info;
- /* "size" is used by INT, ENUM, STRUCT and UNION.
+ /* "size" is used by INT, ENUM, STRUCT, UNION and DATASEC.
* "size" tells the size of the type it is describing.
*
* "type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT,
- * FUNC and FUNC_PROTO.
+ * FUNC, FUNC_PROTO and VAR.
* "type" is a type_id referring to another type.
*/
union {
#define BTF_KIND_RESTRICT 11 /* Restrict */
#define BTF_KIND_FUNC 12 /* Function */
#define BTF_KIND_FUNC_PROTO 13 /* Function Proto */
-#define BTF_KIND_MAX 13
-#define NR_BTF_KINDS 14
+#define BTF_KIND_VAR 14 /* Variable */
+#define BTF_KIND_DATASEC 15 /* Section */
+#define BTF_KIND_MAX BTF_KIND_DATASEC
+#define NR_BTF_KINDS (BTF_KIND_MAX + 1)
/* For some specific BTF_KIND, "struct btf_type" is immediately
* followed by extra data.
__u32 type;
};
+enum {
+ BTF_VAR_STATIC = 0,
+ BTF_VAR_GLOBAL_ALLOCATED,
+};
+
+/* BTF_KIND_VAR is followed by a single "struct btf_var" to describe
+ * additional information related to the variable such as its linkage.
+ */
+struct btf_var {
+ __u32 linkage;
+};
+
+/* BTF_KIND_DATASEC is followed by multiple "struct btf_var_secinfo"
+ * to describe all BTF_KIND_VAR types it contains along with it's
+ * in-section offset as well as size.
+ */
+struct btf_var_secinfo {
+ __u32 type;
+ __u32 offset;
+ __u32 size;
+};
+
#endif /* _UAPI__LINUX_BTF_H__ */
static inline int ethtool_validate_speed(__u32 speed)
{
- return speed <= INT_MAX || speed == SPEED_UNKNOWN;
+ return speed <= INT_MAX || speed == (__u32)SPEED_UNKNOWN;
}
/* Duplex, half or full. */
#define ICMPV6_TIME_EXCEED 3
#define ICMPV6_PARAMPROB 4
+#define ICMPV6_ERRMSG_MAX 127
+
#define ICMPV6_INFOMSG_MASK 0x80
#define ICMPV6_ECHO_REQUEST 128
#define ICMPV6_MRDISC_ADV 151
+#define ICMPV6_MSG_MAX 255
+
/*
* Codes for Destination Unreachable
*/
};
enum vlan_flags {
- VLAN_FLAG_REORDER_HDR = 0x1,
- VLAN_FLAG_GVRP = 0x2,
- VLAN_FLAG_LOOSE_BINDING = 0x4,
- VLAN_FLAG_MVRP = 0x8,
+ VLAN_FLAG_REORDER_HDR = 0x1,
+ VLAN_FLAG_GVRP = 0x2,
+ VLAN_FLAG_LOOSE_BINDING = 0x4,
+ VLAN_FLAG_MVRP = 0x8,
+ VLAN_FLAG_BRIDGE_BINDING = 0x10,
};
enum vlan_name_types {
#define KEY_TITLE 0x171
#define KEY_SUBTITLE 0x172
#define KEY_ANGLE 0x173
-#define KEY_ZOOM 0x174
+#define KEY_FULL_SCREEN 0x174 /* AC View Toggle */
+#define KEY_ZOOM KEY_FULL_SCREEN
#define KEY_MODE 0x175
#define KEY_KEYBOARD 0x176
-#define KEY_SCREEN 0x177
+#define KEY_ASPECT_RATIO 0x177 /* HUTRR37: Aspect */
+#define KEY_SCREEN KEY_ASPECT_RATIO
#define KEY_PC 0x178 /* Media Select Computer */
#define KEY_TV 0x179 /* Media Select TV */
#define KEY_TV2 0x17a /* Media Select Cable */
#define IP_VS_PEDATA_MAXLEN 255
+/* Tunnel types */
+enum {
+ IP_VS_CONN_F_TUNNEL_TYPE_IPIP = 0, /* IPIP */
+ IP_VS_CONN_F_TUNNEL_TYPE_GUE, /* GUE */
+ IP_VS_CONN_F_TUNNEL_TYPE_MAX,
+};
+
/*
* The struct ip_vs_service_user and struct ip_vs_dest_user are
* used to set IPVS rules through setsockopt.
IPVS_DEST_ATTR_STATS64, /* nested attribute for dest stats */
+ IPVS_DEST_ATTR_TUN_TYPE, /* tunnel type */
+
+ IPVS_DEST_ATTR_TUN_PORT, /* tunnel port */
+
__IPVS_DEST_ATTR_MAX,
};
*
* @NFTA_OSF_DREG: destination register (NLA_U32: nft_registers)
* @NFTA_OSF_TTL: Value of the TTL osf option (NLA_U8)
+ * @NFTA_OSF_FLAGS: flags (NLA_U32)
*/
enum nft_osf_attributes {
NFTA_OSF_UNSPEC,
NFTA_OSF_DREG,
NFTA_OSF_TTL,
+ NFTA_OSF_FLAGS,
__NFTA_OSF_MAX,
};
#define NFTA_OSF_MAX (__NFTA_OSF_MAX - 1)
+enum nft_osf_flags {
+ NFT_OSF_F_VERSION = (1 << 0),
+};
+
/**
* enum nft_device_attributes - nf_tables device netlink attributes
*
#ifndef _LINUX_SOCKIOS_H
#define _LINUX_SOCKIOS_H
+#include <asm/bitsperlong.h>
#include <asm/sockios.h>
/* Linux-specific socket ioctls */
#define SOCK_IOC_TYPE 0x89
+/*
+ * the timeval/timespec data structure layout is defined by libc,
+ * so we need to cover both possible versions on 32-bit.
+ */
+/* Get stamp (timeval) */
+#define SIOCGSTAMP_NEW _IOR(SOCK_IOC_TYPE, 0x06, long long[2])
+/* Get stamp (timespec) */
+#define SIOCGSTAMPNS_NEW _IOR(SOCK_IOC_TYPE, 0x07, long long[2])
+
+#if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__))
+/* on 64-bit and x32, avoid the ?: operator */
+#define SIOCGSTAMP SIOCGSTAMP_OLD
+#define SIOCGSTAMPNS SIOCGSTAMPNS_OLD
+#else
+#define SIOCGSTAMP ((sizeof(struct timeval)) == 8 ? \
+ SIOCGSTAMP_OLD : SIOCGSTAMP_NEW)
+#define SIOCGSTAMPNS ((sizeof(struct timespec)) == 8 ? \
+ SIOCGSTAMPNS_OLD : SIOCGSTAMPNS_NEW)
+#endif
+
/* Routing table calls. */
#define SIOCADDRT 0x890B /* add routing table entry */
#define SIOCDELRT 0x890C /* delete routing table entry */
#define TIPC_MCAST_REPLICAST 134 /* Default: TIPC selects. No arg */
#define TIPC_GROUP_JOIN 135 /* Takes struct tipc_group_req* */
#define TIPC_GROUP_LEAVE 136 /* No argument */
+#define TIPC_SOCK_RECVQ_USED 137 /* Default: none (read only) */
/*
* Flag values
#ifndef __KERNEL__
#include <stdlib.h>
+#include <time.h>
#endif
/*
page_alloc_init();
pr_notice("Kernel command line: %s\n", boot_command_line);
+ /* parameters may set static keys */
+ jump_label_init();
parse_early_param();
after_dashes = parse_args("Booting kernel",
static_command_line, __start___param,
parse_args("Setting init args", after_dashes, NULL, 0, -1, -1,
NULL, set_init_arg);
- jump_label_init();
-
/*
* These use large bootmem allocations and must precede
* kmem_cache_init()
#include "map_in_map.h"
#define ARRAY_CREATE_FLAG_MASK \
- (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
+ (BPF_F_NUMA_NODE | BPF_F_ACCESS_MASK)
static void bpf_array_free_percpu(struct bpf_array *array)
{
if (attr->max_entries == 0 || attr->key_size != 4 ||
attr->value_size == 0 ||
attr->map_flags & ~ARRAY_CREATE_FLAG_MASK ||
+ !bpf_map_flags_access_ok(attr->map_flags) ||
(percpu && numa_node != NUMA_NO_NODE))
return -EINVAL;
return array->value + array->elem_size * (index & array->index_mask);
}
+static int array_map_direct_value_addr(const struct bpf_map *map, u64 *imm,
+ u32 off)
+{
+ struct bpf_array *array = container_of(map, struct bpf_array, map);
+
+ if (map->max_entries != 1)
+ return -ENOTSUPP;
+ if (off >= map->value_size)
+ return -EINVAL;
+
+ *imm = (unsigned long)array->value;
+ return 0;
+}
+
+static int array_map_direct_value_meta(const struct bpf_map *map, u64 imm,
+ u32 *off)
+{
+ struct bpf_array *array = container_of(map, struct bpf_array, map);
+ u64 base = (unsigned long)array->value;
+ u64 range = array->elem_size;
+
+ if (map->max_entries != 1)
+ return -ENOTSUPP;
+ if (imm < base || imm >= base + range)
+ return -ENOENT;
+
+ *off = imm - base;
+ return 0;
+}
+
/* emit BPF instructions equivalent to C code of array_map_lookup_elem() */
static u32 array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
{
return;
}
- seq_printf(m, "%u: ", *(u32 *)key);
+ if (map->btf_key_type_id)
+ seq_printf(m, "%u: ", *(u32 *)key);
btf_type_seq_show(map->btf, map->btf_value_type_id, value, m);
seq_puts(m, "\n");
{
u32 int_data;
+ /* One exception for keyless BTF: .bss/.data/.rodata map */
+ if (btf_type_is_void(key_type)) {
+ if (map->map_type != BPF_MAP_TYPE_ARRAY ||
+ map->max_entries != 1)
+ return -EINVAL;
+
+ if (BTF_INFO_KIND(value_type->info) != BTF_KIND_DATASEC)
+ return -EINVAL;
+
+ return 0;
+ }
+
if (BTF_INFO_KIND(key_type->info) != BTF_KIND_INT)
return -EINVAL;
.map_update_elem = array_map_update_elem,
.map_delete_elem = array_map_delete_elem,
.map_gen_lookup = array_map_gen_lookup,
+ .map_direct_value_addr = array_map_direct_value_addr,
+ .map_direct_value_meta = array_map_direct_value_meta,
.map_seq_show_elem = array_map_seq_show_elem,
.map_check_btf = array_map_check_btf,
};
/* only file descriptors can be stored in this type of map */
if (attr->value_size != sizeof(u32))
return -EINVAL;
+ /* Program read-only/write-only not supported for special maps yet. */
+ if (attr->map_flags & (BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG))
+ return -EINVAL;
return array_map_alloc_check(attr);
}
i < btf_type_vlen(struct_type); \
i++, member++)
+#define for_each_vsi(i, struct_type, member) \
+ for (i = 0, member = btf_type_var_secinfo(struct_type); \
+ i < btf_type_vlen(struct_type); \
+ i++, member++)
+
+#define for_each_vsi_from(i, from, struct_type, member) \
+ for (i = from, member = btf_type_var_secinfo(struct_type) + from; \
+ i < btf_type_vlen(struct_type); \
+ i++, member++)
+
static DEFINE_IDR(btf_idr);
static DEFINE_SPINLOCK(btf_idr_lock);
[BTF_KIND_RESTRICT] = "RESTRICT",
[BTF_KIND_FUNC] = "FUNC",
[BTF_KIND_FUNC_PROTO] = "FUNC_PROTO",
+ [BTF_KIND_VAR] = "VAR",
+ [BTF_KIND_DATASEC] = "DATASEC",
};
struct btf_kind_operations {
return false;
}
-static bool btf_type_is_void(const struct btf_type *t)
+bool btf_type_is_void(const struct btf_type *t)
{
return t == &btf_void;
}
return BTF_INFO_KIND(t->info) == BTF_KIND_INT;
}
+static bool btf_type_is_var(const struct btf_type *t)
+{
+ return BTF_INFO_KIND(t->info) == BTF_KIND_VAR;
+}
+
+static bool btf_type_is_datasec(const struct btf_type *t)
+{
+ return BTF_INFO_KIND(t->info) == BTF_KIND_DATASEC;
+}
+
+/* Types that act only as a source, not sink or intermediate
+ * type when resolving.
+ */
+static bool btf_type_is_resolve_source_only(const struct btf_type *t)
+{
+ return btf_type_is_var(t) ||
+ btf_type_is_datasec(t);
+}
+
/* What types need to be resolved?
*
* btf_type_is_modifier() is an obvious one.
*
* btf_type_is_struct() because its member refers to
* another type (through member->type).
-
+ *
+ * btf_type_is_var() because the variable refers to
+ * another type. btf_type_is_datasec() holds multiple
+ * btf_type_is_var() types that need resolving.
+ *
* btf_type_is_array() because its element (array->type)
* refers to another type. Array can be thought of a
* special case of struct while array just has the same
static bool btf_type_needs_resolve(const struct btf_type *t)
{
return btf_type_is_modifier(t) ||
- btf_type_is_ptr(t) ||
- btf_type_is_struct(t) ||
- btf_type_is_array(t);
+ btf_type_is_ptr(t) ||
+ btf_type_is_struct(t) ||
+ btf_type_is_array(t) ||
+ btf_type_is_var(t) ||
+ btf_type_is_datasec(t);
}
/* t->size can be used */
case BTF_KIND_STRUCT:
case BTF_KIND_UNION:
case BTF_KIND_ENUM:
+ case BTF_KIND_DATASEC:
return true;
}
return (const struct btf_enum *)(t + 1);
}
+static const struct btf_var *btf_type_var(const struct btf_type *t)
+{
+ return (const struct btf_var *)(t + 1);
+}
+
+static const struct btf_var_secinfo *btf_type_var_secinfo(const struct btf_type *t)
+{
+ return (const struct btf_var_secinfo *)(t + 1);
+}
+
static const struct btf_kind_operations *btf_type_ops(const struct btf_type *t)
{
return kind_ops[BTF_INFO_KIND(t->info)];
offset < btf->hdr.str_len;
}
-/* Only C-style identifier is permitted. This can be relaxed if
- * necessary.
- */
-static bool btf_name_valid_identifier(const struct btf *btf, u32 offset)
+static bool __btf_name_char_ok(char c, bool first, bool dot_ok)
+{
+ if ((first ? !isalpha(c) :
+ !isalnum(c)) &&
+ c != '_' &&
+ ((c == '.' && !dot_ok) ||
+ c != '.'))
+ return false;
+ return true;
+}
+
+static bool __btf_name_valid(const struct btf *btf, u32 offset, bool dot_ok)
{
/* offset must be valid */
const char *src = &btf->strings[offset];
const char *src_limit;
- if (!isalpha(*src) && *src != '_')
+ if (!__btf_name_char_ok(*src, true, dot_ok))
return false;
/* set a limit on identifier length */
src_limit = src + KSYM_NAME_LEN;
src++;
while (*src && src < src_limit) {
- if (!isalnum(*src) && *src != '_')
+ if (!__btf_name_char_ok(*src, false, dot_ok))
return false;
src++;
}
return !*src;
}
+/* Only C-style identifier is permitted. This can be relaxed if
+ * necessary.
+ */
+static bool btf_name_valid_identifier(const struct btf *btf, u32 offset)
+{
+ return __btf_name_valid(btf, offset, false);
+}
+
+static bool btf_name_valid_section(const struct btf *btf, u32 offset)
+{
+ return __btf_name_valid(btf, offset, true);
+}
+
static const char *__btf_name_by_offset(const struct btf *btf, u32 offset)
{
if (!offset)
__btf_verifier_log(log, "\n");
}
+__printf(4, 5)
+static void btf_verifier_log_vsi(struct btf_verifier_env *env,
+ const struct btf_type *datasec_type,
+ const struct btf_var_secinfo *vsi,
+ const char *fmt, ...)
+{
+ struct bpf_verifier_log *log = &env->log;
+ va_list args;
+
+ if (!bpf_verifier_log_needed(log))
+ return;
+ if (env->phase != CHECK_META)
+ btf_verifier_log_type(env, datasec_type, NULL);
+
+ __btf_verifier_log(log, "\t type_id=%u offset=%u size=%u",
+ vsi->type, vsi->offset, vsi->size);
+ if (fmt && *fmt) {
+ __btf_verifier_log(log, " ");
+ va_start(args, fmt);
+ bpf_verifier_vlog(log, fmt, args);
+ va_end(args);
+ }
+
+ __btf_verifier_log(log, "\n");
+}
+
static void btf_verifier_log_hdr(struct btf_verifier_env *env,
u32 btf_data_size)
{
} else if (btf_type_is_ptr(size_type)) {
size = sizeof(void *);
} else {
- if (WARN_ON_ONCE(!btf_type_is_modifier(size_type)))
+ if (WARN_ON_ONCE(!btf_type_is_modifier(size_type) &&
+ !btf_type_is_var(size_type)))
return NULL;
size = btf->resolved_sizes[size_type_id];
u32 next_type_size = 0;
next_type = btf_type_by_id(btf, next_type_id);
- if (!next_type) {
+ if (!next_type || btf_type_is_resolve_source_only(next_type)) {
btf_verifier_log_type(env, v->t, "Invalid type_id");
return -EINVAL;
}
return 0;
}
+static int btf_var_resolve(struct btf_verifier_env *env,
+ const struct resolve_vertex *v)
+{
+ const struct btf_type *next_type;
+ const struct btf_type *t = v->t;
+ u32 next_type_id = t->type;
+ struct btf *btf = env->btf;
+ u32 next_type_size;
+
+ next_type = btf_type_by_id(btf, next_type_id);
+ if (!next_type || btf_type_is_resolve_source_only(next_type)) {
+ btf_verifier_log_type(env, v->t, "Invalid type_id");
+ return -EINVAL;
+ }
+
+ if (!env_type_is_resolve_sink(env, next_type) &&
+ !env_type_is_resolved(env, next_type_id))
+ return env_stack_push(env, next_type, next_type_id);
+
+ if (btf_type_is_modifier(next_type)) {
+ const struct btf_type *resolved_type;
+ u32 resolved_type_id;
+
+ resolved_type_id = next_type_id;
+ resolved_type = btf_type_id_resolve(btf, &resolved_type_id);
+
+ if (btf_type_is_ptr(resolved_type) &&
+ !env_type_is_resolve_sink(env, resolved_type) &&
+ !env_type_is_resolved(env, resolved_type_id))
+ return env_stack_push(env, resolved_type,
+ resolved_type_id);
+ }
+
+ /* We must resolve to something concrete at this point, no
+ * forward types or similar that would resolve to size of
+ * zero is allowed.
+ */
+ if (!btf_type_id_size(btf, &next_type_id, &next_type_size)) {
+ btf_verifier_log_type(env, v->t, "Invalid type_id");
+ return -EINVAL;
+ }
+
+ env_stack_pop_resolved(env, next_type_id, next_type_size);
+
+ return 0;
+}
+
static int btf_ptr_resolve(struct btf_verifier_env *env,
const struct resolve_vertex *v)
{
struct btf *btf = env->btf;
next_type = btf_type_by_id(btf, next_type_id);
- if (!next_type) {
+ if (!next_type || btf_type_is_resolve_source_only(next_type)) {
btf_verifier_log_type(env, v->t, "Invalid type_id");
return -EINVAL;
}
btf_type_ops(t)->seq_show(btf, t, type_id, data, bits_offset, m);
}
+static void btf_var_seq_show(const struct btf *btf, const struct btf_type *t,
+ u32 type_id, void *data, u8 bits_offset,
+ struct seq_file *m)
+{
+ t = btf_type_id_resolve(btf, &type_id);
+
+ btf_type_ops(t)->seq_show(btf, t, type_id, data, bits_offset, m);
+}
+
static void btf_ptr_seq_show(const struct btf *btf, const struct btf_type *t,
u32 type_id, void *data, u8 bits_offset,
struct seq_file *m)
/* Check array->index_type */
index_type_id = array->index_type;
index_type = btf_type_by_id(btf, index_type_id);
- if (btf_type_nosize_or_null(index_type)) {
+ if (btf_type_is_resolve_source_only(index_type) ||
+ btf_type_nosize_or_null(index_type)) {
btf_verifier_log_type(env, v->t, "Invalid index");
return -EINVAL;
}
/* Check array->type */
elem_type_id = array->type;
elem_type = btf_type_by_id(btf, elem_type_id);
- if (btf_type_nosize_or_null(elem_type)) {
+ if (btf_type_is_resolve_source_only(elem_type) ||
+ btf_type_nosize_or_null(elem_type)) {
btf_verifier_log_type(env, v->t,
"Invalid elem");
return -EINVAL;
const struct btf_type *member_type = btf_type_by_id(env->btf,
member_type_id);
- if (btf_type_nosize_or_null(member_type)) {
+ if (btf_type_is_resolve_source_only(member_type) ||
+ btf_type_nosize_or_null(member_type)) {
btf_verifier_log_member(env, v->t, member,
"Invalid member");
return -EINVAL;
.seq_show = btf_df_seq_show,
};
+static s32 btf_var_check_meta(struct btf_verifier_env *env,
+ const struct btf_type *t,
+ u32 meta_left)
+{
+ const struct btf_var *var;
+ u32 meta_needed = sizeof(*var);
+
+ if (meta_left < meta_needed) {
+ btf_verifier_log_basic(env, t,
+ "meta_left:%u meta_needed:%u",
+ meta_left, meta_needed);
+ return -EINVAL;
+ }
+
+ if (btf_type_vlen(t)) {
+ btf_verifier_log_type(env, t, "vlen != 0");
+ return -EINVAL;
+ }
+
+ if (btf_type_kflag(t)) {
+ btf_verifier_log_type(env, t, "Invalid btf_info kind_flag");
+ return -EINVAL;
+ }
+
+ if (!t->name_off ||
+ !__btf_name_valid(env->btf, t->name_off, true)) {
+ btf_verifier_log_type(env, t, "Invalid name");
+ return -EINVAL;
+ }
+
+ /* A var cannot be in type void */
+ if (!t->type || !BTF_TYPE_ID_VALID(t->type)) {
+ btf_verifier_log_type(env, t, "Invalid type_id");
+ return -EINVAL;
+ }
+
+ var = btf_type_var(t);
+ if (var->linkage != BTF_VAR_STATIC &&
+ var->linkage != BTF_VAR_GLOBAL_ALLOCATED) {
+ btf_verifier_log_type(env, t, "Linkage not supported");
+ return -EINVAL;
+ }
+
+ btf_verifier_log_type(env, t, NULL);
+
+ return meta_needed;
+}
+
+static void btf_var_log(struct btf_verifier_env *env, const struct btf_type *t)
+{
+ const struct btf_var *var = btf_type_var(t);
+
+ btf_verifier_log(env, "type_id=%u linkage=%u", t->type, var->linkage);
+}
+
+static const struct btf_kind_operations var_ops = {
+ .check_meta = btf_var_check_meta,
+ .resolve = btf_var_resolve,
+ .check_member = btf_df_check_member,
+ .check_kflag_member = btf_df_check_kflag_member,
+ .log_details = btf_var_log,
+ .seq_show = btf_var_seq_show,
+};
+
+static s32 btf_datasec_check_meta(struct btf_verifier_env *env,
+ const struct btf_type *t,
+ u32 meta_left)
+{
+ const struct btf_var_secinfo *vsi;
+ u64 last_vsi_end_off = 0, sum = 0;
+ u32 i, meta_needed;
+
+ meta_needed = btf_type_vlen(t) * sizeof(*vsi);
+ if (meta_left < meta_needed) {
+ btf_verifier_log_basic(env, t,
+ "meta_left:%u meta_needed:%u",
+ meta_left, meta_needed);
+ return -EINVAL;
+ }
+
+ if (!btf_type_vlen(t)) {
+ btf_verifier_log_type(env, t, "vlen == 0");
+ return -EINVAL;
+ }
+
+ if (!t->size) {
+ btf_verifier_log_type(env, t, "size == 0");
+ return -EINVAL;
+ }
+
+ if (btf_type_kflag(t)) {
+ btf_verifier_log_type(env, t, "Invalid btf_info kind_flag");
+ return -EINVAL;
+ }
+
+ if (!t->name_off ||
+ !btf_name_valid_section(env->btf, t->name_off)) {
+ btf_verifier_log_type(env, t, "Invalid name");
+ return -EINVAL;
+ }
+
+ btf_verifier_log_type(env, t, NULL);
+
+ for_each_vsi(i, t, vsi) {
+ /* A var cannot be in type void */
+ if (!vsi->type || !BTF_TYPE_ID_VALID(vsi->type)) {
+ btf_verifier_log_vsi(env, t, vsi,
+ "Invalid type_id");
+ return -EINVAL;
+ }
+
+ if (vsi->offset < last_vsi_end_off || vsi->offset >= t->size) {
+ btf_verifier_log_vsi(env, t, vsi,
+ "Invalid offset");
+ return -EINVAL;
+ }
+
+ if (!vsi->size || vsi->size > t->size) {
+ btf_verifier_log_vsi(env, t, vsi,
+ "Invalid size");
+ return -EINVAL;
+ }
+
+ last_vsi_end_off = vsi->offset + vsi->size;
+ if (last_vsi_end_off > t->size) {
+ btf_verifier_log_vsi(env, t, vsi,
+ "Invalid offset+size");
+ return -EINVAL;
+ }
+
+ btf_verifier_log_vsi(env, t, vsi, NULL);
+ sum += vsi->size;
+ }
+
+ if (t->size < sum) {
+ btf_verifier_log_type(env, t, "Invalid btf_info size");
+ return -EINVAL;
+ }
+
+ return meta_needed;
+}
+
+static int btf_datasec_resolve(struct btf_verifier_env *env,
+ const struct resolve_vertex *v)
+{
+ const struct btf_var_secinfo *vsi;
+ struct btf *btf = env->btf;
+ u16 i;
+
+ for_each_vsi_from(i, v->next_member, v->t, vsi) {
+ u32 var_type_id = vsi->type, type_id, type_size = 0;
+ const struct btf_type *var_type = btf_type_by_id(env->btf,
+ var_type_id);
+ if (!var_type || !btf_type_is_var(var_type)) {
+ btf_verifier_log_vsi(env, v->t, vsi,
+ "Not a VAR kind member");
+ return -EINVAL;
+ }
+
+ if (!env_type_is_resolve_sink(env, var_type) &&
+ !env_type_is_resolved(env, var_type_id)) {
+ env_stack_set_next_member(env, i + 1);
+ return env_stack_push(env, var_type, var_type_id);
+ }
+
+ type_id = var_type->type;
+ if (!btf_type_id_size(btf, &type_id, &type_size)) {
+ btf_verifier_log_vsi(env, v->t, vsi, "Invalid type");
+ return -EINVAL;
+ }
+
+ if (vsi->size < type_size) {
+ btf_verifier_log_vsi(env, v->t, vsi, "Invalid size");
+ return -EINVAL;
+ }
+ }
+
+ env_stack_pop_resolved(env, 0, 0);
+ return 0;
+}
+
+static void btf_datasec_log(struct btf_verifier_env *env,
+ const struct btf_type *t)
+{
+ btf_verifier_log(env, "size=%u vlen=%u", t->size, btf_type_vlen(t));
+}
+
+static void btf_datasec_seq_show(const struct btf *btf,
+ const struct btf_type *t, u32 type_id,
+ void *data, u8 bits_offset,
+ struct seq_file *m)
+{
+ const struct btf_var_secinfo *vsi;
+ const struct btf_type *var;
+ u32 i;
+
+ seq_printf(m, "section (\"%s\") = {", __btf_name_by_offset(btf, t->name_off));
+ for_each_vsi(i, t, vsi) {
+ var = btf_type_by_id(btf, vsi->type);
+ if (i)
+ seq_puts(m, ",");
+ btf_type_ops(var)->seq_show(btf, var, vsi->type,
+ data + vsi->offset, bits_offset, m);
+ }
+ seq_puts(m, "}");
+}
+
+static const struct btf_kind_operations datasec_ops = {
+ .check_meta = btf_datasec_check_meta,
+ .resolve = btf_datasec_resolve,
+ .check_member = btf_df_check_member,
+ .check_kflag_member = btf_df_check_kflag_member,
+ .log_details = btf_datasec_log,
+ .seq_show = btf_datasec_seq_show,
+};
+
static int btf_func_proto_check(struct btf_verifier_env *env,
const struct btf_type *t)
{
[BTF_KIND_RESTRICT] = &modifier_ops,
[BTF_KIND_FUNC] = &func_ops,
[BTF_KIND_FUNC_PROTO] = &func_proto_ops,
+ [BTF_KIND_VAR] = &var_ops,
+ [BTF_KIND_DATASEC] = &datasec_ops,
};
static s32 btf_check_meta(struct btf_verifier_env *env,
if (!env_type_is_resolved(env, type_id))
return false;
- if (btf_type_is_struct(t))
+ if (btf_type_is_struct(t) || btf_type_is_datasec(t))
return !btf->resolved_ids[type_id] &&
- !btf->resolved_sizes[type_id];
+ !btf->resolved_sizes[type_id];
- if (btf_type_is_modifier(t) || btf_type_is_ptr(t)) {
+ if (btf_type_is_modifier(t) || btf_type_is_ptr(t) ||
+ btf_type_is_var(t)) {
t = btf_type_id_resolve(btf, &type_id);
- return t && !btf_type_is_modifier(t);
+ return t &&
+ !btf_type_is_modifier(t) &&
+ !btf_type_is_var(t) &&
+ !btf_type_is_datasec(t);
}
if (btf_type_is_array(t)) {
#include <linux/kernel.h>
#include <linux/atomic.h>
#include <linux/cgroup.h>
+#include <linux/filter.h>
#include <linux/slab.h>
+#include <linux/sysctl.h>
+#include <linux/string.h>
#include <linux/bpf.h>
#include <linux/bpf-cgroup.h>
#include <net/sock.h>
EXPORT_SYMBOL(__cgroup_bpf_check_dev_permission);
static const struct bpf_func_proto *
-cgroup_dev_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+cgroup_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
switch (func_id) {
case BPF_FUNC_map_lookup_elem:
return &bpf_map_update_elem_proto;
case BPF_FUNC_map_delete_elem:
return &bpf_map_delete_elem_proto;
+ case BPF_FUNC_map_push_elem:
+ return &bpf_map_push_elem_proto;
+ case BPF_FUNC_map_pop_elem:
+ return &bpf_map_pop_elem_proto;
+ case BPF_FUNC_map_peek_elem:
+ return &bpf_map_peek_elem_proto;
case BPF_FUNC_get_current_uid_gid:
return &bpf_get_current_uid_gid_proto;
case BPF_FUNC_get_local_storage:
}
}
+static const struct bpf_func_proto *
+cgroup_dev_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+{
+ return cgroup_base_func_proto(func_id, prog);
+}
+
static bool cgroup_dev_is_valid_access(int off, int size,
enum bpf_access_type type,
const struct bpf_prog *prog,
.get_func_proto = cgroup_dev_func_proto,
.is_valid_access = cgroup_dev_is_valid_access,
};
+
+/**
+ * __cgroup_bpf_run_filter_sysctl - Run a program on sysctl
+ *
+ * @head: sysctl table header
+ * @table: sysctl table
+ * @write: sysctl is being read (= 0) or written (= 1)
+ * @buf: pointer to buffer passed by user space
+ * @pcount: value-result argument: value is size of buffer pointed to by @buf,
+ * result is size of @new_buf if program set new value, initial value
+ * otherwise
+ * @ppos: value-result argument: value is position at which read from or write
+ * to sysctl is happening, result is new position if program overrode it,
+ * initial value otherwise
+ * @new_buf: pointer to pointer to new buffer that will be allocated if program
+ * overrides new value provided by user space on sysctl write
+ * NOTE: it's caller responsibility to free *new_buf if it was set
+ * @type: type of program to be executed
+ *
+ * Program is run when sysctl is being accessed, either read or written, and
+ * can allow or deny such access.
+ *
+ * This function will return %-EPERM if an attached program is found and
+ * returned value != 1 during execution. In all other cases 0 is returned.
+ */
+int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
+ struct ctl_table *table, int write,
+ void __user *buf, size_t *pcount,
+ loff_t *ppos, void **new_buf,
+ enum bpf_attach_type type)
+{
+ struct bpf_sysctl_kern ctx = {
+ .head = head,
+ .table = table,
+ .write = write,
+ .ppos = ppos,
+ .cur_val = NULL,
+ .cur_len = PAGE_SIZE,
+ .new_val = NULL,
+ .new_len = 0,
+ .new_updated = 0,
+ };
+ struct cgroup *cgrp;
+ int ret;
+
+ ctx.cur_val = kmalloc_track_caller(ctx.cur_len, GFP_KERNEL);
+ if (ctx.cur_val) {
+ mm_segment_t old_fs;
+ loff_t pos = 0;
+
+ old_fs = get_fs();
+ set_fs(KERNEL_DS);
+ if (table->proc_handler(table, 0, (void __user *)ctx.cur_val,
+ &ctx.cur_len, &pos)) {
+ /* Let BPF program decide how to proceed. */
+ ctx.cur_len = 0;
+ }
+ set_fs(old_fs);
+ } else {
+ /* Let BPF program decide how to proceed. */
+ ctx.cur_len = 0;
+ }
+
+ if (write && buf && *pcount) {
+ /* BPF program should be able to override new value with a
+ * buffer bigger than provided by user.
+ */
+ ctx.new_val = kmalloc_track_caller(PAGE_SIZE, GFP_KERNEL);
+ ctx.new_len = min_t(size_t, PAGE_SIZE, *pcount);
+ if (!ctx.new_val ||
+ copy_from_user(ctx.new_val, buf, ctx.new_len))
+ /* Let BPF program decide how to proceed. */
+ ctx.new_len = 0;
+ }
+
+ rcu_read_lock();
+ cgrp = task_dfl_cgroup(current);
+ ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], &ctx, BPF_PROG_RUN);
+ rcu_read_unlock();
+
+ kfree(ctx.cur_val);
+
+ if (ret == 1 && ctx.new_updated) {
+ *new_buf = ctx.new_val;
+ *pcount = ctx.new_len;
+ } else {
+ kfree(ctx.new_val);
+ }
+
+ return ret == 1 ? 0 : -EPERM;
+}
+EXPORT_SYMBOL(__cgroup_bpf_run_filter_sysctl);
+
+static ssize_t sysctl_cpy_dir(const struct ctl_dir *dir, char **bufp,
+ size_t *lenp)
+{
+ ssize_t tmp_ret = 0, ret;
+
+ if (dir->header.parent) {
+ tmp_ret = sysctl_cpy_dir(dir->header.parent, bufp, lenp);
+ if (tmp_ret < 0)
+ return tmp_ret;
+ }
+
+ ret = strscpy(*bufp, dir->header.ctl_table[0].procname, *lenp);
+ if (ret < 0)
+ return ret;
+ *bufp += ret;
+ *lenp -= ret;
+ ret += tmp_ret;
+
+ /* Avoid leading slash. */
+ if (!ret)
+ return ret;
+
+ tmp_ret = strscpy(*bufp, "/", *lenp);
+ if (tmp_ret < 0)
+ return tmp_ret;
+ *bufp += tmp_ret;
+ *lenp -= tmp_ret;
+
+ return ret + tmp_ret;
+}
+
+BPF_CALL_4(bpf_sysctl_get_name, struct bpf_sysctl_kern *, ctx, char *, buf,
+ size_t, buf_len, u64, flags)
+{
+ ssize_t tmp_ret = 0, ret;
+
+ if (!buf)
+ return -EINVAL;
+
+ if (!(flags & BPF_F_SYSCTL_BASE_NAME)) {
+ if (!ctx->head)
+ return -EINVAL;
+ tmp_ret = sysctl_cpy_dir(ctx->head->parent, &buf, &buf_len);
+ if (tmp_ret < 0)
+ return tmp_ret;
+ }
+
+ ret = strscpy(buf, ctx->table->procname, buf_len);
+
+ return ret < 0 ? ret : tmp_ret + ret;
+}
+
+static const struct bpf_func_proto bpf_sysctl_get_name_proto = {
+ .func = bpf_sysctl_get_name,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_PTR_TO_MEM,
+ .arg3_type = ARG_CONST_SIZE,
+ .arg4_type = ARG_ANYTHING,
+};
+
+static int copy_sysctl_value(char *dst, size_t dst_len, char *src,
+ size_t src_len)
+{
+ if (!dst)
+ return -EINVAL;
+
+ if (!dst_len)
+ return -E2BIG;
+
+ if (!src || !src_len) {
+ memset(dst, 0, dst_len);
+ return -EINVAL;
+ }
+
+ memcpy(dst, src, min(dst_len, src_len));
+
+ if (dst_len > src_len) {
+ memset(dst + src_len, '\0', dst_len - src_len);
+ return src_len;
+ }
+
+ dst[dst_len - 1] = '\0';
+
+ return -E2BIG;
+}
+
+BPF_CALL_3(bpf_sysctl_get_current_value, struct bpf_sysctl_kern *, ctx,
+ char *, buf, size_t, buf_len)
+{
+ return copy_sysctl_value(buf, buf_len, ctx->cur_val, ctx->cur_len);
+}
+
+static const struct bpf_func_proto bpf_sysctl_get_current_value_proto = {
+ .func = bpf_sysctl_get_current_value,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_PTR_TO_UNINIT_MEM,
+ .arg3_type = ARG_CONST_SIZE,
+};
+
+BPF_CALL_3(bpf_sysctl_get_new_value, struct bpf_sysctl_kern *, ctx, char *, buf,
+ size_t, buf_len)
+{
+ if (!ctx->write) {
+ if (buf && buf_len)
+ memset(buf, '\0', buf_len);
+ return -EINVAL;
+ }
+ return copy_sysctl_value(buf, buf_len, ctx->new_val, ctx->new_len);
+}
+
+static const struct bpf_func_proto bpf_sysctl_get_new_value_proto = {
+ .func = bpf_sysctl_get_new_value,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_PTR_TO_UNINIT_MEM,
+ .arg3_type = ARG_CONST_SIZE,
+};
+
+BPF_CALL_3(bpf_sysctl_set_new_value, struct bpf_sysctl_kern *, ctx,
+ const char *, buf, size_t, buf_len)
+{
+ if (!ctx->write || !ctx->new_val || !ctx->new_len || !buf || !buf_len)
+ return -EINVAL;
+
+ if (buf_len > PAGE_SIZE - 1)
+ return -E2BIG;
+
+ memcpy(ctx->new_val, buf, buf_len);
+ ctx->new_len = buf_len;
+ ctx->new_updated = 1;
+
+ return 0;
+}
+
+static const struct bpf_func_proto bpf_sysctl_set_new_value_proto = {
+ .func = bpf_sysctl_set_new_value,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_PTR_TO_MEM,
+ .arg3_type = ARG_CONST_SIZE,
+};
+
+static const struct bpf_func_proto *
+sysctl_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+{
+ switch (func_id) {
+ case BPF_FUNC_strtol:
+ return &bpf_strtol_proto;
+ case BPF_FUNC_strtoul:
+ return &bpf_strtoul_proto;
+ case BPF_FUNC_sysctl_get_name:
+ return &bpf_sysctl_get_name_proto;
+ case BPF_FUNC_sysctl_get_current_value:
+ return &bpf_sysctl_get_current_value_proto;
+ case BPF_FUNC_sysctl_get_new_value:
+ return &bpf_sysctl_get_new_value_proto;
+ case BPF_FUNC_sysctl_set_new_value:
+ return &bpf_sysctl_set_new_value_proto;
+ default:
+ return cgroup_base_func_proto(func_id, prog);
+ }
+}
+
+static bool sysctl_is_valid_access(int off, int size, enum bpf_access_type type,
+ const struct bpf_prog *prog,
+ struct bpf_insn_access_aux *info)
+{
+ const int size_default = sizeof(__u32);
+
+ if (off < 0 || off + size > sizeof(struct bpf_sysctl) || off % size)
+ return false;
+
+ switch (off) {
+ case offsetof(struct bpf_sysctl, write):
+ if (type != BPF_READ)
+ return false;
+ bpf_ctx_record_field_size(info, size_default);
+ return bpf_ctx_narrow_access_ok(off, size, size_default);
+ case offsetof(struct bpf_sysctl, file_pos):
+ if (type == BPF_READ) {
+ bpf_ctx_record_field_size(info, size_default);
+ return bpf_ctx_narrow_access_ok(off, size, size_default);
+ } else {
+ return size == size_default;
+ }
+ default:
+ return false;
+ }
+}
+
+static u32 sysctl_convert_ctx_access(enum bpf_access_type type,
+ const struct bpf_insn *si,
+ struct bpf_insn *insn_buf,
+ struct bpf_prog *prog, u32 *target_size)
+{
+ struct bpf_insn *insn = insn_buf;
+
+ switch (si->off) {
+ case offsetof(struct bpf_sysctl, write):
+ *insn++ = BPF_LDX_MEM(
+ BPF_SIZE(si->code), si->dst_reg, si->src_reg,
+ bpf_target_off(struct bpf_sysctl_kern, write,
+ FIELD_SIZEOF(struct bpf_sysctl_kern,
+ write),
+ target_size));
+ break;
+ case offsetof(struct bpf_sysctl, file_pos):
+ /* ppos is a pointer so it should be accessed via indirect
+ * loads and stores. Also for stores additional temporary
+ * register is used since neither src_reg nor dst_reg can be
+ * overridden.
+ */
+ if (type == BPF_WRITE) {
+ int treg = BPF_REG_9;
+
+ if (si->src_reg == treg || si->dst_reg == treg)
+ --treg;
+ if (si->src_reg == treg || si->dst_reg == treg)
+ --treg;
+ *insn++ = BPF_STX_MEM(
+ BPF_DW, si->dst_reg, treg,
+ offsetof(struct bpf_sysctl_kern, tmp_reg));
+ *insn++ = BPF_LDX_MEM(
+ BPF_FIELD_SIZEOF(struct bpf_sysctl_kern, ppos),
+ treg, si->dst_reg,
+ offsetof(struct bpf_sysctl_kern, ppos));
+ *insn++ = BPF_STX_MEM(
+ BPF_SIZEOF(u32), treg, si->src_reg, 0);
+ *insn++ = BPF_LDX_MEM(
+ BPF_DW, treg, si->dst_reg,
+ offsetof(struct bpf_sysctl_kern, tmp_reg));
+ } else {
+ *insn++ = BPF_LDX_MEM(
+ BPF_FIELD_SIZEOF(struct bpf_sysctl_kern, ppos),
+ si->dst_reg, si->src_reg,
+ offsetof(struct bpf_sysctl_kern, ppos));
+ *insn++ = BPF_LDX_MEM(
+ BPF_SIZE(si->code), si->dst_reg, si->dst_reg, 0);
+ }
+ *target_size = sizeof(u32);
+ break;
+ }
+
+ return insn - insn_buf;
+}
+
+const struct bpf_verifier_ops cg_sysctl_verifier_ops = {
+ .get_func_proto = sysctl_func_proto,
+ .is_valid_access = sysctl_is_valid_access,
+ .convert_ctx_access = sysctl_convert_ctx_access,
+};
+
+const struct bpf_prog_ops cg_sysctl_prog_ops = {
+};
dst[i] = fp->insnsi[i];
if (!was_ld_map &&
dst[i].code == (BPF_LD | BPF_IMM | BPF_DW) &&
- dst[i].src_reg == BPF_PSEUDO_MAP_FD) {
+ (dst[i].src_reg == BPF_PSEUDO_MAP_FD ||
+ dst[i].src_reg == BPF_PSEUDO_MAP_VALUE)) {
was_ld_map = true;
dst[i].imm = 0;
} else if (was_ld_map &&
u32 insn_adj_cnt, insn_rest, insn_delta = len - 1;
const u32 cnt_max = S16_MAX;
struct bpf_prog *prog_adj;
+ int err;
/* Since our patchlet doesn't expand the image, we're done. */
if (insn_delta == 0) {
* we afterwards may not fail anymore.
*/
if (insn_adj_cnt > cnt_max &&
- bpf_adj_branches(prog, off, off + 1, off + len, true))
- return NULL;
+ (err = bpf_adj_branches(prog, off, off + 1, off + len, true)))
+ return ERR_PTR(err);
/* Several new instructions need to be inserted. Make room
* for them. Likely, there's no need for a new allocation as
prog_adj = bpf_prog_realloc(prog, bpf_prog_size(insn_adj_cnt),
GFP_USER);
if (!prog_adj)
- return NULL;
+ return ERR_PTR(-ENOMEM);
prog_adj->len = insn_adj_cnt;
continue;
tmp = bpf_patch_insn_single(clone, i, insn_buff, rewritten);
- if (!tmp) {
+ if (IS_ERR(tmp)) {
/* Patching may have repointed aux->prog during
* realloc from the original one, so we need to
* fix it up here on error.
*/
bpf_jit_prog_release_other(prog, clone);
- return ERR_PTR(-ENOMEM);
+ return tmp;
}
clone = tmp;
}
static struct sk_buff *cpu_map_build_skb(struct bpf_cpu_map_entry *rcpu,
- struct xdp_frame *xdpf)
+ struct xdp_frame *xdpf,
+ struct sk_buff *skb)
{
unsigned int hard_start_headroom;
unsigned int frame_size;
void *pkt_data_start;
- struct sk_buff *skb;
/* Part of headroom was reserved to xdpf */
hard_start_headroom = sizeof(struct xdp_frame) + xdpf->headroom;
SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
pkt_data_start = xdpf->data - hard_start_headroom;
- skb = build_skb(pkt_data_start, frame_size);
- if (!skb)
+ skb = build_skb_around(skb, pkt_data_start, frame_size);
+ if (unlikely(!skb))
return NULL;
skb_reserve(skb, hard_start_headroom);
}
}
+#define CPUMAP_BATCH 8
+
static int cpu_map_kthread_run(void *data)
{
struct bpf_cpu_map_entry *rcpu = data;
* kthread_stop signal until queue is empty.
*/
while (!kthread_should_stop() || !__ptr_ring_empty(rcpu->queue)) {
- unsigned int processed = 0, drops = 0, sched = 0;
- struct xdp_frame *xdpf;
+ unsigned int drops = 0, sched = 0;
+ void *frames[CPUMAP_BATCH];
+ void *skbs[CPUMAP_BATCH];
+ gfp_t gfp = __GFP_ZERO | GFP_ATOMIC;
+ int i, n, m;
/* Release CPU reschedule checks */
if (__ptr_ring_empty(rcpu->queue)) {
sched = cond_resched();
}
- /* Process packets in rcpu->queue */
- local_bh_disable();
/*
* The bpf_cpu_map_entry is single consumer, with this
* kthread CPU pinned. Lockless access to ptr_ring
* consume side valid as no-resize allowed of queue.
*/
- while ((xdpf = __ptr_ring_consume(rcpu->queue))) {
- struct sk_buff *skb;
+ n = ptr_ring_consume_batched(rcpu->queue, frames, CPUMAP_BATCH);
+
+ for (i = 0; i < n; i++) {
+ void *f = frames[i];
+ struct page *page = virt_to_page(f);
+
+ /* Bring struct page memory area to curr CPU. Read by
+ * build_skb_around via page_is_pfmemalloc(), and when
+ * freed written by page_frag_free call.
+ */
+ prefetchw(page);
+ }
+
+ m = kmem_cache_alloc_bulk(skbuff_head_cache, gfp, n, skbs);
+ if (unlikely(m == 0)) {
+ for (i = 0; i < n; i++)
+ skbs[i] = NULL; /* effect: xdp_return_frame */
+ drops = n;
+ }
+
+ local_bh_disable();
+ for (i = 0; i < n; i++) {
+ struct xdp_frame *xdpf = frames[i];
+ struct sk_buff *skb = skbs[i];
int ret;
- skb = cpu_map_build_skb(rcpu, xdpf);
+ skb = cpu_map_build_skb(rcpu, xdpf, skb);
if (!skb) {
xdp_return_frame(xdpf);
continue;
ret = netif_receive_skb_core(skb);
if (ret == NET_RX_DROP)
drops++;
-
- /* Limit BH-disable period */
- if (++processed == 8)
- break;
}
/* Feedback loop via tracepoint */
- trace_xdp_cpumap_kthread(rcpu->map_id, processed, drops, sched);
+ trace_xdp_cpumap_kthread(rcpu->map_id, n, drops, sched);
local_bh_enable(); /* resched point, may call do_softirq() */
}
* part of the ldimm64 insn is accessible.
*/
u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm;
- bool map_ptr = insn->src_reg == BPF_PSEUDO_MAP_FD;
+ bool is_ptr = insn->src_reg == BPF_PSEUDO_MAP_FD ||
+ insn->src_reg == BPF_PSEUDO_MAP_VALUE;
char tmp[64];
- if (map_ptr && !allow_ptr_leaks)
+ if (is_ptr && !allow_ptr_leaks)
imm = 0;
verbose(cbs->private_data, "(%02x) r%d = %s\n",
#define HTAB_CREATE_FLAG_MASK \
(BPF_F_NO_PREALLOC | BPF_F_NO_COMMON_LRU | BPF_F_NUMA_NODE | \
- BPF_F_RDONLY | BPF_F_WRONLY | BPF_F_ZERO_SEED)
+ BPF_F_ACCESS_MASK | BPF_F_ZERO_SEED)
struct bucket {
struct hlist_nulls_head head;
/* Guard against local DoS, and discourage production use. */
return -EPERM;
- if (attr->map_flags & ~HTAB_CREATE_FLAG_MASK)
- /* reserved bits should not be used */
+ if (attr->map_flags & ~HTAB_CREATE_FLAG_MASK ||
+ !bpf_map_flags_access_ok(attr->map_flags))
return -EINVAL;
if (!lru && percpu_lru)
#include <linux/sched.h>
#include <linux/uidgid.h>
#include <linux/filter.h>
+#include <linux/ctype.h>
+
+#include "../../lib/kstrtox.h"
/* If kernel subsystem is allowing eBPF programs to call this function,
* inside its own verifier_ops->get_func_proto() callback it should return
.arg2_type = ARG_ANYTHING,
};
#endif
+
+#define BPF_STRTOX_BASE_MASK 0x1F
+
+static int __bpf_strtoull(const char *buf, size_t buf_len, u64 flags,
+ unsigned long long *res, bool *is_negative)
+{
+ unsigned int base = flags & BPF_STRTOX_BASE_MASK;
+ const char *cur_buf = buf;
+ size_t cur_len = buf_len;
+ unsigned int consumed;
+ size_t val_len;
+ char str[64];
+
+ if (!buf || !buf_len || !res || !is_negative)
+ return -EINVAL;
+
+ if (base != 0 && base != 8 && base != 10 && base != 16)
+ return -EINVAL;
+
+ if (flags & ~BPF_STRTOX_BASE_MASK)
+ return -EINVAL;
+
+ while (cur_buf < buf + buf_len && isspace(*cur_buf))
+ ++cur_buf;
+
+ *is_negative = (cur_buf < buf + buf_len && *cur_buf == '-');
+ if (*is_negative)
+ ++cur_buf;
+
+ consumed = cur_buf - buf;
+ cur_len -= consumed;
+ if (!cur_len)
+ return -EINVAL;
+
+ cur_len = min(cur_len, sizeof(str) - 1);
+ memcpy(str, cur_buf, cur_len);
+ str[cur_len] = '\0';
+ cur_buf = str;
+
+ cur_buf = _parse_integer_fixup_radix(cur_buf, &base);
+ val_len = _parse_integer(cur_buf, base, res);
+
+ if (val_len & KSTRTOX_OVERFLOW)
+ return -ERANGE;
+
+ if (val_len == 0)
+ return -EINVAL;
+
+ cur_buf += val_len;
+ consumed += cur_buf - str;
+
+ return consumed;
+}
+
+static int __bpf_strtoll(const char *buf, size_t buf_len, u64 flags,
+ long long *res)
+{
+ unsigned long long _res;
+ bool is_negative;
+ int err;
+
+ err = __bpf_strtoull(buf, buf_len, flags, &_res, &is_negative);
+ if (err < 0)
+ return err;
+ if (is_negative) {
+ if ((long long)-_res > 0)
+ return -ERANGE;
+ *res = -_res;
+ } else {
+ if ((long long)_res < 0)
+ return -ERANGE;
+ *res = _res;
+ }
+ return err;
+}
+
+BPF_CALL_4(bpf_strtol, const char *, buf, size_t, buf_len, u64, flags,
+ long *, res)
+{
+ long long _res;
+ int err;
+
+ err = __bpf_strtoll(buf, buf_len, flags, &_res);
+ if (err < 0)
+ return err;
+ if (_res != (long)_res)
+ return -ERANGE;
+ *res = _res;
+ return err;
+}
+
+const struct bpf_func_proto bpf_strtol_proto = {
+ .func = bpf_strtol,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_MEM,
+ .arg2_type = ARG_CONST_SIZE,
+ .arg3_type = ARG_ANYTHING,
+ .arg4_type = ARG_PTR_TO_LONG,
+};
+
+BPF_CALL_4(bpf_strtoul, const char *, buf, size_t, buf_len, u64, flags,
+ unsigned long *, res)
+{
+ unsigned long long _res;
+ bool is_negative;
+ int err;
+
+ err = __bpf_strtoull(buf, buf_len, flags, &_res, &is_negative);
+ if (err < 0)
+ return err;
+ if (is_negative)
+ return -EINVAL;
+ if (_res != (unsigned long)_res)
+ return -ERANGE;
+ *res = _res;
+ return err;
+}
+
+const struct bpf_func_proto bpf_strtoul_proto = {
+ .func = bpf_strtoul,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_MEM,
+ .arg2_type = ARG_CONST_SIZE,
+ .arg3_type = ARG_ANYTHING,
+ .arg4_type = ARG_PTR_TO_LONG,
+};
#endif
#ifdef CONFIG_CGROUP_BPF
#define LOCAL_STORAGE_CREATE_FLAG_MASK \
- (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
+ (BPF_F_NUMA_NODE | BPF_F_ACCESS_MASK)
struct bpf_cgroup_storage_map {
struct bpf_map map;
if (attr->value_size > PAGE_SIZE)
return ERR_PTR(-E2BIG);
- if (attr->map_flags & ~LOCAL_STORAGE_CREATE_FLAG_MASK)
- /* reserved bits should not be used */
+ if (attr->map_flags & ~LOCAL_STORAGE_CREATE_FLAG_MASK ||
+ !bpf_map_flags_access_ok(attr->map_flags))
return ERR_PTR(-EINVAL);
if (attr->max_entries)
#define LPM_KEY_SIZE_MIN LPM_KEY_SIZE(LPM_DATA_SIZE_MIN)
#define LPM_CREATE_FLAG_MASK (BPF_F_NO_PREALLOC | BPF_F_NUMA_NODE | \
- BPF_F_RDONLY | BPF_F_WRONLY)
+ BPF_F_ACCESS_MASK)
static struct bpf_map *trie_alloc(union bpf_attr *attr)
{
if (attr->max_entries == 0 ||
!(attr->map_flags & BPF_F_NO_PREALLOC) ||
attr->map_flags & ~LPM_CREATE_FLAG_MASK ||
+ !bpf_map_flags_access_ok(attr->map_flags) ||
attr->key_size < LPM_KEY_SIZE_MIN ||
attr->key_size > LPM_KEY_SIZE_MAX ||
attr->value_size < LPM_VAL_SIZE_MIN ||
#include "percpu_freelist.h"
#define QUEUE_STACK_CREATE_FLAG_MASK \
- (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
-
+ (BPF_F_NUMA_NODE | BPF_F_ACCESS_MASK)
struct bpf_queue_stack {
struct bpf_map map;
/* check sanity of attributes */
if (attr->max_entries == 0 || attr->key_size != 0 ||
attr->value_size == 0 ||
- attr->map_flags & ~QUEUE_STACK_CREATE_FLAG_MASK)
+ attr->map_flags & ~QUEUE_STACK_CREATE_FLAG_MASK ||
+ !bpf_map_flags_access_ok(attr->map_flags))
return -EINVAL;
if (attr->value_size > KMALLOC_MAX_SIZE)
kvfree(area);
}
+static u32 bpf_map_flags_retain_permanent(u32 flags)
+{
+ /* Some map creation flags are not tied to the map object but
+ * rather to the map fd instead, so they have no meaning upon
+ * map object inspection since multiple file descriptors with
+ * different (access) properties can exist here. Thus, given
+ * this has zero meaning for the map itself, lets clear these
+ * from here.
+ */
+ return flags & ~(BPF_F_RDONLY | BPF_F_WRONLY);
+}
+
void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr)
{
map->map_type = attr->map_type;
map->key_size = attr->key_size;
map->value_size = attr->value_size;
map->max_entries = attr->max_entries;
- map->map_flags = attr->map_flags;
+ map->map_flags = bpf_map_flags_retain_permanent(attr->map_flags);
map->numa_node = bpf_map_attr_numa_node(attr);
}
return 0;
}
+static fmode_t map_get_sys_perms(struct bpf_map *map, struct fd f)
+{
+ fmode_t mode = f.file->f_mode;
+
+ /* Our file permissions may have been overridden by global
+ * map permissions facing syscall side.
+ */
+ if (READ_ONCE(map->frozen))
+ mode &= ~FMODE_CAN_WRITE;
+ return mode;
+}
+
#ifdef CONFIG_PROC_FS
static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp)
{
"max_entries:\t%u\n"
"map_flags:\t%#x\n"
"memlock:\t%llu\n"
- "map_id:\t%u\n",
+ "map_id:\t%u\n"
+ "frozen:\t%u\n",
map->map_type,
map->key_size,
map->value_size,
map->max_entries,
map->map_flags,
map->pages * 1ULL << PAGE_SHIFT,
- map->id);
+ map->id,
+ READ_ONCE(map->frozen));
if (owner_prog_type) {
seq_printf(m, "owner_prog_type:\t%u\n",
const char *end = src + BPF_OBJ_NAME_LEN;
memset(dst, 0, BPF_OBJ_NAME_LEN);
-
- /* Copy all isalnum() and '_' char */
+ /* Copy all isalnum(), '_' and '.' chars. */
while (src < end && *src) {
- if (!isalnum(*src) && *src != '_')
+ if (!isalnum(*src) &&
+ *src != '_' && *src != '.')
return -EINVAL;
*dst++ = *src++;
}
u32 key_size, value_size;
int ret = 0;
- key_type = btf_type_id_size(btf, &btf_key_id, &key_size);
- if (!key_type || key_size != map->key_size)
- return -EINVAL;
+ /* Some maps allow key to be unspecified. */
+ if (btf_key_id) {
+ key_type = btf_type_id_size(btf, &btf_key_id, &key_size);
+ if (!key_type || key_size != map->key_size)
+ return -EINVAL;
+ } else {
+ key_type = btf_type_by_id(btf, 0);
+ if (!map->ops->map_check_btf)
+ return -EINVAL;
+ }
value_type = btf_type_id_size(btf, &btf_value_id, &value_size);
if (!value_type || value_size != map->value_size)
map->spin_lock_off = btf_find_spin_lock(btf, value_type);
if (map_value_has_spin_lock(map)) {
+ if (map->map_flags & BPF_F_RDONLY_PROG)
+ return -EACCES;
if (map->map_type != BPF_MAP_TYPE_HASH &&
map->map_type != BPF_MAP_TYPE_ARRAY &&
map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE)
if (attr->btf_key_type_id || attr->btf_value_type_id) {
struct btf *btf;
- if (!attr->btf_key_type_id || !attr->btf_value_type_id) {
+ if (!attr->btf_value_type_id) {
err = -EINVAL;
goto free_map_nouncharge;
}
map = __bpf_map_get(f);
if (IS_ERR(map))
return PTR_ERR(map);
-
- if (!(f.file->f_mode & FMODE_CAN_READ)) {
+ if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ)) {
err = -EPERM;
goto err_put;
}
map = __bpf_map_get(f);
if (IS_ERR(map))
return PTR_ERR(map);
-
- if (!(f.file->f_mode & FMODE_CAN_WRITE)) {
+ if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
err = -EPERM;
goto err_put;
}
map = __bpf_map_get(f);
if (IS_ERR(map))
return PTR_ERR(map);
-
- if (!(f.file->f_mode & FMODE_CAN_WRITE)) {
+ if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
err = -EPERM;
goto err_put;
}
map = __bpf_map_get(f);
if (IS_ERR(map))
return PTR_ERR(map);
-
- if (!(f.file->f_mode & FMODE_CAN_READ)) {
+ if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ)) {
err = -EPERM;
goto err_put;
}
map = __bpf_map_get(f);
if (IS_ERR(map))
return PTR_ERR(map);
-
- if (!(f.file->f_mode & FMODE_CAN_WRITE)) {
+ if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
err = -EPERM;
goto err_put;
}
return err;
}
+#define BPF_MAP_FREEZE_LAST_FIELD map_fd
+
+static int map_freeze(const union bpf_attr *attr)
+{
+ int err = 0, ufd = attr->map_fd;
+ struct bpf_map *map;
+ struct fd f;
+
+ if (CHECK_ATTR(BPF_MAP_FREEZE))
+ return -EINVAL;
+
+ f = fdget(ufd);
+ map = __bpf_map_get(f);
+ if (IS_ERR(map))
+ return PTR_ERR(map);
+ if (READ_ONCE(map->frozen)) {
+ err = -EBUSY;
+ goto err_put;
+ }
+ if (!capable(CAP_SYS_ADMIN)) {
+ err = -EPERM;
+ goto err_put;
+ }
+
+ WRITE_ONCE(map->frozen, true);
+err_put:
+ fdput(f);
+ return err;
+}
+
static const struct bpf_prog_ops * const bpf_prog_types[] = {
#define BPF_PROG_TYPE(_id, _name) \
[_id] = & _name ## _prog_ops,
/* eBPF programs must be GPL compatible to use GPL-ed functions */
is_gpl = license_is_gpl_compatible(license);
- if (attr->insn_cnt == 0 || attr->insn_cnt > BPF_MAXINSNS)
+ if (attr->insn_cnt == 0 ||
+ attr->insn_cnt > (capable(CAP_SYS_ADMIN) ? BPF_COMPLEXITY_LIMIT_INSNS : BPF_MAXINSNS))
return -E2BIG;
if (type != BPF_PROG_TYPE_SOCKET_FILTER &&
type != BPF_PROG_TYPE_CGROUP_SKB &&
case BPF_FLOW_DISSECTOR:
ptype = BPF_PROG_TYPE_FLOW_DISSECTOR;
break;
+ case BPF_CGROUP_SYSCTL:
+ ptype = BPF_PROG_TYPE_CGROUP_SYSCTL;
+ break;
default:
return -EINVAL;
}
return lirc_prog_detach(attr);
case BPF_FLOW_DISSECTOR:
return skb_flow_dissector_bpf_prog_detach(attr);
+ case BPF_CGROUP_SYSCTL:
+ ptype = BPF_PROG_TYPE_CGROUP_SYSCTL;
+ break;
default:
return -EINVAL;
}
case BPF_CGROUP_UDP6_SENDMSG:
case BPF_CGROUP_SOCK_OPS:
case BPF_CGROUP_DEVICE:
+ case BPF_CGROUP_SYSCTL:
break;
case BPF_LIRC_MODE2:
return lirc_prog_query(attr, uattr);
return cgroup_bpf_prog_query(attr, uattr);
}
-#define BPF_PROG_TEST_RUN_LAST_FIELD test.duration
+#define BPF_PROG_TEST_RUN_LAST_FIELD test.ctx_out
static int bpf_prog_test_run(const union bpf_attr *attr,
union bpf_attr __user *uattr)
if (CHECK_ATTR(BPF_PROG_TEST_RUN))
return -EINVAL;
+ if ((attr->test.ctx_size_in && !attr->test.ctx_in) ||
+ (!attr->test.ctx_size_in && attr->test.ctx_in))
+ return -EINVAL;
+
+ if ((attr->test.ctx_size_out && !attr->test.ctx_out) ||
+ (!attr->test.ctx_size_out && attr->test.ctx_out))
+ return -EINVAL;
+
prog = bpf_prog_get(attr->test.prog_fd);
if (IS_ERR(prog))
return PTR_ERR(prog);
}
static const struct bpf_map *bpf_map_from_imm(const struct bpf_prog *prog,
- unsigned long addr)
+ unsigned long addr, u32 *off,
+ u32 *type)
{
+ const struct bpf_map *map;
int i;
- for (i = 0; i < prog->aux->used_map_cnt; i++)
- if (prog->aux->used_maps[i] == (void *)addr)
- return prog->aux->used_maps[i];
+ for (i = 0, *off = 0; i < prog->aux->used_map_cnt; i++) {
+ map = prog->aux->used_maps[i];
+ if (map == (void *)addr) {
+ *type = BPF_PSEUDO_MAP_FD;
+ return map;
+ }
+ if (!map->ops->map_direct_value_meta)
+ continue;
+ if (!map->ops->map_direct_value_meta(map, addr, off)) {
+ *type = BPF_PSEUDO_MAP_VALUE;
+ return map;
+ }
+ }
+
return NULL;
}
{
const struct bpf_map *map;
struct bpf_insn *insns;
+ u32 off, type;
u64 imm;
int i;
continue;
imm = ((u64)insns[i + 1].imm << 32) | (u32)insns[i].imm;
- map = bpf_map_from_imm(prog, imm);
+ map = bpf_map_from_imm(prog, imm, &off, &type);
if (map) {
- insns[i].src_reg = BPF_PSEUDO_MAP_FD;
+ insns[i].src_reg = type;
insns[i].imm = map->id;
- insns[i + 1].imm = 0;
+ insns[i + 1].imm = off;
continue;
}
}
case BPF_MAP_GET_NEXT_KEY:
err = map_get_next_key(&attr);
break;
+ case BPF_MAP_FREEZE:
+ err = map_freeze(&attr);
+ break;
case BPF_PROG_LOAD:
err = bpf_prog_load(&attr, uattr);
break;
struct bpf_verifier_stack_elem *next;
};
-#define BPF_COMPLEXITY_LIMIT_INSNS 131072
#define BPF_COMPLEXITY_LIMIT_STACK 1024
#define BPF_COMPLEXITY_LIMIT_STATES 64
*/
subprog[env->subprog_cnt].start = insn_cnt;
- if (env->log.level > 1)
+ if (env->log.level & BPF_LOG_LEVEL2)
for (i = 0; i < env->subprog_cnt; i++)
verbose(env, "func#%d @%d\n", i, subprog[i].start);
struct bpf_reg_state *parent)
{
bool writes = parent == state->parent; /* Observe write marks */
+ int cnt = 0;
while (parent) {
/* if read wasn't screened by an earlier write ... */
parent->var_off.value, parent->off);
return -EFAULT;
}
+ if (parent->live & REG_LIVE_READ)
+ /* The parentage chain never changes and
+ * this parent was already marked as LIVE_READ.
+ * There is no need to keep walking the chain again and
+ * keep re-marking all parents as LIVE_READ.
+ * This case happens when the same register is read
+ * multiple times without writes into it in-between.
+ */
+ break;
/* ... then we depend on parent's value */
parent->live |= REG_LIVE_READ;
state = parent;
parent = state->parent;
writes = true;
+ cnt++;
}
+
+ if (env->longest_mark_read_walk < cnt)
+ env->longest_mark_read_walk = cnt;
return 0;
}
{
struct bpf_verifier_state *vstate = env->cur_state;
struct bpf_func_state *state = vstate->frame[vstate->curframe];
- struct bpf_reg_state *regs = state->regs;
+ struct bpf_reg_state *reg, *regs = state->regs;
if (regno >= MAX_BPF_REG) {
verbose(env, "R%d is invalid\n", regno);
return -EINVAL;
}
+ reg = ®s[regno];
if (t == SRC_OP) {
/* check whether register used as source operand can be read */
- if (regs[regno].type == NOT_INIT) {
+ if (reg->type == NOT_INIT) {
verbose(env, "R%d !read_ok\n", regno);
return -EACCES;
}
/* We don't need to worry about FP liveness because it's read-only */
- if (regno != BPF_REG_FP)
- return mark_reg_read(env, ®s[regno],
- regs[regno].parent);
+ if (regno == BPF_REG_FP)
+ return 0;
+
+ return mark_reg_read(env, reg, reg->parent);
} else {
/* check whether register used as dest operand can be written to */
if (regno == BPF_REG_FP) {
verbose(env, "frame pointer is read only\n");
return -EACCES;
}
- regs[regno].live |= REG_LIVE_WRITTEN;
+ reg->live |= REG_LIVE_WRITTEN;
if (t == DST_OP)
mark_reg_unknown(env, regs, regno);
}
char tn_buf[48];
tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
- verbose(env, "variable stack access var_off=%s off=%d size=%d",
+ verbose(env, "variable stack access var_off=%s off=%d size=%d\n",
tn_buf, off, size);
return -EACCES;
}
return 0;
}
+static int check_map_access_type(struct bpf_verifier_env *env, u32 regno,
+ int off, int size, enum bpf_access_type type)
+{
+ struct bpf_reg_state *regs = cur_regs(env);
+ struct bpf_map *map = regs[regno].map_ptr;
+ u32 cap = bpf_map_flags_to_cap(map);
+
+ if (type == BPF_WRITE && !(cap & BPF_MAP_CAN_WRITE)) {
+ verbose(env, "write into map forbidden, value_size=%d off=%d size=%d\n",
+ map->value_size, off, size);
+ return -EACCES;
+ }
+
+ if (type == BPF_READ && !(cap & BPF_MAP_CAN_READ)) {
+ verbose(env, "read from map forbidden, value_size=%d off=%d size=%d\n",
+ map->value_size, off, size);
+ return -EACCES;
+ }
+
+ return 0;
+}
+
/* check read/write into map element returned by bpf_map_lookup_elem() */
static int __check_map_access(struct bpf_verifier_env *env, u32 regno, int off,
int size, bool zero_size_allowed)
* need to try adding each of min_value and max_value to off
* to make sure our theoretical access will be safe.
*/
- if (env->log.level)
+ if (env->log.level & BPF_LOG_LEVEL)
print_verifier_state(env, state);
/* The minimum value is only important with signed
verbose(env, "R%d leaks addr into map\n", value_regno);
return -EACCES;
}
-
+ err = check_map_access_type(env, regno, off, size, t);
+ if (err)
+ return err;
err = check_map_access(env, regno, off, size, false);
if (!err && t == BPF_READ && value_regno >= 0)
mark_reg_unknown(env, regs, value_regno);
BPF_SIZE(insn->code), BPF_WRITE, -1, true);
}
+static int __check_stack_boundary(struct bpf_verifier_env *env, u32 regno,
+ int off, int access_size,
+ bool zero_size_allowed)
+{
+ struct bpf_reg_state *reg = reg_state(env, regno);
+
+ if (off >= 0 || off < -MAX_BPF_STACK || off + access_size > 0 ||
+ access_size < 0 || (access_size == 0 && !zero_size_allowed)) {
+ if (tnum_is_const(reg->var_off)) {
+ verbose(env, "invalid stack type R%d off=%d access_size=%d\n",
+ regno, off, access_size);
+ } else {
+ char tn_buf[48];
+
+ tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
+ verbose(env, "invalid stack type R%d var_off=%s access_size=%d\n",
+ regno, tn_buf, access_size);
+ }
+ return -EACCES;
+ }
+ return 0;
+}
+
/* when register 'regno' is passed into function that will read 'access_size'
* bytes from that pointer, make sure that it's within stack boundary
* and all elements of stack are initialized.
{
struct bpf_reg_state *reg = reg_state(env, regno);
struct bpf_func_state *state = func(env, reg);
- int off, i, slot, spi;
+ int err, min_off, max_off, i, slot, spi;
if (reg->type != PTR_TO_STACK) {
/* Allow zero-byte read from NULL, regardless of pointer type */
return -EACCES;
}
- /* Only allow fixed-offset stack reads */
- if (!tnum_is_const(reg->var_off)) {
- char tn_buf[48];
+ if (tnum_is_const(reg->var_off)) {
+ min_off = max_off = reg->var_off.value + reg->off;
+ err = __check_stack_boundary(env, regno, min_off, access_size,
+ zero_size_allowed);
+ if (err)
+ return err;
+ } else {
+ /* Variable offset is prohibited for unprivileged mode for
+ * simplicity since it requires corresponding support in
+ * Spectre masking for stack ALU.
+ * See also retrieve_ptr_limit().
+ */
+ if (!env->allow_ptr_leaks) {
+ char tn_buf[48];
- tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
- verbose(env, "invalid variable stack read R%d var_off=%s\n",
- regno, tn_buf);
- return -EACCES;
- }
- off = reg->off + reg->var_off.value;
- if (off >= 0 || off < -MAX_BPF_STACK || off + access_size > 0 ||
- access_size < 0 || (access_size == 0 && !zero_size_allowed)) {
- verbose(env, "invalid stack type R%d off=%d access_size=%d\n",
- regno, off, access_size);
- return -EACCES;
+ tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
+ verbose(env, "R%d indirect variable offset stack access prohibited for !root, var_off=%s\n",
+ regno, tn_buf);
+ return -EACCES;
+ }
+ /* Only initialized buffer on stack is allowed to be accessed
+ * with variable offset. With uninitialized buffer it's hard to
+ * guarantee that whole memory is marked as initialized on
+ * helper return since specific bounds are unknown what may
+ * cause uninitialized stack leaking.
+ */
+ if (meta && meta->raw_mode)
+ meta = NULL;
+
+ if (reg->smax_value >= BPF_MAX_VAR_OFF ||
+ reg->smax_value <= -BPF_MAX_VAR_OFF) {
+ verbose(env, "R%d unbounded indirect variable offset stack access\n",
+ regno);
+ return -EACCES;
+ }
+ min_off = reg->smin_value + reg->off;
+ max_off = reg->smax_value + reg->off;
+ err = __check_stack_boundary(env, regno, min_off, access_size,
+ zero_size_allowed);
+ if (err) {
+ verbose(env, "R%d min value is outside of stack bound\n",
+ regno);
+ return err;
+ }
+ err = __check_stack_boundary(env, regno, max_off, access_size,
+ zero_size_allowed);
+ if (err) {
+ verbose(env, "R%d max value is outside of stack bound\n",
+ regno);
+ return err;
+ }
}
if (meta && meta->raw_mode) {
return 0;
}
- for (i = 0; i < access_size; i++) {
+ for (i = min_off; i < max_off + access_size; i++) {
u8 *stype;
- slot = -(off + i) - 1;
+ slot = -i - 1;
spi = slot / BPF_REG_SIZE;
if (state->allocated_stack <= slot)
goto err;
goto mark;
}
err:
- verbose(env, "invalid indirect read from stack off %d+%d size %d\n",
- off, i, access_size);
+ if (tnum_is_const(reg->var_off)) {
+ verbose(env, "invalid indirect read from stack off %d+%d size %d\n",
+ min_off, i - min_off, access_size);
+ } else {
+ char tn_buf[48];
+
+ tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
+ verbose(env, "invalid indirect read from stack var_off %s+%d size %d\n",
+ tn_buf, i - min_off, access_size);
+ }
return -EACCES;
mark:
/* reading any byte out of 8-byte 'spill_slot' will cause
mark_reg_read(env, &state->stack[spi].spilled_ptr,
state->stack[spi].spilled_ptr.parent);
}
- return update_stack_depth(env, state, off);
+ return update_stack_depth(env, state, min_off);
}
static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
return check_packet_access(env, regno, reg->off, access_size,
zero_size_allowed);
case PTR_TO_MAP_VALUE:
+ if (check_map_access_type(env, regno, reg->off, access_size,
+ meta && meta->raw_mode ? BPF_WRITE :
+ BPF_READ))
+ return -EACCES;
return check_map_access(env, regno, reg->off, access_size,
zero_size_allowed);
default: /* scalar_value|ptr_to_stack or invalid ptr */
type == ARG_CONST_SIZE_OR_ZERO;
}
+static bool arg_type_is_int_ptr(enum bpf_arg_type type)
+{
+ return type == ARG_PTR_TO_INT ||
+ type == ARG_PTR_TO_LONG;
+}
+
+static int int_ptr_type_to_size(enum bpf_arg_type type)
+{
+ if (type == ARG_PTR_TO_INT)
+ return sizeof(u32);
+ else if (type == ARG_PTR_TO_LONG)
+ return sizeof(u64);
+
+ return -EINVAL;
+}
+
static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
enum bpf_arg_type arg_type,
struct bpf_call_arg_meta *meta)
type != expected_type)
goto err_type;
meta->raw_mode = arg_type == ARG_PTR_TO_UNINIT_MEM;
+ } else if (arg_type_is_int_ptr(arg_type)) {
+ expected_type = PTR_TO_STACK;
+ if (!type_is_pkt_pointer(type) &&
+ type != PTR_TO_MAP_VALUE &&
+ type != expected_type)
+ goto err_type;
} else {
verbose(env, "unsupported arg_type %d\n", arg_type);
return -EFAULT;
err = check_helper_mem_access(env, regno - 1,
reg->umax_value,
zero_size_allowed, meta);
+ } else if (arg_type_is_int_ptr(arg_type)) {
+ int size = int_ptr_type_to_size(arg_type);
+
+ err = check_helper_mem_access(env, regno, size, false, meta);
+ if (err)
+ return err;
+ err = check_ptr_alignment(env, reg, 0, size, true);
}
return err;
/* and go analyze first insn of the callee */
*insn_idx = target_insn;
- if (env->log.level) {
+ if (env->log.level & BPF_LOG_LEVEL) {
verbose(env, "caller:\n");
print_verifier_state(env, caller);
verbose(env, "callee:\n");
return err;
*insn_idx = callee->callsite + 1;
- if (env->log.level) {
+ if (env->log.level & BPF_LOG_LEVEL) {
verbose(env, "returning from callee:\n");
print_verifier_state(env, callee);
verbose(env, "to caller at %d:\n", *insn_idx);
int func_id, int insn_idx)
{
struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
+ struct bpf_map *map = meta->map_ptr;
if (func_id != BPF_FUNC_tail_call &&
func_id != BPF_FUNC_map_lookup_elem &&
func_id != BPF_FUNC_map_peek_elem)
return 0;
- if (meta->map_ptr == NULL) {
+ if (map == NULL) {
verbose(env, "kernel subsystem misconfigured verifier\n");
return -EINVAL;
}
+ /* In case of read-only, some additional restrictions
+ * need to be applied in order to prevent altering the
+ * state of the map from program side.
+ */
+ if ((map->map_flags & BPF_F_RDONLY_PROG) &&
+ (func_id == BPF_FUNC_map_delete_elem ||
+ func_id == BPF_FUNC_map_update_elem ||
+ func_id == BPF_FUNC_map_push_elem ||
+ func_id == BPF_FUNC_map_pop_elem)) {
+ verbose(env, "write into map forbidden\n");
+ return -EACCES;
+ }
+
if (!BPF_MAP_PTR(aux->map_state))
bpf_map_ptr_store(aux, meta->map_ptr,
meta->map_ptr->unpriv_array);
switch (ptr_reg->type) {
case PTR_TO_STACK:
+ /* Indirect variable offset stack access is prohibited in
+ * unprivileged mode so it's not handled here.
+ */
off = ptr_reg->off + ptr_reg->var_off.value;
if (mask_to_left)
*ptr_limit = MAX_BPF_STACK + off;
insn->dst_reg);
return -EACCES;
}
- if (env->log.level)
+ if (env->log.level & BPF_LOG_LEVEL)
print_verifier_state(env, this_branch->frame[this_branch->curframe]);
return 0;
}
-/* return the map pointer stored inside BPF_LD_IMM64 instruction */
-static struct bpf_map *ld_imm64_to_map_ptr(struct bpf_insn *insn)
-{
- u64 imm64 = ((u64) (u32) insn[0].imm) | ((u64) (u32) insn[1].imm) << 32;
-
- return (struct bpf_map *) (unsigned long) imm64;
-}
-
/* verify BPF_LD_IMM64 instruction */
static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
{
+ struct bpf_insn_aux_data *aux = cur_aux(env);
struct bpf_reg_state *regs = cur_regs(env);
+ struct bpf_map *map;
int err;
if (BPF_SIZE(insn->code) != BPF_DW) {
return 0;
}
- /* replace_map_fd_with_map_ptr() should have caught bad ld_imm64 */
- BUG_ON(insn->src_reg != BPF_PSEUDO_MAP_FD);
+ map = env->used_maps[aux->map_index];
+ mark_reg_known_zero(env, regs, insn->dst_reg);
+ regs[insn->dst_reg].map_ptr = map;
+
+ if (insn->src_reg == BPF_PSEUDO_MAP_VALUE) {
+ regs[insn->dst_reg].type = PTR_TO_MAP_VALUE;
+ regs[insn->dst_reg].off = aux->map_off;
+ if (map_value_has_spin_lock(map))
+ regs[insn->dst_reg].id = ++env->id_gen;
+ } else if (insn->src_reg == BPF_PSEUDO_MAP_FD) {
+ regs[insn->dst_reg].type = CONST_PTR_TO_MAP;
+ } else {
+ verbose(env, "bpf verifier is misconfigured\n");
+ return -EINVAL;
+ }
- regs[insn->dst_reg].type = CONST_PTR_TO_MAP;
- regs[insn->dst_reg].map_ptr = ld_imm64_to_map_ptr(insn);
return 0;
}
case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
case BPF_PROG_TYPE_SOCK_OPS:
case BPF_PROG_TYPE_CGROUP_DEVICE:
+ case BPF_PROG_TYPE_CGROUP_SYSCTL:
break;
default:
return 0;
#define STATE_LIST_MARK ((struct bpf_verifier_state_list *) -1L)
-static int *insn_stack; /* stack of insns to process */
-static int cur_stack; /* current stack index */
-static int *insn_state;
-
/* t, w, e - match pseudo-code above:
* t - index of current instruction
* w - next instruction
*/
static int push_insn(int t, int w, int e, struct bpf_verifier_env *env)
{
+ int *insn_stack = env->cfg.insn_stack;
+ int *insn_state = env->cfg.insn_state;
+
if (e == FALLTHROUGH && insn_state[t] >= (DISCOVERED | FALLTHROUGH))
return 0;
/* tree-edge */
insn_state[t] = DISCOVERED | e;
insn_state[w] = DISCOVERED;
- if (cur_stack >= env->prog->len)
+ if (env->cfg.cur_stack >= env->prog->len)
return -E2BIG;
- insn_stack[cur_stack++] = w;
+ insn_stack[env->cfg.cur_stack++] = w;
return 1;
} else if ((insn_state[w] & 0xF0) == DISCOVERED) {
verbose_linfo(env, t, "%d: ", t);
{
struct bpf_insn *insns = env->prog->insnsi;
int insn_cnt = env->prog->len;
+ int *insn_stack, *insn_state;
int ret = 0;
int i, t;
- insn_state = kcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
+ insn_state = env->cfg.insn_state = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
if (!insn_state)
return -ENOMEM;
- insn_stack = kcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
+ insn_stack = env->cfg.insn_stack = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
if (!insn_stack) {
- kfree(insn_state);
+ kvfree(insn_state);
return -ENOMEM;
}
insn_state[0] = DISCOVERED; /* mark 1st insn as discovered */
insn_stack[0] = 0; /* 0 is the first instruction */
- cur_stack = 1;
+ env->cfg.cur_stack = 1;
peek_stack:
- if (cur_stack == 0)
+ if (env->cfg.cur_stack == 0)
goto check_state;
- t = insn_stack[cur_stack - 1];
+ t = insn_stack[env->cfg.cur_stack - 1];
if (BPF_CLASS(insns[t].code) == BPF_JMP ||
BPF_CLASS(insns[t].code) == BPF_JMP32) {
mark_explored:
insn_state[t] = EXPLORED;
- if (cur_stack-- <= 0) {
+ if (env->cfg.cur_stack-- <= 0) {
verbose(env, "pop stack internal bug\n");
ret = -EFAULT;
goto err_free;
ret = 0; /* cfg looks good */
err_free:
- kfree(insn_state);
- kfree(insn_stack);
+ kvfree(insn_state);
+ kvfree(insn_stack);
+ env->cfg.insn_state = env->cfg.insn_stack = NULL;
return ret;
}
return true;
}
+static int propagate_liveness_reg(struct bpf_verifier_env *env,
+ struct bpf_reg_state *reg,
+ struct bpf_reg_state *parent_reg)
+{
+ int err;
+
+ if (parent_reg->live & REG_LIVE_READ || !(reg->live & REG_LIVE_READ))
+ return 0;
+
+ err = mark_reg_read(env, reg, parent_reg);
+ if (err)
+ return err;
+
+ return 0;
+}
+
/* A write screens off any subsequent reads; but write marks come from the
* straight-line code between a state and its parent. When we arrive at an
* equivalent state (jump target or such) we didn't arrive by the straight-line
const struct bpf_verifier_state *vstate,
struct bpf_verifier_state *vparent)
{
- int i, frame, err = 0;
+ struct bpf_reg_state *state_reg, *parent_reg;
struct bpf_func_state *state, *parent;
+ int i, frame, err = 0;
if (vparent->curframe != vstate->curframe) {
WARN(1, "propagate_live: parent frame %d current frame %d\n",
/* Propagate read liveness of registers... */
BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG);
for (frame = 0; frame <= vstate->curframe; frame++) {
+ parent = vparent->frame[frame];
+ state = vstate->frame[frame];
+ parent_reg = parent->regs;
+ state_reg = state->regs;
/* We don't need to worry about FP liveness, it's read-only */
for (i = frame < vstate->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++) {
- if (vparent->frame[frame]->regs[i].live & REG_LIVE_READ)
- continue;
- if (vstate->frame[frame]->regs[i].live & REG_LIVE_READ) {
- err = mark_reg_read(env, &vstate->frame[frame]->regs[i],
- &vparent->frame[frame]->regs[i]);
- if (err)
- return err;
- }
+ err = propagate_liveness_reg(env, &state_reg[i],
+ &parent_reg[i]);
+ if (err)
+ return err;
}
- }
- /* ... and stack slots */
- for (frame = 0; frame <= vstate->curframe; frame++) {
- state = vstate->frame[frame];
- parent = vparent->frame[frame];
+ /* Propagate stack slots. */
for (i = 0; i < state->allocated_stack / BPF_REG_SIZE &&
i < parent->allocated_stack / BPF_REG_SIZE; i++) {
- if (parent->stack[i].spilled_ptr.live & REG_LIVE_READ)
- continue;
- if (state->stack[i].spilled_ptr.live & REG_LIVE_READ)
- mark_reg_read(env, &state->stack[i].spilled_ptr,
- &parent->stack[i].spilled_ptr);
+ parent_reg = &parent->stack[i].spilled_ptr;
+ state_reg = &state->stack[i].spilled_ptr;
+ err = propagate_liveness_reg(env, state_reg,
+ parent_reg);
+ if (err)
+ return err;
}
}
return err;
static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
{
struct bpf_verifier_state_list *new_sl;
- struct bpf_verifier_state_list *sl;
+ struct bpf_verifier_state_list *sl, **pprev;
struct bpf_verifier_state *cur = env->cur_state, *new;
int i, j, err, states_cnt = 0;
- sl = env->explored_states[insn_idx];
+ pprev = &env->explored_states[insn_idx];
+ sl = *pprev;
+
if (!sl)
/* this 'insn_idx' instruction wasn't marked, so we will not
* be doing state search here
while (sl != STATE_LIST_MARK) {
if (states_equal(env, &sl->state, cur)) {
+ sl->hit_cnt++;
/* reached equivalent register/stack state,
* prune the search.
* Registers read by the continuation are read by us.
return err;
return 1;
}
- sl = sl->next;
states_cnt++;
+ sl->miss_cnt++;
+ /* heuristic to determine whether this state is beneficial
+ * to keep checking from state equivalence point of view.
+ * Higher numbers increase max_states_per_insn and verification time,
+ * but do not meaningfully decrease insn_processed.
+ */
+ if (sl->miss_cnt > sl->hit_cnt * 3 + 3) {
+ /* the state is unlikely to be useful. Remove it to
+ * speed up verification
+ */
+ *pprev = sl->next;
+ if (sl->state.frame[0]->regs[0].live & REG_LIVE_DONE) {
+ free_verifier_state(&sl->state, false);
+ kfree(sl);
+ env->peak_states--;
+ } else {
+ /* cannot free this state, since parentage chain may
+ * walk it later. Add it for free_list instead to
+ * be freed at the end of verification
+ */
+ sl->next = env->free_list;
+ env->free_list = sl;
+ }
+ sl = *pprev;
+ continue;
+ }
+ pprev = &sl->next;
+ sl = *pprev;
}
+ if (env->max_states_per_insn < states_cnt)
+ env->max_states_per_insn = states_cnt;
+
if (!env->allow_ptr_leaks && states_cnt > BPF_COMPLEXITY_LIMIT_STATES)
return 0;
new_sl = kzalloc(sizeof(struct bpf_verifier_state_list), GFP_KERNEL);
if (!new_sl)
return -ENOMEM;
+ env->total_states++;
+ env->peak_states++;
/* add new state to the head of linked list */
new = &new_sl->state;
struct bpf_verifier_state *state;
struct bpf_insn *insns = env->prog->insnsi;
struct bpf_reg_state *regs;
- int insn_cnt = env->prog->len, i;
- int insn_processed = 0;
+ int insn_cnt = env->prog->len;
bool do_print_state = false;
env->prev_linfo = NULL;
insn = &insns[env->insn_idx];
class = BPF_CLASS(insn->code);
- if (++insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) {
+ if (++env->insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) {
verbose(env,
"BPF program is too large. Processed %d insn\n",
- insn_processed);
+ env->insn_processed);
return -E2BIG;
}
return err;
if (err == 1) {
/* found equivalent state, can prune the search */
- if (env->log.level) {
+ if (env->log.level & BPF_LOG_LEVEL) {
if (do_print_state)
verbose(env, "\nfrom %d to %d%s: safe\n",
env->prev_insn_idx, env->insn_idx,
if (need_resched())
cond_resched();
- if (env->log.level > 1 || (env->log.level && do_print_state)) {
- if (env->log.level > 1)
+ if (env->log.level & BPF_LOG_LEVEL2 ||
+ (env->log.level & BPF_LOG_LEVEL && do_print_state)) {
+ if (env->log.level & BPF_LOG_LEVEL2)
verbose(env, "%d:", env->insn_idx);
else
verbose(env, "\nfrom %d to %d%s:",
do_print_state = false;
}
- if (env->log.level) {
+ if (env->log.level & BPF_LOG_LEVEL) {
const struct bpf_insn_cbs cbs = {
.cb_print = verbose,
.private_data = env,
env->insn_idx++;
}
- verbose(env, "processed %d insns (limit %d), stack depth ",
- insn_processed, BPF_COMPLEXITY_LIMIT_INSNS);
- for (i = 0; i < env->subprog_cnt; i++) {
- u32 depth = env->subprog_info[i].stack_depth;
-
- verbose(env, "%d", depth);
- if (i + 1 < env->subprog_cnt)
- verbose(env, "+");
- }
- verbose(env, "\n");
env->prog->aux->stack_depth = env->subprog_info[0].stack_depth;
return 0;
}
}
if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) {
+ struct bpf_insn_aux_data *aux;
struct bpf_map *map;
struct fd f;
+ u64 addr;
if (i == insn_cnt - 1 || insn[1].code != 0 ||
insn[1].dst_reg != 0 || insn[1].src_reg != 0 ||
return -EINVAL;
}
- if (insn->src_reg == 0)
+ if (insn[0].src_reg == 0)
/* valid generic load 64-bit imm */
goto next_insn;
- if (insn[0].src_reg != BPF_PSEUDO_MAP_FD ||
- insn[1].imm != 0) {
- verbose(env, "unrecognized bpf_ld_imm64 insn\n");
+ /* In final convert_pseudo_ld_imm64() step, this is
+ * converted into regular 64-bit imm load insn.
+ */
+ if ((insn[0].src_reg != BPF_PSEUDO_MAP_FD &&
+ insn[0].src_reg != BPF_PSEUDO_MAP_VALUE) ||
+ (insn[0].src_reg == BPF_PSEUDO_MAP_FD &&
+ insn[1].imm != 0)) {
+ verbose(env,
+ "unrecognized bpf_ld_imm64 insn\n");
return -EINVAL;
}
return err;
}
- /* store map pointer inside BPF_LD_IMM64 instruction */
- insn[0].imm = (u32) (unsigned long) map;
- insn[1].imm = ((u64) (unsigned long) map) >> 32;
+ aux = &env->insn_aux_data[i];
+ if (insn->src_reg == BPF_PSEUDO_MAP_FD) {
+ addr = (unsigned long)map;
+ } else {
+ u32 off = insn[1].imm;
+
+ if (off >= BPF_MAX_VAR_OFF) {
+ verbose(env, "direct value offset of %u is not allowed\n", off);
+ fdput(f);
+ return -EINVAL;
+ }
+
+ if (!map->ops->map_direct_value_addr) {
+ verbose(env, "no direct value access support for this map type\n");
+ fdput(f);
+ return -EINVAL;
+ }
+
+ err = map->ops->map_direct_value_addr(map, &addr, off);
+ if (err) {
+ verbose(env, "invalid access to map value pointer, value_size=%u off=%u\n",
+ map->value_size, off);
+ fdput(f);
+ return err;
+ }
+
+ aux->map_off = off;
+ addr += off;
+ }
+
+ insn[0].imm = (u32)addr;
+ insn[1].imm = addr >> 32;
/* check whether we recorded this map already */
- for (j = 0; j < env->used_map_cnt; j++)
+ for (j = 0; j < env->used_map_cnt; j++) {
if (env->used_maps[j] == map) {
+ aux->map_index = j;
fdput(f);
goto next_insn;
}
+ }
if (env->used_map_cnt >= MAX_USED_MAPS) {
fdput(f);
fdput(f);
return PTR_ERR(map);
}
+
+ aux->map_index = env->used_map_cnt;
env->used_maps[env->used_map_cnt++] = map;
if (bpf_map_is_cgroup_storage(map) &&
struct bpf_prog *new_prog;
new_prog = bpf_patch_insn_single(env->prog, off, patch, len);
- if (!new_prog)
+ if (IS_ERR(new_prog)) {
+ if (PTR_ERR(new_prog) == -ERANGE)
+ verbose(env,
+ "insn %d cannot be patched due to 16-bit range\n",
+ env->insn_aux_data[off].orig_idx);
return NULL;
+ }
if (adjust_insn_aux_data(env, new_prog->len, off, len))
return NULL;
adjust_subprog_starts(env, off, len);
insn->src_reg != BPF_PSEUDO_CALL)
continue;
subprog = insn->off;
- insn->imm = (u64 (*)(u64, u64, u64, u64, u64))
- func[subprog]->bpf_func -
- __bpf_call_base;
+ insn->imm = BPF_CAST_CALL(func[subprog]->bpf_func) -
+ __bpf_call_base;
}
/* we use the aux data to keep a list of the start addresses
struct bpf_verifier_state_list *sl, *sln;
int i;
+ sl = env->free_list;
+ while (sl) {
+ sln = sl->next;
+ free_verifier_state(&sl->state, false);
+ kfree(sl);
+ sl = sln;
+ }
+
if (!env->explored_states)
return;
}
}
- kfree(env->explored_states);
+ kvfree(env->explored_states);
+}
+
+static void print_verification_stats(struct bpf_verifier_env *env)
+{
+ int i;
+
+ if (env->log.level & BPF_LOG_STATS) {
+ verbose(env, "verification time %lld usec\n",
+ div_u64(env->verification_time, 1000));
+ verbose(env, "stack depth ");
+ for (i = 0; i < env->subprog_cnt; i++) {
+ u32 depth = env->subprog_info[i].stack_depth;
+
+ verbose(env, "%d", depth);
+ if (i + 1 < env->subprog_cnt)
+ verbose(env, "+");
+ }
+ verbose(env, "\n");
+ }
+ verbose(env, "processed %d insns (limit %d) max_states_per_insn %d "
+ "total_states %d peak_states %d mark_read %d\n",
+ env->insn_processed, BPF_COMPLEXITY_LIMIT_INSNS,
+ env->max_states_per_insn, env->total_states,
+ env->peak_states, env->longest_mark_read_walk);
}
int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
union bpf_attr __user *uattr)
{
+ u64 start_time = ktime_get_ns();
struct bpf_verifier_env *env;
struct bpf_verifier_log *log;
int i, len, ret = -EINVAL;
env->insn_aux_data[i].orig_idx = i;
env->prog = *prog;
env->ops = bpf_verifier_ops[env->prog->type];
+ is_priv = capable(CAP_SYS_ADMIN);
/* grab the mutex to protect few globals used by verifier */
- mutex_lock(&bpf_verifier_lock);
+ if (!is_priv)
+ mutex_lock(&bpf_verifier_lock);
if (attr->log_level || attr->log_buf || attr->log_size) {
/* user requested verbose verifier output
ret = -EINVAL;
/* log attributes have to be sane */
- if (log->len_total < 128 || log->len_total > UINT_MAX >> 8 ||
- !log->level || !log->ubuf)
+ if (log->len_total < 128 || log->len_total > UINT_MAX >> 2 ||
+ !log->level || !log->ubuf || log->level & ~BPF_LOG_MASK)
goto err_unlock;
}
if (attr->prog_flags & BPF_F_ANY_ALIGNMENT)
env->strict_alignment = false;
- is_priv = capable(CAP_SYS_ADMIN);
env->allow_ptr_leaks = is_priv;
ret = replace_map_fd_with_map_ptr(env);
goto skip_full_check;
}
- env->explored_states = kcalloc(env->prog->len,
+ env->explored_states = kvcalloc(env->prog->len,
sizeof(struct bpf_verifier_state_list *),
GFP_USER);
ret = -ENOMEM;
if (ret == 0)
ret = fixup_call_args(env);
+ env->verification_time = ktime_get_ns() - start_time;
+ print_verification_stats(env);
+
if (log->level && bpf_verifier_log_full(log))
ret = -ENOSPC;
if (log->level && !log->ubuf) {
release_maps(env);
*prog = env->prog;
err_unlock:
- mutex_unlock(&bpf_verifier_lock);
+ if (!is_priv)
+ mutex_unlock(&bpf_verifier_lock);
vfree(env->insn_aux_data);
err_free_env:
kfree(env);
#ifdef CONFIG_STACKTRACE
entry->stacktrace.max_entries = DMA_DEBUG_STACKTRACE_ENTRIES;
entry->stacktrace.entries = entry->st_entries;
- entry->stacktrace.skip = 2;
+ entry->stacktrace.skip = 1;
save_stack_trace(&entry->stacktrace);
#endif
event->pmu->del(event, 0);
event->oncpu = -1;
- if (event->pending_disable) {
- event->pending_disable = 0;
+ if (READ_ONCE(event->pending_disable) >= 0) {
+ WRITE_ONCE(event->pending_disable, -1);
state = PERF_EVENT_STATE_OFF;
}
perf_event_set_state(event, state);
void perf_event_disable_inatomic(struct perf_event *event)
{
- event->pending_disable = 1;
+ WRITE_ONCE(event->pending_disable, smp_processor_id());
+ /* can fail, see perf_pending_event_disable() */
irq_work_queue(&event->pending);
}
}
}
+static void perf_pending_event_disable(struct perf_event *event)
+{
+ int cpu = READ_ONCE(event->pending_disable);
+
+ if (cpu < 0)
+ return;
+
+ if (cpu == smp_processor_id()) {
+ WRITE_ONCE(event->pending_disable, -1);
+ perf_event_disable_local(event);
+ return;
+ }
+
+ /*
+ * CPU-A CPU-B
+ *
+ * perf_event_disable_inatomic()
+ * @pending_disable = CPU-A;
+ * irq_work_queue();
+ *
+ * sched-out
+ * @pending_disable = -1;
+ *
+ * sched-in
+ * perf_event_disable_inatomic()
+ * @pending_disable = CPU-B;
+ * irq_work_queue(); // FAILS
+ *
+ * irq_work_run()
+ * perf_pending_event()
+ *
+ * But the event runs on CPU-B and wants disabling there.
+ */
+ irq_work_queue_on(&event->pending, cpu);
+}
+
static void perf_pending_event(struct irq_work *entry)
{
- struct perf_event *event = container_of(entry,
- struct perf_event, pending);
+ struct perf_event *event = container_of(entry, struct perf_event, pending);
int rctx;
rctx = perf_swevent_get_recursion_context();
* and we won't recurse 'further'.
*/
- if (event->pending_disable) {
- event->pending_disable = 0;
- perf_event_disable_local(event);
- }
+ perf_pending_event_disable(event);
if (event->pending_wakeup) {
event->pending_wakeup = 0;
if (task == TASK_TOMBSTONE)
return;
- if (!ifh->nr_file_filters)
- return;
-
- mm = get_task_mm(event->ctx->task);
- if (!mm)
- goto restart;
+ if (ifh->nr_file_filters) {
+ mm = get_task_mm(event->ctx->task);
+ if (!mm)
+ goto restart;
- down_read(&mm->mmap_sem);
+ down_read(&mm->mmap_sem);
+ }
raw_spin_lock_irqsave(&ifh->lock, flags);
list_for_each_entry(filter, &ifh->list, entry) {
- event->addr_filter_ranges[count].start = 0;
- event->addr_filter_ranges[count].size = 0;
+ if (filter->path.dentry) {
+ /*
+ * Adjust base offset if the filter is associated to a
+ * binary that needs to be mapped:
+ */
+ event->addr_filter_ranges[count].start = 0;
+ event->addr_filter_ranges[count].size = 0;
- /*
- * Adjust base offset if the filter is associated to a binary
- * that needs to be mapped:
- */
- if (filter->path.dentry)
perf_addr_filter_apply(filter, mm, &event->addr_filter_ranges[count]);
+ } else {
+ event->addr_filter_ranges[count].start = filter->offset;
+ event->addr_filter_ranges[count].size = filter->size;
+ }
count++;
}
event->addr_filters_gen++;
raw_spin_unlock_irqrestore(&ifh->lock, flags);
- up_read(&mm->mmap_sem);
+ if (ifh->nr_file_filters) {
+ up_read(&mm->mmap_sem);
- mmput(mm);
+ mmput(mm);
+ }
restart:
perf_event_stop(event, 1);
init_waitqueue_head(&event->waitq);
+ event->pending_disable = -1;
init_irq_work(&event->pending, perf_pending_event);
mutex_init(&event->mmap_mutex);
* store that will be enabled on successful return
*/
if (!handle->size) { /* A, matches D */
- event->pending_disable = 1;
+ event->pending_disable = smp_processor_id();
perf_output_wakeup(handle);
local_set(&rb->aux_nest, 0);
goto err_put;
rb->aux_head += size;
}
- if (size || handle->aux_flags) {
- /*
- * Only send RECORD_AUX if we have something useful to communicate
- *
- * Note: the OVERWRITE records by themselves are not considered
- * useful, as they don't communicate any *new* information,
- * aside from the short-lived offset, that becomes history at
- * the next event sched-in and therefore isn't useful.
- * The userspace that needs to copy out AUX data in overwrite
- * mode should know to use user_page::aux_head for the actual
- * offset. So, from now on we don't output AUX records that
- * have *only* OVERWRITE flag set.
- */
-
- if (handle->aux_flags & ~(u64)PERF_AUX_FLAG_OVERWRITE)
- perf_event_aux_event(handle->event, aux_head, size,
- handle->aux_flags);
- }
+ /*
+ * Only send RECORD_AUX if we have something useful to communicate
+ *
+ * Note: the OVERWRITE records by themselves are not considered
+ * useful, as they don't communicate any *new* information,
+ * aside from the short-lived offset, that becomes history at
+ * the next event sched-in and therefore isn't useful.
+ * The userspace that needs to copy out AUX data in overwrite
+ * mode should know to use user_page::aux_head for the actual
+ * offset. So, from now on we don't output AUX records that
+ * have *only* OVERWRITE flag set.
+ */
+ if (size || (handle->aux_flags & ~(u64)PERF_AUX_FLAG_OVERWRITE))
+ perf_event_aux_event(handle->event, aux_head, size,
+ handle->aux_flags);
rb->user_page->aux_head = rb->aux_head;
if (rb_need_aux_wakeup(rb))
if (wakeup) {
if (handle->aux_flags & PERF_AUX_FLAG_TRUNCATED)
- handle->event->pending_disable = 1;
+ handle->event->pending_disable = smp_processor_id();
perf_output_wakeup(handle);
}
int irq_chip_set_wake_parent(struct irq_data *data, unsigned int on)
{
data = data->parent_data;
+
+ if (data->chip->flags & IRQCHIP_SKIP_SET_WAKE)
+ return 0;
+
if (data->chip->irq_set_wake)
return data->chip->irq_set_wake(data, on);
alloc_masks(&desc[i], node);
raw_spin_lock_init(&desc[i].lock);
lockdep_set_class(&desc[i].lock, &irq_desc_lock_class);
+ mutex_init(&desc[i].request_mutex);
desc_set_defaults(i, &desc[i], node, NULL, NULL);
}
return arch_early_irq_init();
static int reuse_unused_kprobe(struct kprobe *ap)
{
struct optimized_kprobe *op;
- int ret;
/*
* Unused kprobe MUST be on the way of delayed unoptimizing (means
/* Enable the probe again */
ap->flags &= ~KPROBE_FLAG_DISABLED;
/* Optimize it again (remove from op->list) */
- ret = kprobe_optready(ap);
- if (ret)
- return ret;
+ if (!kprobe_optready(ap))
+ return -EINVAL;
optimize_kprobe(ap);
return 0;
return;
raw_local_irq_save(flags);
- if (!graph_lock())
- goto out_irq;
+ arch_spin_lock(&lockdep_lock);
+ current->lockdep_recursion = 1;
/* closed head */
pf = delayed_free.pf + (delayed_free.index ^ 1);
*/
call_rcu_zapped(delayed_free.pf + delayed_free.index);
- graph_unlock();
-out_irq:
+ current->lockdep_recursion = 0;
+ arch_spin_unlock(&lockdep_lock);
raw_local_irq_restore(flags);
}
{
struct pending_free *pf;
unsigned long flags;
- int locked;
init_data_structures_once();
raw_local_irq_save(flags);
- locked = graph_lock();
- if (!locked)
- goto out_irq;
-
+ arch_spin_lock(&lockdep_lock);
+ current->lockdep_recursion = 1;
pf = get_pending_free();
__lockdep_free_key_range(pf, start, size);
call_rcu_zapped(pf);
-
- graph_unlock();
-out_irq:
+ current->lockdep_recursion = 0;
+ arch_spin_unlock(&lockdep_lock);
raw_local_irq_restore(flags);
/*
if (dl_entity_is_special(dl_se))
return;
- WARN_ON(hrtimer_active(&dl_se->inactive_timer));
WARN_ON(dl_se->dl_non_contending);
zerolag_time = dl_se->deadline -
* If the "0-lag time" already passed, decrease the active
* utilization now, instead of starting a timer
*/
- if (zerolag_time < 0) {
+ if ((zerolag_time < 0) || hrtimer_active(&dl_se->inactive_timer)) {
if (dl_task(p))
sub_running_bw(dl_se, dl_rq);
if (!dl_task(p) || p->state == TASK_DEAD) {
return HRTIMER_NORESTART;
}
+extern const u64 max_cfs_quota_period;
+
static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer)
{
struct cfs_bandwidth *cfs_b =
unsigned long flags;
int overrun;
int idle = 0;
+ int count = 0;
raw_spin_lock_irqsave(&cfs_b->lock, flags);
for (;;) {
if (!overrun)
break;
+ if (++count > 3) {
+ u64 new, old = ktime_to_ns(cfs_b->period);
+
+ new = (old * 147) / 128; /* ~115% */
+ new = min(new, max_cfs_quota_period);
+
+ cfs_b->period = ns_to_ktime(new);
+
+ /* since max is 1s, this is limited to 1e9^2, which fits in u64 */
+ cfs_b->quota *= new;
+ cfs_b->quota = div64_u64(cfs_b->quota, old);
+
+ pr_warn_ratelimited(
+ "cfs_period_timer[cpu%d]: period too short, scaling up (new cfs_period_us %lld, cfs_quota_us = %lld)\n",
+ smp_processor_id(),
+ div_u64(new, NSEC_PER_USEC),
+ div_u64(cfs_b->quota, NSEC_PER_USEC));
+
+ /* reset count so we don't come right back in here */
+ count = 0;
+ }
+
idle = do_sched_cfs_period_timer(cfs_b, overrun, flags);
}
if (idle)
if (cfs_rq->last_h_load_update == now)
return;
- cfs_rq->h_load_next = NULL;
+ WRITE_ONCE(cfs_rq->h_load_next, NULL);
for_each_sched_entity(se) {
cfs_rq = cfs_rq_of(se);
- cfs_rq->h_load_next = se;
+ WRITE_ONCE(cfs_rq->h_load_next, se);
if (cfs_rq->last_h_load_update == now)
break;
}
cfs_rq->last_h_load_update = now;
}
- while ((se = cfs_rq->h_load_next) != NULL) {
+ while ((se = READ_ONCE(cfs_rq->h_load_next)) != NULL) {
load = cfs_rq->h_load;
load = div64_ul(load * se->avg.load_avg,
cfs_rq_load_avg(cfs_rq) + 1);
sd->nr = syscall_get_nr(task, regs);
sd->arch = syscall_get_arch();
- syscall_get_arguments(task, regs, 0, 6, args);
+ syscall_get_arguments(task, regs, args);
sd->args[0] = args[0];
sd->args[1] = args[1];
sd->args[2] = args[2];
if (flags)
return -EINVAL;
- f = fdget_raw(pidfd);
+ f = fdget(pidfd);
if (!f.file)
return -EBADF;
static int __maybe_unused one = 1;
static int __maybe_unused two = 2;
static int __maybe_unused four = 4;
+static unsigned long zero_ul;
static unsigned long one_ul = 1;
static unsigned long long_max = LONG_MAX;
static int one_hundred = 100;
.maxlen = sizeof(files_stat.max_files),
.mode = 0644,
.proc_handler = proc_doulongvec_minmax,
- .extra1 = &zero,
+ .extra1 = &zero_ul,
.extra2 = &long_max,
},
{
return -ENOSYS;
}
+int proc_do_large_bitmap(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ return -ENOSYS;
+}
#endif /* CONFIG_PROC_SYSCTL */
EXPORT_SYMBOL(proc_dostring);
EXPORT_SYMBOL(proc_doulongvec_minmax);
EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax);
+EXPORT_SYMBOL(proc_do_large_bitmap);
{
struct alarm *alarm = &timr->it.alarm.alarmtimer;
- return ktime_sub(now, alarm->node.expires);
+ return ktime_sub(alarm->node.expires, now);
}
/**
return cd.read_data[seq & 1].epoch_cyc;
}
-static int sched_clock_suspend(void)
+int sched_clock_suspend(void)
{
struct clock_read_data *rd = &cd.read_data[0];
return 0;
}
-static void sched_clock_resume(void)
+void sched_clock_resume(void)
{
struct clock_read_data *rd = &cd.read_data[0];
trace_suspend_resume(TPS("timekeeping_freeze"),
smp_processor_id(), true);
system_state = SYSTEM_SUSPEND;
+ sched_clock_suspend();
timekeeping_suspend();
} else {
tick_suspend_local();
if (tick_freeze_depth == num_online_cpus()) {
timekeeping_resume();
+ sched_clock_resume();
system_state = SYSTEM_RUNNING;
trace_suspend_resume(TPS("timekeeping_freeze"),
smp_processor_id(), false);
}
EXPORT_SYMBOL(jiffies64_to_nsecs);
+u64 jiffies64_to_msecs(const u64 j)
+{
+#if HZ <= MSEC_PER_SEC && !(MSEC_PER_SEC % HZ)
+ return (MSEC_PER_SEC / HZ) * j;
+#else
+ return div_u64(j * HZ_TO_MSEC_NUM, HZ_TO_MSEC_DEN);
+#endif
+}
+EXPORT_SYMBOL(jiffies64_to_msecs);
+
/**
* nsecs_to_jiffies64 - Convert nsecs in u64 to jiffies64
*
extern void timekeeping_warp_clock(void);
extern int timekeeping_suspend(void);
extern void timekeeping_resume(void);
+#ifdef CONFIG_GENERIC_SCHED_CLOCK
+extern int sched_clock_suspend(void);
+extern void sched_clock_resume(void);
+#else
+static inline int sched_clock_suspend(void) { return 0; }
+static inline void sched_clock_resume(void) { }
+#endif
extern void do_timer(unsigned long ticks);
extern void update_wall_time(void);
return &bpf_map_update_elem_proto;
case BPF_FUNC_map_delete_elem:
return &bpf_map_delete_elem_proto;
+ case BPF_FUNC_map_push_elem:
+ return &bpf_map_push_elem_proto;
+ case BPF_FUNC_map_pop_elem:
+ return &bpf_map_pop_elem_proto;
+ case BPF_FUNC_map_peek_elem:
+ return &bpf_map_peek_elem_proto;
case BPF_FUNC_probe_read:
return &bpf_probe_read_proto;
case BPF_FUNC_ktime_get_ns:
#include <linux/list.h>
#include <linux/hash.h>
#include <linux/rcupdate.h>
+#include <linux/kprobes.h>
#include <trace/events/sched.h>
tr->ops->func = ftrace_stub;
}
-static inline void
+static nokprobe_inline void
__ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *ignored, struct pt_regs *regs)
{
{
__ftrace_ops_list_func(ip, parent_ip, NULL, regs);
}
+NOKPROBE_SYMBOL(ftrace_ops_list_func);
#else
static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip)
{
__ftrace_ops_list_func(ip, parent_ip, NULL, NULL);
}
+NOKPROBE_SYMBOL(ftrace_ops_no_ops);
#endif
/*
preempt_enable_notrace();
trace_clear_recursion(bit);
}
+NOKPROBE_SYMBOL(ftrace_ops_assist_func);
/**
* ftrace_ops_get_func - get the function a trampoline should call
buf->private = 0;
}
-static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
+static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
struct buffer_ref *ref = (struct buffer_ref *)buf->private;
+ if (ref->ref > INT_MAX/2)
+ return false;
+
ref->ref++;
+ return true;
}
/* Pipe buffer operations for a buffer. */
struct ring_buffer_event *event;
struct ring_buffer *buffer;
unsigned long irq_flags;
+ unsigned long args[6];
int pc;
int syscall_nr;
int size;
entry = ring_buffer_event_data(event);
entry->nr = syscall_nr;
- syscall_get_arguments(current, regs, 0, sys_data->nb_args, entry->args);
+ syscall_get_arguments(current, regs, args);
+ memcpy(entry->args, args, sizeof(unsigned long) * sys_data->nb_args);
event_trigger_unlock_commit(trace_file, buffer, event, entry,
irq_flags, pc);
struct syscall_metadata *sys_data;
struct syscall_trace_enter *rec;
struct hlist_head *head;
+ unsigned long args[6];
bool valid_prog_array;
int syscall_nr;
int rctx;
return;
rec->nr = syscall_nr;
- syscall_get_arguments(current, regs, 0, sys_data->nb_args,
- (unsigned long *)&rec->args);
+ syscall_get_arguments(current, regs, args);
+ memcpy(&rec->args, args, sizeof(unsigned long) * sys_data->nb_args);
if ((valid_prog_array &&
!perf_call_bpf_enter(sys_data->enter_event, regs, sys_data, rec)) ||
if (__this_cpu_read(hard_watchdog_warn) == true)
return;
- pr_emerg("Watchdog detected hard LOCKUP on cpu %d", this_cpu);
+ pr_emerg("Watchdog detected hard LOCKUP on cpu %d\n",
+ this_cpu);
print_modules();
print_irqtrace_events(current);
if (regs)
But it significantly improves the success of resolving
variables in gdb on optimized code.
+config DEBUG_INFO_BTF
+ bool "Generate BTF typeinfo"
+ depends on DEBUG_INFO
+ help
+ Generate deduplicated BTF type information from DWARF debug info.
+ Turning this on expects presence of pahole tool, which will convert
+ DWARF type info into equivalent deduplicated BTF type info.
+
config GDB_SCRIPTS
bool "Provide GDB scripts for kernel debugging"
depends on DEBUG_INFO
config ARCH_HAS_KCOV
bool
help
- KCOV does not have any arch-specific code, but currently it is enabled
- only for x86_64. KCOV requires testing on other archs, and most likely
- disabling of instrumentation for some early boot code.
+ An architecture should select this when it can successfully
+ build and run with CONFIG_KCOV. This typically requires
+ disabling instrumentation for some early boot code.
config CC_HAS_SANCOV_TRACE_PC
def_bool $(cc-option,-fsanitize-coverage=trace-pc)
size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp,
struct iov_iter *i)
{
+#ifdef CONFIG_CRYPTO
struct ahash_request *hash = hashp;
struct scatterlist sg;
size_t copied;
ahash_request_set_crypt(hash, &sg, NULL, copied);
crypto_ahash_update(hash);
return copied;
+#else
+ return 0;
+#endif
}
EXPORT_SYMBOL(hash_and_copy_to_iter);
{
const unsigned char *ip = in;
unsigned char *op = out;
+ unsigned char *data_start;
size_t l = in_len;
size_t t = 0;
signed char state_offset = -2;
unsigned int m4_max_offset;
- // LZO v0 will never write 17 as first byte,
- // so this is used to version the bitstream
+ // LZO v0 will never write 17 as first byte (except for zero-length
+ // input), so this is used to version the bitstream
if (bitstream_version > 0) {
*op++ = 17;
*op++ = bitstream_version;
m4_max_offset = M4_MAX_OFFSET_V0;
}
+ data_start = op;
+
while (l > 20) {
size_t ll = l <= (m4_max_offset + 1) ? l : (m4_max_offset + 1);
uintptr_t ll_end = (uintptr_t) ip + ll;
if (t > 0) {
const unsigned char *ii = in + in_len - t;
- if (op == out && t <= 238) {
+ if (op == data_start && t <= 238) {
*op++ = (17 + t);
} else if (t <= 3) {
op[state_offset] |= t;
if (unlikely(in_len < 3))
goto input_overrun;
- if (likely(*ip == 17)) {
+ if (likely(in_len >= 5) && likely(*ip == 17)) {
bitstream_version = ip[1];
ip += 2;
- if (unlikely(in_len < 5))
- goto input_overrun;
} else {
bitstream_version = 0;
}
return 1;
if (unlikely(tbl->nest))
return 1;
- return bit_spin_is_locked(1, (unsigned long *)&tbl->buckets[hash]);
+ return bit_spin_is_locked(0, (unsigned long *)&tbl->buckets[hash]);
}
EXPORT_SYMBOL_GPL(lockdep_rht_bucket_is_held);
#else
int i;
static struct lock_class_key __key;
- size = sizeof(*tbl) + nbuckets * sizeof(tbl->buckets[0]);
- tbl = kvzalloc(size, gfp);
+ tbl = kvzalloc(struct_size(tbl, buckets, nbuckets), gfp);
size = nbuckets;
struct bucket_table *new_tbl = rhashtable_last_table(ht, old_tbl);
int err = -EAGAIN;
struct rhash_head *head, *next, *entry;
- struct rhash_head **pprev = NULL;
+ struct rhash_head __rcu **pprev = NULL;
unsigned int new_hash;
if (new_tbl->nest)
err = -ENOENT;
- rht_for_each_from(entry, rht_ptr(*bkt), old_tbl, old_hash) {
+ rht_for_each_from(entry, rht_ptr(bkt, old_tbl, old_hash),
+ old_tbl, old_hash) {
err = 0;
next = rht_dereference_bucket(entry->next, old_tbl, old_hash);
rht_lock_nested(new_tbl, &new_tbl->buckets[new_hash], SINGLE_DEPTH_NESTING);
- head = rht_ptr(rht_dereference_bucket(new_tbl->buckets[new_hash],
- new_tbl, new_hash));
+ head = rht_ptr(new_tbl->buckets + new_hash, new_tbl, new_hash);
RCU_INIT_POINTER(entry->next, head);
rcu_assign_pointer(*pprev, next);
else
/* Need to preserved the bit lock. */
- rcu_assign_pointer(*bkt, rht_ptr_locked(next));
+ rht_assign_locked(bkt, next);
out:
return err;
.ht = ht,
.key = key,
};
- struct rhash_head **pprev = NULL;
+ struct rhash_head __rcu **pprev = NULL;
struct rhash_head *head;
int elasticity;
elasticity = RHT_ELASTICITY;
- rht_for_each_from(head, rht_ptr(*bkt), tbl, hash) {
+ rht_for_each_from(head, rht_ptr(bkt, tbl, hash), tbl, hash) {
struct rhlist_head *list;
struct rhlist_head *plist;
rcu_assign_pointer(*pprev, obj);
else
/* Need to preserve the bit lock */
- rcu_assign_pointer(*bkt, rht_ptr_locked(obj));
+ rht_assign_locked(bkt, obj);
return NULL;
}
if (unlikely(rht_grow_above_100(ht, tbl)))
return ERR_PTR(-EAGAIN);
- head = rht_ptr(rht_dereference_bucket(*bkt, tbl, hash));
+ head = rht_ptr(bkt, tbl, hash);
RCU_INIT_POINTER(obj->next, head);
if (ht->rhlist) {
/* bkt is always the head of the list, so it holds
* the lock, which we need to preserve
*/
- rcu_assign_pointer(*bkt, rht_ptr_locked(obj));
+ rht_assign_locked(bkt, obj);
atomic_inc(&ht->nelems);
if (rht_grow_above_75(ht, tbl))
struct rhash_head *pos, *next;
cond_resched();
- for (pos = rht_ptr(rht_dereference(*rht_bucket(tbl, i), ht)),
+ for (pos = rht_ptr_exclusive(rht_bucket(tbl, i)),
next = !rht_is_a_nulls(pos) ?
rht_dereference(pos->next, ht) : NULL;
!rht_is_a_nulls(pos);
EXPORT_SYMBOL(memcmp);
#endif
+#ifndef __HAVE_ARCH_BCMP
+/**
+ * bcmp - returns 0 if and only if the buffers have identical contents.
+ * @a: pointer to first buffer.
+ * @b: pointer to second buffer.
+ * @len: size of buffers.
+ *
+ * The sign or magnitude of a non-zero return value has no particular
+ * meaning, and architectures may implement their own more efficient bcmp(). So
+ * while this particular implementation is a simple (tail) call to memcmp, do
+ * not rely on anything but whether the return value is zero or non-zero.
+ */
+#undef bcmp
+int bcmp(const void *a, const void *b, size_t len)
+{
+ return memcmp(a, b, len);
+}
+EXPORT_SYMBOL(bcmp);
+#endif
+
#ifndef __HAVE_ARCH_MEMSCAN
/**
* memscan - Find a character in an area of memory.
#include <linux/export.h>
#include <asm/syscall.h>
-static int collect_syscall(struct task_struct *target, long *callno,
- unsigned long args[6], unsigned int maxargs,
- unsigned long *sp, unsigned long *pc)
+static int collect_syscall(struct task_struct *target, struct syscall_info *info)
{
struct pt_regs *regs;
if (!try_get_task_stack(target)) {
/* Task has no stack, so the task isn't in a syscall. */
- *sp = *pc = 0;
- *callno = -1;
+ memset(info, 0, sizeof(*info));
+ info->data.nr = -1;
return 0;
}
return -EAGAIN;
}
- *sp = user_stack_pointer(regs);
- *pc = instruction_pointer(regs);
+ info->sp = user_stack_pointer(regs);
+ info->data.instruction_pointer = instruction_pointer(regs);
- *callno = syscall_get_nr(target, regs);
- if (*callno != -1L && maxargs > 0)
- syscall_get_arguments(target, regs, 0, maxargs, args);
+ info->data.nr = syscall_get_nr(target, regs);
+ if (info->data.nr != -1L)
+ syscall_get_arguments(target, regs,
+ (unsigned long *)&info->data.args[0]);
put_task_stack(target);
return 0;
/**
* task_current_syscall - Discover what a blocked task is doing.
* @target: thread to examine
- * @callno: filled with system call number or -1
- * @args: filled with @maxargs system call arguments
- * @maxargs: number of elements in @args to fill
- * @sp: filled with user stack pointer
- * @pc: filled with user PC
+ * @info: structure with the following fields:
+ * .sp - filled with user stack pointer
+ * .data.nr - filled with system call number or -1
+ * .data.args - filled with @maxargs system call arguments
+ * .data.instruction_pointer - filled with user PC
*
- * If @target is blocked in a system call, returns zero with *@callno
- * set to the the call's number and @args filled in with its arguments.
- * Registers not used for system call arguments may not be available and
- * it is not kosher to use &struct user_regset calls while the system
+ * If @target is blocked in a system call, returns zero with @info.data.nr
+ * set to the the call's number and @info.data.args filled in with its
+ * arguments. Registers not used for system call arguments may not be available
+ * and it is not kosher to use &struct user_regset calls while the system
* call is still in progress. Note we may get this result if @target
* has finished its system call but not yet returned to user mode, such
* as when it's stopped for signal handling or syscall exit tracing.
*
* If @target is blocked in the kernel during a fault or exception,
- * returns zero with *@callno set to -1 and does not fill in @args.
- * If so, it's now safe to examine @target using &struct user_regset
- * get() calls as long as we're sure @target won't return to user mode.
+ * returns zero with *@info.data.nr set to -1 and does not fill in
+ * @info.data.args. If so, it's now safe to examine @target using
+ * &struct user_regset get() calls as long as we're sure @target won't return
+ * to user mode.
*
* Returns -%EAGAIN if @target does not remain blocked.
- *
- * Returns -%EINVAL if @maxargs is too large (maximum is six).
*/
-int task_current_syscall(struct task_struct *target, long *callno,
- unsigned long args[6], unsigned int maxargs,
- unsigned long *sp, unsigned long *pc)
+int task_current_syscall(struct task_struct *target, struct syscall_info *info)
{
long state;
unsigned long ncsw;
- if (unlikely(maxargs > 6))
- return -EINVAL;
-
if (target == current)
- return collect_syscall(target, callno, args, maxargs, sp, pc);
+ return collect_syscall(target, info);
state = target->state;
if (unlikely(!state))
ncsw = wait_task_inactive(target, state);
if (unlikely(!ncsw) ||
- unlikely(collect_syscall(target, callno, args, maxargs, sp, pc)) ||
+ unlikely(collect_syscall(target, info)) ||
unlikely(wait_task_inactive(target, state) != ncsw))
return -EAGAIN;
struct rhash_head *pos, *next;
struct test_obj_rhl *p;
- pos = rht_ptr(rht_dereference(tbl->buckets[i], ht));
+ pos = rht_ptr_exclusive(tbl->buckets + i);
next = !rht_is_a_nulls(pos) ? rht_dereference(pos->next, ht) : NULL;
if (!rht_is_a_nulls(pos)) {
goto retry;
}
- if (flags & FOLL_GET)
- get_page(page);
+ if (flags & FOLL_GET) {
+ if (unlikely(!try_get_page(page))) {
+ page = ERR_PTR(-ENOMEM);
+ goto out;
+ }
+ }
if (flags & FOLL_TOUCH) {
if ((flags & FOLL_WRITE) &&
!pte_dirty(pte) && !PageDirty(page))
if (pmd_trans_unstable(pmd))
ret = -EBUSY;
} else {
- get_page(page);
+ if (unlikely(!try_get_page(page))) {
+ spin_unlock(ptl);
+ return ERR_PTR(-ENOMEM);
+ }
spin_unlock(ptl);
lock_page(page);
ret = split_huge_page(page);
if (is_device_public_page(*page))
goto unmap;
}
- get_page(*page);
+ if (unlikely(!try_get_page(*page))) {
+ ret = -ENOMEM;
+ goto unmap;
+ }
out:
ret = 0;
unmap:
}
}
+/*
+ * Return the compund head page with ref appropriately incremented,
+ * or NULL if that failed.
+ */
+static inline struct page *try_get_compound_head(struct page *page, int refs)
+{
+ struct page *head = compound_head(page);
+ if (WARN_ON_ONCE(page_ref_count(head) < 0))
+ return NULL;
+ if (unlikely(!page_cache_add_speculative(head, refs)))
+ return NULL;
+ return head;
+}
+
#ifdef CONFIG_ARCH_HAS_PTE_SPECIAL
static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
int write, struct page **pages, int *nr)
VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
page = pte_page(pte);
- head = compound_head(page);
- if (!page_cache_get_speculative(head))
+ head = try_get_compound_head(page, 1);
+ if (!head)
goto pte_unmap;
if (unlikely(pte_val(pte) != pte_val(*ptep))) {
refs++;
} while (addr += PAGE_SIZE, addr != end);
- head = compound_head(pmd_page(orig));
- if (!page_cache_add_speculative(head, refs)) {
+ head = try_get_compound_head(pmd_page(orig), refs);
+ if (!head) {
*nr -= refs;
return 0;
}
refs++;
} while (addr += PAGE_SIZE, addr != end);
- head = compound_head(pud_page(orig));
- if (!page_cache_add_speculative(head, refs)) {
+ head = try_get_compound_head(pud_page(orig), refs);
+ if (!head) {
*nr -= refs;
return 0;
}
refs++;
} while (addr += PAGE_SIZE, addr != end);
- head = compound_head(pgd_page(orig));
- if (!page_cache_add_speculative(head, refs)) {
+ head = try_get_compound_head(pgd_page(orig), refs);
+ if (!head) {
*nr -= refs;
return 0;
}
spinlock_t *ptl;
ptl = pmd_lock(mm, pmd);
+ if (!pmd_none(*pmd)) {
+ if (write) {
+ if (pmd_pfn(*pmd) != pfn_t_to_pfn(pfn)) {
+ WARN_ON_ONCE(!is_huge_zero_pmd(*pmd));
+ goto out_unlock;
+ }
+ entry = pmd_mkyoung(*pmd);
+ entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
+ if (pmdp_set_access_flags(vma, addr, pmd, entry, 1))
+ update_mmu_cache_pmd(vma, addr, pmd);
+ }
+
+ goto out_unlock;
+ }
+
entry = pmd_mkhuge(pfn_t_pmd(pfn, prot));
if (pfn_t_devmap(pfn))
entry = pmd_mkdevmap(entry);
if (pgtable) {
pgtable_trans_huge_deposit(mm, pmd, pgtable);
mm_inc_nr_ptes(mm);
+ pgtable = NULL;
}
set_pmd_at(mm, addr, pmd, entry);
update_mmu_cache_pmd(vma, addr, pmd);
+
+out_unlock:
spin_unlock(ptl);
+ if (pgtable)
+ pte_free(mm, pgtable);
}
vm_fault_t vmf_insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
spinlock_t *ptl;
ptl = pud_lock(mm, pud);
+ if (!pud_none(*pud)) {
+ if (write) {
+ if (pud_pfn(*pud) != pfn_t_to_pfn(pfn)) {
+ WARN_ON_ONCE(!is_huge_zero_pud(*pud));
+ goto out_unlock;
+ }
+ entry = pud_mkyoung(*pud);
+ entry = maybe_pud_mkwrite(pud_mkdirty(entry), vma);
+ if (pudp_set_access_flags(vma, addr, pud, entry, 1))
+ update_mmu_cache_pud(vma, addr, pud);
+ }
+ goto out_unlock;
+ }
+
entry = pud_mkhuge(pfn_t_pud(pfn, prot));
if (pfn_t_devmap(pfn))
entry = pud_mkdevmap(entry);
}
set_pud_at(mm, addr, pud, entry);
update_mmu_cache_pud(vma, addr, pud);
+
+out_unlock:
spin_unlock(ptl);
}
pfn_offset = (vaddr & ~huge_page_mask(h)) >> PAGE_SHIFT;
page = pte_page(huge_ptep_get(pte));
+
+ /*
+ * Instead of doing 'try_get_page()' below in the same_page
+ * loop, just check the count once here.
+ */
+ if (unlikely(page_count(page) <= 0)) {
+ if (pages) {
+ spin_unlock(ptl);
+ remainder = 0;
+ err = -ENOMEM;
+ break;
+ }
+ }
same_page:
if (pages) {
pages[i] = mem_map_offset(page, pfn_offset);
/*
* Scan a large memory block in MAX_SCAN_SIZE chunks to reduce the latency.
*/
+#ifdef CONFIG_SMP
static void scan_large_block(void *start, void *end)
{
void *next;
cond_resched();
}
}
+#endif
/*
* Scan a memory block corresponding to a kmemleak_object. A condition is
}
rcu_read_unlock();
- /* data/bss scanning */
- scan_large_block(_sdata, _edata);
- scan_large_block(__bss_start, __bss_stop);
- scan_large_block(__start_ro_after_init, __end_ro_after_init);
-
#ifdef CONFIG_SMP
/* per-cpu sections scanning */
for_each_possible_cpu(i)
}
local_irq_restore(flags);
+ /* register the data/bss sections */
+ create_object((unsigned long)_sdata, _edata - _sdata,
+ KMEMLEAK_GREY, GFP_ATOMIC);
+ create_object((unsigned long)__bss_start, __bss_stop - __bss_start,
+ KMEMLEAK_GREY, GFP_ATOMIC);
+ /* only register .data..ro_after_init if not within .data */
+ if (__start_ro_after_init < _sdata || __end_ro_after_init > _edata)
+ create_object((unsigned long)__start_ro_after_init,
+ __end_ro_after_init - __start_ro_after_init,
+ KMEMLEAK_GREY, GFP_ATOMIC);
+
/*
* This is the point where tracking allocations is safe. Automatic
* scanning is started during the late initcall. Add the early logged
return &memcg->cgwb_domain;
}
+/*
+ * idx can be of type enum memcg_stat_item or node_stat_item.
+ * Keep in sync with memcg_exact_page().
+ */
+static unsigned long memcg_exact_page_state(struct mem_cgroup *memcg, int idx)
+{
+ long x = atomic_long_read(&memcg->stat[idx]);
+ int cpu;
+
+ for_each_online_cpu(cpu)
+ x += per_cpu_ptr(memcg->stat_cpu, cpu)->count[idx];
+ if (x < 0)
+ x = 0;
+ return x;
+}
+
/**
* mem_cgroup_wb_stats - retrieve writeback related stats from its memcg
* @wb: bdi_writeback in question
struct mem_cgroup *memcg = mem_cgroup_from_css(wb->memcg_css);
struct mem_cgroup *parent;
- *pdirty = memcg_page_state(memcg, NR_FILE_DIRTY);
+ *pdirty = memcg_exact_page_state(memcg, NR_FILE_DIRTY);
/* this should eventually include NR_UNSTABLE_NFS */
- *pwriteback = memcg_page_state(memcg, NR_WRITEBACK);
+ *pwriteback = memcg_exact_page_state(memcg, NR_WRITEBACK);
*pfilepages = mem_cgroup_nr_lru_pages(memcg, (1 << LRU_INACTIVE_FILE) |
(1 << LRU_ACTIVE_FILE));
*pheadroom = PAGE_COUNTER_MAX;
#include <linux/moduleparam.h>
#include <linux/pkeys.h>
#include <linux/oom.h>
+#include <linux/sched/mm.h>
#include <linux/uaccess.h>
#include <asm/cacheflush.h>
vma = find_vma_prev(mm, addr, &prev);
if (vma && (vma->vm_start <= addr))
return vma;
- if (!prev || expand_stack(prev, addr))
+ /* don't alter vm_end if the coredump is running */
+ if (!prev || !mmget_still_valid(mm) || expand_stack(prev, addr))
return NULL;
if (prev->vm_flags & VM_LOCKED)
populate_vma_page_range(prev, addr, prev->vm_end, NULL);
return vma;
if (!(vma->vm_flags & VM_GROWSDOWN))
return NULL;
+ /* don't alter vm_start if the coredump is running */
+ if (!mmget_still_valid(mm))
+ return NULL;
start = vma->vm_start;
if (expand_stack(vma, addr))
return NULL;
bool has_unmovable_pages(struct zone *zone, struct page *page, int count,
int migratetype, int flags)
{
- unsigned long pfn, iter, found;
+ unsigned long found;
+ unsigned long iter = 0;
+ unsigned long pfn = page_to_pfn(page);
+ const char *reason = "unmovable page";
/*
* TODO we could make this much more efficient by not checking every
* can still lead to having bootmem allocations in zone_movable.
*/
- /*
- * CMA allocations (alloc_contig_range) really need to mark isolate
- * CMA pageblocks even when they are not movable in fact so consider
- * them movable here.
- */
- if (is_migrate_cma(migratetype) &&
- is_migrate_cma(get_pageblock_migratetype(page)))
- return false;
+ if (is_migrate_cma_page(page)) {
+ /*
+ * CMA allocations (alloc_contig_range) really need to mark
+ * isolate CMA pageblocks even when they are not movable in fact
+ * so consider them movable here.
+ */
+ if (is_migrate_cma(migratetype))
+ return false;
+
+ reason = "CMA page";
+ goto unmovable;
+ }
- pfn = page_to_pfn(page);
- for (found = 0, iter = 0; iter < pageblock_nr_pages; iter++) {
+ for (found = 0; iter < pageblock_nr_pages; iter++) {
unsigned long check = pfn + iter;
if (!pfn_valid_within(check))
unmovable:
WARN_ON_ONCE(zone_idx(zone) == ZONE_MOVABLE);
if (flags & REPORT_FAILURE)
- dump_page(pfn_to_page(pfn+iter), "unmovable page");
+ dump_page(pfn_to_page(pfn + iter), reason);
return true;
}
ai->groups[group].base_offset = areas[group] - base;
}
- pr_info("Embedded %zu pages/cpu @%p s%zu r%zu d%zu u%zu\n",
- PFN_DOWN(size_sum), base, ai->static_size, ai->reserved_size,
+ pr_info("Embedded %zu pages/cpu s%zu r%zu d%zu u%zu\n",
+ PFN_DOWN(size_sum), ai->static_size, ai->reserved_size,
ai->dyn_size, ai->unit_size);
rc = pcpu_setup_first_chunk(ai, base);
}
/* we're ready, commit */
- pr_info("%d %s pages/cpu @%p s%zu r%zu d%zu\n",
- unit_pages, psize_str, vm.addr, ai->static_size,
+ pr_info("%d %s pages/cpu s%zu r%zu d%zu\n",
+ unit_pages, psize_str, ai->static_size,
ai->reserved_size, ai->dyn_size);
rc = pcpu_setup_first_chunk(ai, vm.addr);
}
spin_unlock(&sbinfo->shrinklist_lock);
}
- if (!list_empty(&info->swaplist)) {
+ while (!list_empty(&info->swaplist)) {
+ /* Wait while shmem_unuse() is scanning this inode... */
+ wait_var_event(&info->stop_eviction,
+ !atomic_read(&info->stop_eviction));
mutex_lock(&shmem_swaplist_mutex);
- list_del_init(&info->swaplist);
+ /* ...but beware of the race if we peeked too early */
+ if (!atomic_read(&info->stop_eviction))
+ list_del_init(&info->swaplist);
mutex_unlock(&shmem_swaplist_mutex);
}
}
static int shmem_find_swap_entries(struct address_space *mapping,
pgoff_t start, unsigned int nr_entries,
struct page **entries, pgoff_t *indices,
- bool frontswap)
+ unsigned int type, bool frontswap)
{
XA_STATE(xas, &mapping->i_pages, start);
struct page *page;
+ swp_entry_t entry;
unsigned int ret = 0;
if (!nr_entries)
if (!xa_is_value(page))
continue;
- if (frontswap) {
- swp_entry_t entry = radix_to_swp_entry(page);
-
- if (!frontswap_test(swap_info[swp_type(entry)],
- swp_offset(entry)))
- continue;
- }
+ entry = radix_to_swp_entry(page);
+ if (swp_type(entry) != type)
+ continue;
+ if (frontswap &&
+ !frontswap_test(swap_info[type], swp_offset(entry)))
+ continue;
indices[ret] = xas.xa_index;
entries[ret] = page;
pvec.nr = shmem_find_swap_entries(mapping, start, nr_entries,
pvec.pages, indices,
- frontswap);
+ type, frontswap);
if (pvec.nr == 0) {
ret = 0;
break;
unsigned long *fs_pages_to_unuse)
{
struct shmem_inode_info *info, *next;
- struct inode *inode;
- struct inode *prev_inode = NULL;
int error = 0;
if (list_empty(&shmem_swaplist))
return 0;
mutex_lock(&shmem_swaplist_mutex);
-
- /*
- * The extra refcount on the inode is necessary to safely dereference
- * p->next after re-acquiring the lock. New shmem inodes with swap
- * get added to the end of the list and we will scan them all.
- */
list_for_each_entry_safe(info, next, &shmem_swaplist, swaplist) {
if (!info->swapped) {
list_del_init(&info->swaplist);
continue;
}
-
- inode = igrab(&info->vfs_inode);
- if (!inode)
- continue;
-
+ /*
+ * Drop the swaplist mutex while searching the inode for swap;
+ * but before doing so, make sure shmem_evict_inode() will not
+ * remove placeholder inode from swaplist, nor let it be freed
+ * (igrab() would protect from unlink, but not from unmount).
+ */
+ atomic_inc(&info->stop_eviction);
mutex_unlock(&shmem_swaplist_mutex);
- if (prev_inode)
- iput(prev_inode);
- prev_inode = inode;
- error = shmem_unuse_inode(inode, type, frontswap,
+ error = shmem_unuse_inode(&info->vfs_inode, type, frontswap,
fs_pages_to_unuse);
cond_resched();
next = list_next_entry(info, swaplist);
if (!info->swapped)
list_del_init(&info->swaplist);
+ if (atomic_dec_and_test(&info->stop_eviction))
+ wake_up_var(&info->stop_eviction);
if (error)
break;
}
mutex_unlock(&shmem_swaplist_mutex);
- if (prev_inode)
- iput(prev_inode);
-
return error;
}
info = SHMEM_I(inode);
memset(info, 0, (char *)inode - (char *)info);
spin_lock_init(&info->lock);
+ atomic_set(&info->stop_eviction, 0);
info->seals = F_SEAL_SEAL;
info->flags = flags & VM_NORESERVE;
INIT_LIST_HEAD(&info->shrinklist);
/* Slab management obj is off-slab. */
freelist = kmem_cache_alloc_node(cachep->freelist_cache,
local_flags, nodeid);
- freelist = kasan_reset_tag(freelist);
if (!freelist)
return NULL;
} else {
static int leaks_show(struct seq_file *m, void *p)
{
- struct kmem_cache *cachep = list_entry(p, struct kmem_cache, list);
+ struct kmem_cache *cachep = list_entry(p, struct kmem_cache,
+ root_caches_node);
struct page *page;
struct kmem_cache_node *n;
const char *name;
* If the boolean frontswap is true, only unuse pages_to_unuse pages;
* pages_to_unuse==0 means all pages; ignored if frontswap is false
*/
-#define SWAP_UNUSE_MAX_TRIES 3
int try_to_unuse(unsigned int type, bool frontswap,
unsigned long pages_to_unuse)
{
struct page *page;
swp_entry_t entry;
unsigned int i;
- int retries = 0;
if (!si->inuse_pages)
return 0;
spin_lock(&mmlist_lock);
p = &init_mm.mmlist;
- while ((p = p->next) != &init_mm.mmlist) {
- if (signal_pending(current)) {
- retval = -EINTR;
- break;
- }
+ while (si->inuse_pages &&
+ !signal_pending(current) &&
+ (p = p->next) != &init_mm.mmlist) {
mm = list_entry(p, struct mm_struct, mmlist);
if (!mmget_not_zero(mm))
mmput(prev_mm);
i = 0;
- while ((i = find_next_to_unuse(si, i, frontswap)) != 0) {
+ while (si->inuse_pages &&
+ !signal_pending(current) &&
+ (i = find_next_to_unuse(si, i, frontswap)) != 0) {
entry = swp_entry(type, i);
page = find_get_page(swap_address_space(entry), i);
* If yes, we would need to do retry the unuse logic again.
* Under global memory pressure, swap entries can be reinserted back
* into process space after the mmlist loop above passes over them.
- * Its not worth continuosuly retrying to unuse the swap in this case.
- * So we try SWAP_UNUSE_MAX_TRIES times.
+ *
+ * Limit the number of retries? No: when mmget_not_zero() above fails,
+ * that mm is likely to be freeing swap from exit_mmap(), which proceeds
+ * at its own independent pace; and even shmem_writepage() could have
+ * been preempted after get_swap_page(), temporarily hiding that swap.
+ * It's easy and robust (though cpu-intensive) just to keep retrying.
*/
- if (++retries >= SWAP_UNUSE_MAX_TRIES)
- retval = -EBUSY;
- else if (si->inuse_pages)
- goto retry;
-
+ if (si->inuse_pages) {
+ if (!signal_pending(current))
+ goto retry;
+ retval = -EINTR;
+ }
out:
return (retval == FRONTSWAP_PAGES_UNUSED) ? 0 : retval;
}
* @s: The string to duplicate
* @n: Maximum number of bytes to copy, including the trailing NUL.
*
- * Return: newly allocated copy of @s or %NULL in case of error
+ * Return: newly allocated copy of @s or an ERR_PTR() in case of error
*/
char *strndup_user(const char __user *s, long n)
{
* 10TB 320 32GB
*/
static bool inactive_list_is_low(struct lruvec *lruvec, bool file,
- struct mem_cgroup *memcg,
struct scan_control *sc, bool actual_reclaim)
{
enum lru_list active_lru = file * LRU_FILE + LRU_ACTIVE;
inactive = lruvec_lru_size(lruvec, inactive_lru, sc->reclaim_idx);
active = lruvec_lru_size(lruvec, active_lru, sc->reclaim_idx);
- if (memcg)
- refaults = memcg_page_state(memcg, WORKINGSET_ACTIVATE);
- else
- refaults = node_page_state(pgdat, WORKINGSET_ACTIVATE);
-
/*
* When refaults are being observed, it means a new workingset
* is being established. Disable active list protection to get
* rid of the stale workingset quickly.
*/
+ refaults = lruvec_page_state(lruvec, WORKINGSET_ACTIVATE);
if (file && actual_reclaim && lruvec->refaults != refaults) {
inactive_ratio = 0;
} else {
}
static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan,
- struct lruvec *lruvec, struct mem_cgroup *memcg,
- struct scan_control *sc)
+ struct lruvec *lruvec, struct scan_control *sc)
{
if (is_active_lru(lru)) {
- if (inactive_list_is_low(lruvec, is_file_lru(lru),
- memcg, sc, true))
+ if (inactive_list_is_low(lruvec, is_file_lru(lru), sc, true))
shrink_active_list(nr_to_scan, lruvec, sc, lru);
return 0;
}
* anonymous pages on the LRU in eligible zones.
* Otherwise, the small LRU gets thrashed.
*/
- if (!inactive_list_is_low(lruvec, false, memcg, sc, false) &&
+ if (!inactive_list_is_low(lruvec, false, sc, false) &&
lruvec_lru_size(lruvec, LRU_INACTIVE_ANON, sc->reclaim_idx)
>> sc->priority) {
scan_balance = SCAN_ANON;
* lruvec even if it has plenty of old anonymous pages unless the
* system is under heavy pressure.
*/
- if (!inactive_list_is_low(lruvec, true, memcg, sc, false) &&
+ if (!inactive_list_is_low(lruvec, true, sc, false) &&
lruvec_lru_size(lruvec, LRU_INACTIVE_FILE, sc->reclaim_idx) >> sc->priority) {
scan_balance = SCAN_FILE;
goto out;
nr[lru] -= nr_to_scan;
nr_reclaimed += shrink_list(lru, nr_to_scan,
- lruvec, memcg, sc);
+ lruvec, sc);
}
}
* Even if we did not try to evict anon pages at all, we want to
* rebalance the anon lru active/inactive ratio.
*/
- if (inactive_list_is_low(lruvec, false, memcg, sc, true))
+ if (inactive_list_is_low(lruvec, false, sc, true))
shrink_active_list(SWAP_CLUSTER_MAX, lruvec,
sc, LRU_ACTIVE_ANON);
}
unsigned long refaults;
struct lruvec *lruvec;
- if (memcg)
- refaults = memcg_page_state(memcg, WORKINGSET_ACTIVATE);
- else
- refaults = node_page_state(pgdat, WORKINGSET_ACTIVATE);
-
lruvec = mem_cgroup_lruvec(pgdat, memcg);
+ refaults = lruvec_page_state(lruvec, WORKINGSET_ACTIVATE);
lruvec->refaults = refaults;
} while ((memcg = mem_cgroup_iter(root_memcg, memcg, NULL)));
}
do {
struct lruvec *lruvec = mem_cgroup_lruvec(pgdat, memcg);
- if (inactive_list_is_low(lruvec, false, memcg, sc, true))
+ if (inactive_list_is_low(lruvec, false, sc, true))
shrink_active_list(SWAP_CLUSTER_MAX, lruvec,
sc, LRU_ACTIVE_ANON);
#endif
#endif /* CONFIG_MEMORY_BALLOON */
#ifdef CONFIG_DEBUG_TLBFLUSH
-#ifdef CONFIG_SMP
"nr_tlb_remote_flush",
"nr_tlb_remote_flush_received",
-#else
- "", /* nr_tlb_remote_flush */
- "", /* nr_tlb_remote_flush_received */
-#endif /* CONFIG_SMP */
"nr_tlb_local_flush_all",
"nr_tlb_local_flush_one",
#endif /* CONFIG_DEBUG_TLBFLUSH */
#include "nhc.h"
static struct rb_root rb_root = RB_ROOT;
-static struct lowpan_nhc *lowpan_nexthdr_nhcs[NEXTHDR_MAX];
+static struct lowpan_nhc *lowpan_nexthdr_nhcs[NEXTHDR_MAX + 1];
static DEFINE_SPINLOCK(lowpan_nhc_lock);
static int lowpan_nhc_insert(struct lowpan_nhc *nhc)
return 0;
}
+static void vlan_stacked_transfer_operstate(const struct net_device *rootdev,
+ struct net_device *dev,
+ struct vlan_dev_priv *vlan)
+{
+ if (!(vlan->flags & VLAN_FLAG_BRIDGE_BINDING))
+ netif_stacked_transfer_operstate(rootdev, dev);
+}
+
void unregister_vlan_dev(struct net_device *dev, struct list_head *head)
{
struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
/* Account for reference in struct vlan_dev_priv */
dev_hold(real_dev);
- netif_stacked_transfer_operstate(real_dev, dev);
+ vlan_stacked_transfer_operstate(real_dev, dev, vlan);
linkwatch_fire_event(dev); /* _MUST_ call rfc2863_policy() */
/* So, got the sucker initialized, now lets place
case NETDEV_CHANGE:
/* Propagate real device state to vlan devices */
vlan_group_for_each_dev(grp, i, vlandev)
- netif_stacked_transfer_operstate(dev, vlandev);
+ vlan_stacked_transfer_operstate(dev, vlandev,
+ vlan_dev_priv(vlandev));
break;
case NETDEV_CHANGEADDR:
dev_close_many(&close_list, false);
list_for_each_entry_safe(vlandev, tmp, &close_list, close_list) {
- netif_stacked_transfer_operstate(dev, vlandev);
+ vlan_stacked_transfer_operstate(dev, vlandev,
+ vlan_dev_priv(vlandev));
list_del_init(&vlandev->close_list);
}
list_del(&close_list);
if (!(vlan->flags & VLAN_FLAG_LOOSE_BINDING))
dev_change_flags(vlandev, flgs | IFF_UP,
extack);
- netif_stacked_transfer_operstate(dev, vlandev);
+ vlan_stacked_transfer_operstate(dev, vlandev, vlan);
}
break;
u32 old_flags = vlan->flags;
if (mask & ~(VLAN_FLAG_REORDER_HDR | VLAN_FLAG_GVRP |
- VLAN_FLAG_LOOSE_BINDING | VLAN_FLAG_MVRP))
+ VLAN_FLAG_LOOSE_BINDING | VLAN_FLAG_MVRP |
+ VLAN_FLAG_BRIDGE_BINDING))
return -EINVAL;
vlan->flags = (old_flags & ~mask) | (flags & mask);
if (vlan->flags & VLAN_FLAG_MVRP)
vlan_mvrp_request_join(dev);
- if (netif_carrier_ok(real_dev))
+ if (netif_carrier_ok(real_dev) &&
+ !(vlan->flags & VLAN_FLAG_BRIDGE_BINDING))
netif_carrier_on(dev);
return 0;
if (!ether_addr_equal(dev->dev_addr, real_dev->dev_addr))
dev_uc_del(real_dev, dev->dev_addr);
- netif_carrier_off(dev);
+ if (!(vlan->flags & VLAN_FLAG_BRIDGE_BINDING))
+ netif_carrier_off(dev);
return 0;
}
static int vlan_dev_init(struct net_device *dev)
{
- struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
+ struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
+ struct net_device *real_dev = vlan->real_dev;
netif_carrier_off(dev);
(1<<__LINK_STATE_DORMANT))) |
(1<<__LINK_STATE_PRESENT);
+ if (vlan->flags & VLAN_FLAG_BRIDGE_BINDING)
+ dev->state |= (1 << __LINK_STATE_NOCARRIER);
+
dev->hw_features = NETIF_F_HW_CSUM | NETIF_F_SG |
NETIF_F_FRAGLIST | NETIF_F_GSO_SOFTWARE |
NETIF_F_GSO_ENCAP_ALL |
#endif
dev->needed_headroom = real_dev->needed_headroom;
- if (vlan_hw_offload_capable(real_dev->features,
- vlan_dev_priv(dev)->vlan_proto)) {
+ if (vlan_hw_offload_capable(real_dev->features, vlan->vlan_proto)) {
dev->header_ops = &vlan_passthru_header_ops;
dev->hard_header_len = real_dev->hard_header_len;
} else {
vlan_dev_set_lockdep_class(dev, vlan_dev_get_lock_subclass(dev));
- vlan_dev_priv(dev)->vlan_pcpu_stats = netdev_alloc_pcpu_stats(struct vlan_pcpu_stats);
- if (!vlan_dev_priv(dev)->vlan_pcpu_stats)
+ vlan->vlan_pcpu_stats = netdev_alloc_pcpu_stats(struct vlan_pcpu_stats);
+ if (!vlan->vlan_pcpu_stats)
return -ENOMEM;
return 0;
flags = nla_data(data[IFLA_VLAN_FLAGS]);
if ((flags->flags & flags->mask) &
~(VLAN_FLAG_REORDER_HDR | VLAN_FLAG_GVRP |
- VLAN_FLAG_LOOSE_BINDING | VLAN_FLAG_MVRP)) {
+ VLAN_FLAG_LOOSE_BINDING | VLAN_FLAG_MVRP |
+ VLAN_FLAG_BRIDGE_BINDING)) {
NL_SET_ERR_MSG_MOD(extack, "Invalid VLAN flags");
return -EINVAL;
}
rc = put_user(amount, (int __user *)argp);
break;
}
- case SIOCGSTAMP:
- rc = sock_get_timestamp(sk, argp);
- break;
- case SIOCGSTAMPNS:
- rc = sock_get_timestampns(sk, argp);
- break;
/* Routing */
case SIOCADDRT:
case SIOCDELRT:
.getname = atalk_getname,
.poll = datagram_poll,
.ioctl = atalk_ioctl,
+ .gettstamp = sock_gettstamp,
#ifdef CONFIG_COMPAT
.compat_ioctl = atalk_compat_ioctl,
#endif
return NETDEV_TX_OK;
}
rt = (struct rtable *) dst;
- if (rt->rt_gateway)
- daddr = &rt->rt_gateway;
+ if (rt->rt_gw_family == AF_INET)
+ daddr = &rt->rt_gw4;
else
daddr = &ip_hdr(skb)->daddr;
n = dst_neigh_lookup(dst, daddr);
(int __user *)argp) ? -EFAULT : 0;
goto done;
}
- case SIOCGSTAMP: /* borrowed from IP */
-#ifdef CONFIG_COMPAT
- if (compat)
- error = compat_sock_get_timestamp(sk, argp);
- else
-#endif
- error = sock_get_timestamp(sk, argp);
- goto done;
- case SIOCGSTAMPNS: /* borrowed from IP */
-#ifdef CONFIG_COMPAT
- if (compat)
- error = compat_sock_get_timestampns(sk, argp);
- else
-#endif
- error = sock_get_timestampns(sk, argp);
- goto done;
case ATM_SETSC:
net_warn_ratelimited("ATM_SETSC is obsolete; used by %s:%d\n",
current->comm, task_pid_nr(current));
static int lec_mcast_attach(struct atm_vcc *vcc, int arg)
{
- if (arg < 0 || arg >= MAX_LEC_ITF || !dev_lec[arg])
+ if (arg < 0 || arg >= MAX_LEC_ITF)
+ return -EINVAL;
+ arg = array_index_nospec(arg, MAX_LEC_ITF);
+ if (!dev_lec[arg])
return -EINVAL;
vcc->proto_data = dev_lec[arg];
return lec_mcast_make(netdev_priv(dev_lec[arg]), vcc);
i = arg;
if (arg >= MAX_LEC_ITF)
return -EINVAL;
+ i = array_index_nospec(arg, MAX_LEC_ITF);
if (!dev_lec[i]) {
int size;
#ifdef CONFIG_COMPAT
.compat_ioctl = vcc_compat_ioctl,
#endif
+ .gettstamp = sock_gettstamp,
.listen = sock_no_listen,
.shutdown = pvc_shutdown,
.setsockopt = pvc_setsockopt,
#ifdef CONFIG_COMPAT
.compat_ioctl = svc_compat_ioctl,
#endif
+ .gettstamp = sock_gettstamp,
.listen = svc_listen,
.shutdown = svc_shutdown,
.setsockopt = svc_setsockopt,
break;
}
- case SIOCGSTAMP:
- res = sock_get_timestamp(sk, argp);
- break;
-
- case SIOCGSTAMPNS:
- res = sock_get_timestampns(sk, argp);
- break;
-
case SIOCAX25ADDUID: /* Add a uid to the uid/call map table */
case SIOCAX25DELUID: /* Delete a uid from the uid/call map table */
case SIOCAX25GETUID: {
* magic dev src_addr dest_addr,digi1,digi2,.. st vs vr va t1 t1 t2 t2 t3 t3 idle idle n2 n2 rtt window paclen Snd-Q Rcv-Q inode
*/
- seq_printf(seq, "%8.8lx %s %s%s ",
- (long) ax25,
+ seq_printf(seq, "%p %s %s%s ",
+ ax25,
ax25->ax25_dev == NULL? "???" : ax25->ax25_dev->dev->name,
ax2asc(buf, &ax25->source_addr),
ax25->iamdigi? "*":"");
.getname = ax25_getname,
.poll = datagram_poll,
.ioctl = ax25_ioctl,
+ .gettstamp = sock_gettstamp,
.listen = ax25_listen,
.shutdown = ax25_shutdown,
.setsockopt = ax25_setsockopt,
err = put_user(amount, (int __user *) arg);
break;
- case SIOCGSTAMP:
- err = sock_get_timestamp(sk, (struct timeval __user *) arg);
- break;
-
- case SIOCGSTAMPNS:
- err = sock_get_timestampns(sk, (struct timespec __user *) arg);
- break;
-
default:
err = -ENOIOCTLCMD;
break;
!test_bit(HCI_CONN_ENCRYPT, &conn->flags))
return 0;
+ /* The minimum encryption key size needs to be enforced by the
+ * host stack before establishing any L2CAP connections. The
+ * specification in theory allows a minimum of 1, but to align
+ * BR/EDR and LE transports, a minimum of 7 is chosen.
+ */
+ if (conn->enc_key_size < HCI_MIN_ENC_KEY_SIZE)
+ return 0;
+
return 1;
}
hdev->set_bdaddr)
ret = hdev->set_bdaddr(hdev,
&hdev->public_addr);
- else
- ret = -EADDRNOTAVAIL;
}
setup_failed:
ev->data, ev->length);
}
- ptr += sizeof(*ev) + ev->length + 1;
+ ptr += sizeof(*ev) + ev->length;
}
hci_dev_unlock(hdev);
sockfd_put(csock);
return err;
}
+ ca.name[sizeof(ca.name)-1] = 0;
err = hidp_connection_add(&ca, csock, isock);
if (!err && copy_to_user(argp, &ca, sizeof(ca)))
}
EXPORT_SYMBOL_GPL(l2cap_chan_set_defaults);
-static void l2cap_le_flowctl_init(struct l2cap_chan *chan)
+static void l2cap_le_flowctl_init(struct l2cap_chan *chan, u16 tx_credits)
{
chan->sdu = NULL;
chan->sdu_last_frag = NULL;
chan->sdu_len = 0;
- chan->tx_credits = 0;
+ chan->tx_credits = tx_credits;
/* Derive MPS from connection MTU to stop HCI fragmentation */
chan->mps = min_t(u16, chan->imtu, chan->conn->mtu - L2CAP_HDR_SIZE);
/* Give enough credits for a full packet */
if (test_and_set_bit(FLAG_LE_CONN_REQ_SENT, &chan->flags))
return;
- l2cap_le_flowctl_init(chan);
+ l2cap_le_flowctl_init(chan, 0);
req.psm = chan->psm;
req.scid = cpu_to_le16(chan->scid);
chan->dcid = scid;
chan->omtu = mtu;
chan->remote_mps = mps;
- chan->tx_credits = __le16_to_cpu(req->credits);
__l2cap_chan_add(conn, chan);
- l2cap_le_flowctl_init(chan);
+ l2cap_le_flowctl_init(chan, __le16_to_cpu(req->credits));
dcid = chan->scid;
credits = chan->rx_credits;
conn = chan->conn;
- /*change security for LE channels */
+ /* change security for LE channels */
if (chan->scid == L2CAP_CID_ATT) {
- if (smp_conn_security(conn->hcon, sec.level))
+ if (smp_conn_security(conn->hcon, sec.level)) {
+ err = -EINVAL;
break;
+ }
+
set_bit(FLAG_PENDING_SECURITY, &chan->flags);
sk->sk_state = BT_CONFIG;
chan->state = BT_CONFIG;
.recvmsg = l2cap_sock_recvmsg,
.poll = bt_sock_poll,
.ioctl = bt_sock_ioctl,
+ .gettstamp = sock_gettstamp,
.mmap = sock_no_mmap,
.socketpair = sock_no_socketpair,
.shutdown = l2cap_sock_shutdown,
MGMT_STATUS_INVALID_PARAMS);
}
- expected_len = sizeof(*cp) + key_count *
- sizeof(struct mgmt_link_key_info);
+ expected_len = struct_size(cp, keys, key_count);
if (expected_len != len) {
bt_dev_err(hdev, "load_link_keys: expected %u bytes, got %u bytes",
expected_len, len);
MGMT_STATUS_INVALID_PARAMS);
}
- expected_len = sizeof(*cp) + irk_count * sizeof(struct mgmt_irk_info);
+ expected_len = struct_size(cp, irks, irk_count);
if (expected_len != len) {
bt_dev_err(hdev, "load_irks: expected %u bytes, got %u bytes",
expected_len, len);
MGMT_STATUS_INVALID_PARAMS);
}
- expected_len = sizeof(*cp) + key_count *
- sizeof(struct mgmt_ltk_info);
+ expected_len = struct_size(cp, keys, key_count);
if (expected_len != len) {
bt_dev_err(hdev, "load_keys: expected %u bytes, got %u bytes",
expected_len, len);
MGMT_STATUS_INVALID_PARAMS);
}
- expected_len = sizeof(*cp) + param_count *
- sizeof(struct mgmt_conn_param);
+ expected_len = struct_size(cp, params, param_count);
if (expected_len != len) {
bt_dev_err(hdev, "load_conn_param: expected %u bytes, got %u bytes",
expected_len, len);
.setsockopt = rfcomm_sock_setsockopt,
.getsockopt = rfcomm_sock_getsockopt,
.ioctl = rfcomm_sock_ioctl,
+ .gettstamp = sock_gettstamp,
.poll = bt_sock_poll,
.socketpair = sock_no_socketpair,
.mmap = sock_no_mmap
struct sock *sk = sock->sk;
int err = 0;
- BT_DBG("sk %p %pMR", sk, &sa->sco_bdaddr);
-
if (!addr || addr_len < sizeof(struct sockaddr_sco) ||
addr->sa_family != AF_BLUETOOTH)
return -EINVAL;
+ BT_DBG("sk %p %pMR", sk, &sa->sco_bdaddr);
+
lock_sock(sk);
if (sk->sk_state != BT_OPEN) {
.recvmsg = sco_sock_recvmsg,
.poll = bt_sock_poll,
.ioctl = bt_sock_ioctl,
+ .gettstamp = sock_gettstamp,
.mmap = sock_no_mmap,
.socketpair = sock_no_socketpair,
.shutdown = sco_sock_shutdown,
-obj-y := test_run.o
+obj-$(CONFIG_BPF_SYSCALL) := test_run.o
return data;
}
+static void *bpf_ctx_init(const union bpf_attr *kattr, u32 max_size)
+{
+ void __user *data_in = u64_to_user_ptr(kattr->test.ctx_in);
+ void __user *data_out = u64_to_user_ptr(kattr->test.ctx_out);
+ u32 size = kattr->test.ctx_size_in;
+ void *data;
+ int err;
+
+ if (!data_in && !data_out)
+ return NULL;
+
+ data = kzalloc(max_size, GFP_USER);
+ if (!data)
+ return ERR_PTR(-ENOMEM);
+
+ if (data_in) {
+ err = bpf_check_uarg_tail_zero(data_in, max_size, size);
+ if (err) {
+ kfree(data);
+ return ERR_PTR(err);
+ }
+
+ size = min_t(u32, max_size, size);
+ if (copy_from_user(data, data_in, size)) {
+ kfree(data);
+ return ERR_PTR(-EFAULT);
+ }
+ }
+ return data;
+}
+
+static int bpf_ctx_finish(const union bpf_attr *kattr,
+ union bpf_attr __user *uattr, const void *data,
+ u32 size)
+{
+ void __user *data_out = u64_to_user_ptr(kattr->test.ctx_out);
+ int err = -EFAULT;
+ u32 copy_size = size;
+
+ if (!data || !data_out)
+ return 0;
+
+ if (copy_size > kattr->test.ctx_size_out) {
+ copy_size = kattr->test.ctx_size_out;
+ err = -ENOSPC;
+ }
+
+ if (copy_to_user(data_out, data, copy_size))
+ goto out;
+ if (copy_to_user(&uattr->test.ctx_size_out, &size, sizeof(size)))
+ goto out;
+ if (err != -ENOSPC)
+ err = 0;
+out:
+ return err;
+}
+
+/**
+ * range_is_zero - test whether buffer is initialized
+ * @buf: buffer to check
+ * @from: check from this position
+ * @to: check up until (excluding) this position
+ *
+ * This function returns true if the there is a non-zero byte
+ * in the buf in the range [from,to).
+ */
+static inline bool range_is_zero(void *buf, size_t from, size_t to)
+{
+ return !memchr_inv((u8 *)buf + from, 0, to - from);
+}
+
+static int convert___skb_to_skb(struct sk_buff *skb, struct __sk_buff *__skb)
+{
+ struct qdisc_skb_cb *cb = (struct qdisc_skb_cb *)skb->cb;
+
+ if (!__skb)
+ return 0;
+
+ /* make sure the fields we don't use are zeroed */
+ if (!range_is_zero(__skb, 0, offsetof(struct __sk_buff, priority)))
+ return -EINVAL;
+
+ /* priority is allowed */
+
+ if (!range_is_zero(__skb, offsetof(struct __sk_buff, priority) +
+ FIELD_SIZEOF(struct __sk_buff, priority),
+ offsetof(struct __sk_buff, cb)))
+ return -EINVAL;
+
+ /* cb is allowed */
+
+ if (!range_is_zero(__skb, offsetof(struct __sk_buff, cb) +
+ FIELD_SIZEOF(struct __sk_buff, cb),
+ sizeof(struct __sk_buff)))
+ return -EINVAL;
+
+ skb->priority = __skb->priority;
+ memcpy(&cb->data, __skb->cb, QDISC_CB_PRIV_LEN);
+
+ return 0;
+}
+
+static void convert_skb_to___skb(struct sk_buff *skb, struct __sk_buff *__skb)
+{
+ struct qdisc_skb_cb *cb = (struct qdisc_skb_cb *)skb->cb;
+
+ if (!__skb)
+ return;
+
+ __skb->priority = skb->priority;
+ memcpy(__skb->cb, &cb->data, QDISC_CB_PRIV_LEN);
+}
+
int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
union bpf_attr __user *uattr)
{
bool is_l2 = false, is_direct_pkt_access = false;
u32 size = kattr->test.data_size_in;
u32 repeat = kattr->test.repeat;
+ struct __sk_buff *ctx = NULL;
u32 retval, duration;
int hh_len = ETH_HLEN;
struct sk_buff *skb;
if (IS_ERR(data))
return PTR_ERR(data);
+ ctx = bpf_ctx_init(kattr, sizeof(struct __sk_buff));
+ if (IS_ERR(ctx)) {
+ kfree(data);
+ return PTR_ERR(ctx);
+ }
+
switch (prog->type) {
case BPF_PROG_TYPE_SCHED_CLS:
case BPF_PROG_TYPE_SCHED_ACT:
sk = kzalloc(sizeof(struct sock), GFP_USER);
if (!sk) {
kfree(data);
+ kfree(ctx);
return -ENOMEM;
}
sock_net_set(sk, current->nsproxy->net_ns);
skb = build_skb(data, 0);
if (!skb) {
kfree(data);
+ kfree(ctx);
kfree(sk);
return -ENOMEM;
}
__skb_push(skb, hh_len);
if (is_direct_pkt_access)
bpf_compute_data_pointers(skb);
+ ret = convert___skb_to_skb(skb, ctx);
+ if (ret)
+ goto out;
ret = bpf_test_run(prog, skb, repeat, &retval, &duration);
- if (ret) {
- kfree_skb(skb);
- kfree(sk);
- return ret;
- }
+ if (ret)
+ goto out;
if (!is_l2) {
if (skb_headroom(skb) < hh_len) {
int nhead = HH_DATA_ALIGN(hh_len - skb_headroom(skb));
if (pskb_expand_head(skb, nhead, 0, GFP_USER)) {
- kfree_skb(skb);
- kfree(sk);
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto out;
}
}
memset(__skb_push(skb, hh_len), 0, hh_len);
}
+ convert_skb_to___skb(skb, ctx);
size = skb->len;
/* bpf program can never convert linear skb to non-linear */
if (WARN_ON_ONCE(skb_is_nonlinear(skb)))
size = skb_headlen(skb);
ret = bpf_test_finish(kattr, uattr, skb->data, size, retval, duration);
+ if (!ret)
+ ret = bpf_ctx_finish(kattr, uattr, ctx,
+ sizeof(struct __sk_buff));
+out:
kfree_skb(skb);
kfree(sk);
+ kfree(ctx);
return ret;
}
void *data;
int ret;
+ if (kattr->test.ctx_in || kattr->test.ctx_out)
+ return -EINVAL;
+
data = bpf_test_init(kattr, size, XDP_PACKET_HEADROOM + NET_IP_ALIGN, 0);
if (IS_ERR(data))
return PTR_ERR(data);
if (prog->type != BPF_PROG_TYPE_FLOW_DISSECTOR)
return -EINVAL;
+ if (kattr->test.ctx_in || kattr->test.ctx_out)
+ return -EINVAL;
+
data = bpf_test_init(kattr, size, NET_SKB_PAD + NET_IP_ALIGN,
SKB_DATA_ALIGN(sizeof(struct skb_shared_info)));
if (IS_ERR(data))
bool changed_addr;
int err;
- /* register of bridge completed, add sysfs entries */
- if ((dev->priv_flags & IFF_EBRIDGE) && event == NETDEV_REGISTER) {
- br_sysfs_addbr(dev);
- return NOTIFY_DONE;
+ if (dev->priv_flags & IFF_EBRIDGE) {
+ if (event == NETDEV_REGISTER) {
+ /* register of bridge completed, add sysfs entries */
+ br_sysfs_addbr(dev);
+ return NOTIFY_DONE;
+ }
+ br_vlan_bridge_event(dev, event, ptr);
}
/* not a port of a bridge */
break;
}
+ if (event != NETDEV_UNREGISTER)
+ br_vlan_port_event(p, event);
+
/* Events that may cause spanning tree to refresh */
if (!notified && (event == NETDEV_CHANGEADDR || event == NETDEV_UP ||
event == NETDEV_CHANGE || event == NETDEV_DOWN))
u8 *arpptr, *sha;
__be32 sip, tip;
- BR_INPUT_SKB_CB(skb)->proxyarp_replied = false;
+ BR_INPUT_SKB_CB(skb)->proxyarp_replied = 0;
if ((dev->flags & IFF_NOARP) ||
!pskb_may_pull(skb, arp_hdr_len(dev)))
return;
if (ipv4_is_zeronet(sip) || sip == tip) {
/* prevent flooding to neigh suppress ports */
- BR_INPUT_SKB_CB(skb)->proxyarp_replied = true;
+ BR_INPUT_SKB_CB(skb)->proxyarp_replied = 1;
return;
}
}
/* its our local ip, so don't proxy reply
* and don't forward to neigh suppress ports
*/
- BR_INPUT_SKB_CB(skb)->proxyarp_replied = true;
+ BR_INPUT_SKB_CB(skb)->proxyarp_replied = 1;
return;
}
*/
if (replied ||
br_opt_get(br, BROPT_NEIGH_SUPPRESS_ENABLED))
- BR_INPUT_SKB_CB(skb)->proxyarp_replied = true;
+ BR_INPUT_SKB_CB(skb)->proxyarp_replied = 1;
}
neigh_release(n);
struct ipv6hdr *iphdr;
struct neighbour *n;
- BR_INPUT_SKB_CB(skb)->proxyarp_replied = false;
+ BR_INPUT_SKB_CB(skb)->proxyarp_replied = 0;
if (p && (p->flags & BR_NEIGH_SUPPRESS))
return;
if (msg->icmph.icmp6_type == NDISC_NEIGHBOUR_ADVERTISEMENT &&
!msg->icmph.icmp6_solicited) {
/* prevent flooding to neigh suppress ports */
- BR_INPUT_SKB_CB(skb)->proxyarp_replied = true;
+ BR_INPUT_SKB_CB(skb)->proxyarp_replied = 1;
return;
}
if (ipv6_addr_any(saddr) || !ipv6_addr_cmp(saddr, daddr)) {
/* prevent flooding to neigh suppress ports */
- BR_INPUT_SKB_CB(skb)->proxyarp_replied = true;
+ BR_INPUT_SKB_CB(skb)->proxyarp_replied = 1;
return;
}
/* its our own ip, so don't proxy reply
* and don't forward to arp suppress ports
*/
- BR_INPUT_SKB_CB(skb)->proxyarp_replied = true;
+ BR_INPUT_SKB_CB(skb)->proxyarp_replied = 1;
return;
}
*/
if (replied ||
br_opt_get(br, BROPT_NEIGH_SUPPRESS_ENABLED))
- BR_INPUT_SKB_CB(skb)->proxyarp_replied = true;
+ BR_INPUT_SKB_CB(skb)->proxyarp_replied = 1;
}
neigh_release(n);
}
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/netfilter_bridge.h>
+#ifdef CONFIG_NETFILTER_FAMILY_BRIDGE
+#include <net/netfilter/nf_queue.h>
+#endif
#include <linux/neighbour.h>
#include <net/arp.h>
#include <linux/export.h>
#include "br_private.h"
#include "br_private_tunnel.h"
-/* Hook for brouter */
-br_should_route_hook_t __rcu *br_should_route_hook __read_mostly;
-EXPORT_SYMBOL(br_should_route_hook);
-
static int
br_netif_receive_skb(struct net *net, struct sock *sk, struct sk_buff *skb)
{
/* note: already called with rcu_read_lock */
static int br_handle_local_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
{
- struct net_bridge_port *p = br_port_get_rcu(skb->dev);
-
__br_handle_local_finish(skb);
- BR_INPUT_SKB_CB(skb)->brdev = p->br->dev;
- br_pass_frame_up(skb);
- return 0;
+ /* return 1 to signal the okfn() was called so it's ok to use the skb */
+ return 1;
+}
+
+static int nf_hook_bridge_pre(struct sk_buff *skb, struct sk_buff **pskb)
+{
+#ifdef CONFIG_NETFILTER_FAMILY_BRIDGE
+ struct nf_hook_entries *e = NULL;
+ struct nf_hook_state state;
+ unsigned int verdict, i;
+ struct net *net;
+ int ret;
+
+ net = dev_net(skb->dev);
+#ifdef HAVE_JUMP_LABEL
+ if (!static_key_false(&nf_hooks_needed[NFPROTO_BRIDGE][NF_BR_PRE_ROUTING]))
+ goto frame_finish;
+#endif
+
+ e = rcu_dereference(net->nf.hooks_bridge[NF_BR_PRE_ROUTING]);
+ if (!e)
+ goto frame_finish;
+
+ nf_hook_state_init(&state, NF_BR_PRE_ROUTING,
+ NFPROTO_BRIDGE, skb->dev, NULL, NULL,
+ net, br_handle_frame_finish);
+
+ for (i = 0; i < e->num_hook_entries; i++) {
+ verdict = nf_hook_entry_hookfn(&e->hooks[i], skb, &state);
+ switch (verdict & NF_VERDICT_MASK) {
+ case NF_ACCEPT:
+ if (BR_INPUT_SKB_CB(skb)->br_netfilter_broute) {
+ *pskb = skb;
+ return RX_HANDLER_PASS;
+ }
+ break;
+ case NF_DROP:
+ kfree_skb(skb);
+ return RX_HANDLER_CONSUMED;
+ case NF_QUEUE:
+ ret = nf_queue(skb, &state, e, i, verdict);
+ if (ret == 1)
+ continue;
+ return RX_HANDLER_CONSUMED;
+ default: /* STOLEN */
+ return RX_HANDLER_CONSUMED;
+ }
+ }
+frame_finish:
+ net = dev_net(skb->dev);
+ br_handle_frame_finish(net, NULL, skb);
+#else
+ br_handle_frame_finish(dev_net(skb->dev), NULL, skb);
+#endif
+ return RX_HANDLER_CONSUMED;
}
/*
struct net_bridge_port *p;
struct sk_buff *skb = *pskb;
const unsigned char *dest = eth_hdr(skb)->h_dest;
- br_should_route_hook_t *rhook;
if (unlikely(skb->pkt_type == PACKET_LOOPBACK))
return RX_HANDLER_PASS;
if (!skb)
return RX_HANDLER_CONSUMED;
+ memset(skb->cb, 0, sizeof(struct br_input_skb_cb));
+
p = br_port_get_rcu(skb->dev);
if (p->flags & BR_VLAN_TUNNEL) {
if (br_handle_ingress_vlan_tunnel(skb, p,
goto forward;
}
- /* Deliver packet to local host only */
- NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, dev_net(skb->dev),
- NULL, skb, skb->dev, NULL, br_handle_local_finish);
- return RX_HANDLER_CONSUMED;
+ /* The else clause should be hit when nf_hook():
+ * - returns < 0 (drop/error)
+ * - returns = 0 (stolen/nf_queue)
+ * Thus return 1 from the okfn() to signal the skb is ok to pass
+ */
+ if (NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN,
+ dev_net(skb->dev), NULL, skb, skb->dev, NULL,
+ br_handle_local_finish) == 1) {
+ return RX_HANDLER_PASS;
+ } else {
+ return RX_HANDLER_CONSUMED;
+ }
}
forward:
switch (p->state) {
case BR_STATE_FORWARDING:
- rhook = rcu_dereference(br_should_route_hook);
- if (rhook) {
- if ((*rhook)(skb)) {
- *pskb = skb;
- return RX_HANDLER_PASS;
- }
- dest = eth_hdr(skb)->h_dest;
- }
- /* fall through */
case BR_STATE_LEARNING:
if (ether_addr_equal(p->br->dev->dev_addr, dest))
skb->pkt_type = PACKET_HOST;
- NF_HOOK(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING,
- dev_net(skb->dev), NULL, skb, skb->dev, NULL,
- br_handle_frame_finish);
- break;
+ return nf_hook_bridge_pre(skb, pskb);
default:
drop:
kfree_skb(skb);
__br_multicast_open(br, query);
- list_for_each_entry(port, &br->port_list, list) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(port, &br->port_list, list) {
if (port->state == BR_STATE_DISABLED ||
port->state == BR_STATE_BLOCKING)
continue;
br_multicast_enable(&port->ip6_own_query);
#endif
}
+ rcu_read_unlock();
}
int br_multicast_toggle(struct net_bridge *br, unsigned long val)
nla_put_u8(skb, IFLA_BR_VLAN_STATS_ENABLED,
br_opt_get(br, BROPT_VLAN_STATS_ENABLED)) ||
nla_put_u8(skb, IFLA_BR_VLAN_STATS_PER_PORT,
- br_opt_get(br, IFLA_BR_VLAN_STATS_PER_PORT)))
+ br_opt_get(br, BROPT_VLAN_STATS_PER_PORT)))
return -EMSGSIZE;
#endif
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
BROPT_MTU_SET_BY_USER,
BROPT_VLAN_STATS_PER_PORT,
BROPT_NO_LL_LEARN,
+ BROPT_VLAN_BRIDGE_BINDING,
};
struct net_bridge {
struct net_device *brdev;
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
- int igmp;
- int mrouters_only;
+ u8 igmp;
+ u8 mrouters_only:1;
#endif
-
- bool proxyarp_replied;
- bool src_port_isolated;
-
+ u8 proxyarp_replied:1;
+ u8 src_port_isolated:1;
#ifdef CONFIG_BRIDGE_VLAN_FILTERING
- bool vlan_filtered;
+ u8 vlan_filtered:1;
+#endif
+#ifdef CONFIG_NETFILTER_FAMILY_BRIDGE
+ u8 br_netfilter_broute:1;
#endif
#ifdef CONFIG_NET_SWITCHDEV
int nbp_get_num_vlan_infos(struct net_bridge_port *p, u32 filter_mask);
void br_vlan_get_stats(const struct net_bridge_vlan *v,
struct br_vlan_stats *stats);
+void br_vlan_port_event(struct net_bridge_port *p, unsigned long event);
+void br_vlan_bridge_event(struct net_device *dev, unsigned long event,
+ void *ptr);
static inline struct net_bridge_vlan_group *br_vlan_group(
const struct net_bridge *br)
struct br_vlan_stats *stats)
{
}
+
+static inline void br_vlan_port_event(struct net_bridge_port *p,
+ unsigned long event)
+{
+}
+
+static inline void br_vlan_bridge_event(struct net_device *dev,
+ unsigned long event, void *ptr)
+{
+}
#endif
struct nf_br_ops {
#include "br_private.h"
#include "br_private_tunnel.h"
+static void nbp_vlan_set_vlan_dev_state(struct net_bridge_port *p, u16 vid);
+
static inline int br_vlan_cmp(struct rhashtable_compare_arg *arg,
const void *ptr)
{
__vlan_add_list(v);
__vlan_add_flags(v, flags);
+
+ if (p)
+ nbp_vlan_set_vlan_dev_state(p, v->vid);
out:
return err;
rhashtable_remove_fast(&vg->vlan_hash, &v->vnode,
br_vlan_rht_params);
__vlan_del_list(v);
+ nbp_vlan_set_vlan_dev_state(p, v->vid);
call_rcu(&v->rcu, nbp_vlan_rcu_free);
}
return 0;
}
EXPORT_SYMBOL_GPL(br_vlan_get_info);
+
+static int br_vlan_is_bind_vlan_dev(const struct net_device *dev)
+{
+ return is_vlan_dev(dev) &&
+ !!(vlan_dev_priv(dev)->flags & VLAN_FLAG_BRIDGE_BINDING);
+}
+
+static int br_vlan_is_bind_vlan_dev_fn(struct net_device *dev,
+ __always_unused void *data)
+{
+ return br_vlan_is_bind_vlan_dev(dev);
+}
+
+static bool br_vlan_has_upper_bind_vlan_dev(struct net_device *dev)
+{
+ int found;
+
+ rcu_read_lock();
+ found = netdev_walk_all_upper_dev_rcu(dev, br_vlan_is_bind_vlan_dev_fn,
+ NULL);
+ rcu_read_unlock();
+
+ return !!found;
+}
+
+struct br_vlan_bind_walk_data {
+ u16 vid;
+ struct net_device *result;
+};
+
+static int br_vlan_match_bind_vlan_dev_fn(struct net_device *dev,
+ void *data_in)
+{
+ struct br_vlan_bind_walk_data *data = data_in;
+ int found = 0;
+
+ if (br_vlan_is_bind_vlan_dev(dev) &&
+ vlan_dev_priv(dev)->vlan_id == data->vid) {
+ data->result = dev;
+ found = 1;
+ }
+
+ return found;
+}
+
+static struct net_device *
+br_vlan_get_upper_bind_vlan_dev(struct net_device *dev, u16 vid)
+{
+ struct br_vlan_bind_walk_data data = {
+ .vid = vid,
+ };
+
+ rcu_read_lock();
+ netdev_walk_all_upper_dev_rcu(dev, br_vlan_match_bind_vlan_dev_fn,
+ &data);
+ rcu_read_unlock();
+
+ return data.result;
+}
+
+static bool br_vlan_is_dev_up(const struct net_device *dev)
+{
+ return !!(dev->flags & IFF_UP) && netif_oper_up(dev);
+}
+
+static void br_vlan_set_vlan_dev_state(const struct net_bridge *br,
+ struct net_device *vlan_dev)
+{
+ u16 vid = vlan_dev_priv(vlan_dev)->vlan_id;
+ struct net_bridge_vlan_group *vg;
+ struct net_bridge_port *p;
+ bool has_carrier = false;
+
+ if (!netif_carrier_ok(br->dev)) {
+ netif_carrier_off(vlan_dev);
+ return;
+ }
+
+ list_for_each_entry(p, &br->port_list, list) {
+ vg = nbp_vlan_group(p);
+ if (br_vlan_find(vg, vid) && br_vlan_is_dev_up(p->dev)) {
+ has_carrier = true;
+ break;
+ }
+ }
+
+ if (has_carrier)
+ netif_carrier_on(vlan_dev);
+ else
+ netif_carrier_off(vlan_dev);
+}
+
+static void br_vlan_set_all_vlan_dev_state(struct net_bridge_port *p)
+{
+ struct net_bridge_vlan_group *vg = nbp_vlan_group(p);
+ struct net_bridge_vlan *vlan;
+ struct net_device *vlan_dev;
+
+ list_for_each_entry(vlan, &vg->vlan_list, vlist) {
+ vlan_dev = br_vlan_get_upper_bind_vlan_dev(p->br->dev,
+ vlan->vid);
+ if (vlan_dev) {
+ if (br_vlan_is_dev_up(p->dev)) {
+ if (netif_carrier_ok(p->br->dev))
+ netif_carrier_on(vlan_dev);
+ } else {
+ br_vlan_set_vlan_dev_state(p->br, vlan_dev);
+ }
+ }
+ }
+}
+
+static void br_vlan_upper_change(struct net_device *dev,
+ struct net_device *upper_dev,
+ bool linking)
+{
+ struct net_bridge *br = netdev_priv(dev);
+
+ if (!br_vlan_is_bind_vlan_dev(upper_dev))
+ return;
+
+ if (linking) {
+ br_vlan_set_vlan_dev_state(br, upper_dev);
+ br_opt_toggle(br, BROPT_VLAN_BRIDGE_BINDING, true);
+ } else {
+ br_opt_toggle(br, BROPT_VLAN_BRIDGE_BINDING,
+ br_vlan_has_upper_bind_vlan_dev(dev));
+ }
+}
+
+struct br_vlan_link_state_walk_data {
+ struct net_bridge *br;
+};
+
+static int br_vlan_link_state_change_fn(struct net_device *vlan_dev,
+ void *data_in)
+{
+ struct br_vlan_link_state_walk_data *data = data_in;
+
+ if (br_vlan_is_bind_vlan_dev(vlan_dev))
+ br_vlan_set_vlan_dev_state(data->br, vlan_dev);
+
+ return 0;
+}
+
+static void br_vlan_link_state_change(struct net_device *dev,
+ struct net_bridge *br)
+{
+ struct br_vlan_link_state_walk_data data = {
+ .br = br
+ };
+
+ rcu_read_lock();
+ netdev_walk_all_upper_dev_rcu(dev, br_vlan_link_state_change_fn,
+ &data);
+ rcu_read_unlock();
+}
+
+/* Must be protected by RTNL. */
+static void nbp_vlan_set_vlan_dev_state(struct net_bridge_port *p, u16 vid)
+{
+ struct net_device *vlan_dev;
+
+ if (!br_opt_get(p->br, BROPT_VLAN_BRIDGE_BINDING))
+ return;
+
+ vlan_dev = br_vlan_get_upper_bind_vlan_dev(p->br->dev, vid);
+ if (vlan_dev)
+ br_vlan_set_vlan_dev_state(p->br, vlan_dev);
+}
+
+/* Must be protected by RTNL. */
+void br_vlan_bridge_event(struct net_device *dev, unsigned long event,
+ void *ptr)
+{
+ struct netdev_notifier_changeupper_info *info;
+ struct net_bridge *br;
+
+ switch (event) {
+ case NETDEV_CHANGEUPPER:
+ info = ptr;
+ br_vlan_upper_change(dev, info->upper_dev, info->linking);
+ break;
+
+ case NETDEV_CHANGE:
+ case NETDEV_UP:
+ br = netdev_priv(dev);
+ if (!br_opt_get(br, BROPT_VLAN_BRIDGE_BINDING))
+ return;
+ br_vlan_link_state_change(dev, br);
+ break;
+ }
+}
+
+/* Must be protected by RTNL. */
+void br_vlan_port_event(struct net_bridge_port *p, unsigned long event)
+{
+ if (!br_opt_get(p->br, BROPT_VLAN_BRIDGE_BINDING))
+ return;
+
+ switch (event) {
+ case NETDEV_CHANGE:
+ case NETDEV_DOWN:
+ case NETDEV_UP:
+ br_vlan_set_all_vlan_dev_state(p);
+ break;
+ }
+}
#include <linux/module.h>
#include <linux/if_bridge.h>
+#include "../br_private.h"
+
/* EBT_ACCEPT means the frame will be bridged
* EBT_DROP means the frame will be routed
*/
.me = THIS_MODULE,
};
-static int ebt_broute(struct sk_buff *skb)
+static unsigned int ebt_broute(void *priv, struct sk_buff *skb,
+ const struct nf_hook_state *s)
{
+ struct net_bridge_port *p = br_port_get_rcu(skb->dev);
struct nf_hook_state state;
+ unsigned char *dest;
int ret;
+ if (!p || p->state != BR_STATE_FORWARDING)
+ return NF_ACCEPT;
+
nf_hook_state_init(&state, NF_BR_BROUTING,
- NFPROTO_BRIDGE, skb->dev, NULL, NULL,
- dev_net(skb->dev), NULL);
+ NFPROTO_BRIDGE, s->in, NULL, NULL,
+ s->net, NULL);
ret = ebt_do_table(skb, &state, state.net->xt.broute_table);
- if (ret == NF_DROP)
- return 1; /* route it */
- return 0; /* bridge it */
+
+ if (ret != NF_DROP)
+ return ret;
+
+ /* DROP in ebtables -t broute means that the
+ * skb should be routed, not bridged.
+ * This is awkward, but can't be changed for compatibility
+ * reasons.
+ *
+ * We map DROP to ACCEPT and set the ->br_netfilter_broute flag.
+ */
+ BR_INPUT_SKB_CB(skb)->br_netfilter_broute = 1;
+
+ /* undo PACKET_HOST mangling done in br_input in case the dst
+ * address matches the logical bridge but not the port.
+ */
+ dest = eth_hdr(skb)->h_dest;
+ if (skb->pkt_type == PACKET_HOST &&
+ !ether_addr_equal(skb->dev->dev_addr, dest) &&
+ ether_addr_equal(p->br->dev->dev_addr, dest))
+ skb->pkt_type = PACKET_OTHERHOST;
+
+ return NF_ACCEPT;
}
+static const struct nf_hook_ops ebt_ops_broute = {
+ .hook = ebt_broute,
+ .pf = NFPROTO_BRIDGE,
+ .hooknum = NF_BR_PRE_ROUTING,
+ .priority = NF_BR_PRI_FIRST,
+};
+
static int __net_init broute_net_init(struct net *net)
{
- return ebt_register_table(net, &broute_table, NULL,
+ return ebt_register_table(net, &broute_table, &ebt_ops_broute,
&net->xt.broute_table);
}
static void __net_exit broute_net_exit(struct net *net)
{
- ebt_unregister_table(net, net->xt.broute_table, NULL);
+ ebt_unregister_table(net, net->xt.broute_table, &ebt_ops_broute);
}
static struct pernet_operations broute_net_ops = {
static int __init ebtable_broute_init(void)
{
- int ret;
-
- ret = register_pernet_subsys(&broute_net_ops);
- if (ret < 0)
- return ret;
- /* see br_input.c */
- RCU_INIT_POINTER(br_should_route_hook,
- (br_should_route_hook_t *)ebt_broute);
- return 0;
+ return register_pernet_subsys(&broute_net_ops);
}
static void __exit ebtable_broute_fini(void)
{
- RCU_INIT_POINTER(br_should_route_hook, NULL);
- synchronize_net();
unregister_pernet_subsys(&broute_net_ops);
}
mutex_unlock(&ebt_mutex);
WRITE_ONCE(*res, table);
-
- if (!ops)
- return 0;
-
ret = nf_register_net_hooks(net, ops, hweight32(table->valid_hooks));
if (ret) {
__ebt_unregister_table(net, table);
void ebt_unregister_table(struct net *net, struct ebt_table *table,
const struct nf_hook_ops *ops)
{
- if (ops)
- nf_unregister_net_hooks(net, ops, hweight32(table->valid_hooks));
+ nf_unregister_net_hooks(net, ops, hweight32(table->valid_hooks));
__ebt_unregister_table(net, table);
}
if (match_kern)
match_kern->match_size = ret;
- if (WARN_ON(type == EBT_COMPAT_TARGET && size_left))
+ /* rule should have no remaining data after target */
+ if (type == EBT_COMPAT_TARGET && size_left)
return -EINVAL;
match32 = (struct compat_ebt_entry_mwt *) buf;
goto noxoff;
if (likely(!netif_queue_stopped(caifd->netdev))) {
+ struct Qdisc *sch;
+
/* If we run with a TX queue, check if the queue is too long*/
txq = netdev_get_tx_queue(skb->dev, 0);
- qlen = qdisc_qlen(rcu_dereference_bh(txq->qdisc));
-
- if (likely(qlen == 0))
+ sch = rcu_dereference_bh(txq->qdisc);
+ if (likely(qdisc_is_empty(sch)))
goto noxoff;
+ /* can check for explicit qdisc len value only !NOLOCK,
+ * always set flow off otherwise
+ */
high = (caifd->netdev->tx_queue_len * q_high) / 100;
- if (likely(qlen < high))
+ if (!(sch->flags & TCQ_F_NOLOCK) && likely(sch->q.qlen < high))
goto noxoff;
}
int can_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
{
- struct sock *sk = sock->sk;
-
switch (cmd) {
-
- case SIOCGSTAMP:
- return sock_get_timestamp(sk, (struct timeval __user *)arg);
-
default:
return -ENOIOCTLCMD;
}
.getname = sock_no_getname,
.poll = datagram_poll,
.ioctl = can_ioctl, /* use can_ioctl() from af_can.c */
+ .gettstamp = sock_gettstamp,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
.setsockopt = sock_no_setsockopt,
.getname = raw_getname,
.poll = datagram_poll,
.ioctl = can_ioctl, /* use can_ioctl() from af_can.c */
+ .gettstamp = sock_gettstamp,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
.setsockopt = raw_setsockopt,
return __compat_sys_setsockopt(fd, level, optname, optval, optlen);
}
-int compat_sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
-{
- struct compat_timeval __user *ctv;
- int err;
- struct timeval tv;
-
- if (COMPAT_USE_64BIT_TIME)
- return sock_get_timestamp(sk, userstamp);
-
- ctv = (struct compat_timeval __user *) userstamp;
- err = -ENOENT;
- sock_enable_timestamp(sk, SOCK_TIMESTAMP);
- tv = ktime_to_timeval(sock_read_timestamp(sk));
-
- if (tv.tv_sec == -1)
- return err;
- if (tv.tv_sec == 0) {
- ktime_t kt = ktime_get_real();
- sock_write_timestamp(sk, kt);
- tv = ktime_to_timeval(kt);
- }
- err = 0;
- if (put_user(tv.tv_sec, &ctv->tv_sec) ||
- put_user(tv.tv_usec, &ctv->tv_usec))
- err = -EFAULT;
- return err;
-}
-EXPORT_SYMBOL(compat_sock_get_timestamp);
-
-int compat_sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp)
-{
- struct compat_timespec __user *ctv;
- int err;
- struct timespec ts;
-
- if (COMPAT_USE_64BIT_TIME)
- return sock_get_timestampns (sk, userstamp);
-
- ctv = (struct compat_timespec __user *) userstamp;
- err = -ENOENT;
- sock_enable_timestamp(sk, SOCK_TIMESTAMP);
- ts = ktime_to_timespec(sock_read_timestamp(sk));
- if (ts.tv_sec == -1)
- return err;
- if (ts.tv_sec == 0) {
- ktime_t kt = ktime_get_real();
- sock_write_timestamp(sk, kt);
- ts = ktime_to_timespec(kt);
- }
- err = 0;
- if (put_user(ts.tv_sec, &ctv->tv_sec) ||
- put_user(ts.tv_nsec, &ctv->tv_nsec))
- err = -EFAULT;
- return err;
-}
-EXPORT_SYMBOL(compat_sock_get_timestampns);
-
static int __compat_sys_getsockopt(int fd, int level, int optname,
char __user *optval,
int __user *optlen)
unsigned int flags,
void (*destructor)(struct sock *sk,
struct sk_buff *skb),
- int *peeked, int *off, int *err,
+ int *off, int *err,
struct sk_buff **last)
{
bool peek_at_off = false;
return NULL;
}
}
- *peeked = 1;
refcount_inc(&skb->users);
} else {
__skb_unlink(skb, queue);
* @sk: socket
* @flags: MSG\_ flags
* @destructor: invoked under the receive lock on successful dequeue
- * @peeked: returns non-zero if this packet has been seen before
* @off: an offset in bytes to peek skb from. Returns an offset
* within an skb where data actually starts
* @err: error code returned
struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned int flags,
void (*destructor)(struct sock *sk,
struct sk_buff *skb),
- int *peeked, int *off, int *err,
+ int *off, int *err,
struct sk_buff **last)
{
struct sk_buff_head *queue = &sk->sk_receive_queue;
if (error)
goto no_packet;
- *peeked = 0;
do {
/* Again only user level code calls this function, so nothing
* interrupt level will suddenly eat the receive_queue.
*/
spin_lock_irqsave(&queue->lock, cpu_flags);
skb = __skb_try_recv_from_queue(sk, queue, flags, destructor,
- peeked, off, &error, last);
+ off, &error, last);
spin_unlock_irqrestore(&queue->lock, cpu_flags);
if (error)
goto no_packet;
struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
void (*destructor)(struct sock *sk,
struct sk_buff *skb),
- int *peeked, int *off, int *err)
+ int *off, int *err)
{
struct sk_buff *skb, *last;
long timeo;
timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
do {
- skb = __skb_try_recv_datagram(sk, flags, destructor, peeked,
- off, err, &last);
+ skb = __skb_try_recv_datagram(sk, flags, destructor, off, err,
+ &last);
if (skb)
return skb;
struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned int flags,
int noblock, int *err)
{
- int peeked, off = 0;
+ int off = 0;
return __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
- NULL, &peeked, &off, err);
+ NULL, &off, err);
}
EXPORT_SYMBOL(skb_recv_datagram);
BUG_ON(!dev_net(dev));
net = dev_net(dev);
- if (dev->flags & IFF_UP)
+
+ /* Some auto-enslaved devices e.g. failover slaves are
+ * special, as userspace might rename the device after
+ * the interface had been brought up and running since
+ * the point kernel initiated auto-enslavement. Allow
+ * live name change even when these slave devices are
+ * up and running.
+ *
+ * Typically, users of these auto-enslaving devices
+ * don't actually care about slave name change, as
+ * they are supposed to operate on master interface
+ * directly.
+ */
+ if (dev->flags & IFF_UP &&
+ likely(!(dev->priv_flags & IFF_LIVE_RENAME_OK)))
return -EBUSY;
write_seqcount_begin(&devnet_rename_seq);
static int devlink_sb_pool_set(struct devlink *devlink, unsigned int sb_index,
u16 pool_index, u32 size,
- enum devlink_sb_threshold_type threshold_type)
+ enum devlink_sb_threshold_type threshold_type,
+ struct netlink_ext_ack *extack)
{
const struct devlink_ops *ops = devlink->ops;
if (ops->sb_pool_set)
return ops->sb_pool_set(devlink, sb_index, pool_index,
- size, threshold_type);
+ size, threshold_type, extack);
return -EOPNOTSUPP;
}
size = nla_get_u32(info->attrs[DEVLINK_ATTR_SB_POOL_SIZE]);
return devlink_sb_pool_set(devlink, devlink_sb->index,
- pool_index, size, threshold_type);
+ pool_index, size, threshold_type,
+ info->extack);
}
static int devlink_nl_sb_port_pool_fill(struct sk_buff *msg,
static int devlink_sb_port_pool_set(struct devlink_port *devlink_port,
unsigned int sb_index, u16 pool_index,
- u32 threshold)
+ u32 threshold,
+ struct netlink_ext_ack *extack)
{
const struct devlink_ops *ops = devlink_port->devlink->ops;
if (ops->sb_port_pool_set)
return ops->sb_port_pool_set(devlink_port, sb_index,
- pool_index, threshold);
+ pool_index, threshold, extack);
return -EOPNOTSUPP;
}
threshold = nla_get_u32(info->attrs[DEVLINK_ATTR_SB_THRESHOLD]);
return devlink_sb_port_pool_set(devlink_port, devlink_sb->index,
- pool_index, threshold);
+ pool_index, threshold, info->extack);
}
static int
static int devlink_sb_tc_pool_bind_set(struct devlink_port *devlink_port,
unsigned int sb_index, u16 tc_index,
enum devlink_sb_pool_type pool_type,
- u16 pool_index, u32 threshold)
+ u16 pool_index, u32 threshold,
+ struct netlink_ext_ack *extack)
{
const struct devlink_ops *ops = devlink_port->devlink->ops;
if (ops->sb_tc_pool_bind_set)
return ops->sb_tc_pool_bind_set(devlink_port, sb_index,
tc_index, pool_type,
- pool_index, threshold);
+ pool_index, threshold, extack);
return -EOPNOTSUPP;
}
threshold = nla_get_u32(info->attrs[DEVLINK_ATTR_SB_THRESHOLD]);
return devlink_sb_tc_pool_bind_set(devlink_port, devlink_sb->index,
tc_index, pool_type,
- pool_index, threshold);
+ pool_index, threshold, info->extack);
}
static int devlink_nl_cmd_sb_occ_snapshot_doit(struct sk_buff *skb,
goto err_upper_link;
}
- slave_dev->priv_flags |= IFF_FAILOVER_SLAVE;
+ slave_dev->priv_flags |= (IFF_FAILOVER_SLAVE | IFF_LIVE_RENAME_OK);
if (fops && fops->slave_register &&
!fops->slave_register(slave_dev, failover_dev))
return NOTIFY_OK;
netdev_upper_dev_unlink(slave_dev, failover_dev);
- slave_dev->priv_flags &= ~IFF_FAILOVER_SLAVE;
+ slave_dev->priv_flags &= ~(IFF_FAILOVER_SLAVE | IFF_LIVE_RENAME_OK);
err_upper_link:
netdev_rx_handler_unregister(slave_dev);
done:
netdev_rx_handler_unregister(slave_dev);
netdev_upper_dev_unlink(slave_dev, failover_dev);
- slave_dev->priv_flags &= ~IFF_FAILOVER_SLAVE;
+ slave_dev->priv_flags &= ~(IFF_FAILOVER_SLAVE | IFF_LIVE_RENAME_OK);
if (fops && fops->slave_unregister &&
!fops->slave_unregister(slave_dev, failover_dev))
#define BPF_F_ADJ_ROOM_MASK (BPF_F_ADJ_ROOM_FIXED_GSO | \
BPF_F_ADJ_ROOM_ENCAP_L3_MASK | \
BPF_F_ADJ_ROOM_ENCAP_L4_GRE | \
- BPF_F_ADJ_ROOM_ENCAP_L4_UDP)
+ BPF_F_ADJ_ROOM_ENCAP_L4_UDP | \
+ BPF_F_ADJ_ROOM_ENCAP_L2( \
+ BPF_ADJ_ROOM_ENCAP_L2_MASK))
static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff,
u64 flags)
{
+ u8 inner_mac_len = flags >> BPF_ADJ_ROOM_ENCAP_L2_SHIFT;
bool encap = flags & BPF_F_ADJ_ROOM_ENCAP_L3_MASK;
u16 mac_len = 0, inner_net = 0, inner_trans = 0;
unsigned int gso_type = SKB_GSO_DODGY;
mac_len = skb->network_header - skb->mac_header;
inner_net = skb->network_header;
+ if (inner_mac_len > len_diff)
+ return -EINVAL;
inner_trans = skb->transport_header;
}
return ret;
if (encap) {
- /* inner mac == inner_net on l3 encap */
- skb->inner_mac_header = inner_net;
+ skb->inner_mac_header = inner_net - inner_mac_len;
skb->inner_network_header = inner_net;
skb->inner_transport_header = inner_trans;
skb_set_inner_protocol(skb, skb->protocol);
gso_type |= SKB_GSO_GRE;
else if (flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV6)
gso_type |= SKB_GSO_IPXIP6;
- else
+ else if (flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV4)
gso_type |= SKB_GSO_IPXIP4;
if (flags & BPF_F_ADJ_ROOM_ENCAP_L4_GRE ||
{
int ret;
+ if (flags & ~BPF_F_ADJ_ROOM_FIXED_GSO)
+ return -EINVAL;
+
if (skb_is_gso(skb) && !skb_is_gso_tcp(skb)) {
/* udp gso_size delineates datagrams, only allow if fixed */
if (!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) ||
if (!IS_ENABLED(CONFIG_INET) || !sk_fullsock(sk))
return -EINVAL;
- if (val)
- tcp_sk(sk)->bpf_sock_ops_cb_flags = val;
+ tcp_sk(sk)->bpf_sock_ops_cb_flags = val;
return argval & (~BPF_SOCK_OPS_ALL_CB_FLAGS);
}
* Only binding to IP is supported.
*/
err = -EINVAL;
+ if (addr_len < offsetofend(struct sockaddr, sa_family))
+ return err;
if (addr->sa_family == AF_INET) {
if (addr_len < sizeof(struct sockaddr_in))
return err;
return BPF_FIB_LKUP_RET_UNSUPP_LWT;
dev = nhc->nhc_dev;
- if (nhc->nhc_has_gw)
- params->ipv4_dst = nhc->nhc_gw.ipv4;
params->rt_metric = res.fi->fib_priority;
/* xdp and cls_bpf programs are run in RCU-bh so
* rcu_read_lock_bh is not needed here
*/
- neigh = __ipv4_neigh_lookup_noref(dev, (__force u32)params->ipv4_dst);
+ if (likely(nhc->nhc_gw_family != AF_INET6)) {
+ if (nhc->nhc_gw_family)
+ params->ipv4_dst = nhc->nhc_gw.ipv4;
+
+ neigh = __ipv4_neigh_lookup_noref(dev,
+ (__force u32)params->ipv4_dst);
+ } else {
+ struct in6_addr *dst = (struct in6_addr *)params->ipv6_dst;
+
+ params->family = AF_INET6;
+ *dst = nhc->nhc_gw.ipv6;
+ neigh = __ipv6_neigh_lookup_noref_stub(dev, dst);
+ }
+
if (!neigh)
return BPF_FIB_LKUP_RET_NO_NEIGH;
{
struct in6_addr *src = (struct in6_addr *) params->ipv6_src;
struct in6_addr *dst = (struct in6_addr *) params->ipv6_dst;
+ struct fib6_result res = {};
struct neighbour *neigh;
struct net_device *dev;
struct inet6_dev *idev;
- struct fib6_info *f6i;
struct flowi6 fl6;
int strict = 0;
- int oif;
+ int oif, err;
u32 mtu;
/* link local addresses are never forwarded */
if (unlikely(!tb))
return BPF_FIB_LKUP_RET_NOT_FWDED;
- f6i = ipv6_stub->fib6_table_lookup(net, tb, oif, &fl6, strict);
+ err = ipv6_stub->fib6_table_lookup(net, tb, oif, &fl6, &res,
+ strict);
} else {
fl6.flowi6_mark = 0;
fl6.flowi6_secid = 0;
fl6.flowi6_tun_key.tun_id = 0;
fl6.flowi6_uid = sock_net_uid(net, NULL);
- f6i = ipv6_stub->fib6_lookup(net, oif, &fl6, strict);
+ err = ipv6_stub->fib6_lookup(net, oif, &fl6, &res, strict);
}
- if (unlikely(IS_ERR_OR_NULL(f6i) || f6i == net->ipv6.fib6_null_entry))
+ if (unlikely(err || IS_ERR_OR_NULL(res.f6i) ||
+ res.f6i == net->ipv6.fib6_null_entry))
return BPF_FIB_LKUP_RET_NOT_FWDED;
- if (unlikely(f6i->fib6_flags & RTF_REJECT)) {
- switch (f6i->fib6_type) {
- case RTN_BLACKHOLE:
- return BPF_FIB_LKUP_RET_BLACKHOLE;
- case RTN_UNREACHABLE:
- return BPF_FIB_LKUP_RET_UNREACHABLE;
- case RTN_PROHIBIT:
- return BPF_FIB_LKUP_RET_PROHIBIT;
- default:
- return BPF_FIB_LKUP_RET_NOT_FWDED;
- }
- }
-
- if (f6i->fib6_type != RTN_UNICAST)
+ switch (res.fib6_type) {
+ /* only unicast is forwarded */
+ case RTN_UNICAST:
+ break;
+ case RTN_BLACKHOLE:
+ return BPF_FIB_LKUP_RET_BLACKHOLE;
+ case RTN_UNREACHABLE:
+ return BPF_FIB_LKUP_RET_UNREACHABLE;
+ case RTN_PROHIBIT:
+ return BPF_FIB_LKUP_RET_PROHIBIT;
+ default:
return BPF_FIB_LKUP_RET_NOT_FWDED;
+ }
- if (f6i->fib6_nsiblings && fl6.flowi6_oif == 0)
- f6i = ipv6_stub->fib6_multipath_select(net, f6i, &fl6,
- fl6.flowi6_oif, NULL,
- strict);
+ ipv6_stub->fib6_select_path(net, &res, &fl6, fl6.flowi6_oif,
+ fl6.flowi6_oif != 0, NULL, strict);
if (check_mtu) {
- mtu = ipv6_stub->ip6_mtu_from_fib6(f6i, dst, src);
+ mtu = ipv6_stub->ip6_mtu_from_fib6(&res, dst, src);
if (params->tot_len > mtu)
return BPF_FIB_LKUP_RET_FRAG_NEEDED;
}
- if (f6i->fib6_nh.fib_nh_lws)
+ if (res.nh->fib_nh_lws)
return BPF_FIB_LKUP_RET_UNSUPP_LWT;
- if (f6i->fib6_nh.fib_nh_has_gw)
- *dst = f6i->fib6_nh.fib_nh_gw6;
+ if (res.nh->fib_nh_gw_family)
+ *dst = res.nh->fib_nh_gw6;
- dev = f6i->fib6_nh.fib_nh_dev;
- params->rt_metric = f6i->fib6_metric;
+ dev = res.nh->fib_nh_dev;
+ params->rt_metric = res.f6i->fib6_metric;
/* xdp and cls_bpf programs are run in RCU-bh so rcu_read_lock_bh is
- * not needed here. Can not use __ipv6_neigh_lookup_noref here
- * because we need to get nd_tbl via the stub
+ * not needed here.
*/
- neigh = ___neigh_lookup_noref(ipv6_stub->nd_tbl, neigh_key_eq128,
- ndisc_hashfn, dst, dev);
+ neigh = __ipv6_neigh_lookup_noref_stub(dev, dst);
if (!neigh)
return BPF_FIB_LKUP_RET_NO_NEIGH;
for_each_possible_cpu(i) {
const struct gnet_stats_queue *qcpu = per_cpu_ptr(q, i);
+ qstats->qlen = 0;
qstats->backlog += qcpu->backlog;
qstats->drops += qcpu->drops;
qstats->requeues += qcpu->requeues;
if (cpu) {
__gnet_stats_copy_queue_cpu(qstats, cpu);
} else {
+ qstats->qlen = q->qlen;
qstats->backlog = q->backlog;
qstats->drops = q->drops;
qstats->requeues = q->requeues;
#include <net/lwtunnel.h>
#include <net/rtnetlink.h>
#include <net/ip6_fib.h>
-#include <net/nexthop.h>
+#include <net/rtnh.h>
#ifdef CONFIG_MODULES
}
EXPORT_SYMBOL_GPL(lwtstate_free);
-int lwtunnel_fill_encap(struct sk_buff *skb, struct lwtunnel_state *lwtstate)
+int lwtunnel_fill_encap(struct sk_buff *skb, struct lwtunnel_state *lwtstate,
+ int encap_attr, int encap_type_attr)
{
const struct lwtunnel_encap_ops *ops;
struct nlattr *nest;
lwtstate->type > LWTUNNEL_ENCAP_MAX)
return 0;
- nest = nla_nest_start(skb, RTA_ENCAP);
+ nest = nla_nest_start(skb, encap_attr);
if (!nest)
return -EMSGSIZE;
if (ret)
goto nla_put_failure;
nla_nest_end(skb, nest);
- ret = nla_put_u16(skb, RTA_ENCAP_TYPE, lwtstate->type);
+ ret = nla_put_u16(skb, encap_type_attr, lwtstate->type);
if (ret)
goto nla_put_failure;
goto out;
}
+ if (tbl->allow_add && !tbl->allow_add(dev, extack)) {
+ err = -EINVAL;
+ goto out;
+ }
+
neigh = neigh_lookup(tbl, dst, dev);
if (neigh == NULL) {
bool exempt_from_gc;
error = device_add(dev);
if (error)
- goto error_put_device;
+ return error;
error = register_queue_kobjects(ndev);
- if (error)
- goto error_device_del;
+ if (error) {
+ device_del(dev);
+ return error;
+ }
pm_runtime_set_memalloc_noio(dev, true);
- return 0;
-
-error_device_del:
- device_del(dev);
-error_put_device:
- put_device(dev);
return error;
}
peer = get_net_ns_by_fd(nla_get_u32(tb[NETNSA_FD]));
nla = tb[NETNSA_FD];
} else if (tb[NETNSA_NSID]) {
- peer = get_net_ns_by_id(net, nla_get_u32(tb[NETNSA_NSID]));
+ peer = get_net_ns_by_id(net, nla_get_s32(tb[NETNSA_NSID]));
if (!peer)
peer = ERR_PTR(-ENOENT);
nla = tb[NETNSA_NSID];
register_netdevice_notifier(&netprio_device_notifier);
return 0;
}
-
subsys_initcall(init_cgroup_netprio);
-MODULE_LICENSE("GPL v2");
{ 0x16, 0, 0, 0x00000000 },
{ 0x06, 0, 0, 0x00000000 },
};
- struct sock_fprog_kern ptp_prog = {
- .len = ARRAY_SIZE(ptp_filter), .filter = ptp_filter,
- };
+ struct sock_fprog_kern ptp_prog;
+
+ ptp_prog.len = ARRAY_SIZE(ptp_filter);
+ ptp_prog.filter = ptp_filter;
BUG_ON(bpf_prog_create(&ptp_insns, &ptp_prog));
}
{
struct if_stats_msg *ifsm;
- if (nlh->nlmsg_len < sizeof(*ifsm)) {
+ if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifsm))) {
NL_SET_ERR_MSG(extack, "Invalid header for stats dump");
return -EINVAL;
}
}
EXPORT_SYMBOL(__alloc_skb);
+/* Caller must provide SKB that is memset cleared */
+static struct sk_buff *__build_skb_around(struct sk_buff *skb,
+ void *data, unsigned int frag_size)
+{
+ struct skb_shared_info *shinfo;
+ unsigned int size = frag_size ? : ksize(data);
+
+ size -= SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+
+ /* Assumes caller memset cleared SKB */
+ skb->truesize = SKB_TRUESIZE(size);
+ refcount_set(&skb->users, 1);
+ skb->head = data;
+ skb->data = data;
+ skb_reset_tail_pointer(skb);
+ skb->end = skb->tail + size;
+ skb->mac_header = (typeof(skb->mac_header))~0U;
+ skb->transport_header = (typeof(skb->transport_header))~0U;
+
+ /* make sure we initialize shinfo sequentially */
+ shinfo = skb_shinfo(skb);
+ memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
+ atomic_set(&shinfo->dataref, 1);
+
+ return skb;
+}
+
/**
* __build_skb - build a network buffer
* @data: data buffer provided by caller
*/
struct sk_buff *__build_skb(void *data, unsigned int frag_size)
{
- struct skb_shared_info *shinfo;
struct sk_buff *skb;
- unsigned int size = frag_size ? : ksize(data);
skb = kmem_cache_alloc(skbuff_head_cache, GFP_ATOMIC);
- if (!skb)
+ if (unlikely(!skb))
return NULL;
- size -= SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
-
memset(skb, 0, offsetof(struct sk_buff, tail));
- skb->truesize = SKB_TRUESIZE(size);
- refcount_set(&skb->users, 1);
- skb->head = data;
- skb->data = data;
- skb_reset_tail_pointer(skb);
- skb->end = skb->tail + size;
- skb->mac_header = (typeof(skb->mac_header))~0U;
- skb->transport_header = (typeof(skb->transport_header))~0U;
-
- /* make sure we initialize shinfo sequentially */
- shinfo = skb_shinfo(skb);
- memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
- atomic_set(&shinfo->dataref, 1);
- return skb;
+ return __build_skb_around(skb, data, frag_size);
}
/* build_skb() is wrapper over __build_skb(), that specifically
}
EXPORT_SYMBOL(build_skb);
+/**
+ * build_skb_around - build a network buffer around provided skb
+ * @skb: sk_buff provide by caller, must be memset cleared
+ * @data: data buffer provided by caller
+ * @frag_size: size of data, or 0 if head was kmalloced
+ */
+struct sk_buff *build_skb_around(struct sk_buff *skb,
+ void *data, unsigned int frag_size)
+{
+ if (unlikely(!skb))
+ return NULL;
+
+ skb = __build_skb_around(skb, data, frag_size);
+
+ if (skb && frag_size) {
+ skb->head_frag = 1;
+ if (page_is_pfmemalloc(virt_to_head_page(data)))
+ skb->pfmemalloc = 1;
+ }
+ return skb;
+}
+EXPORT_SYMBOL(build_skb_around);
+
#define NAPI_SKB_CACHE_SIZE 64
struct napi_alloc_cache {
static struct sk_buff *skb_reorder_vlan_header(struct sk_buff *skb)
{
- int mac_len;
+ int mac_len, meta_len;
+ void *meta;
if (skb_cow(skb, skb_headroom(skb)) < 0) {
kfree_skb(skb);
memmove(skb_mac_header(skb) + VLAN_HLEN, skb_mac_header(skb),
mac_len - VLAN_HLEN - ETH_TLEN);
}
+
+ meta_len = skb_metadata_len(skb);
+ if (meta_len) {
+ meta = skb_metadata_end(skb) - meta_len;
+ memmove(meta + VLAN_HLEN, meta, meta_len);
+ }
+
skb->mac_header += VLAN_HLEN;
return skb;
}
tv.tv_usec = ((timeo % HZ) * USEC_PER_SEC) / HZ;
}
- if (in_compat_syscall() && !COMPAT_USE_64BIT_TIME) {
+ if (old_timeval && in_compat_syscall() && !COMPAT_USE_64BIT_TIME) {
struct old_timeval32 tv32 = { tv.tv_sec, tv.tv_usec };
*(struct old_timeval32 *)optval = tv32;
return sizeof(tv32);
{
struct __kernel_sock_timeval tv;
- if (in_compat_syscall() && !COMPAT_USE_64BIT_TIME) {
+ if (old_timeval && in_compat_syscall() && !COMPAT_USE_64BIT_TIME) {
struct old_timeval32 tv32;
if (optlen < sizeof(tv32))
}
EXPORT_SYMBOL(lock_sock_fast);
-int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
+int sock_gettstamp(struct socket *sock, void __user *userstamp,
+ bool timeval, bool time32)
{
- struct timeval tv;
+ struct sock *sk = sock->sk;
+ struct timespec64 ts;
sock_enable_timestamp(sk, SOCK_TIMESTAMP);
- tv = ktime_to_timeval(sock_read_timestamp(sk));
- if (tv.tv_sec == -1)
+ ts = ktime_to_timespec64(sock_read_timestamp(sk));
+ if (ts.tv_sec == -1)
return -ENOENT;
- if (tv.tv_sec == 0) {
+ if (ts.tv_sec == 0) {
ktime_t kt = ktime_get_real();
- sock_write_timestamp(sk, kt);
- tv = ktime_to_timeval(kt);
+ sock_write_timestamp(sk, kt);;
+ ts = ktime_to_timespec64(kt);
}
- return copy_to_user(userstamp, &tv, sizeof(tv)) ? -EFAULT : 0;
-}
-EXPORT_SYMBOL(sock_get_timestamp);
-int sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp)
-{
- struct timespec ts;
+ if (timeval)
+ ts.tv_nsec /= 1000;
- sock_enable_timestamp(sk, SOCK_TIMESTAMP);
- ts = ktime_to_timespec(sock_read_timestamp(sk));
- if (ts.tv_sec == -1)
- return -ENOENT;
- if (ts.tv_sec == 0) {
- ktime_t kt = ktime_get_real();
- sock_write_timestamp(sk, kt);
- ts = ktime_to_timespec(sk->sk_stamp);
+#ifdef CONFIG_COMPAT_32BIT_TIME
+ if (time32)
+ return put_old_timespec32(&ts, userstamp);
+#endif
+#ifdef CONFIG_SPARC64
+ /* beware of padding in sparc64 timeval */
+ if (timeval && !in_compat_syscall()) {
+ struct __kernel_old_timeval __user tv = {
+ .tv_sec = ts.tv_sec,
+ .tv_usec = ts.tv_nsec,
+ };
+ if (copy_to_user(userstamp, &tv, sizeof(tv)))
+ return -EFAULT;
+ return 0;
}
- return copy_to_user(userstamp, &ts, sizeof(ts)) ? -EFAULT : 0;
+#endif
+ return put_timespec64(&ts, userstamp);
}
-EXPORT_SYMBOL(sock_get_timestampns);
+EXPORT_SYMBOL(sock_gettstamp);
void sock_enable_timestamp(struct sock *sk, int flag)
{
/* FIXME: work on tcp_poll to rename it to inet_csk_poll */
.poll = dccp_poll,
.ioctl = inet_ioctl,
+ .gettstamp = sock_gettstamp,
/* FIXME: work on inet_listen to rename it to sock_common_listen */
.listen = inet_dccp_listen,
.shutdown = inet_shutdown,
.getname = inet6_getname,
.poll = dccp_poll,
.ioctl = inet6_ioctl,
+ .gettstamp = sock_gettstamp,
.listen = inet_dccp_listen,
.shutdown = inet_shutdown,
.setsockopt = sock_common_setsockopt,
#include <net/dn_fib.h>
#include <net/dn_neigh.h>
#include <net/dn_dev.h>
-#include <net/nexthop.h>
+#include <net/rtnh.h>
#define RT_MIN_TABLE 1
desclen += typelen + 1;
}
- if (!namelen)
- namelen = strnlen(name, 256);
if (namelen < 3 || namelen > 255)
return -EINVAL;
desclen += namelen + 1;
hsr-y := hsr_main.o hsr_framereg.o hsr_device.o \
hsr_netlink.o hsr_slave.o hsr_forward.o
-hsr-$(CONFIG_DEBUG_FS) += hsr_prp_debugfs.o
+hsr-$(CONFIG_DEBUG_FS) += hsr_debugfs.o
/*
- * hsr_prp_debugfs code
- * Copyright (C) 2017 Texas Instruments Incorporated
+ * hsr_debugfs code
+ * Copyright (C) 2019 Texas Instruments Incorporated
*
* Author(s):
- * Murali Karicheri <m-karicheri2@ti.com?
+ * Murali Karicheri <m-karicheri2@ti.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
mac[0], mac[1], mac[2], mac[3], mac[4], mac[5]);
}
-/* hsr_prp_node_table_show - Formats and prints node_table entries */
+/* hsr_node_table_show - Formats and prints node_table entries */
static int
-hsr_prp_node_table_show(struct seq_file *sfp, void *data)
+hsr_node_table_show(struct seq_file *sfp, void *data)
{
struct hsr_priv *priv = (struct hsr_priv *)sfp->private;
struct hsr_node *node;
return 0;
}
-/* hsr_prp_node_table_open - Open the node_table file
+/* hsr_node_table_open - Open the node_table file
*
* Description:
* This routine opens a debugfs file node_table of specific hsr device
*/
static int
-hsr_prp_node_table_open(struct inode *inode, struct file *filp)
+hsr_node_table_open(struct inode *inode, struct file *filp)
{
- return single_open(filp, hsr_prp_node_table_show, inode->i_private);
+ return single_open(filp, hsr_node_table_show, inode->i_private);
}
-static const struct file_operations hsr_prp_fops = {
+static const struct file_operations hsr_fops = {
.owner = THIS_MODULE,
- .open = hsr_prp_node_table_open,
+ .open = hsr_node_table_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
-/* hsr_prp_debugfs_init - create hsr-prp node_table file for dumping
+/* hsr_debugfs_init - create hsr node_table file for dumping
* the node table
*
* Description:
* When debugfs is configured this routine sets up the node_table file per
- * hsr/prp device for dumping the node_table entries
+ * hsr device for dumping the node_table entries
*/
-int hsr_prp_debugfs_init(struct hsr_priv *priv)
+int hsr_debugfs_init(struct hsr_priv *priv, struct net_device *hsr_dev)
{
int rc = -1;
struct dentry *de = NULL;
- de = debugfs_create_dir("hsr", NULL);
+ de = debugfs_create_dir(hsr_dev->name, NULL);
if (!de) {
- pr_err("Cannot create hsr-prp debugfs root\n");
+ pr_err("Cannot create hsr debugfs root\n");
return rc;
}
de = debugfs_create_file("node_table", S_IFREG | 0444,
priv->node_tbl_root, priv,
- &hsr_prp_fops);
+ &hsr_fops);
if (!de) {
- pr_err("Cannot create hsr-prp node_table directory\n");
+ pr_err("Cannot create hsr node_table directory\n");
return rc;
}
priv->node_tbl_file = de;
- rc = 0;
- return rc;
+ return 0;
}
-/* hsr_prp_debugfs_term - Tear down debugfs intrastructure
+/* hsr_debugfs_term - Tear down debugfs intrastructure
*
* Description:
* When Debufs is configured this routine removes debugfs file system
- * elements that are specific to hsr-prp
+ * elements that are specific to hsr
*/
void
-hsr_prp_debugfs_term(struct hsr_priv *priv)
+hsr_debugfs_term(struct hsr_priv *priv)
{
debugfs_remove(priv->node_tbl_file);
priv->node_tbl_file = NULL;
hsr = netdev_priv(hsr_dev);
- hsr_prp_debugfs_term(hsr);
+ hsr_debugfs_term(hsr);
rtnl_lock();
hsr_for_each_port(hsr, port)
goto fail;
mod_timer(&hsr->prune_timer, jiffies + msecs_to_jiffies(PRUNE_PERIOD));
- res = hsr_prp_debugfs_init(hsr);
+ res = hsr_debugfs_init(hsr, hsr_dev);
if (res)
goto fail;
goto out_drop;
hsr_register_frame_in(frame.node_src, port, frame.sequence_nr);
hsr_forward_do(&frame);
+ /* Gets called for ingress frames as well as egress from master port.
+ * So check and increment stats for master port only here.
+ */
+ if (port->type == HSR_PT_MASTER) {
+ port->dev->stats.tx_packets++;
+ port->dev->stats.tx_bytes += skb->len;
+ }
if (frame.skb_hsr)
kfree_skb(frame.skb_hsr);
}
#if IS_ENABLED(CONFIG_DEBUG_FS)
-int hsr_prp_debugfs_init(struct hsr_priv *priv);
-void hsr_prp_debugfs_term(struct hsr_priv *priv);
+int hsr_debugfs_init(struct hsr_priv *priv, struct net_device *hsr_dev);
+void hsr_debugfs_term(struct hsr_priv *priv);
#else
-static inline int hsr_prp_debugfs_init(struct hsr_priv *priv)
+static inline int hsr_debugfs_init(struct hsr_priv *priv,
+ struct net_device *hsr_dev)
{
return 0;
}
-static inline void hsr_prp_debugfs_term(struct hsr_priv *priv)
+static inline void hsr_debugfs_term(struct hsr_priv *priv)
{}
#endif
struct sock *sk = sock->sk;
switch (cmd) {
- case SIOCGSTAMP:
- return sock_get_timestamp(sk, (struct timeval __user *)arg);
- case SIOCGSTAMPNS:
- return sock_get_timestampns(sk, (struct timespec __user *)arg);
case SIOCGIFADDR:
case SIOCSIFADDR:
return ieee802154_dev_ioctl(sk, (struct ifreq __user *)arg,
.getname = sock_no_getname,
.poll = datagram_poll,
.ioctl = ieee802154_sock_ioctl,
+ .gettstamp = sock_gettstamp,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
.setsockopt = sock_common_setsockopt,
.getname = sock_no_getname,
.poll = datagram_poll,
.ioctl = ieee802154_sock_ioctl,
+ .gettstamp = sock_gettstamp,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
.setsockopt = sock_common_setsockopt,
struct rtentry rt;
switch (cmd) {
- case SIOCGSTAMP:
- err = sock_get_timestamp(sk, (struct timeval __user *)arg);
- break;
- case SIOCGSTAMPNS:
- err = sock_get_timestampns(sk, (struct timespec __user *)arg);
- break;
case SIOCADDRT:
case SIOCDELRT:
if (copy_from_user(&rt, p, sizeof(struct rtentry)))
.getname = inet_getname,
.poll = tcp_poll,
.ioctl = inet_ioctl,
+ .gettstamp = sock_gettstamp,
.listen = inet_listen,
.shutdown = inet_shutdown,
.setsockopt = sock_common_setsockopt,
.getname = inet_getname,
.poll = udp_poll,
.ioctl = inet_ioctl,
+ .gettstamp = sock_gettstamp,
.listen = sock_no_listen,
.shutdown = inet_shutdown,
.setsockopt = sock_common_setsockopt,
.getname = inet_getname,
.poll = datagram_poll,
.ioctl = inet_ioctl,
+ .gettstamp = sock_gettstamp,
.listen = sock_no_listen,
.shutdown = inet_shutdown,
.setsockopt = sock_common_setsockopt,
return 0;
}
-
-module_init(bpfilter_sockopt_init);
+device_initcall(bpfilter_sockopt_init);
if (rt->rt_gateway.sa_family == AF_INET && addr) {
unsigned int addr_type;
- cfg->fc_gw = addr;
+ cfg->fc_gw4 = addr;
+ cfg->fc_gw_family = AF_INET;
addr_type = inet_addr_type_table(net, addr, cfg->fc_table);
if (rt->rt_flags & RTF_GATEWAY &&
addr_type == RTN_UNICAST)
if (cmd == SIOCDELRT)
return 0;
- if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw)
+ if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw_family)
return -EINVAL;
if (cfg->fc_scope == RT_SCOPE_NOWHERE)
[RTA_DPORT] = { .type = NLA_U16 },
};
+int fib_gw_from_via(struct fib_config *cfg, struct nlattr *nla,
+ struct netlink_ext_ack *extack)
+{
+ struct rtvia *via;
+ int alen;
+
+ if (nla_len(nla) < offsetof(struct rtvia, rtvia_addr)) {
+ NL_SET_ERR_MSG(extack, "Invalid attribute length for RTA_VIA");
+ return -EINVAL;
+ }
+
+ via = nla_data(nla);
+ alen = nla_len(nla) - offsetof(struct rtvia, rtvia_addr);
+
+ switch (via->rtvia_family) {
+ case AF_INET:
+ if (alen != sizeof(__be32)) {
+ NL_SET_ERR_MSG(extack, "Invalid IPv4 address in RTA_VIA");
+ return -EINVAL;
+ }
+ cfg->fc_gw_family = AF_INET;
+ cfg->fc_gw4 = *((__be32 *)via->rtvia_addr);
+ break;
+ case AF_INET6:
+#ifdef CONFIG_IPV6
+ if (alen != sizeof(struct in6_addr)) {
+ NL_SET_ERR_MSG(extack, "Invalid IPv6 address in RTA_VIA");
+ return -EINVAL;
+ }
+ cfg->fc_gw_family = AF_INET6;
+ cfg->fc_gw6 = *((struct in6_addr *)via->rtvia_addr);
+#else
+ NL_SET_ERR_MSG(extack, "IPv6 support not enabled in kernel");
+ return -EINVAL;
+#endif
+ break;
+ default:
+ NL_SET_ERR_MSG(extack, "Unsupported address family in RTA_VIA");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
struct nlmsghdr *nlh, struct fib_config *cfg,
struct netlink_ext_ack *extack)
{
+ bool has_gw = false, has_via = false;
struct nlattr *attr;
int err, remaining;
struct rtmsg *rtm;
cfg->fc_oif = nla_get_u32(attr);
break;
case RTA_GATEWAY:
- cfg->fc_gw = nla_get_be32(attr);
+ has_gw = true;
+ cfg->fc_gw4 = nla_get_be32(attr);
+ if (cfg->fc_gw4)
+ cfg->fc_gw_family = AF_INET;
break;
case RTA_VIA:
- NL_SET_ERR_MSG(extack, "IPv4 does not support RTA_VIA attribute");
- err = -EINVAL;
- goto errout;
+ has_via = true;
+ err = fib_gw_from_via(cfg, attr, extack);
+ if (err)
+ goto errout;
+ break;
case RTA_PRIORITY:
cfg->fc_priority = nla_get_u32(attr);
break;
}
}
+ if (has_gw && has_via) {
+ NL_SET_ERR_MSG(extack,
+ "Nexthop configuration can not contain both GATEWAY and VIA");
+ goto errout;
+ }
+
return 0;
errout:
return err;
#include <net/tcp.h>
#include <net/sock.h>
#include <net/ip_fib.h>
+#include <net/ip6_fib.h>
#include <net/netlink.h>
-#include <net/nexthop.h>
+#include <net/rtnh.h>
#include <net/lwtunnel.h>
#include <net/fib_notifier.h>
#include <net/addrconf.h>
for_nexthops(fi) {
if (nh->fib_nh_oif != onh->fib_nh_oif ||
- nh->fib_nh_gw4 != onh->fib_nh_gw4 ||
+ nh->fib_nh_gw_family != onh->fib_nh_gw_family ||
nh->fib_nh_scope != onh->fib_nh_scope ||
#ifdef CONFIG_IP_ROUTE_MULTIPATH
nh->fib_nh_weight != onh->fib_nh_weight ||
lwtunnel_cmp_encap(nh->fib_nh_lws, onh->fib_nh_lws) ||
((nh->fib_nh_flags ^ onh->fib_nh_flags) & ~RTNH_COMPARE_MASK))
return -1;
+
+ if (nh->fib_nh_gw_family == AF_INET &&
+ nh->fib_nh_gw4 != onh->fib_nh_gw4)
+ return -1;
+
+ if (nh->fib_nh_gw_family == AF_INET6 &&
+ ipv6_addr_cmp(&nh->fib_nh_gw6, &onh->fib_nh_gw6))
+ return -1;
+
onh++;
} endfor_nexthops(fi);
return 0;
struct fib_info **last_resort, int *last_idx,
int dflt)
{
+ const struct fib_nh_common *nhc = fib_info_nhc(fi, 0);
struct neighbour *n;
int state = NUD_NONE;
- n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].fib_nh_gw4, fi->fib_dev);
+ if (likely(nhc->nhc_gw_family == AF_INET))
+ n = neigh_lookup(&arp_tbl, &nhc->nhc_gw.ipv4, nhc->nhc_dev);
+ else if (nhc->nhc_gw_family == AF_INET6)
+ n = neigh_lookup(ipv6_stub->nd_tbl, &nhc->nhc_gw.ipv6,
+ nhc->nhc_dev);
+ else
+ n = NULL;
+
if (n) {
state = n->nud_state;
neigh_release(n);
goto init_failure;
nh->fib_nh_oif = cfg->fc_oif;
- if (cfg->fc_gw) {
- nh->fib_nh_gw4 = cfg->fc_gw;
- nh->fib_nh_has_gw = 1;
- }
+ nh->fib_nh_gw_family = cfg->fc_gw_family;
+ if (cfg->fc_gw_family == AF_INET)
+ nh->fib_nh_gw4 = cfg->fc_gw4;
+ else if (cfg->fc_gw_family == AF_INET6)
+ nh->fib_nh_gw6 = cfg->fc_gw6;
+
nh->fib_nh_flags = cfg->fc_flags;
#ifdef CONFIG_IP_ROUTE_CLASSID
attrlen = rtnh_attrlen(rtnh);
if (attrlen > 0) {
- struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
+ struct nlattr *nla, *nlav, *attrs = rtnh_attrs(rtnh);
nla = nla_find(attrs, attrlen, RTA_GATEWAY);
- if (nla)
- fib_cfg.fc_gw = nla_get_in_addr(nla);
+ nlav = nla_find(attrs, attrlen, RTA_VIA);
+ if (nla && nlav) {
+ NL_SET_ERR_MSG(extack,
+ "Nexthop configuration can not contain both GATEWAY and VIA");
+ return -EINVAL;
+ }
+ if (nla) {
+ fib_cfg.fc_gw4 = nla_get_in_addr(nla);
+ if (fib_cfg.fc_gw4)
+ fib_cfg.fc_gw_family = AF_INET;
+ } else if (nlav) {
+ ret = fib_gw_from_via(&fib_cfg, nlav, extack);
+ if (ret)
+ goto errout;
+ }
nla = nla_find(attrs, attrlen, RTA_FLOW);
if (nla)
"Nexthop device index does not match RTA_OIF");
goto errout;
}
- if (cfg->fc_gw && fi->fib_nh->fib_nh_gw4 != cfg->fc_gw) {
- NL_SET_ERR_MSG(extack,
- "Nexthop gateway does not match RTA_GATEWAY");
- goto errout;
+ if (cfg->fc_gw_family) {
+ if (cfg->fc_gw_family != fi->fib_nh->fib_nh_gw_family ||
+ (cfg->fc_gw_family == AF_INET &&
+ fi->fib_nh->fib_nh_gw4 != cfg->fc_gw4) ||
+ (cfg->fc_gw_family == AF_INET6 &&
+ ipv6_addr_cmp(&fi->fib_nh->fib_nh_gw6, &cfg->fc_gw6))) {
+ NL_SET_ERR_MSG(extack,
+ "Nexthop gateway does not match RTA_GATEWAY or RTA_VIA");
+ goto errout;
+ }
}
#ifdef CONFIG_IP_ROUTE_CLASSID
if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow) {
if (cfg->fc_priority && cfg->fc_priority != fi->fib_priority)
return 1;
- if (cfg->fc_oif || cfg->fc_gw) {
+ if (cfg->fc_oif || cfg->fc_gw_family) {
if (cfg->fc_encap) {
if (fib_encap_match(cfg->fc_encap_type, cfg->fc_encap,
fi->fib_nh, cfg, extack))
cfg->fc_flow != fi->fib_nh->nh_tclassid)
return 1;
#endif
- if ((!cfg->fc_oif || cfg->fc_oif == fi->fib_nh->fib_nh_oif) &&
- (!cfg->fc_gw || cfg->fc_gw == fi->fib_nh->fib_nh_gw4))
- return 0;
- return 1;
+ if ((cfg->fc_oif && cfg->fc_oif != fi->fib_nh->fib_nh_oif) ||
+ (cfg->fc_gw_family &&
+ cfg->fc_gw_family != fi->fib_nh->fib_nh_gw_family))
+ return 1;
+
+ if (cfg->fc_gw_family == AF_INET &&
+ cfg->fc_gw4 != fi->fib_nh->fib_nh_gw4)
+ return 1;
+
+ if (cfg->fc_gw_family == AF_INET6 &&
+ ipv6_addr_cmp(&cfg->fc_gw6, &fi->fib_nh->fib_nh_gw6))
+ return 1;
+
+ return 0;
}
#ifdef CONFIG_IP_ROUTE_MULTIPATH
attrlen = rtnh_attrlen(rtnh);
if (attrlen > 0) {
- struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
+ struct nlattr *nla, *nlav, *attrs = rtnh_attrs(rtnh);
nla = nla_find(attrs, attrlen, RTA_GATEWAY);
- if (nla && nla_get_in_addr(nla) != nh->fib_nh_gw4)
- return 1;
+ nlav = nla_find(attrs, attrlen, RTA_VIA);
+ if (nla && nlav) {
+ NL_SET_ERR_MSG(extack,
+ "Nexthop configuration can not contain both GATEWAY and VIA");
+ return -EINVAL;
+ }
+
+ if (nla) {
+ if (nh->fib_nh_gw_family != AF_INET ||
+ nla_get_in_addr(nla) != nh->fib_nh_gw4)
+ return 1;
+ } else if (nlav) {
+ struct fib_config cfg2;
+ int err;
+
+ err = fib_gw_from_via(&cfg2, nlav, extack);
+ if (err)
+ return err;
+
+ switch (nh->fib_nh_gw_family) {
+ case AF_INET:
+ if (cfg2.fc_gw_family != AF_INET ||
+ cfg2.fc_gw4 != nh->fib_nh_gw4)
+ return 1;
+ break;
+ case AF_INET6:
+ if (cfg2.fc_gw_family != AF_INET6 ||
+ ipv6_addr_cmp(&cfg2.fc_gw6,
+ &nh->fib_nh_gw6))
+ return 1;
+ break;
+ }
+ }
+
#ifdef CONFIG_IP_ROUTE_CLASSID
nla = nla_find(attrs, attrlen, RTA_FLOW);
if (nla && nla_get_u32(nla) != nh->nh_tclassid)
return true;
}
+static int fib_check_nh_v6_gw(struct net *net, struct fib_nh *nh,
+ u32 table, struct netlink_ext_ack *extack)
+{
+ struct fib6_config cfg = {
+ .fc_table = table,
+ .fc_flags = nh->fib_nh_flags | RTF_GATEWAY,
+ .fc_ifindex = nh->fib_nh_oif,
+ .fc_gateway = nh->fib_nh_gw6,
+ };
+ struct fib6_nh fib6_nh = {};
+ int err;
+
+ err = ipv6_stub->fib6_nh_init(net, &fib6_nh, &cfg, GFP_KERNEL, extack);
+ if (!err) {
+ nh->fib_nh_dev = fib6_nh.fib_nh_dev;
+ dev_hold(nh->fib_nh_dev);
+ nh->fib_nh_oif = nh->fib_nh_dev->ifindex;
+ nh->fib_nh_scope = RT_SCOPE_LINK;
+
+ ipv6_stub->fib6_nh_release(&fib6_nh);
+ }
+
+ return err;
+}
/*
* Picture
* |
* |-> {local prefix} (terminal node)
*/
-static int fib_check_nh(struct fib_config *cfg, struct fib_nh *nh,
- struct netlink_ext_ack *extack)
+static int fib_check_nh_v4_gw(struct net *net, struct fib_nh *nh, u32 table,
+ u8 scope, struct netlink_ext_ack *extack)
{
- int err = 0;
- struct net *net;
struct net_device *dev;
+ struct fib_result res;
+ int err;
- net = cfg->fc_nlinfo.nl_net;
- if (nh->fib_nh_gw4) {
- struct fib_result res;
-
- if (nh->fib_nh_flags & RTNH_F_ONLINK) {
- unsigned int addr_type;
+ if (nh->fib_nh_flags & RTNH_F_ONLINK) {
+ unsigned int addr_type;
- if (cfg->fc_scope >= RT_SCOPE_LINK) {
- NL_SET_ERR_MSG(extack,
- "Nexthop has invalid scope");
- return -EINVAL;
- }
- dev = __dev_get_by_index(net, nh->fib_nh_oif);
- if (!dev) {
- NL_SET_ERR_MSG(extack, "Nexthop device required for onlink");
- return -ENODEV;
- }
- if (!(dev->flags & IFF_UP)) {
- NL_SET_ERR_MSG(extack,
- "Nexthop device is not up");
- return -ENETDOWN;
- }
- addr_type = inet_addr_type_dev_table(net, dev,
- nh->fib_nh_gw4);
- if (addr_type != RTN_UNICAST) {
- NL_SET_ERR_MSG(extack,
- "Nexthop has invalid gateway");
- return -EINVAL;
- }
- if (!netif_carrier_ok(dev))
- nh->fib_nh_flags |= RTNH_F_LINKDOWN;
- nh->fib_nh_dev = dev;
- dev_hold(dev);
- nh->fib_nh_scope = RT_SCOPE_LINK;
- return 0;
+ if (scope >= RT_SCOPE_LINK) {
+ NL_SET_ERR_MSG(extack, "Nexthop has invalid scope");
+ return -EINVAL;
}
- rcu_read_lock();
- {
- struct fib_table *tbl = NULL;
- struct flowi4 fl4 = {
- .daddr = nh->fib_nh_gw4,
- .flowi4_scope = cfg->fc_scope + 1,
- .flowi4_oif = nh->fib_nh_oif,
- .flowi4_iif = LOOPBACK_IFINDEX,
- };
-
- /* It is not necessary, but requires a bit of thinking */
- if (fl4.flowi4_scope < RT_SCOPE_LINK)
- fl4.flowi4_scope = RT_SCOPE_LINK;
-
- if (cfg->fc_table)
- tbl = fib_get_table(net, cfg->fc_table);
-
- if (tbl)
- err = fib_table_lookup(tbl, &fl4, &res,
- FIB_LOOKUP_IGNORE_LINKSTATE |
- FIB_LOOKUP_NOREF);
-
- /* on error or if no table given do full lookup. This
- * is needed for example when nexthops are in the local
- * table rather than the given table
- */
- if (!tbl || err) {
- err = fib_lookup(net, &fl4, &res,
- FIB_LOOKUP_IGNORE_LINKSTATE);
- }
-
- if (err) {
- NL_SET_ERR_MSG(extack,
- "Nexthop has invalid gateway");
- rcu_read_unlock();
- return err;
- }
+ dev = __dev_get_by_index(net, nh->fib_nh_oif);
+ if (!dev) {
+ NL_SET_ERR_MSG(extack, "Nexthop device required for onlink");
+ return -ENODEV;
}
- err = -EINVAL;
- if (res.type != RTN_UNICAST && res.type != RTN_LOCAL) {
- NL_SET_ERR_MSG(extack, "Nexthop has invalid gateway");
- goto out;
+ if (!(dev->flags & IFF_UP)) {
+ NL_SET_ERR_MSG(extack, "Nexthop device is not up");
+ return -ENETDOWN;
}
- nh->fib_nh_scope = res.scope;
- nh->fib_nh_oif = FIB_RES_OIF(res);
- nh->fib_nh_dev = dev = FIB_RES_DEV(res);
- if (!dev) {
- NL_SET_ERR_MSG(extack,
- "No egress device for nexthop gateway");
- goto out;
+ addr_type = inet_addr_type_dev_table(net, dev, nh->fib_nh_gw4);
+ if (addr_type != RTN_UNICAST) {
+ NL_SET_ERR_MSG(extack, "Nexthop has invalid gateway");
+ return -EINVAL;
}
- dev_hold(dev);
if (!netif_carrier_ok(dev))
nh->fib_nh_flags |= RTNH_F_LINKDOWN;
- err = (dev->flags & IFF_UP) ? 0 : -ENETDOWN;
- } else {
- struct in_device *in_dev;
-
- if (nh->fib_nh_flags & (RTNH_F_PERVASIVE | RTNH_F_ONLINK)) {
- NL_SET_ERR_MSG(extack,
- "Invalid flags for nexthop - PERVASIVE and ONLINK can not be set");
- return -EINVAL;
+ nh->fib_nh_dev = dev;
+ dev_hold(dev);
+ nh->fib_nh_scope = RT_SCOPE_LINK;
+ return 0;
+ }
+ rcu_read_lock();
+ {
+ struct fib_table *tbl = NULL;
+ struct flowi4 fl4 = {
+ .daddr = nh->fib_nh_gw4,
+ .flowi4_scope = scope + 1,
+ .flowi4_oif = nh->fib_nh_oif,
+ .flowi4_iif = LOOPBACK_IFINDEX,
+ };
+
+ /* It is not necessary, but requires a bit of thinking */
+ if (fl4.flowi4_scope < RT_SCOPE_LINK)
+ fl4.flowi4_scope = RT_SCOPE_LINK;
+
+ if (table)
+ tbl = fib_get_table(net, table);
+
+ if (tbl)
+ err = fib_table_lookup(tbl, &fl4, &res,
+ FIB_LOOKUP_IGNORE_LINKSTATE |
+ FIB_LOOKUP_NOREF);
+
+ /* on error or if no table given do full lookup. This
+ * is needed for example when nexthops are in the local
+ * table rather than the given table
+ */
+ if (!tbl || err) {
+ err = fib_lookup(net, &fl4, &res,
+ FIB_LOOKUP_IGNORE_LINKSTATE);
}
- rcu_read_lock();
- err = -ENODEV;
- in_dev = inetdev_by_index(net, nh->fib_nh_oif);
- if (!in_dev)
- goto out;
- err = -ENETDOWN;
- if (!(in_dev->dev->flags & IFF_UP)) {
- NL_SET_ERR_MSG(extack, "Device for nexthop is not up");
+
+ if (err) {
+ NL_SET_ERR_MSG(extack, "Nexthop has invalid gateway");
goto out;
}
- nh->fib_nh_dev = in_dev->dev;
- dev_hold(nh->fib_nh_dev);
- nh->fib_nh_scope = RT_SCOPE_HOST;
- if (!netif_carrier_ok(nh->fib_nh_dev))
- nh->fib_nh_flags |= RTNH_F_LINKDOWN;
- err = 0;
}
+
+ err = -EINVAL;
+ if (res.type != RTN_UNICAST && res.type != RTN_LOCAL) {
+ NL_SET_ERR_MSG(extack, "Nexthop has invalid gateway");
+ goto out;
+ }
+ nh->fib_nh_scope = res.scope;
+ nh->fib_nh_oif = FIB_RES_OIF(res);
+ nh->fib_nh_dev = dev = FIB_RES_DEV(res);
+ if (!dev) {
+ NL_SET_ERR_MSG(extack,
+ "No egress device for nexthop gateway");
+ goto out;
+ }
+ dev_hold(dev);
+ if (!netif_carrier_ok(dev))
+ nh->fib_nh_flags |= RTNH_F_LINKDOWN;
+ err = (dev->flags & IFF_UP) ? 0 : -ENETDOWN;
+out:
+ rcu_read_unlock();
+ return err;
+}
+
+static int fib_check_nh_nongw(struct net *net, struct fib_nh *nh,
+ struct netlink_ext_ack *extack)
+{
+ struct in_device *in_dev;
+ int err;
+
+ if (nh->fib_nh_flags & (RTNH_F_PERVASIVE | RTNH_F_ONLINK)) {
+ NL_SET_ERR_MSG(extack,
+ "Invalid flags for nexthop - PERVASIVE and ONLINK can not be set");
+ return -EINVAL;
+ }
+
+ rcu_read_lock();
+
+ err = -ENODEV;
+ in_dev = inetdev_by_index(net, nh->fib_nh_oif);
+ if (!in_dev)
+ goto out;
+ err = -ENETDOWN;
+ if (!(in_dev->dev->flags & IFF_UP)) {
+ NL_SET_ERR_MSG(extack, "Device for nexthop is not up");
+ goto out;
+ }
+
+ nh->fib_nh_dev = in_dev->dev;
+ dev_hold(nh->fib_nh_dev);
+ nh->fib_nh_scope = RT_SCOPE_HOST;
+ if (!netif_carrier_ok(nh->fib_nh_dev))
+ nh->fib_nh_flags |= RTNH_F_LINKDOWN;
+ err = 0;
out:
rcu_read_unlock();
return err;
}
+static int fib_check_nh(struct fib_config *cfg, struct fib_nh *nh,
+ struct netlink_ext_ack *extack)
+{
+ struct net *net = cfg->fc_nlinfo.nl_net;
+ u32 table = cfg->fc_table;
+ int err;
+
+ if (nh->fib_nh_gw_family == AF_INET)
+ err = fib_check_nh_v4_gw(net, nh, table, cfg->fc_scope, extack);
+ else if (nh->fib_nh_gw_family == AF_INET6)
+ err = fib_check_nh_v6_gw(net, nh, table, extack);
+ else
+ err = fib_check_nh_nongw(net, nh, extack);
+
+ return err;
+}
+
static inline unsigned int fib_laddr_hashfn(__be32 val)
{
unsigned int mask = (fib_info_hash_size - 1);
goto failure;
if (fib_props[cfg->fc_type].error) {
- if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp) {
+ if (cfg->fc_gw_family || cfg->fc_oif || cfg->fc_mp) {
NL_SET_ERR_MSG(extack,
"Gateway, device and multipath can not be specified for this route type");
goto err_inval;
"Route with host scope can not have multiple nexthops");
goto err_inval;
}
- if (nh->fib_nh_gw4) {
+ if (nh->fib_nh_gw_family) {
NL_SET_ERR_MSG(extack,
"Route with host scope can not have a gateway");
goto err_inval;
change_nexthops(fi) {
fib_info_update_nh_saddr(net, nexthop_nh);
+ if (nexthop_nh->fib_nh_gw_family == AF_INET6)
+ fi->fib_nh_is_v6 = true;
} endfor_nexthops(fi)
fib_rebalance(fi);
}
int fib_nexthop_info(struct sk_buff *skb, const struct fib_nh_common *nhc,
- unsigned int *flags, bool skip_oif)
+ unsigned char *flags, bool skip_oif)
{
if (nhc->nhc_flags & RTNH_F_DEAD)
*flags |= RTNH_F_DEAD;
rcu_read_unlock();
}
- if (nhc->nhc_has_gw) {
- switch (nhc->nhc_family) {
- case AF_INET:
- if (nla_put_in_addr(skb, RTA_GATEWAY, nhc->nhc_gw.ipv4))
- goto nla_put_failure;
- break;
- case AF_INET6:
- if (nla_put_in6_addr(skb, RTA_GATEWAY,
- &nhc->nhc_gw.ipv6) < 0)
+ switch (nhc->nhc_gw_family) {
+ case AF_INET:
+ if (nla_put_in_addr(skb, RTA_GATEWAY, nhc->nhc_gw.ipv4))
+ goto nla_put_failure;
+ break;
+ case AF_INET6:
+ /* if gateway family does not match nexthop family
+ * gateway is encoded as RTA_VIA
+ */
+ if (nhc->nhc_gw_family != nhc->nhc_family) {
+ int alen = sizeof(struct in6_addr);
+ struct nlattr *nla;
+ struct rtvia *via;
+
+ nla = nla_reserve(skb, RTA_VIA, alen + 2);
+ if (!nla)
goto nla_put_failure;
- break;
+
+ via = nla_data(nla);
+ via->rtvia_family = AF_INET6;
+ memcpy(via->rtvia_addr, &nhc->nhc_gw.ipv6, alen);
+ } else if (nla_put_in6_addr(skb, RTA_GATEWAY,
+ &nhc->nhc_gw.ipv6) < 0) {
+ goto nla_put_failure;
}
+ break;
}
*flags |= (nhc->nhc_flags & RTNH_F_ONLINK);
goto nla_put_failure;
if (nhc->nhc_lwtstate &&
- lwtunnel_fill_encap(skb, nhc->nhc_lwtstate) < 0)
+ lwtunnel_fill_encap(skb, nhc->nhc_lwtstate,
+ RTA_ENCAP, RTA_ENCAP_TYPE) < 0)
goto nla_put_failure;
return 0;
{
const struct net_device *dev = nhc->nhc_dev;
struct rtnexthop *rtnh;
- unsigned int flags = 0;
+ unsigned char flags = 0;
rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
if (!rtnh)
goto nla_put_failure;
if (fi->fib_nhs == 1) {
struct fib_nh *nh = &fi->fib_nh[0];
- unsigned int flags = 0;
+ unsigned char flags = 0;
if (fib_nexthop_info(skb, &nh->nh_common, &flags, false) < 0)
goto nla_put_failure;
* Dead device goes up. We wake up dead nexthops.
* It takes sense only on multipath routes.
*/
-int fib_sync_up(struct net_device *dev, unsigned int nh_flags)
+int fib_sync_up(struct net_device *dev, unsigned char nh_flags)
{
struct fib_info *prev_fi;
unsigned int hash;
rcu_read_lock_bh();
- n = __ipv4_neigh_lookup_noref(nh->fib_nh_dev,
- (__force u32)nh->fib_nh_gw4);
+ if (likely(nh->fib_nh_gw_family == AF_INET))
+ n = __ipv4_neigh_lookup_noref(nh->fib_nh_dev,
+ (__force u32)nh->fib_nh_gw4);
+ else if (nh->fib_nh_gw_family == AF_INET6)
+ n = __ipv6_neigh_lookup_noref_stub(nh->fib_nh_dev,
+ &nh->fib_nh_gw6);
+ else
+ n = NULL;
if (n)
state = n->nud_state;
struct guehdr *guehdr;
void *data;
u16 doffset = 0;
+ u8 proto_ctype;
if (!fou)
return 1;
break;
case 1: {
- /* Direct encasulation of IPv4 or IPv6 */
+ /* Direct encapsulation of IPv4 or IPv6 */
int prot;
/* guehdr may change after pull */
guehdr = (struct guehdr *)&udp_hdr(skb)[1];
- hdrlen = sizeof(struct guehdr) + optlen;
-
- if (guehdr->version != 0 || validate_gue_flags(guehdr, optlen))
+ if (validate_gue_flags(guehdr, optlen))
goto drop;
hdrlen = sizeof(struct guehdr) + optlen;
if (unlikely(guehdr->control))
return gue_control_message(skb, guehdr);
+ proto_ctype = guehdr->proto_ctype;
__skb_pull(skb, sizeof(struct udphdr) + hdrlen);
skb_reset_transport_header(skb);
if (iptunnel_pull_offloads(skb))
goto drop;
- return -guehdr->proto_ctype;
+ return -proto_ctype;
drop:
kfree_skb(skb);
case 0: /* Full GUE header present */
break;
case 1: {
- /* Direct encasulation of IPv4 or IPv6 */
+ /* Direct encapsulation of IPv4 or IPv6 */
skb_set_transport_header(skb, -(int)sizeof(struct icmphdr));
switch (((struct iphdr *)guehdr)->version) {
rt = ip_route_output_flow(net, fl4, sk);
if (IS_ERR(rt))
goto no_route;
- if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway)
+ if (opt && opt->opt.is_strictroute && rt->rt_gw_family)
goto route_err;
rcu_read_unlock();
return &rt->dst;
rt = ip_route_output_flow(net, fl4, sk);
if (IS_ERR(rt))
goto no_route;
- if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway)
+ if (opt && opt->opt.is_strictroute && rt->rt_gw_family)
goto route_err;
return &rt->dst;
rt = skb_rtable(skb);
- if (opt->is_strictroute && rt->rt_uses_gateway)
+ if (opt->is_strictroute && rt->rt_gw_family)
goto sr_failed;
IPCB(skb)->flags |= IPSKB_FORWARDED;
struct net *net = dev_net(skb->dev);
struct metadata_dst *tun_dst = NULL;
struct erspan_base_hdr *ershdr;
- struct erspan_metadata *pkt_md;
struct ip_tunnel_net *itn;
struct ip_tunnel *tunnel;
const struct iphdr *iph;
if (unlikely(!pskb_may_pull(skb, len)))
return PACKET_REJECT;
- ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len);
- pkt_md = (struct erspan_metadata *)(ershdr + 1);
-
if (__iptunnel_pull_header(skb,
len,
htons(ETH_P_TEB),
goto drop;
if (tunnel->collect_md) {
+ struct erspan_metadata *pkt_md, *md;
struct ip_tunnel_info *info;
- struct erspan_metadata *md;
+ unsigned char *gh;
__be64 tun_id;
__be16 flags;
if (!tun_dst)
return PACKET_REJECT;
+ /* skb can be uncloned in __iptunnel_pull_header, so
+ * old pkt_md is no longer valid and we need to reset
+ * it
+ */
+ gh = skb_network_header(skb) +
+ skb_network_header_len(skb);
+ pkt_md = (struct erspan_metadata *)(gh + gre_hdr_len +
+ sizeof(*ershdr));
md = ip_tunnel_info_opts(&tun_dst->u.tun_info);
md->version = ver;
md2 = &md->u.md2;
struct net_device *dev = dst->dev;
unsigned int hh_len = LL_RESERVED_SPACE(dev);
struct neighbour *neigh;
- u32 nexthop;
+ bool is_v6gw = false;
if (rt->rt_type == RTN_MULTICAST) {
IP_UPD_PO_STATS(net, IPSTATS_MIB_OUTMCAST, skb->len);
}
rcu_read_lock_bh();
- nexthop = (__force u32) rt_nexthop(rt, ip_hdr(skb)->daddr);
- neigh = __ipv4_neigh_lookup_noref(dev, nexthop);
- if (unlikely(!neigh))
- neigh = __neigh_create(&arp_tbl, &nexthop, dev, false);
+ neigh = ip_neigh_for_gw(rt, skb, &is_v6gw);
if (!IS_ERR(neigh)) {
int res;
sock_confirm_neigh(skb, neigh);
- res = neigh_output(neigh, skb);
-
+ /* if crossing protocols, can not use the cached header */
+ res = neigh_output(neigh, skb, is_v6gw);
rcu_read_unlock_bh();
return res;
}
skb_dst_set_noref(skb, &rt->dst);
packet_routed:
- if (inet_opt && inet_opt->opt.is_strictroute && rt->rt_uses_gateway)
+ if (inet_opt && inet_opt->opt.is_strictroute && rt->rt_gw_family)
goto no_route;
/* OK, we know where to send it, allocate and build IP header. */
#include <net/netlink.h>
#include <net/fib_rules.h>
#include <linux/netconf.h>
-#include <net/nexthop.h>
+#include <net/rtnh.h>
#include <linux/nospec.h>
if NF_TABLES_IPV4
-config NFT_CHAIN_ROUTE_IPV4
- tristate "IPv4 nf_tables route chain support"
- help
- This option enables the "route" chain for IPv4 in nf_tables. This
- chain type is used to force packet re-routing after mangling header
- fields such as the source, destination, type of service and
- the packet mark.
-
config NFT_REJECT_IPV4
select NF_REJECT_IPV4
default NFT_REJECT
config IP_NF_TARGET_MASQUERADE
tristate "MASQUERADE target support"
- select NF_NAT_MASQUERADE
- default m if NETFILTER_ADVANCED=n
+ select NETFILTER_XT_TARGET_MASQUERADE
help
- Masquerading is a special case of NAT: all outgoing connections are
- changed to seem to come from a particular interface's address, and
- if the interface goes down, those connections are lost. This is
- only useful for dialup accounts with dynamic IP address (ie. your IP
- address will be different on next dialup).
-
- To compile it as a module, choose M here. If unsure, say N.
+ This is a backwards-compat option for the user's convenience
+ (e.g. when running oldconfig). It selects NETFILTER_XT_TARGET_MASQUERADE.
config IP_NF_TARGET_NETMAP
tristate "NETMAP target support"
$(obj)/nf_nat_snmp_basic_main.o: $(obj)/nf_nat_snmp_basic.asn1.h
obj-$(CONFIG_NF_NAT_SNMP_BASIC) += nf_nat_snmp_basic.o
-obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV4) += nft_chain_route_ipv4.o
obj-$(CONFIG_NFT_REJECT_IPV4) += nft_reject_ipv4.o
obj-$(CONFIG_NFT_FIB_IPV4) += nft_fib_ipv4.o
obj-$(CONFIG_NFT_DUP_IPV4) += nft_dup_ipv4.o
# targets
obj-$(CONFIG_IP_NF_TARGET_CLUSTERIP) += ipt_CLUSTERIP.o
obj-$(CONFIG_IP_NF_TARGET_ECN) += ipt_ECN.o
-obj-$(CONFIG_IP_NF_TARGET_MASQUERADE) += ipt_MASQUERADE.o
obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o
obj-$(CONFIG_IP_NF_TARGET_SYNPROXY) += ipt_SYNPROXY.o
+++ /dev/null
-/*
- * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
- * Copyright (c) 2012 Pablo Neira Ayuso <pablo@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/list.h>
-#include <linux/skbuff.h>
-#include <linux/netlink.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/netfilter/nfnetlink.h>
-#include <linux/netfilter/nf_tables.h>
-#include <net/netfilter/nf_tables.h>
-#include <net/netfilter/nf_tables_ipv4.h>
-#include <net/route.h>
-#include <net/ip.h>
-
-static unsigned int nf_route_table_hook(void *priv,
- struct sk_buff *skb,
- const struct nf_hook_state *state)
-{
- unsigned int ret;
- struct nft_pktinfo pkt;
- u32 mark;
- __be32 saddr, daddr;
- u_int8_t tos;
- const struct iphdr *iph;
- int err;
-
- nft_set_pktinfo(&pkt, skb, state);
- nft_set_pktinfo_ipv4(&pkt, skb);
-
- mark = skb->mark;
- iph = ip_hdr(skb);
- saddr = iph->saddr;
- daddr = iph->daddr;
- tos = iph->tos;
-
- ret = nft_do_chain(&pkt, priv);
- if (ret != NF_DROP && ret != NF_STOLEN) {
- iph = ip_hdr(skb);
-
- if (iph->saddr != saddr ||
- iph->daddr != daddr ||
- skb->mark != mark ||
- iph->tos != tos) {
- err = ip_route_me_harder(state->net, skb, RTN_UNSPEC);
- if (err < 0)
- ret = NF_DROP_ERR(err);
- }
- }
- return ret;
-}
-
-static const struct nft_chain_type nft_chain_route_ipv4 = {
- .name = "route",
- .type = NFT_CHAIN_T_ROUTE,
- .family = NFPROTO_IPV4,
- .owner = THIS_MODULE,
- .hook_mask = (1 << NF_INET_LOCAL_OUT),
- .hooks = {
- [NF_INET_LOCAL_OUT] = nf_route_table_hook,
- },
-};
-
-static int __init nft_chain_route_init(void)
-{
- nft_register_chain_type(&nft_chain_route_ipv4);
-
- return 0;
-}
-
-static void __exit nft_chain_route_exit(void)
-{
- nft_unregister_chain_type(&nft_chain_route_ipv4);
-}
-
-module_init(nft_chain_route_init);
-module_exit(nft_chain_route_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
-MODULE_ALIAS_NFT_CHAIN(AF_INET, "route");
struct sk_buff *skb,
const void *daddr)
{
+ const struct rtable *rt = container_of(dst, struct rtable, dst);
struct net_device *dev = dst->dev;
- const __be32 *pkey = daddr;
- const struct rtable *rt;
struct neighbour *n;
- rt = (const struct rtable *) dst;
- if (rt->rt_gateway)
- pkey = (const __be32 *) &rt->rt_gateway;
- else if (skb)
- pkey = &ip_hdr(skb)->daddr;
+ rcu_read_lock_bh();
+
+ if (likely(rt->rt_gw_family == AF_INET)) {
+ n = ip_neigh_gw4(dev, rt->rt_gw4);
+ } else if (rt->rt_gw_family == AF_INET6) {
+ n = ip_neigh_gw6(dev, &rt->rt_gw6);
+ } else {
+ __be32 pkey;
+
+ pkey = skb ? ip_hdr(skb)->daddr : *((__be32 *) daddr);
+ n = ip_neigh_gw4(dev, pkey);
+ }
+
+ if (n && !refcount_inc_not_zero(&n->refcnt))
+ n = NULL;
+
+ rcu_read_unlock_bh();
- n = __ipv4_neigh_lookup(dev, *(__force u32 *)pkey);
- if (n)
- return n;
- return neigh_create(&arp_tbl, pkey, dev);
+ return n;
}
static void ipv4_confirm_neigh(const struct dst_entry *dst, const void *daddr)
{
+ const struct rtable *rt = container_of(dst, struct rtable, dst);
struct net_device *dev = dst->dev;
const __be32 *pkey = daddr;
- const struct rtable *rt;
- rt = (const struct rtable *)dst;
- if (rt->rt_gateway)
- pkey = (const __be32 *)&rt->rt_gateway;
- else if (!daddr ||
+ if (rt->rt_gw_family == AF_INET) {
+ pkey = (const __be32 *)&rt->rt_gw4;
+ } else if (rt->rt_gw_family == AF_INET6) {
+ return __ipv6_confirm_neigh_stub(dev, &rt->rt_gw6);
+ } else if (!daddr ||
(rt->rt_flags &
- (RTCF_MULTICAST | RTCF_BROADCAST | RTCF_LOCAL)))
+ (RTCF_MULTICAST | RTCF_BROADCAST | RTCF_LOCAL))) {
return;
-
+ }
__ipv4_confirm_neigh(dev, *(__force u32 *)pkey);
}
if (fnhe->fnhe_gw) {
rt->rt_flags |= RTCF_REDIRECTED;
- rt->rt_gateway = fnhe->fnhe_gw;
- rt->rt_uses_gateway = 1;
+ rt->rt_gw_family = AF_INET;
+ rt->rt_gw4 = fnhe->fnhe_gw;
}
}
return;
}
- if (rt->rt_gateway != old_gw)
+ if (rt->rt_gw_family != AF_INET || rt->rt_gw4 != old_gw)
return;
in_dev = __in_dev_get_rcu(dev);
return dst;
}
+static void ipv4_send_dest_unreach(struct sk_buff *skb)
+{
+ struct ip_options opt;
+ int res;
+
+ /* Recompile ip options since IPCB may not be valid anymore.
+ * Also check we have a reasonable ipv4 header.
+ */
+ if (!pskb_network_may_pull(skb, sizeof(struct iphdr)) ||
+ ip_hdr(skb)->version != 4 || ip_hdr(skb)->ihl < 5)
+ return;
+
+ memset(&opt, 0, sizeof(opt));
+ if (ip_hdr(skb)->ihl > 5) {
+ if (!pskb_network_may_pull(skb, ip_hdr(skb)->ihl * 4))
+ return;
+ opt.optlen = ip_hdr(skb)->ihl * 4 - sizeof(struct iphdr);
+
+ rcu_read_lock();
+ res = __ip_options_compile(dev_net(skb->dev), &opt, skb, NULL);
+ rcu_read_unlock();
+
+ if (res)
+ return;
+ }
+ __icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0, &opt);
+}
+
static void ipv4_link_failure(struct sk_buff *skb)
{
struct rtable *rt;
- icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
+ ipv4_send_dest_unreach(skb);
rt = skb_rtable(skb);
if (rt)
mtu = READ_ONCE(dst->dev->mtu);
if (unlikely(ip_mtu_locked(dst))) {
- if (rt->rt_uses_gateway && mtu > 576)
+ if (rt->rt_gw_family && mtu > 576)
mtu = 576;
}
orig = NULL;
}
fill_route_from_fnhe(rt, fnhe);
- if (!rt->rt_gateway)
- rt->rt_gateway = daddr;
+ if (!rt->rt_gw4) {
+ rt->rt_gw4 = daddr;
+ rt->rt_gw_family = AF_INET;
+ }
if (do_cache) {
dst_hold(&rt->dst);
if (fi) {
struct fib_nh_common *nhc = FIB_RES_NHC(*res);
- struct fib_nh *nh = container_of(nhc, struct fib_nh, nh_common);
+ struct fib_nh *nh;
- if (nh->fib_nh_gw4 && nh->fib_nh_scope == RT_SCOPE_LINK) {
- rt->rt_gateway = nh->fib_nh_gw4;
- rt->rt_uses_gateway = 1;
+ if (nhc->nhc_gw_family && nhc->nhc_scope == RT_SCOPE_LINK) {
+ rt->rt_gw_family = nhc->nhc_gw_family;
+ /* only INET and INET6 are supported */
+ if (likely(nhc->nhc_gw_family == AF_INET))
+ rt->rt_gw4 = nhc->nhc_gw.ipv4;
+ else
+ rt->rt_gw6 = nhc->nhc_gw.ipv6;
}
+
ip_dst_init_metrics(&rt->dst, fi->fib_metrics);
+ nh = container_of(nhc, struct fib_nh, nh_common);
#ifdef CONFIG_IP_ROUTE_CLASSID
rt->dst.tclassid = nh->nh_tclassid;
#endif
* However, if we are unsuccessful at storing this
* route into the cache we really need to set it.
*/
- if (!rt->rt_gateway)
- rt->rt_gateway = daddr;
+ if (!rt->rt_gw4) {
+ rt->rt_gw_family = AF_INET;
+ rt->rt_gw4 = daddr;
+ }
rt_add_uncached_list(rt);
}
} else
rt->rt_iif = 0;
rt->rt_pmtu = 0;
rt->rt_mtu_locked = 0;
- rt->rt_gateway = 0;
- rt->rt_uses_gateway = 0;
+ rt->rt_gw_family = 0;
+ rt->rt_gw4 = 0;
INIT_LIST_HEAD(&rt->rt_uncached);
rt->dst.output = ip_output;
do_cache = res->fi && !itag;
if (out_dev == in_dev && err && IN_DEV_TX_REDIRECTS(out_dev) &&
skb->protocol == htons(ETH_P_IP)) {
- __be32 gw = nhc->nhc_family == AF_INET ? nhc->nhc_gw.ipv4 : 0;
+ __be32 gw;
+ gw = nhc->nhc_gw_family == AF_INET ? nhc->nhc_gw.ipv4 : 0;
if (IN_DEV_SHARED_MEDIA(out_dev) ||
inet_addr_onlink(out_dev, saddr, gw))
IPCB(skb)->flags |= IPSKB_DOREDIRECT;
} else {
if (unlikely(fl4->flowi4_flags &
FLOWI_FLAG_KNOWN_NH &&
- !(nhc->nhc_has_gw &&
+ !(nhc->nhc_gw_family &&
nhc->nhc_scope == RT_SCOPE_LINK))) {
do_cache = false;
goto add;
rt->rt_genid = rt_genid_ipv4(net);
rt->rt_flags = ort->rt_flags;
rt->rt_type = ort->rt_type;
- rt->rt_gateway = ort->rt_gateway;
- rt->rt_uses_gateway = ort->rt_uses_gateway;
+ rt->rt_gw_family = ort->rt_gw_family;
+ if (rt->rt_gw_family == AF_INET)
+ rt->rt_gw4 = ort->rt_gw4;
+ else if (rt->rt_gw_family == AF_INET6)
+ rt->rt_gw6 = ort->rt_gw6;
INIT_LIST_HEAD(&rt->rt_uncached);
}
if (nla_put_in_addr(skb, RTA_PREFSRC, fl4->saddr))
goto nla_put_failure;
}
- if (rt->rt_uses_gateway &&
- nla_put_in_addr(skb, RTA_GATEWAY, rt->rt_gateway))
+ if (rt->rt_gw_family == AF_INET &&
+ nla_put_in_addr(skb, RTA_GATEWAY, rt->rt_gw4)) {
goto nla_put_failure;
+ } else if (rt->rt_gw_family == AF_INET6) {
+ int alen = sizeof(struct in6_addr);
+ struct nlattr *nla;
+ struct rtvia *via;
+
+ nla = nla_reserve(skb, RTA_VIA, alen + 2);
+ if (!nla)
+ goto nla_put_failure;
+
+ via = nla_data(nla);
+ via->rtvia_family = AF_INET6;
+ memcpy(via->rtvia_addr, &rt->rt_gw6, alen);
+ }
expires = rt->dst.expires;
if (expires) {
static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX };
static int comp_sack_nr_max = 255;
static u32 u32_max_div_HZ = UINT_MAX / HZ;
+static int one_day_secs = 24 * 3600;
/* obsolete */
static int sysctl_tcp_low_latency __read_mostly;
.data = &init_net.ipv4.sysctl_tcp_min_rtt_wlen,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &one_day_secs
},
{
.procname = "tcp_autocorking",
if (likely(!size)) {
skb = sk->sk_tx_skb_cache;
if (skb && !skb_cloned(skb)) {
- skb->truesize -= skb->data_len;
+ skb->truesize = SKB_TRUESIZE(skb_end_offset(skb));
sk->sk_tx_skb_cache = NULL;
pskb_trim(skb, 0);
INIT_LIST_HEAD(&skb->tcp_tsorted_anchor);
#define DCTCP_MAX_ALPHA 1024U
struct dctcp {
- u32 acked_bytes_ecn;
- u32 acked_bytes_total;
- u32 prior_snd_una;
+ u32 old_delivered;
+ u32 old_delivered_ce;
u32 prior_rcv_nxt;
u32 dctcp_alpha;
u32 next_seq;
{
ca->next_seq = tp->snd_nxt;
- ca->acked_bytes_ecn = 0;
- ca->acked_bytes_total = 0;
+ ca->old_delivered = tp->delivered;
+ ca->old_delivered_ce = tp->delivered_ce;
}
static void dctcp_init(struct sock *sk)
sk->sk_state == TCP_CLOSE)) {
struct dctcp *ca = inet_csk_ca(sk);
- ca->prior_snd_una = tp->snd_una;
ca->prior_rcv_nxt = tp->rcv_nxt;
ca->dctcp_alpha = min(dctcp_alpha_on_init, DCTCP_MAX_ALPHA);
{
const struct tcp_sock *tp = tcp_sk(sk);
struct dctcp *ca = inet_csk_ca(sk);
- u32 acked_bytes = tp->snd_una - ca->prior_snd_una;
-
- /* If ack did not advance snd_una, count dupack as MSS size.
- * If ack did update window, do not count it at all.
- */
- if (acked_bytes == 0 && !(flags & CA_ACK_WIN_UPDATE))
- acked_bytes = inet_csk(sk)->icsk_ack.rcv_mss;
- if (acked_bytes) {
- ca->acked_bytes_total += acked_bytes;
- ca->prior_snd_una = tp->snd_una;
-
- if (flags & CA_ACK_ECE)
- ca->acked_bytes_ecn += acked_bytes;
- }
/* Expired RTT */
if (!before(tp->snd_una, ca->next_seq)) {
- u64 bytes_ecn = ca->acked_bytes_ecn;
+ u32 delivered_ce = tp->delivered_ce - ca->old_delivered_ce;
u32 alpha = ca->dctcp_alpha;
/* alpha = (1 - g) * alpha + g * F */
alpha -= min_not_zero(alpha, alpha >> dctcp_shift_g);
- if (bytes_ecn) {
+ if (delivered_ce) {
+ u32 delivered = tp->delivered - ca->old_delivered;
+
/* If dctcp_shift_g == 1, a 32bit value would overflow
- * after 8 Mbytes.
+ * after 8 M packets.
*/
- bytes_ecn <<= (10 - dctcp_shift_g);
- do_div(bytes_ecn, max(1U, ca->acked_bytes_total));
+ delivered_ce <<= (10 - dctcp_shift_g);
+ delivered_ce /= max(1U, delivered);
- alpha = min(alpha + (u32)bytes_ecn, DCTCP_MAX_ALPHA);
+ alpha = min(alpha + delivered_ce, DCTCP_MAX_ALPHA);
}
/* dctcp_alpha can be read from dctcp_get_info() without
* synchro, so we ask compiler to not use dctcp_alpha
union tcp_cc_info *info)
{
const struct dctcp *ca = inet_csk_ca(sk);
+ const struct tcp_sock *tp = tcp_sk(sk);
/* Fill it also in case of VEGASINFO due to req struct limits.
* We can still correctly retrieve it later.
info->dctcp.dctcp_enabled = 1;
info->dctcp.dctcp_ce_state = (u16) ca->ce_state;
info->dctcp.dctcp_alpha = ca->dctcp_alpha;
- info->dctcp.dctcp_ab_ecn = ca->acked_bytes_ecn;
- info->dctcp.dctcp_ab_tot = ca->acked_bytes_total;
+ info->dctcp.dctcp_ab_ecn = tp->mss_cache *
+ (tp->delivered_ce - ca->old_delivered_ce);
+ info->dctcp.dctcp_ab_tot = tp->mss_cache *
+ (tp->delivered - ca->old_delivered);
}
*attr = INET_DIAG_DCTCPINFO;
static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb)
{
struct tcp_sock *tp = tcp_sk(sk);
+ int room;
+
+ room = min_t(int, tp->window_clamp, tcp_space(sk)) - tp->rcv_ssthresh;
/* Check #1 */
- if (tp->rcv_ssthresh < tp->window_clamp &&
- (int)tp->rcv_ssthresh < tcp_space(sk) &&
- !tcp_under_memory_pressure(sk)) {
+ if (room > 0 && !tcp_under_memory_pressure(sk)) {
int incr;
/* Check #2. Increase window, if skb with such overhead
if (incr) {
incr = max_t(int, incr, 2 * skb->len);
- tp->rcv_ssthresh = min(tp->rcv_ssthresh + incr,
- tp->window_clamp);
+ tp->rcv_ssthresh += min(room, incr);
inet_csk(sk)->icsk_ack.quick |= 1;
}
}
EXPORT_SYMBOL(udp_ioctl);
struct sk_buff *__skb_recv_udp(struct sock *sk, unsigned int flags,
- int noblock, int *peeked, int *off, int *err)
+ int noblock, int *off, int *err)
{
struct sk_buff_head *sk_queue = &sk->sk_receive_queue;
struct sk_buff_head *queue;
break;
error = -EAGAIN;
- *peeked = 0;
do {
spin_lock_bh(&queue->lock);
skb = __skb_try_recv_from_queue(sk, queue, flags,
udp_skb_destructor,
- peeked, off, err,
- &last);
+ off, err, &last);
if (skb) {
spin_unlock_bh(&queue->lock);
return skb;
skb = __skb_try_recv_from_queue(sk, queue, flags,
udp_skb_dtor_locked,
- peeked, off, err,
- &last);
+ off, err, &last);
spin_unlock(&sk_queue->lock);
spin_unlock_bh(&queue->lock);
if (skb)
DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name);
struct sk_buff *skb;
unsigned int ulen, copied;
- int peeked, peeking, off;
- int err;
+ int off, err, peeking = flags & MSG_PEEK;
int is_udplite = IS_UDPLITE(sk);
bool checksum_valid = false;
return ip_recv_error(sk, msg, len, addr_len);
try_again:
- peeking = flags & MSG_PEEK;
off = sk_peek_offset(sk, flags);
- skb = __skb_recv_udp(sk, flags, noblock, &peeked, &off, &err);
+ skb = __skb_recv_udp(sk, flags, noblock, &off, &err);
if (!skb)
return err;
}
if (unlikely(err)) {
- if (!peeked) {
+ if (!peeking) {
atomic_inc(&sk->sk_drops);
UDP_INC_STATS(sock_net(sk),
UDP_MIB_INERRORS, is_udplite);
return err;
}
- if (!peeked)
+ if (!peeking)
UDP_INC_STATS(sock_net(sk),
UDP_MIB_INDATAGRAMS, is_udplite);
xdst->u.rt.rt_flags = rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST |
RTCF_LOCAL);
xdst->u.rt.rt_type = rt->rt_type;
- xdst->u.rt.rt_gateway = rt->rt_gateway;
- xdst->u.rt.rt_uses_gateway = rt->rt_uses_gateway;
+ xdst->u.rt.rt_gw_family = rt->rt_gw_family;
+ if (rt->rt_gw_family == AF_INET)
+ xdst->u.rt.rt_gw4 = rt->rt_gw4;
+ else if (rt->rt_gw_family == AF_INET6)
+ xdst->u.rt.rt_gw6 = rt->rt_gw6;
xdst->u.rt.rt_pmtu = rt->rt_pmtu;
xdst->u.rt.rt_mtu_locked = rt->rt_mtu_locked;
INIT_LIST_HEAD(&xdst->u.rt.rt_uncached);
for_each_fib6_node_rt_rcu(fn) {
if (rt->fib6_nh.fib_nh_dev->ifindex != dev->ifindex)
continue;
- if (no_gw && rt->fib6_nh.fib_nh_has_gw)
+ if (no_gw && rt->fib6_nh.fib_nh_gw_family)
continue;
if ((rt->fib6_flags & flags) != flags)
continue;
return NULL;
}
-static struct fib6_info *
+static int
eafnosupport_fib6_table_lookup(struct net *net, struct fib6_table *table,
- int oif, struct flowi6 *fl6, int flags)
+ int oif, struct flowi6 *fl6,
+ struct fib6_result *res, int flags)
{
- return NULL;
+ return -EAFNOSUPPORT;
}
-static struct fib6_info *
+static int
eafnosupport_fib6_lookup(struct net *net, int oif, struct flowi6 *fl6,
- int flags)
+ struct fib6_result *res, int flags)
{
- return NULL;
+ return -EAFNOSUPPORT;
}
-static struct fib6_info *
-eafnosupport_fib6_multipath_select(const struct net *net, struct fib6_info *f6i,
- struct flowi6 *fl6, int oif,
- const struct sk_buff *skb, int strict)
+static void
+eafnosupport_fib6_select_path(const struct net *net, struct fib6_result *res,
+ struct flowi6 *fl6, int oif, bool have_oif_match,
+ const struct sk_buff *skb, int strict)
{
- return f6i;
}
static u32
-eafnosupport_ip6_mtu_from_fib6(struct fib6_info *f6i, struct in6_addr *daddr,
- struct in6_addr *saddr)
+eafnosupport_ip6_mtu_from_fib6(const struct fib6_result *res,
+ const struct in6_addr *daddr,
+ const struct in6_addr *saddr)
{
return 0;
}
+static int eafnosupport_fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
+ struct fib6_config *cfg, gfp_t gfp_flags,
+ struct netlink_ext_ack *extack)
+{
+ NL_SET_ERR_MSG(extack, "IPv6 support not enabled in kernel");
+ return -EAFNOSUPPORT;
+}
+
const struct ipv6_stub *ipv6_stub __read_mostly = &(struct ipv6_stub) {
.ipv6_dst_lookup = eafnosupport_ipv6_dst_lookup,
.ipv6_route_input = eafnosupport_ipv6_route_input,
.fib6_get_table = eafnosupport_fib6_get_table,
.fib6_table_lookup = eafnosupport_fib6_table_lookup,
.fib6_lookup = eafnosupport_fib6_lookup,
- .fib6_multipath_select = eafnosupport_fib6_multipath_select,
+ .fib6_select_path = eafnosupport_fib6_select_path,
.ip6_mtu_from_fib6 = eafnosupport_ip6_mtu_from_fib6,
+ .fib6_nh_init = eafnosupport_fib6_nh_init,
};
EXPORT_SYMBOL_GPL(ipv6_stub);
}
if (nlmsg_attrlen(nlh, sizeof(*ifal))) {
- NL_SET_ERR_MSG_MOD(extack, "Invalid data after header for address label dump requewst");
+ NL_SET_ERR_MSG_MOD(extack, "Invalid data after header for address label dump request");
return -EINVAL;
}
struct net *net = sock_net(sk);
switch (cmd) {
- case SIOCGSTAMP:
- return sock_get_timestamp(sk, (struct timeval __user *)arg);
-
- case SIOCGSTAMPNS:
- return sock_get_timestampns(sk, (struct timespec __user *)arg);
-
case SIOCADDRT:
case SIOCDELRT:
.getname = inet6_getname,
.poll = tcp_poll, /* ok */
.ioctl = inet6_ioctl, /* must change */
+ .gettstamp = sock_gettstamp,
.listen = inet_listen, /* ok */
.shutdown = inet_shutdown, /* ok */
.setsockopt = sock_common_setsockopt, /* ok */
.getname = inet6_getname,
.poll = udp_poll, /* ok */
.ioctl = inet6_ioctl, /* must change */
+ .gettstamp = sock_gettstamp,
.listen = sock_no_listen, /* ok */
.shutdown = inet_shutdown, /* ok */
.setsockopt = sock_common_setsockopt, /* ok */
net->ipv6.sysctl.icmpv6_echo_ignore_all = 0;
net->ipv6.sysctl.icmpv6_echo_ignore_multicast = 0;
net->ipv6.sysctl.icmpv6_echo_ignore_anycast = 0;
+
+ /* By default, rate limit error messages.
+ * Except for pmtu discovery, it would break it.
+ * proc_do_large_bitmap needs pointer to the bitmap.
+ */
+ bitmap_set(net->ipv6.sysctl.icmpv6_ratemask, 0, ICMPV6_ERRMSG_MAX + 1);
+ bitmap_clear(net->ipv6.sysctl.icmpv6_ratemask, ICMPV6_PKT_TOOBIG, 1);
+ net->ipv6.sysctl.icmpv6_ratemask_ptr = net->ipv6.sysctl.icmpv6_ratemask;
+
net->ipv6.sysctl.flowlabel_consistency = 1;
net->ipv6.sysctl.auto_flowlabels = IP6_DEFAULT_AUTO_FLOW_LABELS;
net->ipv6.sysctl.idgen_retries = 3;
.fib6_get_table = fib6_get_table,
.fib6_table_lookup = fib6_table_lookup,
.fib6_lookup = fib6_lookup,
- .fib6_multipath_select = fib6_multipath_select,
+ .fib6_select_path = fib6_select_path,
.ip6_mtu_from_fib6 = ip6_mtu_from_fib6,
+ .fib6_nh_init = fib6_nh_init,
+ .fib6_nh_release = fib6_nh_release,
.udpv6_encap_enable = udpv6_encap_enable,
.ndisc_send_na = ndisc_send_na,
.nd_tbl = &nd_tbl,
}
/* called with rcu lock held; no reference taken on fib6_info */
-struct fib6_info *fib6_lookup(struct net *net, int oif, struct flowi6 *fl6,
- int flags)
+int fib6_lookup(struct net *net, int oif, struct flowi6 *fl6,
+ struct fib6_result *res, int flags)
{
- struct fib6_info *f6i;
int err;
if (net->ipv6.fib6_has_custom_rules) {
struct fib_lookup_arg arg = {
.lookup_ptr = fib6_table_lookup,
.lookup_data = &oif,
+ .result = res,
.flags = FIB_LOOKUP_NOREF,
};
err = fib_rules_lookup(net->ipv6.fib6_rules_ops,
flowi6_to_flowi(fl6), flags, &arg);
- if (err)
- return ERR_PTR(err);
-
- f6i = arg.result ? : net->ipv6.fib6_null_entry;
} else {
- f6i = fib6_table_lookup(net, net->ipv6.fib6_local_tbl,
- oif, fl6, flags);
- if (!f6i || f6i == net->ipv6.fib6_null_entry)
- f6i = fib6_table_lookup(net, net->ipv6.fib6_main_tbl,
- oif, fl6, flags);
+ err = fib6_table_lookup(net, net->ipv6.fib6_local_tbl, oif,
+ fl6, res, flags);
+ if (err || res->f6i == net->ipv6.fib6_null_entry)
+ err = fib6_table_lookup(net, net->ipv6.fib6_main_tbl,
+ oif, fl6, res, flags);
}
- return f6i;
+ return err;
}
struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
int flags, pol_lookup_t lookup)
{
if (net->ipv6.fib6_has_custom_rules) {
+ struct fib6_result res = {};
struct fib_lookup_arg arg = {
.lookup_ptr = lookup,
.lookup_data = skb,
+ .result = &res,
.flags = FIB_LOOKUP_NOREF,
};
fib_rules_lookup(net->ipv6.fib6_rules_ops,
flowi6_to_flowi(fl6), flags, &arg);
- if (arg.result)
- return arg.result;
+ if (res.rt6)
+ return &res.rt6->dst;
} else {
struct rt6_info *rt;
static int fib6_rule_action_alt(struct fib_rule *rule, struct flowi *flp,
int flags, struct fib_lookup_arg *arg)
{
+ struct fib6_result *res = arg->result;
struct flowi6 *flp6 = &flp->u.ip6;
struct net *net = rule->fr_net;
struct fib6_table *table;
- struct fib6_info *f6i;
- int err = -EAGAIN, *oif;
+ int err, *oif;
u32 tb_id;
switch (rule->action) {
return -EAGAIN;
oif = (int *)arg->lookup_data;
- f6i = fib6_table_lookup(net, table, *oif, flp6, flags);
- if (f6i != net->ipv6.fib6_null_entry) {
+ err = fib6_table_lookup(net, table, *oif, flp6, res, flags);
+ if (!err && res->f6i != net->ipv6.fib6_null_entry)
err = fib6_rule_saddr(net, rule, flags, flp6,
- fib6_info_nh_dev(f6i));
-
- if (likely(!err))
- arg->result = f6i;
- }
+ res->nh->fib_nh_dev);
+ else
+ err = -EAGAIN;
return err;
}
static int __fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
int flags, struct fib_lookup_arg *arg)
{
+ struct fib6_result *res = arg->result;
struct flowi6 *flp6 = &flp->u.ip6;
struct rt6_info *rt = NULL;
struct fib6_table *table;
discard_pkt:
dst_hold(&rt->dst);
out:
- arg->result = rt;
+ res->rt6 = rt;
return err;
}
static bool fib6_rule_suppress(struct fib_rule *rule, struct fib_lookup_arg *arg)
{
- struct rt6_info *rt = (struct rt6_info *) arg->result;
+ struct fib6_result *res = arg->result;
+ struct rt6_info *rt = res->rt6;
struct net_device *dev = NULL;
+ if (!rt)
+ return false;
+
if (rt->rt6i_idev)
dev = rt->rt6i_idev->dev;
return false;
}
-static bool icmpv6_mask_allow(int type)
+static bool icmpv6_mask_allow(struct net *net, int type)
{
- /* Informational messages are not limited. */
- if (type & ICMPV6_INFOMSG_MASK)
+ if (type > ICMPV6_MSG_MAX)
return true;
- /* Do not limit pmtu discovery, it would break it. */
- if (type == ICMPV6_PKT_TOOBIG)
+ /* Limit if icmp type is set in ratemask. */
+ if (!test_bit(type, net->ipv6.sysctl.icmpv6_ratemask))
return true;
return false;
}
-static bool icmpv6_global_allow(int type)
+static bool icmpv6_global_allow(struct net *net, int type)
{
- if (icmpv6_mask_allow(type))
+ if (icmpv6_mask_allow(net, type))
return true;
if (icmp_global_allow())
struct dst_entry *dst;
bool res = false;
- if (icmpv6_mask_allow(type))
+ if (icmpv6_mask_allow(net, type))
return true;
/*
local_bh_disable();
/* Check global sysctl_icmp_msgs_per_sec ratelimit */
- if (!(skb->dev->flags&IFF_LOOPBACK) && !icmpv6_global_allow(type))
+ if (!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, type))
goto out_bh_enable;
mip6_addr_swap(skb);
if (IS_ERR(dst))
goto out;
+ /* Check the ratelimit */
+ if ((!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, ICMPV6_ECHO_REPLY)) ||
+ !icmpv6_xrlim_allow(sk, ICMPV6_ECHO_REPLY, &fl6))
+ goto out_dst_release;
+
idev = __in6_dev_get(skb->dev);
msg.skb = skb;
icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
skb->len + sizeof(struct icmp6hdr));
}
+out_dst_release:
dst_release(dst);
out:
icmpv6_xmit_unlock(sk);
.mode = 0644,
.proc_handler = proc_dointvec,
},
+ {
+ .procname = "ratemask",
+ .data = &init_net.ipv6.sysctl.icmpv6_ratemask_ptr,
+ .maxlen = ICMPV6_MSG_MAX + 1,
+ .mode = 0644,
+ .proc_handler = proc_do_large_bitmap,
+ },
{ },
};
table[1].data = &net->ipv6.sysctl.icmpv6_echo_ignore_all;
table[2].data = &net->ipv6.sysctl.icmpv6_echo_ignore_multicast;
table[3].data = &net->ipv6.sysctl.icmpv6_echo_ignore_anycast;
+ table[4].data = &net->ipv6.sysctl.icmpv6_ratemask_ptr;
}
return table;
}
}
INIT_LIST_HEAD(&f6i->fib6_siblings);
- atomic_inc(&f6i->fib6_ref);
+ refcount_set(&f6i->fib6_ref, 1);
return f6i;
}
WARN_ON(f6i->fib6_node);
bucket = rcu_dereference_protected(f6i->rt6i_exception_bucket, 1);
- if (bucket) {
- f6i->rt6i_exception_bucket = NULL;
- kfree(bucket);
- }
+ kfree(bucket);
if (f6i->rt6i_pcpu) {
int cpu;
}
/* called with rcu lock held; no reference taken on fib6_info */
-struct fib6_info *fib6_lookup(struct net *net, int oif, struct flowi6 *fl6,
- int flags)
+int fib6_lookup(struct net *net, int oif, struct flowi6 *fl6,
+ struct fib6_result *res, int flags)
{
- return fib6_table_lookup(net, net->ipv6.fib6_main_tbl, oif, fl6, flags);
+ return fib6_table_lookup(net, net->ipv6.fib6_main_tbl, oif, fl6,
+ res, flags);
}
static void __net_init fib6_tables_init(struct net *net)
RCU_INIT_POINTER(in->parent, pn);
in->leaf = fn->leaf;
- atomic_inc(&rcu_dereference_protected(in->leaf,
- lockdep_is_held(&table->tb6_lock))->fib6_ref);
+ fib6_info_hold(rcu_dereference_protected(in->leaf,
+ lockdep_is_held(&table->tb6_lock)));
/* update parent pointer */
if (dir)
{
struct fib6_table *table = rt->fib6_table;
- if (atomic_read(&rt->fib6_ref) != 1) {
+ if (refcount_read(&rt->fib6_ref) != 1) {
/* This route is used as dummy address holder in some split
* nodes. It is not leaked, but it still holds other resources,
* which must be released in time. So, scan ascendant nodes
struct fib6_info *new_leaf;
if (!(fn->fn_flags & RTN_RTINFO) && leaf == rt) {
new_leaf = fib6_find_prefix(net, table, fn);
- atomic_inc(&new_leaf->fib6_ref);
+ fib6_info_hold(new_leaf);
rcu_assign_pointer(fn->leaf, new_leaf);
fib6_info_release(rt);
return err;
rcu_assign_pointer(rt->fib6_next, iter);
- atomic_inc(&rt->fib6_ref);
+ fib6_info_hold(rt);
rcu_assign_pointer(rt->fib6_node, fn);
rcu_assign_pointer(*ins, rt);
if (!info->skip_notify)
if (err)
return err;
- atomic_inc(&rt->fib6_ref);
+ fib6_info_hold(rt);
rcu_assign_pointer(rt->fib6_node, fn);
rt->fib6_next = iter->fib6_next;
rcu_assign_pointer(*ins, rt);
if (!sfn)
goto failure;
- atomic_inc(&info->nl_net->ipv6.fib6_null_entry->fib6_ref);
+ fib6_info_hold(info->nl_net->ipv6.fib6_null_entry);
rcu_assign_pointer(sfn->leaf,
info->nl_net->ipv6.fib6_null_entry);
sfn->fn_flags = RTN_ROOT;
rcu_assign_pointer(fn->leaf,
info->nl_net->ipv6.fib6_null_entry);
} else {
- atomic_inc(&rt->fib6_ref);
+ fib6_info_hold(rt);
rcu_assign_pointer(fn->leaf, rt);
}
}
#else
seq_puts(seq, "00000000000000000000000000000000 00 ");
#endif
- if (rt->fib6_nh.fib_nh_has_gw) {
+ if (rt->fib6_nh.fib_nh_gw_family) {
flags |= RTF_GATEWAY;
seq_printf(seq, "%pi6", &rt->fib6_nh.fib_nh_gw6);
} else {
dev = rt->fib6_nh.fib_nh_dev;
seq_printf(seq, " %08x %08x %08x %08x %8s\n",
- rt->fib6_metric, atomic_read(&rt->fib6_ref), 0,
+ rt->fib6_metric, refcount_read(&rt->fib6_ref), 0,
flags, dev ? dev->name : "");
iter->w.leaf = NULL;
return 0;
}
static int ip6erspan_rcv(struct sk_buff *skb,
- struct tnl_ptk_info *tpi)
+ struct tnl_ptk_info *tpi,
+ int gre_hdr_len)
{
struct erspan_base_hdr *ershdr;
- struct erspan_metadata *pkt_md;
const struct ipv6hdr *ipv6h;
struct erspan_md2 *md2;
struct ip6_tnl *tunnel;
if (unlikely(!pskb_may_pull(skb, len)))
return PACKET_REJECT;
- ershdr = (struct erspan_base_hdr *)skb->data;
- pkt_md = (struct erspan_metadata *)(ershdr + 1);
-
if (__iptunnel_pull_header(skb, len,
htons(ETH_P_TEB),
false, false) < 0)
return PACKET_REJECT;
if (tunnel->parms.collect_md) {
+ struct erspan_metadata *pkt_md, *md;
struct metadata_dst *tun_dst;
struct ip_tunnel_info *info;
- struct erspan_metadata *md;
+ unsigned char *gh;
__be64 tun_id;
__be16 flags;
if (!tun_dst)
return PACKET_REJECT;
+ /* skb can be uncloned in __iptunnel_pull_header, so
+ * old pkt_md is no longer valid and we need to reset
+ * it
+ */
+ gh = skb_network_header(skb) +
+ skb_network_header_len(skb);
+ pkt_md = (struct erspan_metadata *)(gh + gre_hdr_len +
+ sizeof(*ershdr));
info = &tun_dst->u.tun_info;
md = ip_tunnel_info_opts(info);
md->version = ver;
if (unlikely(tpi.proto == htons(ETH_P_ERSPAN) ||
tpi.proto == htons(ETH_P_ERSPAN2))) {
- if (ip6erspan_rcv(skb, &tpi) == PACKET_RCVD)
+ if (ip6erspan_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
return 0;
goto out;
}
neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
if (!IS_ERR(neigh)) {
sock_confirm_neigh(skb, neigh);
- ret = neigh_output(neigh, skb);
+ ret = neigh_output(neigh, skb, false);
rcu_read_unlock_bh();
return ret;
}
const struct net_device *dev,
__u32 *hash_rnd);
static bool ndisc_key_eq(const struct neighbour *neigh, const void *pkey);
+static bool ndisc_allow_add(const struct net_device *dev,
+ struct netlink_ext_ack *extack);
static int ndisc_constructor(struct neighbour *neigh);
static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb);
static void ndisc_error_report(struct neighbour *neigh, struct sk_buff *skb);
.pconstructor = pndisc_constructor,
.pdestructor = pndisc_destructor,
.proxy_redo = pndisc_redo,
+ .allow_add = ndisc_allow_add,
.id = "ndisc_cache",
.parms = {
.tbl = &nd_tbl,
ipv6_dev_mc_dec(dev, &maddr);
}
+/* called with rtnl held */
+static bool ndisc_allow_add(const struct net_device *dev,
+ struct netlink_ext_ack *extack)
+{
+ struct inet6_dev *idev = __in6_dev_get(dev);
+
+ if (!idev || idev->cnf.disable_ipv6) {
+ NL_SET_ERR_MSG(extack, "IPv6 is disabled on this device");
+ return false;
+ }
+
+ return true;
+}
+
static struct sk_buff *ndisc_alloc_skb(struct net_device *dev,
int len)
{
if NF_TABLES_IPV6
-config NFT_CHAIN_ROUTE_IPV6
- tristate "IPv6 nf_tables route chain support"
- help
- This option enables the "route" chain for IPv6 in nf_tables. This
- chain type is used to force packet re-routing after mangling header
- fields such as the source, destination, flowlabel, hop-limit and
- the packet mark.
-
config NFT_REJECT_IPV6
select NF_REJECT_IPV6
default NFT_REJECT
config IP6_NF_TARGET_MASQUERADE
tristate "MASQUERADE target support"
- select NF_NAT_MASQUERADE
+ select NETFILTER_XT_TARGET_MASQUERADE
help
- Masquerading is a special case of NAT: all outgoing connections are
- changed to seem to come from a particular interface's address, and
- if the interface goes down, those connections are lost. This is
- only useful for dialup accounts with dynamic IP address (ie. your IP
- address will be different on next dialup).
-
- To compile it as a module, choose M here. If unsure, say N.
+ This is a backwards-compat option for the user's convenience
+ (e.g. when running oldconfig). It selects NETFILTER_XT_TARGET_MASQUERADE.
config IP6_NF_TARGET_NPT
tristate "NPT (Network Prefix translation) target support"
obj-$(CONFIG_NF_DUP_IPV6) += nf_dup_ipv6.o
# nf_tables
-obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV6) += nft_chain_route_ipv6.o
obj-$(CONFIG_NFT_REJECT_IPV6) += nft_reject_ipv6.o
obj-$(CONFIG_NFT_DUP_IPV6) += nft_dup_ipv6.o
obj-$(CONFIG_NFT_FIB_IPV6) += nft_fib_ipv6.o
obj-$(CONFIG_IP6_NF_MATCH_SRH) += ip6t_srh.o
# targets
-obj-$(CONFIG_IP6_NF_TARGET_MASQUERADE) += ip6t_MASQUERADE.o
obj-$(CONFIG_IP6_NF_TARGET_NPT) += ip6t_NPT.o
obj-$(CONFIG_IP6_NF_TARGET_REJECT) += ip6t_REJECT.o
obj-$(CONFIG_IP6_NF_TARGET_SYNPROXY) += ip6t_SYNPROXY.o
+++ /dev/null
-/*
- * Copyright (c) 2011 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Based on Rusty Russell's IPv6 MASQUERADE target. Development of IPv6
- * NAT funded by Astaro.
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/netdevice.h>
-#include <linux/ipv6.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv6.h>
-#include <linux/netfilter/x_tables.h>
-#include <net/netfilter/nf_nat.h>
-#include <net/addrconf.h>
-#include <net/ipv6.h>
-#include <net/netfilter/ipv6/nf_nat_masquerade.h>
-
-static unsigned int
-masquerade_tg6(struct sk_buff *skb, const struct xt_action_param *par)
-{
- return nf_nat_masquerade_ipv6(skb, par->targinfo, xt_out(par));
-}
-
-static int masquerade_tg6_checkentry(const struct xt_tgchk_param *par)
-{
- const struct nf_nat_range2 *range = par->targinfo;
-
- if (range->flags & NF_NAT_RANGE_MAP_IPS)
- return -EINVAL;
- return nf_ct_netns_get(par->net, par->family);
-}
-
-static void masquerade_tg6_destroy(const struct xt_tgdtor_param *par)
-{
- nf_ct_netns_put(par->net, par->family);
-}
-
-static struct xt_target masquerade_tg6_reg __read_mostly = {
- .name = "MASQUERADE",
- .family = NFPROTO_IPV6,
- .checkentry = masquerade_tg6_checkentry,
- .destroy = masquerade_tg6_destroy,
- .target = masquerade_tg6,
- .targetsize = sizeof(struct nf_nat_range),
- .table = "nat",
- .hooks = 1 << NF_INET_POST_ROUTING,
- .me = THIS_MODULE,
-};
-
-static int __init masquerade_tg6_init(void)
-{
- int err;
-
- err = xt_register_target(&masquerade_tg6_reg);
- if (err)
- return err;
-
- err = nf_nat_masquerade_ipv6_register_notifier();
- if (err)
- xt_unregister_target(&masquerade_tg6_reg);
-
- return err;
-}
-static void __exit masquerade_tg6_exit(void)
-{
- nf_nat_masquerade_ipv6_unregister_notifier();
- xt_unregister_target(&masquerade_tg6_reg);
-}
-
-module_init(masquerade_tg6_init);
-module_exit(masquerade_tg6_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
-MODULE_DESCRIPTION("Xtables: automatic address SNAT");
+++ /dev/null
-/*
- * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
- * Copyright (c) 2012 Pablo Neira Ayuso <pablo@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Development of this code funded by Astaro AG (http://www.astaro.com/)
- */
-
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/list.h>
-#include <linux/skbuff.h>
-#include <linux/netlink.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv6.h>
-#include <linux/netfilter/nfnetlink.h>
-#include <linux/netfilter/nf_tables.h>
-#include <net/netfilter/nf_tables.h>
-#include <net/netfilter/nf_tables_ipv6.h>
-#include <net/route.h>
-
-static unsigned int nf_route_table_hook(void *priv,
- struct sk_buff *skb,
- const struct nf_hook_state *state)
-{
- unsigned int ret;
- struct nft_pktinfo pkt;
- struct in6_addr saddr, daddr;
- u_int8_t hop_limit;
- u32 mark, flowlabel;
- int err;
-
- nft_set_pktinfo(&pkt, skb, state);
- nft_set_pktinfo_ipv6(&pkt, skb);
-
- /* save source/dest address, mark, hoplimit, flowlabel, priority */
- memcpy(&saddr, &ipv6_hdr(skb)->saddr, sizeof(saddr));
- memcpy(&daddr, &ipv6_hdr(skb)->daddr, sizeof(daddr));
- mark = skb->mark;
- hop_limit = ipv6_hdr(skb)->hop_limit;
-
- /* flowlabel and prio (includes version, which shouldn't change either */
- flowlabel = *((u32 *)ipv6_hdr(skb));
-
- ret = nft_do_chain(&pkt, priv);
- if (ret != NF_DROP && ret != NF_STOLEN &&
- (memcmp(&ipv6_hdr(skb)->saddr, &saddr, sizeof(saddr)) ||
- memcmp(&ipv6_hdr(skb)->daddr, &daddr, sizeof(daddr)) ||
- skb->mark != mark ||
- ipv6_hdr(skb)->hop_limit != hop_limit ||
- flowlabel != *((u_int32_t *)ipv6_hdr(skb)))) {
- err = ip6_route_me_harder(state->net, skb);
- if (err < 0)
- ret = NF_DROP_ERR(err);
- }
-
- return ret;
-}
-
-static const struct nft_chain_type nft_chain_route_ipv6 = {
- .name = "route",
- .type = NFT_CHAIN_T_ROUTE,
- .family = NFPROTO_IPV6,
- .owner = THIS_MODULE,
- .hook_mask = (1 << NF_INET_LOCAL_OUT),
- .hooks = {
- [NF_INET_LOCAL_OUT] = nf_route_table_hook,
- },
-};
-
-static int __init nft_chain_route_init(void)
-{
- nft_register_chain_type(&nft_chain_route_ipv6);
-
- return 0;
-}
-
-static void __exit nft_chain_route_exit(void)
-{
- nft_unregister_chain_type(&nft_chain_route_ipv6);
-}
-
-module_init(nft_chain_route_init);
-module_exit(nft_chain_route_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
-MODULE_ALIAS_NFT_CHAIN(AF_INET6, "route");
.getname = inet6_getname,
.poll = datagram_poll, /* ok */
.ioctl = inet6_ioctl, /* must change */
+ .gettstamp = sock_gettstamp,
.listen = sock_no_listen, /* ok */
.shutdown = inet_shutdown, /* ok */
.setsockopt = sock_common_setsockopt, /* ok */
#include <net/xfrm.h>
#include <net/netevent.h>
#include <net/netlink.h>
-#include <net/nexthop.h>
+#include <net/rtnh.h>
#include <net/lwtunnel.h>
#include <net/ip_tunnels.h>
#include <net/l3mdev.h>
struct sk_buff *skb, u32 mtu);
static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
struct sk_buff *skb);
-static int rt6_score_route(struct fib6_info *rt, int oif, int strict);
+static int rt6_score_route(const struct fib6_nh *nh, u32 fib6_flags, int oif,
+ int strict);
static size_t rt6_nlmsg_size(struct fib6_info *rt);
static int rt6_fill_node(struct net *net, struct sk_buff *skb,
struct fib6_info *rt, struct dst_entry *dst,
struct in6_addr *dest, struct in6_addr *src,
int iif, int type, u32 portid, u32 seq,
unsigned int flags);
-static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt,
+static struct rt6_info *rt6_find_cached_rt(const struct fib6_result *res,
struct in6_addr *daddr,
struct in6_addr *saddr);
.fib6_flags = (RTF_REJECT | RTF_NONEXTHOP),
.fib6_protocol = RTPROT_KERNEL,
.fib6_metric = ~(u32)0,
- .fib6_ref = ATOMIC_INIT(1),
+ .fib6_ref = REFCOUNT_INIT(1),
.fib6_type = RTN_UNREACHABLE,
.fib6_metrics = (struct dst_metrics *)&dst_default_metrics,
};
return false;
}
-struct fib6_info *fib6_multipath_select(const struct net *net,
- struct fib6_info *match,
- struct flowi6 *fl6, int oif,
- const struct sk_buff *skb,
- int strict)
+void fib6_select_path(const struct net *net, struct fib6_result *res,
+ struct flowi6 *fl6, int oif, bool have_oif_match,
+ const struct sk_buff *skb, int strict)
{
struct fib6_info *sibling, *next_sibling;
+ struct fib6_info *match = res->f6i;
+
+ if (!match->fib6_nsiblings || have_oif_match)
+ goto out;
/* We might have already computed the hash for ICMPv6 errors. In such
* case it will always be non-zero. Otherwise now is the time to do it.
fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL);
if (fl6->mp_hash <= atomic_read(&match->fib6_nh.fib_nh_upper_bound))
- return match;
+ goto out;
list_for_each_entry_safe(sibling, next_sibling, &match->fib6_siblings,
fib6_siblings) {
+ const struct fib6_nh *nh = &sibling->fib6_nh;
int nh_upper_bound;
- nh_upper_bound = atomic_read(&sibling->fib6_nh.fib_nh_upper_bound);
+ nh_upper_bound = atomic_read(&nh->fib_nh_upper_bound);
if (fl6->mp_hash > nh_upper_bound)
continue;
- if (rt6_score_route(sibling, oif, strict) < 0)
+ if (rt6_score_route(nh, sibling->fib6_flags, oif, strict) < 0)
break;
match = sibling;
break;
}
- return match;
+out:
+ res->f6i = match;
+ res->nh = &match->fib6_nh;
}
/*
* Route lookup. rcu_read_lock() should be held.
*/
-static inline struct fib6_info *rt6_device_match(struct net *net,
- struct fib6_info *rt,
- const struct in6_addr *saddr,
- int oif,
- int flags)
+static bool __rt6_device_match(struct net *net, const struct fib6_nh *nh,
+ const struct in6_addr *saddr, int oif, int flags)
{
- struct fib6_info *sprt;
+ const struct net_device *dev;
- if (!oif && ipv6_addr_any(saddr) &&
- !(rt->fib6_nh.fib_nh_flags & RTNH_F_DEAD))
- return rt;
+ if (nh->fib_nh_flags & RTNH_F_DEAD)
+ return false;
- for (sprt = rt; sprt; sprt = rcu_dereference(sprt->fib6_next)) {
- const struct net_device *dev = sprt->fib6_nh.fib_nh_dev;
+ dev = nh->fib_nh_dev;
+ if (oif) {
+ if (dev->ifindex == oif)
+ return true;
+ } else {
+ if (ipv6_chk_addr(net, saddr, dev,
+ flags & RT6_LOOKUP_F_IFACE))
+ return true;
+ }
- if (sprt->fib6_nh.fib_nh_flags & RTNH_F_DEAD)
- continue;
+ return false;
+}
- if (oif) {
- if (dev->ifindex == oif)
- return sprt;
- } else {
- if (ipv6_chk_addr(net, saddr, dev,
- flags & RT6_LOOKUP_F_IFACE))
- return sprt;
+static void rt6_device_match(struct net *net, struct fib6_result *res,
+ const struct in6_addr *saddr, int oif, int flags)
+{
+ struct fib6_info *f6i = res->f6i;
+ struct fib6_info *spf6i;
+ struct fib6_nh *nh;
+
+ if (!oif && ipv6_addr_any(saddr)) {
+ nh = &f6i->fib6_nh;
+ if (!(nh->fib_nh_flags & RTNH_F_DEAD))
+ goto out;
+ }
+
+ for (spf6i = f6i; spf6i; spf6i = rcu_dereference(spf6i->fib6_next)) {
+ nh = &spf6i->fib6_nh;
+ if (__rt6_device_match(net, nh, saddr, oif, flags)) {
+ res->f6i = spf6i;
+ goto out;
}
}
- if (oif && flags & RT6_LOOKUP_F_IFACE)
- return net->ipv6.fib6_null_entry;
+ if (oif && flags & RT6_LOOKUP_F_IFACE) {
+ res->f6i = net->ipv6.fib6_null_entry;
+ nh = &res->f6i->fib6_nh;
+ goto out;
+ }
- return rt->fib6_nh.fib_nh_flags & RTNH_F_DEAD ? net->ipv6.fib6_null_entry : rt;
+ nh = &f6i->fib6_nh;
+ if (nh->fib_nh_flags & RTNH_F_DEAD) {
+ res->f6i = net->ipv6.fib6_null_entry;
+ nh = &res->f6i->fib6_nh;
+ }
+out:
+ res->nh = nh;
+ res->fib6_type = res->f6i->fib6_type;
+ res->fib6_flags = res->f6i->fib6_flags;
}
#ifdef CONFIG_IPV6_ROUTER_PREF
kfree(work);
}
-static void rt6_probe(struct fib6_info *rt)
+static void rt6_probe(struct fib6_nh *fib6_nh)
{
struct __rt6_probe_work *work = NULL;
const struct in6_addr *nh_gw;
* Router Reachability Probe MUST be rate-limited
* to no more than one per minute.
*/
- if (!rt || !rt->fib6_nh.fib_nh_has_gw)
+ if (fib6_nh->fib_nh_gw_family)
return;
- nh_gw = &rt->fib6_nh.fib_nh_gw6;
- dev = rt->fib6_nh.fib_nh_dev;
+ nh_gw = &fib6_nh->fib_nh_gw6;
+ dev = fib6_nh->fib_nh_dev;
rcu_read_lock_bh();
idev = __in6_dev_get(dev);
neigh = __ipv6_neigh_lookup_noref(dev, nh_gw);
__neigh_set_probe_once(neigh);
}
write_unlock(&neigh->lock);
- } else if (time_after(jiffies, rt->last_probe +
+ } else if (time_after(jiffies, fib6_nh->last_probe +
idev->cnf.rtr_probe_interval)) {
work = kmalloc(sizeof(*work), GFP_ATOMIC);
}
if (work) {
- rt->last_probe = jiffies;
+ fib6_nh->last_probe = jiffies;
INIT_WORK(&work->work, rt6_probe_deferred);
work->target = *nh_gw;
dev_hold(dev);
rcu_read_unlock_bh();
}
#else
-static inline void rt6_probe(struct fib6_info *rt)
+static inline void rt6_probe(struct fib6_nh *fib6_nh)
{
}
#endif
/*
* Default Router Selection (RFC 2461 6.3.6)
*/
-static inline int rt6_check_dev(struct fib6_info *rt, int oif)
-{
- const struct net_device *dev = rt->fib6_nh.fib_nh_dev;
-
- if (!oif || dev->ifindex == oif)
- return 2;
- return 0;
-}
-
-static inline enum rt6_nud_state rt6_check_neigh(struct fib6_info *rt)
+static enum rt6_nud_state rt6_check_neigh(const struct fib6_nh *fib6_nh)
{
enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
struct neighbour *neigh;
- if (rt->fib6_flags & RTF_NONEXTHOP ||
- !rt->fib6_nh.fib_nh_has_gw)
- return RT6_NUD_SUCCEED;
-
rcu_read_lock_bh();
- neigh = __ipv6_neigh_lookup_noref(rt->fib6_nh.fib_nh_dev,
- &rt->fib6_nh.fib_nh_gw6);
+ neigh = __ipv6_neigh_lookup_noref(fib6_nh->fib_nh_dev,
+ &fib6_nh->fib_nh_gw6);
if (neigh) {
read_lock(&neigh->lock);
if (neigh->nud_state & NUD_VALID)
return ret;
}
-static int rt6_score_route(struct fib6_info *rt, int oif, int strict)
+static int rt6_score_route(const struct fib6_nh *nh, u32 fib6_flags, int oif,
+ int strict)
{
- int m;
+ int m = 0;
+
+ if (!oif || nh->fib_nh_dev->ifindex == oif)
+ m = 2;
- m = rt6_check_dev(rt, oif);
if (!m && (strict & RT6_LOOKUP_F_IFACE))
return RT6_NUD_FAIL_HARD;
#ifdef CONFIG_IPV6_ROUTER_PREF
- m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->fib6_flags)) << 2;
+ m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(fib6_flags)) << 2;
#endif
- if (strict & RT6_LOOKUP_F_REACHABLE) {
- int n = rt6_check_neigh(rt);
+ if ((strict & RT6_LOOKUP_F_REACHABLE) &&
+ !(fib6_flags & RTF_NONEXTHOP) && nh->fib_nh_gw_family) {
+ int n = rt6_check_neigh(nh);
if (n < 0)
return n;
}
return m;
}
-static struct fib6_info *find_match(struct fib6_info *rt, int oif, int strict,
- int *mpri, struct fib6_info *match,
- bool *do_rr)
+static bool find_match(struct fib6_nh *nh, u32 fib6_flags,
+ int oif, int strict, int *mpri, bool *do_rr)
{
- int m;
bool match_do_rr = false;
+ bool rc = false;
+ int m;
- if (rt->fib6_nh.fib_nh_flags & RTNH_F_DEAD)
+ if (nh->fib_nh_flags & RTNH_F_DEAD)
goto out;
- if (ip6_ignore_linkdown(rt->fib6_nh.fib_nh_dev) &&
- rt->fib6_nh.fib_nh_flags & RTNH_F_LINKDOWN &&
+ if (ip6_ignore_linkdown(nh->fib_nh_dev) &&
+ nh->fib_nh_flags & RTNH_F_LINKDOWN &&
!(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
goto out;
- if (fib6_check_expired(rt))
- goto out;
-
- m = rt6_score_route(rt, oif, strict);
+ m = rt6_score_route(nh, fib6_flags, oif, strict);
if (m == RT6_NUD_FAIL_DO_RR) {
match_do_rr = true;
m = 0; /* lowest valid score */
}
if (strict & RT6_LOOKUP_F_REACHABLE)
- rt6_probe(rt);
+ rt6_probe(nh);
/* note that m can be RT6_NUD_FAIL_PROBE at this point */
if (m > *mpri) {
*do_rr = match_do_rr;
*mpri = m;
- match = rt;
+ rc = true;
}
out:
- return match;
+ return rc;
}
-static struct fib6_info *find_rr_leaf(struct fib6_node *fn,
- struct fib6_info *leaf,
- struct fib6_info *rr_head,
- u32 metric, int oif, int strict,
- bool *do_rr)
+static void __find_rr_leaf(struct fib6_info *f6i_start,
+ struct fib6_info *nomatch, u32 metric,
+ struct fib6_result *res, struct fib6_info **cont,
+ int oif, int strict, bool *do_rr, int *mpri)
{
- struct fib6_info *rt, *match, *cont;
- int mpri = -1;
+ struct fib6_info *f6i;
- match = NULL;
- cont = NULL;
- for (rt = rr_head; rt; rt = rcu_dereference(rt->fib6_next)) {
- if (rt->fib6_metric != metric) {
- cont = rt;
- break;
+ for (f6i = f6i_start;
+ f6i && f6i != nomatch;
+ f6i = rcu_dereference(f6i->fib6_next)) {
+ struct fib6_nh *nh;
+
+ if (cont && f6i->fib6_metric != metric) {
+ *cont = f6i;
+ return;
}
- match = find_match(rt, oif, strict, &mpri, match, do_rr);
- }
+ if (fib6_check_expired(f6i))
+ continue;
- for (rt = leaf; rt && rt != rr_head;
- rt = rcu_dereference(rt->fib6_next)) {
- if (rt->fib6_metric != metric) {
- cont = rt;
- break;
+ nh = &f6i->fib6_nh;
+ if (find_match(nh, f6i->fib6_flags, oif, strict, mpri, do_rr)) {
+ res->f6i = f6i;
+ res->nh = nh;
+ res->fib6_flags = f6i->fib6_flags;
+ res->fib6_type = f6i->fib6_type;
}
-
- match = find_match(rt, oif, strict, &mpri, match, do_rr);
}
+}
+
+static void find_rr_leaf(struct fib6_node *fn, struct fib6_info *leaf,
+ struct fib6_info *rr_head, int oif, int strict,
+ bool *do_rr, struct fib6_result *res)
+{
+ u32 metric = rr_head->fib6_metric;
+ struct fib6_info *cont = NULL;
+ int mpri = -1;
- if (match || !cont)
- return match;
+ __find_rr_leaf(rr_head, NULL, metric, res, &cont,
+ oif, strict, do_rr, &mpri);
- for (rt = cont; rt; rt = rcu_dereference(rt->fib6_next))
- match = find_match(rt, oif, strict, &mpri, match, do_rr);
+ __find_rr_leaf(leaf, rr_head, metric, res, &cont,
+ oif, strict, do_rr, &mpri);
- return match;
+ if (res->f6i || !cont)
+ return;
+
+ __find_rr_leaf(cont, NULL, metric, res, NULL,
+ oif, strict, do_rr, &mpri);
}
-static struct fib6_info *rt6_select(struct net *net, struct fib6_node *fn,
- int oif, int strict)
+static void rt6_select(struct net *net, struct fib6_node *fn, int oif,
+ struct fib6_result *res, int strict)
{
struct fib6_info *leaf = rcu_dereference(fn->leaf);
- struct fib6_info *match, *rt0;
+ struct fib6_info *rt0;
bool do_rr = false;
int key_plen;
+ /* make sure this function or its helpers sets f6i */
+ res->f6i = NULL;
+
if (!leaf || leaf == net->ipv6.fib6_null_entry)
- return net->ipv6.fib6_null_entry;
+ goto out;
rt0 = rcu_dereference(fn->rr_ptr);
if (!rt0)
key_plen = rt0->fib6_src.plen;
#endif
if (fn->fn_bit != key_plen)
- return net->ipv6.fib6_null_entry;
-
- match = find_rr_leaf(fn, leaf, rt0, rt0->fib6_metric, oif, strict,
- &do_rr);
+ goto out;
+ find_rr_leaf(fn, leaf, rt0, oif, strict, &do_rr, res);
if (do_rr) {
struct fib6_info *next = rcu_dereference(rt0->fib6_next);
}
}
- return match ? match : net->ipv6.fib6_null_entry;
+out:
+ if (!res->f6i) {
+ res->f6i = net->ipv6.fib6_null_entry;
+ res->nh = &res->f6i->fib6_nh;
+ res->fib6_flags = res->f6i->fib6_flags;
+ res->fib6_type = res->f6i->fib6_type;
+ }
}
-static bool rt6_is_gw_or_nonexthop(const struct fib6_info *rt)
+static bool rt6_is_gw_or_nonexthop(const struct fib6_result *res)
{
- return (rt->fib6_flags & RTF_NONEXTHOP) || rt->fib6_nh.fib_nh_has_gw;
+ return (res->f6i->fib6_flags & RTF_NONEXTHOP) ||
+ res->nh->fib_nh_gw_family;
}
#ifdef CONFIG_IPV6_ROUTE_INFO
*/
/* called with rcu_lock held */
-static struct net_device *ip6_rt_get_dev_rcu(struct fib6_info *rt)
+static struct net_device *ip6_rt_get_dev_rcu(const struct fib6_result *res)
{
- struct net_device *dev = rt->fib6_nh.fib_nh_dev;
+ struct net_device *dev = res->nh->fib_nh_dev;
- if (rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) {
+ if (res->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) {
/* for copies of local routes, dst->dev needs to be the
* device if it is a master device, the master device if
* device is enslaved, and the loopback as the default
*/
if (netif_is_l3_slave(dev) &&
- !rt6_need_strict(&rt->fib6_dst.addr))
+ !rt6_need_strict(&res->f6i->fib6_dst.addr))
dev = l3mdev_master_dev_rcu(dev);
else if (!netif_is_l3_master(dev))
dev = dev_net(dev)->loopback_dev;
return flags;
}
-static void ip6_rt_init_dst_reject(struct rt6_info *rt, struct fib6_info *ort)
+static void ip6_rt_init_dst_reject(struct rt6_info *rt, u8 fib6_type)
{
- rt->dst.error = ip6_rt_type_to_error(ort->fib6_type);
+ rt->dst.error = ip6_rt_type_to_error(fib6_type);
- switch (ort->fib6_type) {
+ switch (fib6_type) {
case RTN_BLACKHOLE:
rt->dst.output = dst_discard_out;
rt->dst.input = dst_discard;
}
}
-static void ip6_rt_init_dst(struct rt6_info *rt, struct fib6_info *ort)
+static void ip6_rt_init_dst(struct rt6_info *rt, const struct fib6_result *res)
{
- if (ort->fib6_flags & RTF_REJECT) {
- ip6_rt_init_dst_reject(rt, ort);
+ struct fib6_info *f6i = res->f6i;
+
+ if (res->fib6_flags & RTF_REJECT) {
+ ip6_rt_init_dst_reject(rt, res->fib6_type);
return;
}
rt->dst.error = 0;
rt->dst.output = ip6_output;
- if (ort->fib6_type == RTN_LOCAL || ort->fib6_type == RTN_ANYCAST) {
+ if (res->fib6_type == RTN_LOCAL || res->fib6_type == RTN_ANYCAST) {
rt->dst.input = ip6_input;
- } else if (ipv6_addr_type(&ort->fib6_dst.addr) & IPV6_ADDR_MULTICAST) {
+ } else if (ipv6_addr_type(&f6i->fib6_dst.addr) & IPV6_ADDR_MULTICAST) {
rt->dst.input = ip6_mc_input;
} else {
rt->dst.input = ip6_forward;
}
- if (ort->fib6_nh.fib_nh_lws) {
- rt->dst.lwtstate = lwtstate_get(ort->fib6_nh.fib_nh_lws);
+ if (res->nh->fib_nh_lws) {
+ rt->dst.lwtstate = lwtstate_get(res->nh->fib_nh_lws);
lwtunnel_set_redirect(&rt->dst);
}
ip_dst_init_metrics(&rt->dst, from->fib6_metrics);
}
-/* Caller must already hold reference to @ort */
-static void ip6_rt_copy_init(struct rt6_info *rt, struct fib6_info *ort)
+/* Caller must already hold reference to f6i in result */
+static void ip6_rt_copy_init(struct rt6_info *rt, const struct fib6_result *res)
{
- struct net_device *dev = fib6_info_nh_dev(ort);
+ const struct fib6_nh *nh = res->nh;
+ const struct net_device *dev = nh->fib_nh_dev;
+ struct fib6_info *f6i = res->f6i;
- ip6_rt_init_dst(rt, ort);
+ ip6_rt_init_dst(rt, res);
- rt->rt6i_dst = ort->fib6_dst;
+ rt->rt6i_dst = f6i->fib6_dst;
rt->rt6i_idev = dev ? in6_dev_get(dev) : NULL;
- rt->rt6i_flags = ort->fib6_flags;
- if (ort->fib6_nh.fib_nh_has_gw) {
- rt->rt6i_gateway = ort->fib6_nh.fib_nh_gw6;
+ rt->rt6i_flags = res->fib6_flags;
+ if (nh->fib_nh_gw_family) {
+ rt->rt6i_gateway = nh->fib_nh_gw6;
rt->rt6i_flags |= RTF_GATEWAY;
}
- rt6_set_from(rt, ort);
+ rt6_set_from(rt, f6i);
#ifdef CONFIG_IPV6_SUBTREES
- rt->rt6i_src = ort->fib6_src;
+ rt->rt6i_src = f6i->fib6_src;
#endif
}
}
/* called with rcu_lock held */
-static struct rt6_info *ip6_create_rt_rcu(struct fib6_info *rt)
+static struct rt6_info *ip6_create_rt_rcu(const struct fib6_result *res)
{
- unsigned short flags = fib6_info_dst_flags(rt);
- struct net_device *dev = rt->fib6_nh.fib_nh_dev;
+ struct net_device *dev = res->nh->fib_nh_dev;
+ struct fib6_info *f6i = res->f6i;
+ unsigned short flags;
struct rt6_info *nrt;
- if (!fib6_info_hold_safe(rt))
+ if (!fib6_info_hold_safe(f6i))
goto fallback;
+ flags = fib6_info_dst_flags(f6i);
nrt = ip6_dst_alloc(dev_net(dev), dev, flags);
if (!nrt) {
- fib6_info_release(rt);
+ fib6_info_release(f6i);
goto fallback;
}
- ip6_rt_copy_init(nrt, rt);
+ ip6_rt_copy_init(nrt, res);
return nrt;
fallback:
const struct sk_buff *skb,
int flags)
{
- struct fib6_info *f6i;
+ struct fib6_result res = {};
struct fib6_node *fn;
struct rt6_info *rt;
rcu_read_lock();
fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
restart:
- f6i = rcu_dereference(fn->leaf);
- if (!f6i) {
- f6i = net->ipv6.fib6_null_entry;
- } else {
- f6i = rt6_device_match(net, f6i, &fl6->saddr,
- fl6->flowi6_oif, flags);
- if (f6i->fib6_nsiblings && fl6->flowi6_oif == 0)
- f6i = fib6_multipath_select(net, f6i, fl6,
- fl6->flowi6_oif, skb,
- flags);
- }
- if (f6i == net->ipv6.fib6_null_entry) {
+ res.f6i = rcu_dereference(fn->leaf);
+ if (!res.f6i)
+ res.f6i = net->ipv6.fib6_null_entry;
+ else
+ rt6_device_match(net, &res, &fl6->saddr, fl6->flowi6_oif,
+ flags);
+
+ if (res.f6i == net->ipv6.fib6_null_entry) {
fn = fib6_backtrack(fn, &fl6->saddr);
if (fn)
goto restart;
+
+ rt = net->ipv6.ip6_null_entry;
+ dst_hold(&rt->dst);
+ goto out;
}
- trace_fib6_table_lookup(net, f6i, table, fl6);
+ fib6_select_path(net, &res, fl6, fl6->flowi6_oif,
+ fl6->flowi6_oif != 0, skb, flags);
/* Search through exception table */
- rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr);
+ rt = rt6_find_cached_rt(&res, &fl6->daddr, &fl6->saddr);
if (rt) {
if (ip6_hold_safe(net, &rt))
dst_use_noref(&rt->dst, jiffies);
- } else if (f6i == net->ipv6.fib6_null_entry) {
- rt = net->ipv6.ip6_null_entry;
- dst_hold(&rt->dst);
} else {
- rt = ip6_create_rt_rcu(f6i);
+ rt = ip6_create_rt_rcu(&res);
}
+out:
+ trace_fib6_table_lookup(net, &res, table, fl6);
+
rcu_read_unlock();
return rt;
return __ip6_ins_rt(rt, &info, NULL);
}
-static struct rt6_info *ip6_rt_cache_alloc(struct fib6_info *ort,
+static struct rt6_info *ip6_rt_cache_alloc(const struct fib6_result *res,
const struct in6_addr *daddr,
const struct in6_addr *saddr)
{
+ struct fib6_info *f6i = res->f6i;
struct net_device *dev;
struct rt6_info *rt;
* Clone the route.
*/
- if (!fib6_info_hold_safe(ort))
+ if (!fib6_info_hold_safe(f6i))
return NULL;
- dev = ip6_rt_get_dev_rcu(ort);
+ dev = ip6_rt_get_dev_rcu(res);
rt = ip6_dst_alloc(dev_net(dev), dev, 0);
if (!rt) {
- fib6_info_release(ort);
+ fib6_info_release(f6i);
return NULL;
}
- ip6_rt_copy_init(rt, ort);
+ ip6_rt_copy_init(rt, res);
rt->rt6i_flags |= RTF_CACHE;
rt->dst.flags |= DST_HOST;
rt->rt6i_dst.addr = *daddr;
rt->rt6i_dst.plen = 128;
- if (!rt6_is_gw_or_nonexthop(ort)) {
- if (ort->fib6_dst.plen != 128 &&
- ipv6_addr_equal(&ort->fib6_dst.addr, daddr))
+ if (!rt6_is_gw_or_nonexthop(res)) {
+ if (f6i->fib6_dst.plen != 128 &&
+ ipv6_addr_equal(&f6i->fib6_dst.addr, daddr))
rt->rt6i_flags |= RTF_ANYCAST;
#ifdef CONFIG_IPV6_SUBTREES
if (rt->rt6i_src.plen && saddr) {
return rt;
}
-static struct rt6_info *ip6_rt_pcpu_alloc(struct fib6_info *rt)
+static struct rt6_info *ip6_rt_pcpu_alloc(const struct fib6_result *res)
{
- unsigned short flags = fib6_info_dst_flags(rt);
+ struct fib6_info *f6i = res->f6i;
+ unsigned short flags = fib6_info_dst_flags(f6i);
struct net_device *dev;
struct rt6_info *pcpu_rt;
- if (!fib6_info_hold_safe(rt))
+ if (!fib6_info_hold_safe(f6i))
return NULL;
rcu_read_lock();
- dev = ip6_rt_get_dev_rcu(rt);
+ dev = ip6_rt_get_dev_rcu(res);
pcpu_rt = ip6_dst_alloc(dev_net(dev), dev, flags);
rcu_read_unlock();
if (!pcpu_rt) {
- fib6_info_release(rt);
+ fib6_info_release(f6i);
return NULL;
}
- ip6_rt_copy_init(pcpu_rt, rt);
+ ip6_rt_copy_init(pcpu_rt, res);
pcpu_rt->rt6i_flags |= RTF_PCPU;
return pcpu_rt;
}
/* It should be called with rcu_read_lock() acquired */
-static struct rt6_info *rt6_get_pcpu_route(struct fib6_info *rt)
+static struct rt6_info *rt6_get_pcpu_route(const struct fib6_result *res)
{
struct rt6_info *pcpu_rt, **p;
- p = this_cpu_ptr(rt->rt6i_pcpu);
+ p = this_cpu_ptr(res->f6i->rt6i_pcpu);
pcpu_rt = *p;
if (pcpu_rt)
}
static struct rt6_info *rt6_make_pcpu_route(struct net *net,
- struct fib6_info *rt)
+ const struct fib6_result *res)
{
struct rt6_info *pcpu_rt, *prev, **p;
- pcpu_rt = ip6_rt_pcpu_alloc(rt);
+ pcpu_rt = ip6_rt_pcpu_alloc(res);
if (!pcpu_rt) {
dst_hold(&net->ipv6.ip6_null_entry->dst);
return net->ipv6.ip6_null_entry;
}
dst_hold(&pcpu_rt->dst);
- p = this_cpu_ptr(rt->rt6i_pcpu);
+ p = this_cpu_ptr(res->f6i->rt6i_pcpu);
prev = cmpxchg(p, NULL, pcpu_rt);
BUG_ON(prev);
return NULL;
}
-static unsigned int fib6_mtu(const struct fib6_info *rt)
+static unsigned int fib6_mtu(const struct fib6_result *res)
{
+ const struct fib6_nh *nh = res->nh;
unsigned int mtu;
- if (rt->fib6_pmtu) {
- mtu = rt->fib6_pmtu;
+ if (res->f6i->fib6_pmtu) {
+ mtu = res->f6i->fib6_pmtu;
} else {
- struct net_device *dev = fib6_info_nh_dev(rt);
+ struct net_device *dev = nh->fib_nh_dev;
struct inet6_dev *idev;
rcu_read_lock();
mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
- return mtu - lwtunnel_headroom(rt->fib6_nh.fib_nh_lws, mtu);
+ return mtu - lwtunnel_headroom(nh->fib_nh_lws, mtu);
}
static int rt6_insert_exception(struct rt6_info *nrt,
- struct fib6_info *ort)
+ const struct fib6_result *res)
{
struct net *net = dev_net(nrt->dst.dev);
struct rt6_exception_bucket *bucket;
struct in6_addr *src_key = NULL;
struct rt6_exception *rt6_ex;
+ struct fib6_info *f6i = res->f6i;
int err = 0;
spin_lock_bh(&rt6_exception_lock);
- if (ort->exception_bucket_flushed) {
+ if (f6i->exception_bucket_flushed) {
err = -EINVAL;
goto out;
}
- bucket = rcu_dereference_protected(ort->rt6i_exception_bucket,
+ bucket = rcu_dereference_protected(f6i->rt6i_exception_bucket,
lockdep_is_held(&rt6_exception_lock));
if (!bucket) {
bucket = kcalloc(FIB6_EXCEPTION_BUCKET_SIZE, sizeof(*bucket),
err = -ENOMEM;
goto out;
}
- rcu_assign_pointer(ort->rt6i_exception_bucket, bucket);
+ rcu_assign_pointer(f6i->rt6i_exception_bucket, bucket);
}
#ifdef CONFIG_IPV6_SUBTREES
- /* rt6i_src.plen != 0 indicates ort is in subtree
+ /* fib6_src.plen != 0 indicates f6i is in subtree
* and exception table is indexed by a hash of
- * both rt6i_dst and rt6i_src.
+ * both fib6_dst and fib6_src.
* Otherwise, the exception table is indexed by
- * a hash of only rt6i_dst.
+ * a hash of only fib6_dst.
*/
- if (ort->fib6_src.plen)
+ if (f6i->fib6_src.plen)
src_key = &nrt->rt6i_src.addr;
#endif
- /* rt6_mtu_change() might lower mtu on ort.
+ /* rt6_mtu_change() might lower mtu on f6i.
* Only insert this exception route if its mtu
- * is less than ort's mtu value.
+ * is less than f6i's mtu value.
*/
- if (dst_metric_raw(&nrt->dst, RTAX_MTU) >= fib6_mtu(ort)) {
+ if (dst_metric_raw(&nrt->dst, RTAX_MTU) >= fib6_mtu(res)) {
err = -EINVAL;
goto out;
}
/* Update fn->fn_sernum to invalidate all cached dst */
if (!err) {
- spin_lock_bh(&ort->fib6_table->tb6_lock);
- fib6_update_sernum(net, ort);
- spin_unlock_bh(&ort->fib6_table->tb6_lock);
+ spin_lock_bh(&f6i->fib6_table->tb6_lock);
+ fib6_update_sernum(net, f6i);
+ spin_unlock_bh(&f6i->fib6_table->tb6_lock);
fib6_force_start_gc(net);
}
/* Find cached rt in the hash table inside passed in rt
* Caller has to hold rcu_read_lock()
*/
-static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt,
+static struct rt6_info *rt6_find_cached_rt(const struct fib6_result *res,
struct in6_addr *daddr,
struct in6_addr *saddr)
{
struct rt6_exception_bucket *bucket;
struct in6_addr *src_key = NULL;
struct rt6_exception *rt6_ex;
- struct rt6_info *res = NULL;
+ struct rt6_info *ret = NULL;
- bucket = rcu_dereference(rt->rt6i_exception_bucket);
+ bucket = rcu_dereference(res->f6i->rt6i_exception_bucket);
#ifdef CONFIG_IPV6_SUBTREES
- /* rt6i_src.plen != 0 indicates rt is in subtree
+ /* fib6i_src.plen != 0 indicates f6i is in subtree
* and exception table is indexed by a hash of
- * both rt6i_dst and rt6i_src.
+ * both fib6_dst and fib6_src.
* Otherwise, the exception table is indexed by
- * a hash of only rt6i_dst.
+ * a hash of only fib6_dst.
*/
- if (rt->fib6_src.plen)
+ if (res->f6i->fib6_src.plen)
src_key = saddr;
#endif
rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
- res = rt6_ex->rt6i;
+ ret = rt6_ex->rt6i;
- return res;
+ return ret;
}
/* Remove the passed in cached rt from the hash table that contains it */
}
/* must be called with rcu lock held */
-struct fib6_info *fib6_table_lookup(struct net *net, struct fib6_table *table,
- int oif, struct flowi6 *fl6, int strict)
+int fib6_table_lookup(struct net *net, struct fib6_table *table, int oif,
+ struct flowi6 *fl6, struct fib6_result *res, int strict)
{
struct fib6_node *fn, *saved_fn;
- struct fib6_info *f6i;
fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
saved_fn = fn;
oif = 0;
redo_rt6_select:
- f6i = rt6_select(net, fn, oif, strict);
- if (f6i == net->ipv6.fib6_null_entry) {
+ rt6_select(net, fn, oif, res, strict);
+ if (res->f6i == net->ipv6.fib6_null_entry) {
fn = fib6_backtrack(fn, &fl6->saddr);
if (fn)
goto redo_rt6_select;
}
}
- trace_fib6_table_lookup(net, f6i, table, fl6);
+ trace_fib6_table_lookup(net, res, table, fl6);
- return f6i;
+ return 0;
}
struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
int oif, struct flowi6 *fl6,
const struct sk_buff *skb, int flags)
{
- struct fib6_info *f6i;
+ struct fib6_result res = {};
struct rt6_info *rt;
int strict = 0;
rcu_read_lock();
- f6i = fib6_table_lookup(net, table, oif, fl6, strict);
- if (f6i->fib6_nsiblings)
- f6i = fib6_multipath_select(net, f6i, fl6, oif, skb, strict);
-
- if (f6i == net->ipv6.fib6_null_entry) {
+ fib6_table_lookup(net, table, oif, fl6, &res, strict);
+ if (res.f6i == net->ipv6.fib6_null_entry) {
rt = net->ipv6.ip6_null_entry;
rcu_read_unlock();
dst_hold(&rt->dst);
return rt;
}
+ fib6_select_path(net, &res, fl6, oif, false, skb, strict);
+
/*Search through exception table */
- rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr);
+ rt = rt6_find_cached_rt(&res, &fl6->daddr, &fl6->saddr);
if (rt) {
if (ip6_hold_safe(net, &rt))
dst_use_noref(&rt->dst, jiffies);
rcu_read_unlock();
return rt;
} else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
- !f6i->fib6_nh.fib_nh_has_gw)) {
+ !res.nh->fib_nh_gw_family)) {
/* Create a RTF_CACHE clone which will not be
* owned by the fib6 tree. It is for the special case where
* the daddr in the skb during the neighbor look-up is different
*/
struct rt6_info *uncached_rt;
- uncached_rt = ip6_rt_cache_alloc(f6i, &fl6->daddr, NULL);
+ uncached_rt = ip6_rt_cache_alloc(&res, &fl6->daddr, NULL);
rcu_read_unlock();
struct rt6_info *pcpu_rt;
local_bh_disable();
- pcpu_rt = rt6_get_pcpu_route(f6i);
+ pcpu_rt = rt6_get_pcpu_route(&res);
if (!pcpu_rt)
- pcpu_rt = rt6_make_pcpu_route(net, f6i);
+ pcpu_rt = rt6_make_pcpu_route(net, &res);
local_bh_enable();
rcu_read_unlock();
if (rt6->rt6i_flags & RTF_CACHE)
rt6_update_exception_stamp_rt(rt6);
} else if (daddr) {
- struct fib6_info *from;
+ struct fib6_result res = {};
struct rt6_info *nrt6;
rcu_read_lock();
- from = rcu_dereference(rt6->from);
- nrt6 = ip6_rt_cache_alloc(from, daddr, saddr);
+ res.f6i = rcu_dereference(rt6->from);
+ if (!res.f6i) {
+ rcu_read_unlock();
+ return;
+ }
+ res.nh = &res.f6i->fib6_nh;
+ res.fib6_flags = res.f6i->fib6_flags;
+ res.fib6_type = res.f6i->fib6_type;
+
+ nrt6 = ip6_rt_cache_alloc(&res, daddr, saddr);
if (nrt6) {
rt6_do_update_pmtu(nrt6, mtu);
- if (rt6_insert_exception(nrt6, from))
+ if (rt6_insert_exception(nrt6, &res))
dst_release_immediate(&nrt6->dst);
}
rcu_read_unlock();
NULL);
}
+static bool ip6_redirect_nh_match(const struct fib6_result *res,
+ struct flowi6 *fl6,
+ const struct in6_addr *gw,
+ struct rt6_info **ret)
+{
+ const struct fib6_nh *nh = res->nh;
+
+ if (nh->fib_nh_flags & RTNH_F_DEAD || !nh->fib_nh_gw_family ||
+ fl6->flowi6_oif != nh->fib_nh_dev->ifindex)
+ return false;
+
+ /* rt_cache's gateway might be different from its 'parent'
+ * in the case of an ip redirect.
+ * So we keep searching in the exception table if the gateway
+ * is different.
+ */
+ if (!ipv6_addr_equal(gw, &nh->fib_nh_gw6)) {
+ struct rt6_info *rt_cache;
+
+ rt_cache = rt6_find_cached_rt(res, &fl6->daddr, &fl6->saddr);
+ if (rt_cache &&
+ ipv6_addr_equal(gw, &rt_cache->rt6i_gateway)) {
+ *ret = rt_cache;
+ return true;
+ }
+ return false;
+ }
+ return true;
+}
+
/* Handle redirects */
struct ip6rd_flowi {
struct flowi6 fl6;
int flags)
{
struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
- struct rt6_info *ret = NULL, *rt_cache;
+ struct rt6_info *ret = NULL;
+ struct fib6_result res = {};
struct fib6_info *rt;
struct fib6_node *fn;
fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
restart:
for_each_fib6_node_rt_rcu(fn) {
- if (rt->fib6_nh.fib_nh_flags & RTNH_F_DEAD)
- continue;
+ res.f6i = rt;
+ res.nh = &rt->fib6_nh;
+
if (fib6_check_expired(rt))
continue;
if (rt->fib6_flags & RTF_REJECT)
break;
- if (!rt->fib6_nh.fib_nh_has_gw)
- continue;
- if (fl6->flowi6_oif != rt->fib6_nh.fib_nh_dev->ifindex)
- continue;
- /* rt_cache's gateway might be different from its 'parent'
- * in the case of an ip redirect.
- * So we keep searching in the exception table if the gateway
- * is different.
- */
- if (!ipv6_addr_equal(&rdfl->gateway, &rt->fib6_nh.fib_nh_gw6)) {
- rt_cache = rt6_find_cached_rt(rt,
- &fl6->daddr,
- &fl6->saddr);
- if (rt_cache &&
- ipv6_addr_equal(&rdfl->gateway,
- &rt_cache->rt6i_gateway)) {
- ret = rt_cache;
- break;
- }
- continue;
- }
- break;
+ if (ip6_redirect_nh_match(&res, fl6, &rdfl->gateway, &ret))
+ goto out;
}
if (!rt)
goto restart;
}
+ res.f6i = rt;
+ res.nh = &rt->fib6_nh;
out:
- if (ret)
+ if (ret) {
ip6_hold_safe(net, &ret);
- else
- ret = ip6_create_rt_rcu(rt);
+ } else {
+ res.fib6_flags = res.f6i->fib6_flags;
+ res.fib6_type = res.f6i->fib6_type;
+ ret = ip6_create_rt_rcu(&res);
+ }
rcu_read_unlock();
- trace_fib6_table_lookup(net, rt, table, fl6);
+ trace_fib6_table_lookup(net, &res, table, fl6);
return ret;
};
* based on ip6_dst_mtu_forward and exception logic of
* rt6_find_cached_rt; called with rcu_read_lock
*/
-u32 ip6_mtu_from_fib6(struct fib6_info *f6i, struct in6_addr *daddr,
- struct in6_addr *saddr)
+u32 ip6_mtu_from_fib6(const struct fib6_result *res,
+ const struct in6_addr *daddr,
+ const struct in6_addr *saddr)
{
struct rt6_exception_bucket *bucket;
+ const struct fib6_nh *nh = res->nh;
+ struct fib6_info *f6i = res->f6i;
+ const struct in6_addr *src_key;
struct rt6_exception *rt6_ex;
- struct in6_addr *src_key;
struct inet6_dev *idev;
u32 mtu = 0;
mtu = dst_metric_raw(&rt6_ex->rt6i->dst, RTAX_MTU);
if (likely(!mtu)) {
- struct net_device *dev = fib6_info_nh_dev(f6i);
+ struct net_device *dev = nh->fib_nh_dev;
mtu = IPV6_MIN_MTU;
idev = __in6_dev_get(dev);
mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
out:
- return mtu - lwtunnel_headroom(fib6_info_nh_lwt(f6i), mtu);
+ return mtu - lwtunnel_headroom(nh->fib_nh_lws, mtu);
}
struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
goto out;
fib6_nh->fib_nh_gw6 = cfg->fc_gateway;
- fib6_nh->fib_nh_has_gw = 1;
+ fib6_nh->fib_nh_gw_family = AF_INET6;
}
err = -ENODEV;
struct fib6_nh *nh;
if (cfg->fc_flags & RTF_CACHE) {
+ struct fib6_result res = {
+ .f6i = rt,
+ };
int rc;
- rt_cache = rt6_find_cached_rt(rt, &cfg->fc_dst,
+ rt_cache = rt6_find_cached_rt(&res,
+ &cfg->fc_dst,
&cfg->fc_src);
if (rt_cache) {
rc = ip6_del_cached_rt(rt_cache, cfg);
{
struct netevent_redirect netevent;
struct rt6_info *rt, *nrt = NULL;
+ struct fib6_result res = {};
struct ndisc_options ndopts;
struct inet6_dev *in6_dev;
struct neighbour *neigh;
- struct fib6_info *from;
struct rd_msg *msg;
int optlen, on_link;
u8 *lladdr;
NDISC_REDIRECT, &ndopts);
rcu_read_lock();
- from = rcu_dereference(rt->from);
+ res.f6i = rcu_dereference(rt->from);
/* This fib6_info_hold() is safe here because we hold reference to rt
* and rt already holds reference to fib6_info.
*/
- fib6_info_hold(from);
+ fib6_info_hold(res.f6i);
rcu_read_unlock();
- nrt = ip6_rt_cache_alloc(from, &msg->dest, NULL);
+ res.nh = &res.f6i->fib6_nh;
+ res.fib6_flags = res.f6i->fib6_flags;
+ res.fib6_type = res.f6i->fib6_type;
+ nrt = ip6_rt_cache_alloc(&res, &msg->dest, NULL);
if (!nrt)
goto out;
* a cached route because rt6_insert_exception() will
* takes care of it
*/
- if (rt6_insert_exception(nrt, from)) {
+ if (rt6_insert_exception(nrt, &res)) {
dst_release_immediate(&nrt->dst);
goto out;
}
call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
out:
- fib6_info_release(from);
+ fib6_info_release(res.f6i);
neigh_release(neigh);
}
if (rt->fib6_nh.fib_nh_dev->ifindex != ifindex)
continue;
if (!(rt->fib6_flags & RTF_ROUTEINFO) ||
- !rt->fib6_nh.fib_nh_has_gw)
+ !rt->fib6_nh.fib_nh_gw_family)
continue;
if (!ipv6_addr_equal(&rt->fib6_nh.fib_nh_gw6, gwaddr))
continue;
struct in6_addr *gateway = (struct in6_addr *)arg;
if (((rt->fib6_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) &&
- rt->fib6_nh.fib_nh_has_gw &&
+ rt->fib6_nh.fib_nh_gw_family &&
ipv6_addr_equal(gateway, &rt->fib6_nh.fib_nh_gw6)) {
return -1;
}
struct arg_netdev_event {
const struct net_device *dev;
union {
- unsigned int nh_flags;
+ unsigned char nh_flags;
unsigned long event;
};
};
return 0;
}
-void rt6_sync_up(struct net_device *dev, unsigned int nh_flags)
+void rt6_sync_up(struct net_device *dev, unsigned char nh_flags)
{
struct arg_netdev_event arg = {
.dev = dev,
static void rt6_multipath_nh_flags_set(struct fib6_info *rt,
const struct net_device *dev,
- unsigned int nh_flags)
+ unsigned char nh_flags)
{
struct fib6_info *iter;
nla_nest_end(skb, mp);
} else {
+ unsigned char nh_flags = 0;
+
if (fib_nexthop_info(skb, &rt->fib6_nh.nh_common,
- &rtm->rtm_flags, false) < 0)
+ &nh_flags, false) < 0)
goto nla_put_failure;
+
+ rtm->rtm_flags |= nh_flags;
}
if (rt6_flags & RTF_EXPIRES) {
struct inet_sock *inet = inet_sk(sk);
struct sk_buff *skb;
unsigned int ulen, copied;
- int peeked, peeking, off;
- int err;
+ int off, err, peeking = flags & MSG_PEEK;
int is_udplite = IS_UDPLITE(sk);
struct udp_mib __percpu *mib;
bool checksum_valid = false;
return ipv6_recv_rxpmtu(sk, msg, len, addr_len);
try_again:
- peeking = flags & MSG_PEEK;
off = sk_peek_offset(sk, flags);
- skb = __skb_recv_udp(sk, flags, noblock, &peeked, &off, &err);
+ skb = __skb_recv_udp(sk, flags, noblock, &off, &err);
if (!skb)
return err;
goto csum_copy_err;
}
if (unlikely(err)) {
- if (!peeked) {
+ if (!peeking) {
atomic_inc(&sk->sk_drops);
SNMP_INC_STATS(mib, UDP_MIB_INERRORS);
}
kfree_skb(skb);
return err;
}
- if (!peeked)
+ if (!peeking)
SNMP_INC_STATS(mib, UDP_MIB_INDATAGRAMS);
sock_recv_ts_and_drops(msg, sk, skb);
static int udpv6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
int addr_len)
{
+ if (addr_len < offsetofend(struct sockaddr, sa_family))
+ return -EINVAL;
/* The following checks are replicated from __ip6_datagram_connect()
* and intended to prevent BPF program called below from accessing
* bytes that are out of the bound specified by user in addr_len.
.getname = l2tp_ip_getname,
.poll = datagram_poll,
.ioctl = inet_ioctl,
+ .gettstamp = sock_gettstamp,
.listen = sock_no_listen,
.shutdown = inet_shutdown,
.setsockopt = sock_common_setsockopt,
.getname = l2tp_ip6_getname,
.poll = datagram_poll,
.ioctl = inet6_ioctl,
+ .gettstamp = sock_gettstamp,
.listen = sock_no_listen,
.shutdown = inet_shutdown,
.setsockopt = sock_common_setsockopt,
{
struct pppol2tp_ioc_stats stats;
struct l2tp_session *session;
- int val;
switch (cmd) {
case PPPIOCGMRU:
if (!session->session_id && !session->peer_session_id)
return -ENOSYS;
- if (get_user(val, (int __user *)arg))
+ if (!access_ok((int __user *)arg, sizeof(int)))
return -EFAULT;
break;
struct llc_sap *sap;
int rc = -EINVAL;
- dprintk("%s: binding %02X\n", __func__, addr->sllc_sap);
-
lock_sock(sk);
if (unlikely(!sock_flag(sk, SOCK_ZAPPED) || addrlen != sizeof(*addr)))
goto out;
rc = -EAFNOSUPPORT;
if (unlikely(addr->sllc_family != AF_LLC))
goto out;
+ dprintk("%s: binding %02X\n", __func__, addr->sllc_sap);
rc = -ENODEV;
rcu_read_lock();
if (sk->sk_bound_dev_if) {
{
struct ieee80211_sub_if_data *sdata = vif_to_sdata(txq->txq.vif);
+ if (local->in_reconfig)
+ return;
+
if (!check_sdata_in_driver(sdata))
return;
* The driver doesn't know anything about VLAN interfaces.
* Hence, don't send GTKs for VLAN interfaces to the driver.
*/
- if (!(key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE))
+ if (!(key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE)) {
+ ret = 1;
goto out_unsupported;
+ }
}
ret = drv_set_key(key->local, SET_KEY, sdata,
/* all of these we can do in software - if driver can */
if (ret == 1)
return 0;
- if (ieee80211_hw_check(&key->local->hw, SW_CRYPTO_CONTROL)) {
- if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
- return 0;
+ if (ieee80211_hw_check(&key->local->hw, SW_CRYPTO_CONTROL))
return -EINVAL;
- }
return 0;
default:
return -EINVAL;
static u32 mesh_table_hash(const void *addr, u32 len, u32 seed)
{
/* Use last four bytes of hw addr as hash index */
- return jhash_1word(*(u32 *)(addr+2), seed);
+ return jhash_1word(__get_unaligned_cpu32((u8 *)addr + 2), seed);
}
static const struct rhashtable_params mesh_rht_params = {
return;
for (tid = 0; tid < IEEE80211_NUM_TIDS; tid++) {
- if (txq_has_queue(sta->sta.txq[tid]))
+ struct ieee80211_txq *txq = sta->sta.txq[tid];
+ struct txq_info *txqi = to_txq_info(txq);
+
+ spin_lock(&local->active_txq_lock[txq->ac]);
+ if (!list_empty(&txqi->schedule_order))
+ list_del_init(&txqi->schedule_order);
+ spin_unlock(&local->active_txq_lock[txq->ac]);
+
+ if (txq_has_queue(txq))
set_bit(tid, &sta->txq_buffered_tids);
else
clear_bit(tid, &sta->txq_buffered_tids);
/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Portions of this file
+ * Copyright (C) 2019 Intel Corporation
+ */
+
#ifdef CONFIG_MAC80211_MESSAGE_TRACING
#if !defined(__MAC80211_MSG_DRIVER_TRACE) || defined(TRACE_HEADER_MULTI_READ)
#undef TRACE_SYSTEM
#define TRACE_SYSTEM mac80211_msg
-#define MAX_MSG_LEN 100
+#define MAX_MSG_LEN 120
DECLARE_EVENT_CLASS(mac80211_msg_event,
TP_PROTO(struct va_format *vaf),
u8 max_subframes = sta->sta.max_amsdu_subframes;
int max_frags = local->hw.max_tx_fragments;
int max_amsdu_len = sta->sta.max_amsdu_len;
+ int orig_truesize;
u32 flow_idx;
__be16 len;
void *data;
if (!head || skb_is_gso(head))
goto out;
+ orig_truesize = head->truesize;
orig_len = head->len;
if (skb->len + head->len > max_amsdu_len)
*frag_tail = skb;
out_recalc:
+ fq->memory_usage += head->truesize - orig_truesize;
if (head->len != orig_len) {
flow->backlog += head->len - orig_len;
tin->backlog_bytes += head->len - orig_len;
struct ieee80211_txq *ieee80211_next_txq(struct ieee80211_hw *hw, u8 ac)
{
struct ieee80211_local *local = hw_to_local(hw);
+ struct ieee80211_txq *ret = NULL;
struct txq_info *txqi = NULL;
- lockdep_assert_held(&local->active_txq_lock[ac]);
+ spin_lock_bh(&local->active_txq_lock[ac]);
begin:
txqi = list_first_entry_or_null(&local->active_txqs[ac],
struct txq_info,
schedule_order);
if (!txqi)
- return NULL;
+ goto out;
if (txqi->txq.sta) {
struct sta_info *sta = container_of(txqi->txq.sta,
if (txqi->schedule_round == local->schedule_round[ac])
- return NULL;
+ goto out;
list_del_init(&txqi->schedule_order);
txqi->schedule_round = local->schedule_round[ac];
- return &txqi->txq;
+ ret = &txqi->txq;
+
+out:
+ spin_unlock_bh(&local->active_txq_lock[ac]);
+ return ret;
}
EXPORT_SYMBOL(ieee80211_next_txq);
-void ieee80211_return_txq(struct ieee80211_hw *hw,
- struct ieee80211_txq *txq)
+void __ieee80211_schedule_txq(struct ieee80211_hw *hw,
+ struct ieee80211_txq *txq,
+ bool force)
{
struct ieee80211_local *local = hw_to_local(hw);
struct txq_info *txqi = to_txq_info(txq);
- lockdep_assert_held(&local->active_txq_lock[txq->ac]);
+ spin_lock_bh(&local->active_txq_lock[txq->ac]);
if (list_empty(&txqi->schedule_order) &&
- (!skb_queue_empty(&txqi->frags) || txqi->tin.backlog_packets)) {
+ (force || !skb_queue_empty(&txqi->frags) ||
+ txqi->tin.backlog_packets)) {
/* If airtime accounting is active, always enqueue STAs at the
* head of the list to ensure that they only get moved to the
* back by the airtime DRR scheduler once they have a negative
list_add_tail(&txqi->schedule_order,
&local->active_txqs[txq->ac]);
}
-}
-EXPORT_SYMBOL(ieee80211_return_txq);
-void ieee80211_schedule_txq(struct ieee80211_hw *hw,
- struct ieee80211_txq *txq)
- __acquires(txq_lock) __releases(txq_lock)
-{
- struct ieee80211_local *local = hw_to_local(hw);
-
- spin_lock_bh(&local->active_txq_lock[txq->ac]);
- ieee80211_return_txq(hw, txq);
spin_unlock_bh(&local->active_txq_lock[txq->ac]);
}
-EXPORT_SYMBOL(ieee80211_schedule_txq);
+EXPORT_SYMBOL(__ieee80211_schedule_txq);
bool ieee80211_txq_may_transmit(struct ieee80211_hw *hw,
struct ieee80211_txq *txq)
struct sta_info *sta;
u8 ac = txq->ac;
- lockdep_assert_held(&local->active_txq_lock[ac]);
+ spin_lock_bh(&local->active_txq_lock[ac]);
if (!txqi->txq.sta)
goto out;
sta->airtime[ac].deficit += sta->airtime_weight;
list_move_tail(&txqi->schedule_order, &local->active_txqs[ac]);
+ spin_unlock_bh(&local->active_txq_lock[ac]);
return false;
out:
if (!list_empty(&txqi->schedule_order))
list_del_init(&txqi->schedule_order);
+ spin_unlock_bh(&local->active_txq_lock[ac]);
return true;
}
EXPORT_SYMBOL(ieee80211_txq_may_transmit);
void ieee80211_txq_schedule_start(struct ieee80211_hw *hw, u8 ac)
- __acquires(txq_lock)
{
struct ieee80211_local *local = hw_to_local(hw);
spin_lock_bh(&local->active_txq_lock[ac]);
local->schedule_round[ac]++;
-}
-EXPORT_SYMBOL(ieee80211_txq_schedule_start);
-
-void ieee80211_txq_schedule_end(struct ieee80211_hw *hw, u8 ac)
- __releases(txq_lock)
-{
- struct ieee80211_local *local = hw_to_local(hw);
-
spin_unlock_bh(&local->active_txq_lock[ac]);
}
-EXPORT_SYMBOL(ieee80211_txq_schedule_end);
+EXPORT_SYMBOL(ieee80211_txq_schedule_start);
void __ieee80211_subif_start_xmit(struct sk_buff *skb,
struct net_device *dev,
#include <net/ipv6.h>
#endif
#include <net/ipv6_stubs.h>
-#include <net/nexthop.h>
+#include <net/rtnh.h>
#include "internal.h"
/* max memory we will use for mpls_route */
mpls_stats_inc_outucastpkts(out_dev, skb);
- if (rt)
- err = neigh_xmit(NEIGH_ARP_TABLE, out_dev, &rt->rt_gateway,
- skb);
- else if (rt6) {
+ if (rt) {
+ if (rt->rt_gw_family == AF_INET)
+ err = neigh_xmit(NEIGH_ARP_TABLE, out_dev, &rt->rt_gw4,
+ skb);
+ else if (rt->rt_gw_family == AF_INET6)
+ err = neigh_xmit(NEIGH_ND_TABLE, out_dev, &rt->rt_gw6,
+ skb);
+ } else if (rt6) {
if (ipv6_addr_v4mapped(&rt6->rt6i_gateway)) {
/* 6PE (RFC 4798) */
err = neigh_xmit(NEIGH_ARP_TABLE, out_dev, &rt6->rt6i_gateway.s6_addr32[3],
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
#include <linux/skbuff.h>
#include <net/ncsi.h>
ndev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
memcpy(saddr.sa_data, &rsp->data[BCM_MAC_ADDR_OFFSET], ETH_ALEN);
/* Increase mac address by 1 for BMC's address */
- saddr.sa_data[ETH_ALEN - 1]++;
+ eth_addr_inc((u8 *)saddr.sa_data);
+ if (!is_valid_ether_addr((const u8 *)saddr.sa_data))
+ return -ENXIO;
+
ret = ops->ndo_set_mac_address(ndev, &saddr);
if (ret < 0)
netdev_warn(ndev, "NCSI: 'Writing mac address to device failed\n");
forms of full Network Address Port Translation. This can be
controlled by iptables, ip6tables or nft.
-config NF_NAT_NEEDED
- bool
- depends on NF_NAT
- default y
-
config NF_NAT_AMANDA
tristate
depends on NF_CONNTRACK && NF_NAT
To compile it as a module, choose M here. If unsure, say N.
+config NETFILTER_XT_TARGET_MASQUERADE
+ tristate "MASQUERADE target support"
+ depends on NF_NAT
+ default m if NETFILTER_ADVANCED=n
+ select NF_NAT_MASQUERADE
+ help
+ Masquerading is a special case of NAT: all outgoing connections are
+ changed to seem to come from a particular interface's address, and
+ if the interface goes down, those connections are lost. This is
+ only useful for dialup accounts with dynamic IP address (ie. your IP
+ address will be different on next dialup).
+
+ To compile it as a module, choose M here. If unsure, say N.
+
config NETFILTER_XT_TARGET_TEE
tristate '"TEE" - packet cloning to alternate destination'
depends on NETFILTER_ADVANCED
nf_tables-objs := nf_tables_core.o nf_tables_api.o nft_chain_filter.o \
nf_tables_trace.o nft_immediate.o nft_cmp.o nft_range.o \
nft_bitwise.o nft_byteorder.o nft_payload.o nft_lookup.o \
- nft_dynset.o nft_meta.o nft_rt.o nft_exthdr.o
+ nft_dynset.o nft_meta.o nft_rt.o nft_exthdr.o \
+ nft_chain_route.o
nf_tables_set-objs := nf_tables_set_core.o \
nft_set_hash.o nft_set_bitmap.o nft_set_rbtree.o
obj-$(CONFIG_NETFILTER_XT_TARGET_NFQUEUE) += xt_NFQUEUE.o
obj-$(CONFIG_NETFILTER_XT_TARGET_RATEEST) += xt_RATEEST.o
obj-$(CONFIG_NETFILTER_XT_TARGET_REDIRECT) += xt_REDIRECT.o
+obj-$(CONFIG_NETFILTER_XT_TARGET_MASQUERADE) += xt_MASQUERADE.o
obj-$(CONFIG_NETFILTER_XT_TARGET_SECMARK) += xt_SECMARK.o
obj-$(CONFIG_NETFILTER_XT_TARGET_TPROXY) += xt_TPROXY.o
obj-$(CONFIG_NETFILTER_XT_TARGET_TCPMSS) += xt_TCPMSS.o
#include <linux/mm.h>
#include <linux/rcupdate.h>
#include <net/net_namespace.h>
+#include <net/netfilter/nf_queue.h>
#include <net/sock.h>
#include "nf_internals.h"
if (!cp) {
int v;
- if (!sysctl_schedule_icmp(ipvs))
+ if (ipip || !sysctl_schedule_icmp(ipvs))
return NF_ACCEPT;
if (!ip_vs_try_to_schedule(ipvs, AF_INET, skb, pd, &v, &cp, &ciph))
conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK;
conn_flags |= IP_VS_CONN_F_INACTIVE;
+ /* set the tunnel info */
+ dest->tun_type = udest->tun_type;
+ dest->tun_port = udest->tun_port;
+
/* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) {
conn_flags |= IP_VS_CONN_F_NOOUTPUT;
return -ERANGE;
}
+ if (udest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) {
+ if (udest->tun_port == 0) {
+ pr_err("%s(): tunnel port is zero\n", __func__);
+ return -EINVAL;
+ }
+ }
+
ip_vs_addr_copy(udest->af, &daddr, &udest->addr);
/* We use function that requires RCU lock */
return -ERANGE;
}
+ if (udest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) {
+ if (udest->tun_port == 0) {
+ pr_err("%s(): tunnel port is zero\n", __func__);
+ return -EINVAL;
+ }
+ }
+
ip_vs_addr_copy(udest->af, &daddr, &udest->addr);
/* We use function that requires RCU lock */
udest->u_threshold = udest_compat->u_threshold;
udest->l_threshold = udest_compat->l_threshold;
udest->af = AF_INET;
+ udest->tun_type = IP_VS_CONN_F_TUNNEL_TYPE_IPIP;
}
static int
[IPVS_DEST_ATTR_PERSIST_CONNS] = { .type = NLA_U32 },
[IPVS_DEST_ATTR_STATS] = { .type = NLA_NESTED },
[IPVS_DEST_ATTR_ADDR_FAMILY] = { .type = NLA_U16 },
+ [IPVS_DEST_ATTR_TUN_TYPE] = { .type = NLA_U8 },
+ [IPVS_DEST_ATTR_TUN_PORT] = { .type = NLA_U16 },
};
static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
IP_VS_CONN_F_FWD_MASK)) ||
nla_put_u32(skb, IPVS_DEST_ATTR_WEIGHT,
atomic_read(&dest->weight)) ||
+ nla_put_u8(skb, IPVS_DEST_ATTR_TUN_TYPE,
+ dest->tun_type) ||
+ nla_put_be16(skb, IPVS_DEST_ATTR_TUN_PORT,
+ dest->tun_port) ||
nla_put_u32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold) ||
nla_put_u32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold) ||
nla_put_u32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
/* If a full entry was requested, check for the additional fields */
if (full_entry) {
struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
- *nla_l_thresh;
+ *nla_l_thresh, *nla_tun_type, *nla_tun_port;
nla_fwd = attrs[IPVS_DEST_ATTR_FWD_METHOD];
nla_weight = attrs[IPVS_DEST_ATTR_WEIGHT];
nla_u_thresh = attrs[IPVS_DEST_ATTR_U_THRESH];
nla_l_thresh = attrs[IPVS_DEST_ATTR_L_THRESH];
+ nla_tun_type = attrs[IPVS_DEST_ATTR_TUN_TYPE];
+ nla_tun_port = attrs[IPVS_DEST_ATTR_TUN_PORT];
if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
return -EINVAL;
udest->weight = nla_get_u32(nla_weight);
udest->u_threshold = nla_get_u32(nla_u_thresh);
udest->l_threshold = nla_get_u32(nla_l_thresh);
+
+ if (nla_tun_type)
+ udest->tun_type = nla_get_u8(nla_tun_type);
+
+ if (nla_tun_port)
+ udest->tun_port = nla_get_be16(nla_tun_port);
}
return 0;
#include <linux/slab.h>
#include <linux/tcp.h> /* for tcphdr */
#include <net/ip.h>
+#include <net/gue.h>
#include <net/tcp.h> /* for csum_tcpudp_magic */
#include <net/udp.h>
#include <net/icmp.h> /* for icmp_send */
mtu = dst_mtu(&rt->dst);
} else {
mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
+ if (!dest)
+ goto err_put;
+ if (dest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE)
+ mtu -= sizeof(struct udphdr) + sizeof(struct guehdr);
if (mtu < 68) {
IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__);
goto err_put;
mtu = dst_mtu(&rt->dst);
else {
mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr);
+ if (!dest)
+ goto err_put;
+ if (dest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE)
+ mtu -= sizeof(struct udphdr) + sizeof(struct guehdr);
if (mtu < IPV6_MIN_MTU) {
IP_VS_DBG_RL("%s(): mtu less than %d\n", __func__,
IPV6_MIN_MTU);
}
}
+static int
+ipvs_gue_encap(struct net *net, struct sk_buff *skb,
+ struct ip_vs_conn *cp, __u8 *next_protocol)
+{
+ __be16 dport;
+ __be16 sport = udp_flow_src_port(net, skb, 0, 0, false);
+ struct udphdr *udph; /* Our new UDP header */
+ struct guehdr *gueh; /* Our new GUE header */
+
+ skb_push(skb, sizeof(struct guehdr));
+
+ gueh = (struct guehdr *)skb->data;
+
+ gueh->control = 0;
+ gueh->version = 0;
+ gueh->hlen = 0;
+ gueh->flags = 0;
+ gueh->proto_ctype = *next_protocol;
+
+ skb_push(skb, sizeof(struct udphdr));
+ skb_reset_transport_header(skb);
+
+ udph = udp_hdr(skb);
+
+ dport = cp->dest->tun_port;
+ udph->dest = dport;
+ udph->source = sport;
+ udph->len = htons(skb->len);
+ udph->check = 0;
+
+ *next_protocol = IPPROTO_UDP;
+
+ return 0;
+}
+
/*
* IP Tunneling transmitter
*
struct iphdr *iph; /* Our new IP header */
unsigned int max_headroom; /* The extra header space needed */
int ret, local;
+ int tun_type, gso_type;
EnterFunction(10);
*/
max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr);
+ tun_type = cp->dest->tun_type;
+
+ if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE)
+ max_headroom += sizeof(struct udphdr) + sizeof(struct guehdr);
+
/* We only care about the df field if sysctl_pmtu_disc(ipvs) is set */
dfp = sysctl_pmtu_disc(ipvs) ? &df : NULL;
skb = ip_vs_prepare_tunneled_skb(skb, cp->af, max_headroom,
if (IS_ERR(skb))
goto tx_error;
- if (iptunnel_handle_offloads(skb, __tun_gso_type_mask(AF_INET, cp->af)))
+ gso_type = __tun_gso_type_mask(AF_INET, cp->af);
+ if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE)
+ gso_type |= SKB_GSO_UDP_TUNNEL;
+
+ if (iptunnel_handle_offloads(skb, gso_type))
goto tx_error;
skb->transport_header = skb->network_header;
+ skb_set_inner_ipproto(skb, next_protocol);
+
+ if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE)
+ ipvs_gue_encap(net, skb, cp, &next_protocol);
+
skb_push(skb, sizeof(struct iphdr));
skb_reset_network_header(skb);
memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
{
+ struct netns_ipvs *ipvs = cp->ipvs;
+ struct net *net = ipvs->net;
struct rt6_info *rt; /* Route to the other host */
struct in6_addr saddr; /* Source for tunnel */
struct net_device *tdev; /* Device to other host */
struct ipv6hdr *iph; /* Our new IP header */
unsigned int max_headroom; /* The extra header space needed */
int ret, local;
+ int tun_type, gso_type;
EnterFunction(10);
- local = __ip_vs_get_out_rt_v6(cp->ipvs, cp->af, skb, cp->dest,
+ local = __ip_vs_get_out_rt_v6(ipvs, cp->af, skb, cp->dest,
&cp->daddr.in6,
&saddr, ipvsh, 1,
IP_VS_RT_MODE_LOCAL |
*/
max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct ipv6hdr);
+ tun_type = cp->dest->tun_type;
+
+ if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE)
+ max_headroom += sizeof(struct udphdr) + sizeof(struct guehdr);
+
skb = ip_vs_prepare_tunneled_skb(skb, cp->af, max_headroom,
&next_protocol, &payload_len,
&dsfield, &ttl, NULL);
if (IS_ERR(skb))
goto tx_error;
- if (iptunnel_handle_offloads(skb, __tun_gso_type_mask(AF_INET6, cp->af)))
+ gso_type = __tun_gso_type_mask(AF_INET6, cp->af);
+ if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE)
+ gso_type |= SKB_GSO_UDP_TUNNEL;
+
+ if (iptunnel_handle_offloads(skb, gso_type))
goto tx_error;
skb->transport_header = skb->network_header;
+ skb_set_inner_ipproto(skb, next_protocol);
+
+ if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE)
+ ipvs_gue_encap(net, skb, cp, &next_protocol);
+
skb_push(skb, sizeof(struct ipv6hdr));
skb_reset_network_header(skb);
memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
ret = ip_vs_tunnel_xmit_prepare(skb, cp);
if (ret == NF_ACCEPT)
- ip6_local_out(cp->ipvs->net, skb->sk, skb);
+ ip6_local_out(net, skb->sk, skb);
else if (ret == NF_DROP)
kfree_skb(skb);
#include <linux/slab.h>
#include <linux/random.h>
#include <linux/jhash.h>
+#include <linux/siphash.h>
#include <linux/err.h>
#include <linux/percpu.h>
#include <linux/moduleparam.h>
}
EXPORT_SYMBOL_GPL(nf_ct_invert_tuple);
+/* Generate a almost-unique pseudo-id for a given conntrack.
+ *
+ * intentionally doesn't re-use any of the seeds used for hash
+ * table location, we assume id gets exposed to userspace.
+ *
+ * Following nf_conn items do not change throughout lifetime
+ * of the nf_conn after it has been committed to main hash table:
+ *
+ * 1. nf_conn address
+ * 2. nf_conn->ext address
+ * 3. nf_conn->master address (normally NULL)
+ * 4. tuple
+ * 5. the associated net namespace
+ */
+u32 nf_ct_get_id(const struct nf_conn *ct)
+{
+ static __read_mostly siphash_key_t ct_id_seed;
+ unsigned long a, b, c, d;
+
+ net_get_random_once(&ct_id_seed, sizeof(ct_id_seed));
+
+ a = (unsigned long)ct;
+ b = (unsigned long)ct->master ^ net_hash_mix(nf_ct_net(ct));
+ c = (unsigned long)ct->ext;
+ d = (unsigned long)siphash(&ct->tuplehash, sizeof(ct->tuplehash),
+ &ct_id_seed);
+#ifdef CONFIG_64BIT
+ return siphash_4u64((u64)a, (u64)b, (u64)c, (u64)d, &ct_id_seed);
+#else
+ return siphash_4u32((u32)a, (u32)b, (u32)c, (u32)d, &ct_id_seed);
+#endif
+}
+EXPORT_SYMBOL_GPL(nf_ct_get_id);
+
static void
clean_from_lists(struct nf_conn *ct)
{
/* set conntrack timestamp, if enabled. */
tstamp = nf_conn_tstamp_find(ct);
- if (tstamp) {
- if (skb->tstamp == 0)
- __net_timestamp(skb);
+ if (tstamp)
+ tstamp->start = ktime_get_real_ns();
- tstamp->start = ktime_to_ns(skb->tstamp);
- }
/* Since the lookup is lockless, hash insertion must be done after
* starting the timer and setting the CONFIRMED bit. The RCU barriers
* guarantee that no other CPU can find the conntrack before the above
/* save hash for reusing when confirming */
*(unsigned long *)(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev) = hash;
ct->status = 0;
+ ct->timeout = 0;
write_pnet(&ct->ct_net, net);
memset(&ct->__nfct_init_offset[0], 0,
offsetof(struct nf_conn, proto) -
exp->tuple.dst.u.all = *dst;
-#ifdef CONFIG_NF_NAT_NEEDED
+#if IS_ENABLED(CONFIG_NF_NAT)
memset(&exp->saved_addr, 0, sizeof(exp->saved_addr));
memset(&exp->saved_proto, 0, sizeof(exp->saved_proto));
#endif
#include <linux/spinlock.h>
#include <linux/interrupt.h>
#include <linux/slab.h>
+#include <linux/siphash.h>
#include <linux/netfilter.h>
#include <net/netlink.h>
#include <net/netfilter/nf_conntrack_timestamp.h>
#include <net/netfilter/nf_conntrack_labels.h>
#include <net/netfilter/nf_conntrack_synproxy.h>
-#ifdef CONFIG_NF_NAT_NEEDED
+#if IS_ENABLED(CONFIG_NF_NAT)
#include <net/netfilter/nf_nat.h>
#include <net/netfilter/nf_nat_helper.h>
#endif
static int ctnetlink_dump_id(struct sk_buff *skb, const struct nf_conn *ct)
{
- if (nla_put_be32(skb, CTA_ID, htonl((unsigned long)ct)))
+ __be32 id = (__force __be32)nf_ct_get_id(ct);
+
+ if (nla_put_be32(skb, CTA_ID, id))
goto nla_put_failure;
return 0;
+ nla_total_size(0) /* CTA_HELP */
+ nla_total_size(NF_CT_HELPER_NAME_LEN) /* CTA_HELP_NAME */
+ ctnetlink_secctx_size(ct)
-#ifdef CONFIG_NF_NAT_NEEDED
+#if IS_ENABLED(CONFIG_NF_NAT)
+ 2 * nla_total_size(0) /* CTA_NAT_SEQ_ADJ_ORIG|REPL */
+ 6 * nla_total_size(sizeof(u_int32_t)) /* CTA_NAT_SEQ_OFFSET */
#endif
}
if (cda[CTA_ID]) {
- u_int32_t id = ntohl(nla_get_be32(cda[CTA_ID]));
- if (id != (u32)(unsigned long)ct) {
+ __be32 id = nla_get_be32(cda[CTA_ID]);
+
+ if (id != (__force __be32)nf_ct_get_id(ct)) {
nf_ct_put(ct);
return -ENOENT;
}
return -EOPNOTSUPP;
}
-#ifdef CONFIG_NF_NAT_NEEDED
+#if IS_ENABLED(CONFIG_NF_NAT)
static int
ctnetlink_parse_nat_setup(struct nf_conn *ct,
enum nf_nat_manip_type manip,
static int
ctnetlink_setup_nat(struct nf_conn *ct, const struct nlattr * const cda[])
{
-#ifdef CONFIG_NF_NAT_NEEDED
+#if IS_ENABLED(CONFIG_NF_NAT)
int ret;
if (!cda[CTA_NAT_DST] && !cda[CTA_NAT_SRC])
+ nla_total_size(0) /* CTA_HELP */
+ nla_total_size(NF_CT_HELPER_NAME_LEN) /* CTA_HELP_NAME */
+ ctnetlink_secctx_size(ct)
-#ifdef CONFIG_NF_NAT_NEEDED
+#if IS_ENABLED(CONFIG_NF_NAT)
+ 2 * nla_total_size(0) /* CTA_NAT_SEQ_ADJ_ORIG|REPL */
+ 6 * nla_total_size(sizeof(u_int32_t)) /* CTA_NAT_SEQ_OFFSET */
#endif
static const union nf_inet_addr any_addr;
+static __be32 nf_expect_get_id(const struct nf_conntrack_expect *exp)
+{
+ static __read_mostly siphash_key_t exp_id_seed;
+ unsigned long a, b, c, d;
+
+ net_get_random_once(&exp_id_seed, sizeof(exp_id_seed));
+
+ a = (unsigned long)exp;
+ b = (unsigned long)exp->helper;
+ c = (unsigned long)exp->master;
+ d = (unsigned long)siphash(&exp->tuple, sizeof(exp->tuple), &exp_id_seed);
+
+#ifdef CONFIG_64BIT
+ return (__force __be32)siphash_4u64((u64)a, (u64)b, (u64)c, (u64)d, &exp_id_seed);
+#else
+ return (__force __be32)siphash_4u32((u32)a, (u32)b, (u32)c, (u32)d, &exp_id_seed);
+#endif
+}
+
static int
ctnetlink_exp_dump_expect(struct sk_buff *skb,
const struct nf_conntrack_expect *exp)
struct nf_conn *master = exp->master;
long timeout = ((long)exp->timeout.expires - (long)jiffies) / HZ;
struct nf_conn_help *help;
-#ifdef CONFIG_NF_NAT_NEEDED
+#if IS_ENABLED(CONFIG_NF_NAT)
struct nlattr *nest_parms;
struct nf_conntrack_tuple nat_tuple = {};
#endif
CTA_EXPECT_MASTER) < 0)
goto nla_put_failure;
-#ifdef CONFIG_NF_NAT_NEEDED
+#if IS_ENABLED(CONFIG_NF_NAT)
if (!nf_inet_addr_cmp(&exp->saved_addr, &any_addr) ||
exp->saved_proto.all) {
nest_parms = nla_nest_start(skb, CTA_EXPECT_NAT | NLA_F_NESTED);
}
#endif
if (nla_put_be32(skb, CTA_EXPECT_TIMEOUT, htonl(timeout)) ||
- nla_put_be32(skb, CTA_EXPECT_ID, htonl((unsigned long)exp)) ||
+ nla_put_be32(skb, CTA_EXPECT_ID, nf_expect_get_id(exp)) ||
nla_put_be32(skb, CTA_EXPECT_FLAGS, htonl(exp->flags)) ||
nla_put_be32(skb, CTA_EXPECT_CLASS, htonl(exp->class)))
goto nla_put_failure;
if (cda[CTA_EXPECT_ID]) {
__be32 id = nla_get_be32(cda[CTA_EXPECT_ID]);
- if (ntohl(id) != (u32)(unsigned long)exp) {
+
+ if (id != nf_expect_get_id(exp)) {
nf_ct_expect_put(exp);
return -ENOENT;
}
struct nf_conntrack_expect *exp,
u_int8_t u3)
{
-#ifdef CONFIG_NF_NAT_NEEDED
+#if IS_ENABLED(CONFIG_NF_NAT)
struct nlattr *tb[CTA_EXPECT_NAT_MAX+1];
struct nf_conntrack_tuple nat_tuple = {};
int err;
struct va_format vaf;
va_list args;
- if (net->ct.sysctl_log_invalid != protonum ||
+ if (net->ct.sysctl_log_invalid != protonum &&
net->ct.sysctl_log_invalid != IPPROTO_RAW)
return;
return NF_ACCEPT;
}
-/* Returns conntrack if it dealt with ICMP, and filled in skb fields */
-static int
-icmp_error_message(struct nf_conn *tmpl, struct sk_buff *skb,
- const struct nf_hook_state *state)
+/* Check inner header is related to any of the existing connections */
+int nf_conntrack_inet_error(struct nf_conn *tmpl, struct sk_buff *skb,
+ unsigned int dataoff,
+ const struct nf_hook_state *state,
+ u8 l4proto, union nf_inet_addr *outer_daddr)
{
struct nf_conntrack_tuple innertuple, origtuple;
const struct nf_conntrack_tuple_hash *h;
const struct nf_conntrack_zone *zone;
enum ip_conntrack_info ctinfo;
struct nf_conntrack_zone tmp;
+ union nf_inet_addr *ct_daddr;
+ enum ip_conntrack_dir dir;
+ struct nf_conn *ct;
WARN_ON(skb_nfct(skb));
zone = nf_ct_zone_tmpl(tmpl, skb, &tmp);
/* Are they talking about one of our connections? */
- if (!nf_ct_get_tuplepr(skb,
- skb_network_offset(skb) + ip_hdrlen(skb)
- + sizeof(struct icmphdr),
- PF_INET, state->net, &origtuple)) {
- pr_debug("icmp_error_message: failed to get tuple\n");
+ if (!nf_ct_get_tuplepr(skb, dataoff,
+ state->pf, state->net, &origtuple))
return -NF_ACCEPT;
- }
/* Ordinarily, we'd expect the inverted tupleproto, but it's
been preserved inside the ICMP. */
- if (!nf_ct_invert_tuple(&innertuple, &origtuple)) {
- pr_debug("icmp_error_message: no match\n");
+ if (!nf_ct_invert_tuple(&innertuple, &origtuple))
return -NF_ACCEPT;
- }
-
- ctinfo = IP_CT_RELATED;
h = nf_conntrack_find_get(state->net, zone, &innertuple);
- if (!h) {
- pr_debug("icmp_error_message: no match\n");
+ if (!h)
+ return -NF_ACCEPT;
+
+ /* Consider: A -> T (=This machine) -> B
+ * Conntrack entry will look like this:
+ * Original: A->B
+ * Reply: B->T (SNAT case) OR A
+ *
+ * When this function runs, we got packet that looks like this:
+ * iphdr|icmphdr|inner_iphdr|l4header (tcp, udp, ..).
+ *
+ * Above nf_conntrack_find_get() makes lookup based on inner_hdr,
+ * so we should expect that destination of the found connection
+ * matches outer header destination address.
+ *
+ * In above example, we can consider these two cases:
+ * 1. Error coming in reply direction from B or M (middle box) to
+ * T (SNAT case) or A.
+ * Inner saddr will be B, dst will be T or A.
+ * The found conntrack will be reply tuple (B->T/A).
+ * 2. Error coming in original direction from A or M to B.
+ * Inner saddr will be A, inner daddr will be B.
+ * The found conntrack will be original tuple (A->B).
+ *
+ * In both cases, conntrack[dir].dst == inner.dst.
+ *
+ * A bogus packet could look like this:
+ * Inner: B->T
+ * Outer: B->X (other machine reachable by T).
+ *
+ * In this case, lookup yields connection A->B and will
+ * set packet from B->X as *RELATED*, even though no connection
+ * from X was ever seen.
+ */
+ ct = nf_ct_tuplehash_to_ctrack(h);
+ dir = NF_CT_DIRECTION(h);
+ ct_daddr = &ct->tuplehash[dir].tuple.dst.u3;
+ if (!nf_inet_addr_cmp(outer_daddr, ct_daddr)) {
+ if (state->pf == AF_INET) {
+ nf_l4proto_log_invalid(skb, state->net, state->pf,
+ l4proto,
+ "outer daddr %pI4 != inner %pI4",
+ &outer_daddr->ip, &ct_daddr->ip);
+ } else if (state->pf == AF_INET6) {
+ nf_l4proto_log_invalid(skb, state->net, state->pf,
+ l4proto,
+ "outer daddr %pI6 != inner %pI6",
+ &outer_daddr->ip6, &ct_daddr->ip6);
+ }
+ nf_ct_put(ct);
return -NF_ACCEPT;
}
- if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY)
+ ctinfo = IP_CT_RELATED;
+ if (dir == IP_CT_DIR_REPLY)
ctinfo += IP_CT_IS_REPLY;
/* Update skb to refer to this connection */
- nf_ct_set(skb, nf_ct_tuplehash_to_ctrack(h), ctinfo);
+ nf_ct_set(skb, ct, ctinfo);
return NF_ACCEPT;
}
struct sk_buff *skb, unsigned int dataoff,
const struct nf_hook_state *state)
{
+ union nf_inet_addr outer_daddr;
const struct icmphdr *icmph;
struct icmphdr _ih;
/* Not enough header? */
- icmph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_ih), &_ih);
+ icmph = skb_header_pointer(skb, dataoff, sizeof(_ih), &_ih);
if (icmph == NULL) {
icmp_error_log(skb, state, "short packet");
return -NF_ACCEPT;
icmph->type != ICMP_REDIRECT)
return NF_ACCEPT;
- return icmp_error_message(tmpl, skb, state);
+ memset(&outer_daddr, 0, sizeof(outer_daddr));
+ outer_daddr.ip = ip_hdr(skb)->daddr;
+
+ dataoff += sizeof(*icmph);
+ return nf_conntrack_inet_error(tmpl, skb, dataoff, state,
+ IPPROTO_ICMP, &outer_daddr);
}
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
return NF_ACCEPT;
}
-static int
-icmpv6_error_message(struct net *net, struct nf_conn *tmpl,
- struct sk_buff *skb,
- unsigned int icmp6off)
-{
- struct nf_conntrack_tuple intuple, origtuple;
- const struct nf_conntrack_tuple_hash *h;
- enum ip_conntrack_info ctinfo;
- struct nf_conntrack_zone tmp;
-
- WARN_ON(skb_nfct(skb));
-
- /* Are they talking about one of our connections? */
- if (!nf_ct_get_tuplepr(skb,
- skb_network_offset(skb)
- + sizeof(struct ipv6hdr)
- + sizeof(struct icmp6hdr),
- PF_INET6, net, &origtuple)) {
- pr_debug("icmpv6_error: Can't get tuple\n");
- return -NF_ACCEPT;
- }
-
- /* Ordinarily, we'd expect the inverted tupleproto, but it's
- been preserved inside the ICMP. */
- if (!nf_ct_invert_tuple(&intuple, &origtuple)) {
- pr_debug("icmpv6_error: Can't invert tuple\n");
- return -NF_ACCEPT;
- }
-
- ctinfo = IP_CT_RELATED;
-
- h = nf_conntrack_find_get(net, nf_ct_zone_tmpl(tmpl, skb, &tmp),
- &intuple);
- if (!h) {
- pr_debug("icmpv6_error: no match\n");
- return -NF_ACCEPT;
- } else {
- if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY)
- ctinfo += IP_CT_IS_REPLY;
- }
-
- /* Update skb to refer to this connection */
- nf_ct_set(skb, nf_ct_tuplehash_to_ctrack(h), ctinfo);
- return NF_ACCEPT;
-}
static void icmpv6_error_log(const struct sk_buff *skb,
const struct nf_hook_state *state,
unsigned int dataoff,
const struct nf_hook_state *state)
{
+ union nf_inet_addr outer_daddr;
const struct icmp6hdr *icmp6h;
struct icmp6hdr _ih;
int type;
if (icmp6h->icmp6_type >= 128)
return NF_ACCEPT;
- return icmpv6_error_message(state->net, tmpl, skb, dataoff);
+ memcpy(&outer_daddr.ip6, &ipv6_hdr(skb)->daddr,
+ sizeof(outer_daddr.ip6));
+ dataoff += sizeof(*icmp6h);
+ return nf_conntrack_inet_error(tmpl, skb, dataoff, state,
+ IPPROTO_ICMPV6, &outer_daddr);
}
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
nfct_help(exp->master)->helper != nfct_help(ct)->helper ||
exp->class != class)
break;
-#ifdef CONFIG_NF_NAT_NEEDED
+#if IS_ENABLED(CONFIG_NF_NAT)
if (!direct_rtp &&
(!nf_inet_addr_cmp(&exp->saved_addr, &exp->tuple.dst.u3) ||
exp->saved_proto.udp.port != exp->tuple.dst.u.udp.port) &&
if (tuplehash == NULL)
return NF_ACCEPT;
- outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.oifidx);
- if (!outdev)
- return NF_ACCEPT;
-
dir = tuplehash->tuple.dir;
flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
rt = (struct rtable *)flow->tuplehash[dir].tuple.dst_cache;
+ outdev = rt->dst.dev;
if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)) &&
(ip_hdr(skb)->frag_off & htons(IP_DF)) != 0)
if (tuplehash == NULL)
return NF_ACCEPT;
- outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.oifidx);
- if (!outdev)
- return NF_ACCEPT;
-
dir = tuplehash->tuple.dir;
flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
rt = (struct rt6_info *)flow->tuplehash[dir].tuple.dst_cache;
+ outdev = rt->dst.dev;
if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
return NF_ACCEPT;
#include <linux/netdevice.h>
/* nf_queue.c */
-int nf_queue(struct sk_buff *skb, struct nf_hook_state *state,
- const struct nf_hook_entries *entries, unsigned int index,
- unsigned int verdict);
void nf_queue_nf_hook_drop(struct net *net);
/* nf_log.c */
case IPPROTO_ICMPV6:
/* id is same for either direction... */
keyptr = &tuple->src.u.icmp.id;
- min = range->min_proto.icmp.id;
- range_size = ntohs(range->max_proto.icmp.id) -
- ntohs(range->min_proto.icmp.id) + 1;
+ if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) {
+ min = 0;
+ range_size = 65536;
+ } else {
+ min = ntohs(range->min_proto.icmp.id);
+ range_size = ntohs(range->max_proto.icmp.id) -
+ ntohs(range->min_proto.icmp.id) + 1;
+ }
goto find_free_id;
#if IS_ENABLED(CONFIG_NF_CT_PROTO_GRE)
case IPPROTO_GRE:
.expectfn = nf_nat_follow_master,
};
-int nf_nat_register_fn(struct net *net, const struct nf_hook_ops *ops,
+int nf_nat_register_fn(struct net *net, u8 pf, const struct nf_hook_ops *ops,
const struct nf_hook_ops *orig_nat_ops, unsigned int ops_count)
{
struct nat_net *nat_net = net_generic(net, nat_net_id);
struct nf_hook_ops *nat_ops;
int i, ret;
- if (WARN_ON_ONCE(ops->pf >= ARRAY_SIZE(nat_net->nat_proto_net)))
+ if (WARN_ON_ONCE(pf >= ARRAY_SIZE(nat_net->nat_proto_net)))
return -EINVAL;
- nat_proto_net = &nat_net->nat_proto_net[ops->pf];
+ nat_proto_net = &nat_net->nat_proto_net[pf];
for (i = 0; i < ops_count; i++) {
- if (WARN_ON(orig_nat_ops[i].pf != ops->pf))
- return -EINVAL;
if (orig_nat_ops[i].hooknum == hooknum) {
hooknum = i;
break;
return ret;
}
-void nf_nat_unregister_fn(struct net *net, const struct nf_hook_ops *ops,
- unsigned int ops_count)
+void nf_nat_unregister_fn(struct net *net, u8 pf, const struct nf_hook_ops *ops,
+ unsigned int ops_count)
{
struct nat_net *nat_net = net_generic(net, nat_net_id);
struct nf_nat_hooks_net *nat_proto_net;
int hooknum = ops->hooknum;
int i;
- if (ops->pf >= ARRAY_SIZE(nat_net->nat_proto_net))
+ if (pf >= ARRAY_SIZE(nat_net->nat_proto_net))
return;
- nat_proto_net = &nat_net->nat_proto_net[ops->pf];
+ nat_proto_net = &nat_net->nat_proto_net[pf];
mutex_lock(&nf_nat_proto_mutex);
if (WARN_ON(nat_proto_net->users == 0))
#include <linux/netfilter_ipv4.h>
#include <linux/netfilter_ipv6.h>
-#include <net/netfilter/ipv4/nf_nat_masquerade.h>
-#include <net/netfilter/ipv6/nf_nat_masquerade.h>
+#include <net/netfilter/nf_nat_masquerade.h>
static DEFINE_MUTEX(masq_mutex);
-static unsigned int masq_refcnt4 __read_mostly;
-static unsigned int masq_refcnt6 __read_mostly;
+static unsigned int masq_refcnt __read_mostly;
unsigned int
nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum,
.notifier_call = masq_inet_event,
};
-int nf_nat_masquerade_ipv4_register_notifier(void)
-{
- int ret = 0;
-
- mutex_lock(&masq_mutex);
- if (WARN_ON_ONCE(masq_refcnt4 == UINT_MAX)) {
- ret = -EOVERFLOW;
- goto out_unlock;
- }
-
- /* check if the notifier was already set */
- if (++masq_refcnt4 > 1)
- goto out_unlock;
-
- /* Register for device down reports */
- ret = register_netdevice_notifier(&masq_dev_notifier);
- if (ret)
- goto err_dec;
- /* Register IP address change reports */
- ret = register_inetaddr_notifier(&masq_inet_notifier);
- if (ret)
- goto err_unregister;
-
- mutex_unlock(&masq_mutex);
- return ret;
-
-err_unregister:
- unregister_netdevice_notifier(&masq_dev_notifier);
-err_dec:
- masq_refcnt4--;
-out_unlock:
- mutex_unlock(&masq_mutex);
- return ret;
-}
-EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4_register_notifier);
-
-void nf_nat_masquerade_ipv4_unregister_notifier(void)
-{
- mutex_lock(&masq_mutex);
- /* check if the notifier still has clients */
- if (--masq_refcnt4 > 0)
- goto out_unlock;
-
- unregister_netdevice_notifier(&masq_dev_notifier);
- unregister_inetaddr_notifier(&masq_inet_notifier);
-out_unlock:
- mutex_unlock(&masq_mutex);
-}
-EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4_unregister_notifier);
-
#if IS_ENABLED(CONFIG_IPV6)
static atomic_t v6_worker_count __read_mostly;
.notifier_call = masq_inet6_event,
};
-int nf_nat_masquerade_ipv6_register_notifier(void)
+static int nf_nat_masquerade_ipv6_register_notifier(void)
+{
+ return register_inet6addr_notifier(&masq_inet6_notifier);
+}
+#else
+static inline int nf_nat_masquerade_ipv6_register_notifier(void) { return 0; }
+#endif
+
+int nf_nat_masquerade_inet_register_notifiers(void)
{
int ret = 0;
mutex_lock(&masq_mutex);
- if (WARN_ON_ONCE(masq_refcnt6 == UINT_MAX)) {
+ if (WARN_ON_ONCE(masq_refcnt == UINT_MAX)) {
ret = -EOVERFLOW;
goto out_unlock;
}
- /* check if the notifier is already set */
- if (++masq_refcnt6 > 1)
+ /* check if the notifier was already set */
+ if (++masq_refcnt > 1)
goto out_unlock;
- ret = register_inet6addr_notifier(&masq_inet6_notifier);
+ /* Register for device down reports */
+ ret = register_netdevice_notifier(&masq_dev_notifier);
if (ret)
goto err_dec;
+ /* Register IP address change reports */
+ ret = register_inetaddr_notifier(&masq_inet_notifier);
+ if (ret)
+ goto err_unregister;
+
+ ret = nf_nat_masquerade_ipv6_register_notifier();
+ if (ret)
+ goto err_unreg_inet;
mutex_unlock(&masq_mutex);
return ret;
+err_unreg_inet:
+ unregister_inetaddr_notifier(&masq_inet_notifier);
+err_unregister:
+ unregister_netdevice_notifier(&masq_dev_notifier);
err_dec:
- masq_refcnt6--;
+ masq_refcnt--;
out_unlock:
mutex_unlock(&masq_mutex);
return ret;
}
-EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6_register_notifier);
+EXPORT_SYMBOL_GPL(nf_nat_masquerade_inet_register_notifiers);
-void nf_nat_masquerade_ipv6_unregister_notifier(void)
+void nf_nat_masquerade_inet_unregister_notifiers(void)
{
mutex_lock(&masq_mutex);
- /* check if the notifier still has clients */
- if (--masq_refcnt6 > 0)
+ /* check if the notifiers still have clients */
+ if (--masq_refcnt > 0)
goto out_unlock;
+ unregister_netdevice_notifier(&masq_dev_notifier);
+ unregister_inetaddr_notifier(&masq_inet_notifier);
+#if IS_ENABLED(CONFIG_IPV6)
unregister_inet6addr_notifier(&masq_inet6_notifier);
+#endif
out_unlock:
mutex_unlock(&masq_mutex);
}
-EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6_unregister_notifier);
-#endif
+EXPORT_SYMBOL_GPL(nf_nat_masquerade_inet_unregister_notifiers);
return ret;
}
-static const struct nf_hook_ops nf_nat_ipv4_ops[] = {
+const struct nf_hook_ops nf_nat_ipv4_ops[] = {
/* Before packet filtering, change destination */
{
.hook = nf_nat_ipv4_in,
int nf_nat_ipv4_register_fn(struct net *net, const struct nf_hook_ops *ops)
{
- return nf_nat_register_fn(net, ops, nf_nat_ipv4_ops, ARRAY_SIZE(nf_nat_ipv4_ops));
+ return nf_nat_register_fn(net, ops->pf, ops, nf_nat_ipv4_ops,
+ ARRAY_SIZE(nf_nat_ipv4_ops));
}
EXPORT_SYMBOL_GPL(nf_nat_ipv4_register_fn);
void nf_nat_ipv4_unregister_fn(struct net *net, const struct nf_hook_ops *ops)
{
- nf_nat_unregister_fn(net, ops, ARRAY_SIZE(nf_nat_ipv4_ops));
+ nf_nat_unregister_fn(net, ops->pf, ops, ARRAY_SIZE(nf_nat_ipv4_ops));
}
EXPORT_SYMBOL_GPL(nf_nat_ipv4_unregister_fn);
return ret;
}
-static int nat_route_me_harder(struct net *net, struct sk_buff *skb)
-{
-#ifdef CONFIG_IPV6_MODULE
- const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops();
-
- if (!v6_ops)
- return -EHOSTUNREACH;
-
- return v6_ops->route_me_harder(net, skb);
-#else
- return ip6_route_me_harder(net, skb);
-#endif
-}
-
static unsigned int
nf_nat_ipv6_local_fn(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3,
&ct->tuplehash[!dir].tuple.src.u3)) {
- err = nat_route_me_harder(state->net, skb);
+ err = nf_ip6_route_me_harder(state->net, skb);
if (err < 0)
ret = NF_DROP_ERR(err);
}
return ret;
}
-static const struct nf_hook_ops nf_nat_ipv6_ops[] = {
+const struct nf_hook_ops nf_nat_ipv6_ops[] = {
/* Before packet filtering, change destination */
{
.hook = nf_nat_ipv6_in,
int nf_nat_ipv6_register_fn(struct net *net, const struct nf_hook_ops *ops)
{
- return nf_nat_register_fn(net, ops, nf_nat_ipv6_ops,
+ return nf_nat_register_fn(net, ops->pf, ops, nf_nat_ipv6_ops,
ARRAY_SIZE(nf_nat_ipv6_ops));
}
EXPORT_SYMBOL_GPL(nf_nat_ipv6_register_fn);
void nf_nat_ipv6_unregister_fn(struct net *net, const struct nf_hook_ops *ops)
{
- nf_nat_unregister_fn(net, ops, ARRAY_SIZE(nf_nat_ipv6_ops));
+ nf_nat_unregister_fn(net, ops->pf, ops, ARRAY_SIZE(nf_nat_ipv6_ops));
}
EXPORT_SYMBOL_GPL(nf_nat_ipv6_unregister_fn);
#endif /* CONFIG_IPV6 */
+
+#if defined(CONFIG_NF_TABLES_INET) && IS_ENABLED(CONFIG_NFT_NAT)
+int nf_nat_inet_register_fn(struct net *net, const struct nf_hook_ops *ops)
+{
+ int ret;
+
+ if (WARN_ON_ONCE(ops->pf != NFPROTO_INET))
+ return -EINVAL;
+
+ ret = nf_nat_register_fn(net, NFPROTO_IPV6, ops, nf_nat_ipv6_ops,
+ ARRAY_SIZE(nf_nat_ipv6_ops));
+ if (ret)
+ return ret;
+
+ ret = nf_nat_register_fn(net, NFPROTO_IPV4, ops, nf_nat_ipv4_ops,
+ ARRAY_SIZE(nf_nat_ipv4_ops));
+ if (ret)
+ nf_nat_ipv6_unregister_fn(net, ops);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(nf_nat_inet_register_fn);
+
+void nf_nat_inet_unregister_fn(struct net *net, const struct nf_hook_ops *ops)
+{
+ nf_nat_unregister_fn(net, NFPROTO_IPV4, ops, ARRAY_SIZE(nf_nat_ipv4_ops));
+ nf_nat_unregister_fn(net, NFPROTO_IPV6, ops, ARRAY_SIZE(nf_nat_ipv6_ops));
+}
+EXPORT_SYMBOL_GPL(nf_nat_inet_unregister_fn);
+#endif /* NFT INET NAT */
return 0;
}
+EXPORT_SYMBOL_GPL(nf_queue);
static unsigned int nf_iterate(struct sk_buff *skb,
struct nf_hook_state *state,
if (IS_ERR(type))
return PTR_ERR(type);
}
- if (!(type->hook_mask & (1 << hook->num)))
+ if (hook->num > NF_MAX_HOOKS || !(type->hook_mask & (1 << hook->num)))
return -EOPNOTSUPP;
if (type->type == NFT_CHAIN_T_NAT &&
static __be64 nf_jiffies64_to_msecs(u64 input)
{
- u64 ms = jiffies64_to_nsecs(input);
-
- return cpu_to_be64(div_u64(ms, NSEC_PER_MSEC));
+ return cpu_to_be64(jiffies64_to_msecs(input));
}
static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx,
return err;
}
-static int nf_tables_set_desc_parse(const struct nft_ctx *ctx,
- struct nft_set_desc *desc,
+static int nf_tables_set_desc_parse(struct nft_set_desc *desc,
const struct nlattr *nla)
{
struct nlattr *da[NFTA_SET_DESC_MAX + 1];
policy = ntohl(nla_get_be32(nla[NFTA_SET_POLICY]));
if (nla[NFTA_SET_DESC] != NULL) {
- err = nf_tables_set_desc_parse(&ctx, &desc, nla[NFTA_SET_DESC]);
+ err = nf_tables_set_desc_parse(&desc, nla[NFTA_SET_DESC]);
if (err < 0)
return err;
}
}
EXPORT_SYMBOL_GPL(nf_tables_bind_set);
-void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
- struct nft_set_binding *binding, bool event)
+static void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
+ struct nft_set_binding *binding, bool event)
{
list_del_rcu(&binding->list);
GFP_KERNEL);
}
}
-EXPORT_SYMBOL_GPL(nf_tables_unbind_set);
void nf_tables_deactivate_set(const struct nft_ctx *ctx, struct nft_set *set,
struct nft_set_binding *binding,
if (err < 0)
goto err5;
+ nft_chain_route_init();
return err;
err5:
rhltable_destroy(&nft_objname_ht);
nfnetlink_subsys_unregister(&nf_tables_subsys);
unregister_netdevice_notifier(&nf_tables_flowtable_notifier);
nft_chain_filter_fini();
+ nft_chain_route_fini();
unregister_pernet_subsys(&nf_tables_net_ops);
cancel_work_sync(&trans_destroy_work);
rcu_barrier();
goto nla_put_failure;
}
- if (skb->tstamp) {
+ if (hooknum <= NF_INET_FORWARD && skb->tstamp) {
struct nfulnl_msg_packet_timestamp ts;
struct timespec64 kts = ktime_to_timespec64(skb->tstamp);
ts.sec = cpu_to_be64(kts.tv_sec);
}
EXPORT_SYMBOL_GPL(nf_osf_match);
-const char *nf_osf_find(const struct sk_buff *skb,
- const struct list_head *nf_osf_fingers,
- const int ttl_check)
+bool nf_osf_find(const struct sk_buff *skb,
+ const struct list_head *nf_osf_fingers,
+ const int ttl_check, struct nf_osf_data *data)
{
const struct iphdr *ip = ip_hdr(skb);
const struct nf_osf_user_finger *f;
const struct nf_osf_finger *kf;
struct nf_osf_hdr_ctx ctx;
const struct tcphdr *tcp;
- const char *genre = NULL;
memset(&ctx, 0, sizeof(ctx));
tcp = nf_osf_hdr_ctx_init(&ctx, skb, ip, opts);
if (!tcp)
- return NULL;
+ return false;
list_for_each_entry_rcu(kf, &nf_osf_fingers[ctx.df], finger_entry) {
f = &kf->finger;
if (!nf_osf_match_one(skb, f, ttl_check, &ctx))
continue;
- genre = f->genre;
+ data->genre = f->genre;
+ data->version = f->version;
break;
}
- return genre;
+ return true;
}
EXPORT_SYMBOL_GPL(nf_osf_find);
if (nfqnl_put_bridge(entry, skb) < 0)
goto nla_put_failure;
- if (entskb->tstamp) {
+ if (entry->state.hook <= NF_INET_FORWARD && entskb->tstamp) {
struct nfqnl_msg_packet_timestamp ts;
struct timespec64 kts = ktime_to_timespec64(entskb->tstamp);
};
#endif
+#ifdef CONFIG_NF_TABLES_INET
+static int nft_nat_inet_reg(struct net *net, const struct nf_hook_ops *ops)
+{
+ return nf_nat_inet_register_fn(net, ops);
+}
+
+static void nft_nat_inet_unreg(struct net *net, const struct nf_hook_ops *ops)
+{
+ nf_nat_inet_unregister_fn(net, ops);
+}
+
+static const struct nft_chain_type nft_chain_nat_inet = {
+ .name = "nat",
+ .type = NFT_CHAIN_T_NAT,
+ .family = NFPROTO_INET,
+ .hook_mask = (1 << NF_INET_PRE_ROUTING) |
+ (1 << NF_INET_LOCAL_IN) |
+ (1 << NF_INET_LOCAL_OUT) |
+ (1 << NF_INET_POST_ROUTING),
+ .hooks = {
+ [NF_INET_PRE_ROUTING] = nft_nat_do_chain,
+ [NF_INET_LOCAL_IN] = nft_nat_do_chain,
+ [NF_INET_LOCAL_OUT] = nft_nat_do_chain,
+ [NF_INET_POST_ROUTING] = nft_nat_do_chain,
+ },
+ .ops_register = nft_nat_inet_reg,
+ .ops_unregister = nft_nat_inet_unreg,
+};
+#endif
+
static int __init nft_chain_nat_init(void)
{
#ifdef CONFIG_NF_TABLES_IPV6
#ifdef CONFIG_NF_TABLES_IPV4
nft_register_chain_type(&nft_chain_nat_ipv4);
#endif
+#ifdef CONFIG_NF_TABLES_INET
+ nft_register_chain_type(&nft_chain_nat_inet);
+#endif
return 0;
}
#ifdef CONFIG_NF_TABLES_IPV6
nft_unregister_chain_type(&nft_chain_nat_ipv6);
#endif
+#ifdef CONFIG_NF_TABLES_INET
+ nft_unregister_chain_type(&nft_chain_nat_inet);
+#endif
}
module_init(nft_chain_nat_init);
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/skbuff.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter_ipv6.h>
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_ipv4.h>
+#include <net/netfilter/nf_tables_ipv6.h>
+#include <net/route.h>
+#include <net/ip.h>
+
+#ifdef CONFIG_NF_TABLES_IPV4
+static unsigned int nf_route_table_hook4(void *priv,
+ struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ const struct iphdr *iph;
+ struct nft_pktinfo pkt;
+ __be32 saddr, daddr;
+ unsigned int ret;
+ u32 mark;
+ int err;
+ u8 tos;
+
+ nft_set_pktinfo(&pkt, skb, state);
+ nft_set_pktinfo_ipv4(&pkt, skb);
+
+ mark = skb->mark;
+ iph = ip_hdr(skb);
+ saddr = iph->saddr;
+ daddr = iph->daddr;
+ tos = iph->tos;
+
+ ret = nft_do_chain(&pkt, priv);
+ if (ret == NF_ACCEPT) {
+ iph = ip_hdr(skb);
+
+ if (iph->saddr != saddr ||
+ iph->daddr != daddr ||
+ skb->mark != mark ||
+ iph->tos != tos) {
+ err = ip_route_me_harder(state->net, skb, RTN_UNSPEC);
+ if (err < 0)
+ ret = NF_DROP_ERR(err);
+ }
+ }
+ return ret;
+}
+
+static const struct nft_chain_type nft_chain_route_ipv4 = {
+ .name = "route",
+ .type = NFT_CHAIN_T_ROUTE,
+ .family = NFPROTO_IPV4,
+ .hook_mask = (1 << NF_INET_LOCAL_OUT),
+ .hooks = {
+ [NF_INET_LOCAL_OUT] = nf_route_table_hook4,
+ },
+};
+#endif
+
+#ifdef CONFIG_NF_TABLES_IPV6
+static unsigned int nf_route_table_hook6(void *priv,
+ struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ struct in6_addr saddr, daddr;
+ struct nft_pktinfo pkt;
+ u32 mark, flowlabel;
+ unsigned int ret;
+ u8 hop_limit;
+ int err;
+
+ nft_set_pktinfo(&pkt, skb, state);
+ nft_set_pktinfo_ipv6(&pkt, skb);
+
+ /* save source/dest address, mark, hoplimit, flowlabel, priority */
+ memcpy(&saddr, &ipv6_hdr(skb)->saddr, sizeof(saddr));
+ memcpy(&daddr, &ipv6_hdr(skb)->daddr, sizeof(daddr));
+ mark = skb->mark;
+ hop_limit = ipv6_hdr(skb)->hop_limit;
+
+ /* flowlabel and prio (includes version, which shouldn't change either)*/
+ flowlabel = *((u32 *)ipv6_hdr(skb));
+
+ ret = nft_do_chain(&pkt, priv);
+ if (ret == NF_ACCEPT &&
+ (memcmp(&ipv6_hdr(skb)->saddr, &saddr, sizeof(saddr)) ||
+ memcmp(&ipv6_hdr(skb)->daddr, &daddr, sizeof(daddr)) ||
+ skb->mark != mark ||
+ ipv6_hdr(skb)->hop_limit != hop_limit ||
+ flowlabel != *((u32 *)ipv6_hdr(skb)))) {
+ err = nf_ip6_route_me_harder(state->net, skb);
+ if (err < 0)
+ ret = NF_DROP_ERR(err);
+ }
+
+ return ret;
+}
+
+static const struct nft_chain_type nft_chain_route_ipv6 = {
+ .name = "route",
+ .type = NFT_CHAIN_T_ROUTE,
+ .family = NFPROTO_IPV6,
+ .hook_mask = (1 << NF_INET_LOCAL_OUT),
+ .hooks = {
+ [NF_INET_LOCAL_OUT] = nf_route_table_hook6,
+ },
+};
+#endif
+
+#ifdef CONFIG_NF_TABLES_INET
+static unsigned int nf_route_table_inet(void *priv,
+ struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ struct nft_pktinfo pkt;
+
+ switch (state->pf) {
+ case NFPROTO_IPV4:
+ return nf_route_table_hook4(priv, skb, state);
+ case NFPROTO_IPV6:
+ return nf_route_table_hook6(priv, skb, state);
+ default:
+ nft_set_pktinfo(&pkt, skb, state);
+ break;
+ }
+
+ return nft_do_chain(&pkt, priv);
+}
+
+static const struct nft_chain_type nft_chain_route_inet = {
+ .name = "route",
+ .type = NFT_CHAIN_T_ROUTE,
+ .family = NFPROTO_INET,
+ .hook_mask = (1 << NF_INET_LOCAL_OUT),
+ .hooks = {
+ [NF_INET_LOCAL_OUT] = nf_route_table_inet,
+ },
+};
+#endif
+
+void __init nft_chain_route_init(void)
+{
+#ifdef CONFIG_NF_TABLES_IPV6
+ nft_register_chain_type(&nft_chain_route_ipv6);
+#endif
+#ifdef CONFIG_NF_TABLES_IPV4
+ nft_register_chain_type(&nft_chain_route_ipv4);
+#endif
+#ifdef CONFIG_NF_TABLES_INET
+ nft_register_chain_type(&nft_chain_route_inet);
+#endif
+}
+
+void __exit nft_chain_route_fini(void)
+{
+#ifdef CONFIG_NF_TABLES_IPV6
+ nft_unregister_chain_type(&nft_chain_route_ipv6);
+#endif
+#ifdef CONFIG_NF_TABLES_IPV4
+ nft_unregister_chain_type(&nft_chain_route_ipv4);
+#endif
+#ifdef CONFIG_NF_TABLES_INET
+ nft_unregister_chain_type(&nft_chain_route_inet);
+#endif
+}
#include <linux/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables.h>
#include <net/netfilter/nf_nat.h>
-#include <net/netfilter/ipv4/nf_nat_masquerade.h>
-#include <net/netfilter/ipv6/nf_nat_masquerade.h>
+#include <net/netfilter/nf_nat_masquerade.h>
struct nft_masq {
u32 flags;
static int __init nft_masq_module_init_ipv6(void)
{
- int ret = nft_register_expr(&nft_masq_ipv6_type);
-
- if (ret)
- return ret;
-
- ret = nf_nat_masquerade_ipv6_register_notifier();
- if (ret < 0)
- nft_unregister_expr(&nft_masq_ipv6_type);
-
- return ret;
+ return nft_register_expr(&nft_masq_ipv6_type);
}
static void nft_masq_module_exit_ipv6(void)
{
nft_unregister_expr(&nft_masq_ipv6_type);
- nf_nat_masquerade_ipv6_unregister_notifier();
}
#else
static inline int nft_masq_module_init_ipv6(void) { return 0; }
static inline void nft_masq_module_exit_ipv6(void) {}
#endif
+#ifdef CONFIG_NF_TABLES_INET
+static void nft_masq_inet_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ switch (nft_pf(pkt)) {
+ case NFPROTO_IPV4:
+ return nft_masq_ipv4_eval(expr, regs, pkt);
+ case NFPROTO_IPV6:
+ return nft_masq_ipv6_eval(expr, regs, pkt);
+ }
+
+ WARN_ON_ONCE(1);
+}
+
+static void
+nft_masq_inet_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
+{
+ nf_ct_netns_put(ctx->net, NFPROTO_INET);
+}
+
+static struct nft_expr_type nft_masq_inet_type;
+static const struct nft_expr_ops nft_masq_inet_ops = {
+ .type = &nft_masq_inet_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_masq)),
+ .eval = nft_masq_inet_eval,
+ .init = nft_masq_init,
+ .destroy = nft_masq_inet_destroy,
+ .dump = nft_masq_dump,
+ .validate = nft_masq_validate,
+};
+
+static struct nft_expr_type nft_masq_inet_type __read_mostly = {
+ .family = NFPROTO_INET,
+ .name = "masq",
+ .ops = &nft_masq_inet_ops,
+ .policy = nft_masq_policy,
+ .maxattr = NFTA_MASQ_MAX,
+ .owner = THIS_MODULE,
+};
+
+static int __init nft_masq_module_init_inet(void)
+{
+ return nft_register_expr(&nft_masq_inet_type);
+}
+
+static void nft_masq_module_exit_inet(void)
+{
+ nft_unregister_expr(&nft_masq_inet_type);
+}
+#else
+static inline int nft_masq_module_init_inet(void) { return 0; }
+static inline void nft_masq_module_exit_inet(void) {}
+#endif
+
static int __init nft_masq_module_init(void)
{
int ret;
if (ret < 0)
return ret;
+ ret = nft_masq_module_init_inet();
+ if (ret < 0) {
+ nft_masq_module_exit_ipv6();
+ return ret;
+ }
+
ret = nft_register_expr(&nft_masq_ipv4_type);
if (ret < 0) {
+ nft_masq_module_exit_inet();
nft_masq_module_exit_ipv6();
return ret;
}
- ret = nf_nat_masquerade_ipv4_register_notifier();
+ ret = nf_nat_masquerade_inet_register_notifiers();
if (ret < 0) {
nft_masq_module_exit_ipv6();
+ nft_masq_module_exit_inet();
nft_unregister_expr(&nft_masq_ipv4_type);
return ret;
}
static void __exit nft_masq_module_exit(void)
{
nft_masq_module_exit_ipv6();
+ nft_masq_module_exit_inet();
nft_unregister_expr(&nft_masq_ipv4_type);
- nf_nat_masquerade_ipv4_unregister_notifier();
+ nf_nat_masquerade_inet_unregister_notifiers();
}
module_init(nft_masq_module_init);
return -EINVAL;
family = ntohl(nla_get_be32(tb[NFTA_NAT_FAMILY]));
- if (family != ctx->family)
+ if (ctx->family != NFPROTO_INET && ctx->family != family)
return -EOPNOTSUPP;
switch (family) {
.owner = THIS_MODULE,
};
+#ifdef CONFIG_NF_TABLES_INET
+static void nft_nat_inet_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ const struct nft_nat *priv = nft_expr_priv(expr);
+
+ if (priv->family == nft_pf(pkt))
+ nft_nat_eval(expr, regs, pkt);
+}
+
+static const struct nft_expr_ops nft_nat_inet_ops = {
+ .type = &nft_nat_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_nat)),
+ .eval = nft_nat_inet_eval,
+ .init = nft_nat_init,
+ .destroy = nft_nat_destroy,
+ .dump = nft_nat_dump,
+ .validate = nft_nat_validate,
+};
+
+static struct nft_expr_type nft_inet_nat_type __read_mostly = {
+ .name = "nat",
+ .family = NFPROTO_INET,
+ .ops = &nft_nat_inet_ops,
+ .policy = nft_nat_policy,
+ .maxattr = NFTA_NAT_MAX,
+ .owner = THIS_MODULE,
+};
+
+static int nft_nat_inet_module_init(void)
+{
+ return nft_register_expr(&nft_inet_nat_type);
+}
+
+static void nft_nat_inet_module_exit(void)
+{
+ nft_unregister_expr(&nft_inet_nat_type);
+}
+#else
+static int nft_nat_inet_module_init(void) { return 0; }
+static void nft_nat_inet_module_exit(void) { }
+#endif
+
static int __init nft_nat_module_init(void)
{
- return nft_register_expr(&nft_nat_type);
+ int ret = nft_nat_inet_module_init();
+
+ if (ret)
+ return ret;
+
+ ret = nft_register_expr(&nft_nat_type);
+ if (ret)
+ nft_nat_inet_module_exit();
+
+ return ret;
}
static void __exit nft_nat_module_exit(void)
{
+ nft_nat_inet_module_exit();
nft_unregister_expr(&nft_nat_type);
}
struct nft_osf {
enum nft_registers dreg:8;
u8 ttl;
+ u32 flags;
};
static const struct nla_policy nft_osf_policy[NFTA_OSF_MAX + 1] = {
[NFTA_OSF_DREG] = { .type = NLA_U32 },
[NFTA_OSF_TTL] = { .type = NLA_U8 },
+ [NFTA_OSF_FLAGS] = { .type = NLA_U32 },
};
static void nft_osf_eval(const struct nft_expr *expr, struct nft_regs *regs,
struct nft_osf *priv = nft_expr_priv(expr);
u32 *dest = ®s->data[priv->dreg];
struct sk_buff *skb = pkt->skb;
+ char os_match[NFT_OSF_MAXGENRELEN + 1];
const struct tcphdr *tcp;
+ struct nf_osf_data data;
struct tcphdr _tcph;
- const char *os_name;
tcp = skb_header_pointer(skb, ip_hdrlen(skb),
sizeof(struct tcphdr), &_tcph);
return;
}
- os_name = nf_osf_find(skb, nf_osf_fingers, priv->ttl);
- if (!os_name)
+ if (!nf_osf_find(skb, nf_osf_fingers, priv->ttl, &data)) {
strncpy((char *)dest, "unknown", NFT_OSF_MAXGENRELEN);
- else
- strncpy((char *)dest, os_name, NFT_OSF_MAXGENRELEN);
+ } else {
+ if (priv->flags & NFT_OSF_F_VERSION)
+ snprintf(os_match, NFT_OSF_MAXGENRELEN, "%s:%s",
+ data.genre, data.version);
+ else
+ strlcpy(os_match, data.genre, NFT_OSF_MAXGENRELEN);
+
+ strncpy((char *)dest, os_match, NFT_OSF_MAXGENRELEN);
+ }
}
static int nft_osf_init(const struct nft_ctx *ctx,
const struct nlattr * const tb[])
{
struct nft_osf *priv = nft_expr_priv(expr);
+ u32 flags;
int err;
u8 ttl;
priv->ttl = ttl;
}
+ if (tb[NFTA_OSF_FLAGS]) {
+ flags = ntohl(nla_get_be32(tb[NFTA_OSF_FLAGS]));
+ if (flags != NFT_OSF_F_VERSION)
+ return -EINVAL;
+ priv->flags = flags;
+ }
+
priv->dreg = nft_parse_register(tb[NFTA_OSF_DREG]);
err = nft_validate_register_store(ctx, priv->dreg, NULL,
NFT_DATA_VALUE, NFT_OSF_MAXGENRELEN);
if (nla_put_u8(skb, NFTA_OSF_TTL, priv->ttl))
goto nla_put_failure;
+ if (nla_put_be32(skb, NFTA_OSF_FLAGS, ntohl(priv->flags)))
+ goto nla_put_failure;
+
if (nft_dump_register(skb, NFTA_OSF_DREG, priv->dreg))
goto nla_put_failure;
return nf_ct_netns_get(ctx->net, ctx->family);
}
-int nft_redir_dump(struct sk_buff *skb, const struct nft_expr *expr)
+static int nft_redir_dump(struct sk_buff *skb, const struct nft_expr *expr)
{
const struct nft_redir *priv = nft_expr_priv(expr);
};
#endif
+#ifdef CONFIG_NF_TABLES_INET
+static void nft_redir_inet_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ switch (nft_pf(pkt)) {
+ case NFPROTO_IPV4:
+ return nft_redir_ipv4_eval(expr, regs, pkt);
+ case NFPROTO_IPV6:
+ return nft_redir_ipv6_eval(expr, regs, pkt);
+ }
+
+ WARN_ON_ONCE(1);
+}
+
+static void
+nft_redir_inet_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
+{
+ nf_ct_netns_put(ctx->net, NFPROTO_INET);
+}
+
+static struct nft_expr_type nft_redir_inet_type;
+static const struct nft_expr_ops nft_redir_inet_ops = {
+ .type = &nft_redir_inet_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_redir)),
+ .eval = nft_redir_inet_eval,
+ .init = nft_redir_init,
+ .destroy = nft_redir_inet_destroy,
+ .dump = nft_redir_dump,
+ .validate = nft_redir_validate,
+};
+
+static struct nft_expr_type nft_redir_inet_type __read_mostly = {
+ .family = NFPROTO_INET,
+ .name = "redir",
+ .ops = &nft_redir_inet_ops,
+ .policy = nft_redir_policy,
+ .maxattr = NFTA_MASQ_MAX,
+ .owner = THIS_MODULE,
+};
+
+static int __init nft_redir_module_init_inet(void)
+{
+ return nft_register_expr(&nft_redir_inet_type);
+}
+#else
+static inline int nft_redir_module_init_inet(void) { return 0; }
+#endif
+
static int __init nft_redir_module_init(void)
{
int ret = nft_register_expr(&nft_redir_ipv4_type);
}
#endif
+ ret = nft_redir_module_init_inet();
+ if (ret < 0) {
+ nft_unregister_expr(&nft_redir_ipv4_type);
+#ifdef CONFIG_NF_TABLES_IPV6
+ nft_unregister_expr(&nft_redir_ipv6_type);
+#endif
+ return ret;
+ }
+
return ret;
}
#ifdef CONFIG_NF_TABLES_IPV6
nft_unregister_expr(&nft_redir_ipv6_type);
#endif
+#ifdef CONFIG_NF_TABLES_INET
+ nft_unregister_expr(&nft_redir_inet_type);
+#endif
}
module_init(nft_redir_module_init);
EXPORT_SYMBOL_GPL(xt_request_find_match);
/* Find target, grabs ref. Returns ERR_PTR() on error. */
-struct xt_target *xt_find_target(u8 af, const char *name, u8 revision)
+static struct xt_target *xt_find_target(u8 af, const char *name, u8 revision)
{
struct xt_target *t;
int err = -ENOENT;
return ERR_PTR(err);
}
-EXPORT_SYMBOL(xt_find_target);
struct xt_target *xt_request_find_target(u8 af, const char *name, u8 revision)
{
* published by the Free Software Foundation.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-#include <linux/types.h>
-#include <linux/inetdevice.h>
-#include <linux/ip.h>
-#include <linux/timer.h>
#include <linux/module.h>
-#include <linux/netfilter.h>
-#include <net/protocol.h>
-#include <net/ip.h>
-#include <net/checksum.h>
-#include <net/route.h>
-#include <linux/netfilter_ipv4.h>
#include <linux/netfilter/x_tables.h>
#include <net/netfilter/nf_nat.h>
-#include <net/netfilter/ipv4/nf_nat_masquerade.h>
+#include <net/netfilter/nf_nat_masquerade.h>
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
nf_ct_netns_put(par->net, par->family);
}
-static struct xt_target masquerade_tg_reg __read_mostly = {
- .name = "MASQUERADE",
- .family = NFPROTO_IPV4,
- .target = masquerade_tg,
- .targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat),
- .table = "nat",
- .hooks = 1 << NF_INET_POST_ROUTING,
- .checkentry = masquerade_tg_check,
- .destroy = masquerade_tg_destroy,
- .me = THIS_MODULE,
+#if IS_ENABLED(CONFIG_IPV6)
+static unsigned int
+masquerade_tg6(struct sk_buff *skb, const struct xt_action_param *par)
+{
+ return nf_nat_masquerade_ipv6(skb, par->targinfo, xt_out(par));
+}
+
+static int masquerade_tg6_checkentry(const struct xt_tgchk_param *par)
+{
+ const struct nf_nat_range2 *range = par->targinfo;
+
+ if (range->flags & NF_NAT_RANGE_MAP_IPS)
+ return -EINVAL;
+
+ return nf_ct_netns_get(par->net, par->family);
+}
+#endif
+
+static struct xt_target masquerade_tg_reg[] __read_mostly = {
+ {
+#if IS_ENABLED(CONFIG_IPV6)
+ .name = "MASQUERADE",
+ .family = NFPROTO_IPV6,
+ .target = masquerade_tg6,
+ .targetsize = sizeof(struct nf_nat_range),
+ .table = "nat",
+ .hooks = 1 << NF_INET_POST_ROUTING,
+ .checkentry = masquerade_tg6_checkentry,
+ .destroy = masquerade_tg_destroy,
+ .me = THIS_MODULE,
+ }, {
+#endif
+ .name = "MASQUERADE",
+ .family = NFPROTO_IPV4,
+ .target = masquerade_tg,
+ .targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat),
+ .table = "nat",
+ .hooks = 1 << NF_INET_POST_ROUTING,
+ .checkentry = masquerade_tg_check,
+ .destroy = masquerade_tg_destroy,
+ .me = THIS_MODULE,
+ }
};
static int __init masquerade_tg_init(void)
{
int ret;
- ret = xt_register_target(&masquerade_tg_reg);
+ ret = xt_register_targets(masquerade_tg_reg,
+ ARRAY_SIZE(masquerade_tg_reg));
if (ret)
return ret;
- ret = nf_nat_masquerade_ipv4_register_notifier();
- if (ret)
- xt_unregister_target(&masquerade_tg_reg);
+ ret = nf_nat_masquerade_inet_register_notifiers();
+ if (ret) {
+ xt_unregister_targets(masquerade_tg_reg,
+ ARRAY_SIZE(masquerade_tg_reg));
+ return ret;
+ }
return ret;
}
static void __exit masquerade_tg_exit(void)
{
- xt_unregister_target(&masquerade_tg_reg);
- nf_nat_masquerade_ipv4_unregister_notifier();
+ xt_unregister_targets(masquerade_tg_reg, ARRAY_SIZE(masquerade_tg_reg));
+ nf_nat_masquerade_inet_unregister_notifiers();
}
module_init(masquerade_tg_init);
module_exit(masquerade_tg_exit);
+#if IS_ENABLED(CONFIG_IPV6)
+MODULE_ALIAS("ip6t_MASQUERADE");
+#endif
+MODULE_ALIAS("ipt_MASQUERADE");
s64 stamp;
/*
- * We cannot use get_seconds() instead of __net_timestamp() here.
+ * We need real time here, but we can neither use skb->tstamp
+ * nor __net_timestamp().
+ *
+ * skb->tstamp and skb->skb_mstamp_ns overlap, however, they
+ * use different clock types (real vs monotonic).
+ *
* Suppose you have two rules:
- * 1. match before 13:00
- * 2. match after 13:00
+ * 1. match before 13:00
+ * 2. match after 13:00
+ *
* If you match against processing time (get_seconds) it
* may happen that the same packet matches both rules if
- * it arrived at the right moment before 13:00.
+ * it arrived at the right moment before 13:00, so it would be
+ * better to check skb->tstamp and set it via __net_timestamp()
+ * if needed. This however breaks outgoing packets tx timestamp,
+ * and causes them to get delayed forever by fq packet scheduler.
*/
- if (skb->tstamp == 0)
- __net_timestamp((struct sk_buff *)skb);
-
- stamp = ktime_to_ns(skb->tstamp);
- stamp = div_s64(stamp, NSEC_PER_SEC);
+ stamp = get_seconds();
if (info->flags & XT_TIME_LOCAL_TZ)
/* Adjust for local timezone */
struct netlink_sock *nlk = nlk_sk(sk);
struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr;
int err = 0;
- unsigned long groups = nladdr->nl_groups;
+ unsigned long groups;
bool bound;
if (addr_len < sizeof(struct sockaddr_nl))
if (nladdr->nl_family != AF_NETLINK)
return -EINVAL;
+ groups = nladdr->nl_groups;
/* Only superuser is allowed to listen multicasts */
if (groups) {
{
struct sock *sk = sock->sk;
void __user *argp = (void __user *)arg;
- int ret;
switch (cmd) {
case TIOCOUTQ: {
return put_user(amount, (int __user *)argp);
}
- case SIOCGSTAMP:
- lock_sock(sk);
- ret = sock_get_timestamp(sk, argp);
- release_sock(sk);
- return ret;
-
- case SIOCGSTAMPNS:
- lock_sock(sk);
- ret = sock_get_timestampns(sk, argp);
- release_sock(sk);
- return ret;
-
case SIOCGIFADDR:
case SIOCSIFADDR:
case SIOCGIFDSTADDR:
.getname = nr_getname,
.poll = datagram_poll,
.ioctl = nr_ioctl,
+ .gettstamp = sock_gettstamp,
.listen = nr_listen,
.shutdown = sock_no_shutdown,
.setsockopt = nr_setsockopt,
int i;
int rc = proto_register(&nr_proto, 0);
- if (rc != 0)
- goto out;
+ if (rc)
+ return rc;
if (nr_ndevs > 0x7fffffff/sizeof(struct net_device *)) {
- printk(KERN_ERR "NET/ROM: nr_proto_init - nr_ndevs parameter to large\n");
- return -1;
+ pr_err("NET/ROM: %s - nr_ndevs parameter too large\n",
+ __func__);
+ rc = -EINVAL;
+ goto unregister_proto;
}
dev_nr = kcalloc(nr_ndevs, sizeof(struct net_device *), GFP_KERNEL);
- if (dev_nr == NULL) {
- printk(KERN_ERR "NET/ROM: nr_proto_init - unable to allocate device array\n");
- return -1;
+ if (!dev_nr) {
+ pr_err("NET/ROM: %s - unable to allocate device array\n",
+ __func__);
+ rc = -ENOMEM;
+ goto unregister_proto;
}
for (i = 0; i < nr_ndevs; i++) {
sprintf(name, "nr%d", i);
dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, nr_setup);
if (!dev) {
- printk(KERN_ERR "NET/ROM: nr_proto_init - unable to allocate device structure\n");
+ rc = -ENOMEM;
goto fail;
}
dev->base_addr = i;
- if (register_netdev(dev)) {
- printk(KERN_ERR "NET/ROM: nr_proto_init - unable to register network device\n");
+ rc = register_netdev(dev);
+ if (rc) {
free_netdev(dev);
goto fail;
}
dev_nr[i] = dev;
}
- if (sock_register(&nr_family_ops)) {
- printk(KERN_ERR "NET/ROM: nr_proto_init - unable to register socket family\n");
+ rc = sock_register(&nr_family_ops);
+ if (rc)
goto fail;
- }
- register_netdevice_notifier(&nr_dev_notifier);
+ rc = register_netdevice_notifier(&nr_dev_notifier);
+ if (rc)
+ goto out_sock;
ax25_register_pid(&nr_pid);
ax25_linkfail_register(&nr_linkfail_notifier);
#ifdef CONFIG_SYSCTL
- nr_register_sysctl();
+ rc = nr_register_sysctl();
+ if (rc)
+ goto out_sysctl;
#endif
nr_loopback_init();
- proc_create_seq("nr", 0444, init_net.proc_net, &nr_info_seqops);
- proc_create_seq("nr_neigh", 0444, init_net.proc_net, &nr_neigh_seqops);
- proc_create_seq("nr_nodes", 0444, init_net.proc_net, &nr_node_seqops);
-out:
- return rc;
+ rc = -ENOMEM;
+ if (!proc_create_seq("nr", 0444, init_net.proc_net, &nr_info_seqops))
+ goto proc_remove1;
+ if (!proc_create_seq("nr_neigh", 0444, init_net.proc_net,
+ &nr_neigh_seqops))
+ goto proc_remove2;
+ if (!proc_create_seq("nr_nodes", 0444, init_net.proc_net,
+ &nr_node_seqops))
+ goto proc_remove3;
+
+ return 0;
+
+proc_remove3:
+ remove_proc_entry("nr_neigh", init_net.proc_net);
+proc_remove2:
+ remove_proc_entry("nr", init_net.proc_net);
+proc_remove1:
+
+ nr_loopback_clear();
+ nr_rt_free();
+
+#ifdef CONFIG_SYSCTL
+ nr_unregister_sysctl();
+out_sysctl:
+#endif
+ ax25_linkfail_release(&nr_linkfail_notifier);
+ ax25_protocol_release(AX25_P_NETROM);
+ unregister_netdevice_notifier(&nr_dev_notifier);
+out_sock:
+ sock_unregister(PF_NETROM);
fail:
while (--i >= 0) {
unregister_netdev(dev_nr[i]);
free_netdev(dev_nr[i]);
}
kfree(dev_nr);
+unregister_proto:
proto_unregister(&nr_proto);
- rc = -1;
- goto out;
+ return rc;
}
module_init(nr_proto_init);
}
}
-void __exit nr_loopback_clear(void)
+void nr_loopback_clear(void)
{
del_timer_sync(&loopback_timer);
skb_queue_purge(&loopback_queue);
/*
* Free all memory associated with the nodes and routes lists.
*/
-void __exit nr_rt_free(void)
+void nr_rt_free(void)
{
struct nr_neigh *s = NULL;
struct nr_node *t = NULL;
{ }
};
-void __init nr_register_sysctl(void)
+int __init nr_register_sysctl(void)
{
nr_table_header = register_net_sysctl(&init_net, "net/netrom", nr_table);
+ if (!nr_table_header)
+ return -ENOMEM;
+ return 0;
}
void nr_unregister_sysctl(void)
create_info = (struct nci_hci_create_pipe_resp *)skb->data;
dest_gate = create_info->dest_gate;
new_pipe = create_info->pipe;
+ if (new_pipe >= NCI_HCI_MAX_PIPES) {
+ status = NCI_HCI_ANY_E_NOK;
+ goto exit;
+ }
/* Save the new created pipe and bind with local gate,
* the description for skb->data[3] is destination gate id
goto exit;
}
delete_info = (struct nci_hci_delete_pipe_noti *)skb->data;
+ if (delete_info->pipe >= NCI_HCI_MAX_PIPES) {
+ status = NCI_HCI_ANY_E_NOK;
+ goto exit;
+ }
ndev->hci_dev->pipes[delete_info->pipe].gate =
NCI_HCI_INVALID_GATE;
#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
#include <net/ipv6_frag.h>
-#ifdef CONFIG_NF_NAT_NEEDED
+#if IS_ENABLED(CONFIG_NF_NAT)
#include <net/netfilter/nf_nat.h>
#endif
struct md_mark mark;
struct md_labels labels;
char timeout[CTNL_TIMEOUT_NAME_MAX];
-#ifdef CONFIG_NF_NAT_NEEDED
+#if IS_ENABLED(CONFIG_NF_NAT)
struct nf_nat_range2 range; /* Only present for SRC NAT and DST NAT. */
#endif
};
return ct_executed;
}
-#ifdef CONFIG_NF_NAT_NEEDED
+#if IS_ENABLED(CONFIG_NF_NAT)
/* Modelled after nf_nat_ipv[46]_fn().
* range is only used for new, uninitialized NAT state.
* Returns either NF_ACCEPT or NF_DROP.
return err;
}
-#else /* !CONFIG_NF_NAT_NEEDED */
+#else /* !CONFIG_NF_NAT */
static int ovs_ct_nat(struct net *net, struct sw_flow_key *key,
const struct ovs_conntrack_info *info,
struct sk_buff *skb, struct nf_conn *ct,
return 0;
}
-#ifdef CONFIG_NF_NAT_NEEDED
+#if IS_ENABLED(CONFIG_NF_NAT)
static int parse_nat(const struct nlattr *attr,
struct ovs_conntrack_info *info, bool log)
{
.maxlen = sizeof(struct md_labels) },
[OVS_CT_ATTR_HELPER] = { .minlen = 1,
.maxlen = NF_CT_HELPER_NAME_LEN },
-#ifdef CONFIG_NF_NAT_NEEDED
+#if IS_ENABLED(CONFIG_NF_NAT)
/* NAT length is checked when parsing the nested attributes. */
[OVS_CT_ATTR_NAT] = { .minlen = 0, .maxlen = INT_MAX },
#endif
return -EINVAL;
}
break;
-#ifdef CONFIG_NF_NAT_NEEDED
+#if IS_ENABLED(CONFIG_NF_NAT)
case OVS_CT_ATTR_NAT: {
int err = parse_nat(a, info, log);
return err;
}
-#ifdef CONFIG_NF_NAT_NEEDED
+#if IS_ENABLED(CONFIG_NF_NAT)
static bool ovs_ct_nat_to_attr(const struct ovs_conntrack_info *info,
struct sk_buff *skb)
{
return -EMSGSIZE;
}
-#ifdef CONFIG_NF_NAT_NEEDED
+#if IS_ENABLED(CONFIG_NF_NAT)
if (ct_info->nat && !ovs_ct_nat_to_attr(ct_info, skb))
return -EMSGSIZE;
#endif
spin_unlock_bh(&sk->sk_receive_queue.lock);
return put_user(amount, (int __user *)arg);
}
- case SIOCGSTAMP:
- return sock_get_timestamp(sk, (struct timeval __user *)arg);
- case SIOCGSTAMPNS:
- return sock_get_timestampns(sk, (struct timespec __user *)arg);
-
#ifdef CONFIG_INET
case SIOCADDRT:
case SIOCDELRT:
.getname = packet_getname_spkt,
.poll = datagram_poll,
.ioctl = packet_ioctl,
+ .gettstamp = sock_gettstamp,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
.setsockopt = sock_no_setsockopt,
.getname = packet_getname,
.poll = packet_poll,
.ioctl = packet_ioctl,
+ .gettstamp = sock_gettstamp,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
.setsockopt = packet_setsockopt,
break;
}
break;
- case SIOCGSTAMP:
- rc = sock_get_timestamp(sk, argp);
- break;
case SIOCADDRT:
case SIOCDELRT:
case SIOCSIFADDR:
.recvmsg = qrtr_recvmsg,
.getname = qrtr_getname,
.ioctl = qrtr_ioctl,
+ .gettstamp = sock_gettstamp,
.poll = datagram_poll,
.shutdown = sock_no_shutdown,
.setsockopt = sock_no_setsockopt,
struct rds_sock *rs = rds_sk_to_rs(sk);
int ret = 0;
+ if (addr_len < offsetofend(struct sockaddr, sa_family))
+ return -EINVAL;
+
lock_sock(sk);
switch (uaddr->sa_family) {
/* We allow an RDS socket to be bound to either IPv4 or IPv6
* address.
*/
+ if (addr_len < offsetofend(struct sockaddr, sa_family))
+ return -EINVAL;
if (uaddr->sa_family == AF_INET) {
struct sockaddr_in *sin = (struct sockaddr_in *)uaddr;
else
pool = rds_ibdev->mr_1m_pool;
+ if (atomic_read(&pool->dirty_count) >= pool->max_items / 10)
+ queue_delayed_work(rds_ib_mr_wq, &pool->flush_worker, 10);
+
+ /* Switch pools if one of the pool is reaching upper limit */
+ if (atomic_read(&pool->dirty_count) >= pool->max_items * 9 / 10) {
+ if (pool->pool_type == RDS_IB_MR_8K_POOL)
+ pool = rds_ibdev->mr_1m_pool;
+ else
+ pool = rds_ibdev->mr_8k_pool;
+ }
+
ibmr = rds_ib_try_reuse_ibmr(pool);
if (ibmr)
return ibmr;
struct rds_ib_mr *ibmr = NULL;
int iter = 0;
- if (atomic_read(&pool->dirty_count) >= pool->max_items_soft / 10)
- queue_delayed_work(rds_ib_mr_wq, &pool->flush_worker, 10);
-
while (1) {
ibmr = rds_ib_reuse_mr(pool);
if (ibmr)
return put_user(amount, (unsigned int __user *) argp);
}
- case SIOCGSTAMP:
- return sock_get_timestamp(sk, (struct timeval __user *) argp);
-
- case SIOCGSTAMPNS:
- return sock_get_timestampns(sk, (struct timespec __user *) argp);
-
case SIOCGIFADDR:
case SIOCSIFADDR:
case SIOCGIFDSTADDR:
.getname = rose_getname,
.poll = datagram_poll,
.ioctl = rose_ioctl,
+ .gettstamp = sock_gettstamp,
.listen = rose_listen,
.shutdown = sock_no_shutdown,
.setsockopt = rose_setsockopt,
#include <linux/init.h>
static struct sk_buff_head loopback_queue;
+#define ROSE_LOOPBACK_LIMIT 1000
static struct timer_list loopback_timer;
static void rose_set_loopback_timer(void);
int rose_loopback_queue(struct sk_buff *skb, struct rose_neigh *neigh)
{
- struct sk_buff *skbn;
+ struct sk_buff *skbn = NULL;
- skbn = skb_clone(skb, GFP_ATOMIC);
+ if (skb_queue_len(&loopback_queue) < ROSE_LOOPBACK_LIMIT)
+ skbn = skb_clone(skb, GFP_ATOMIC);
- kfree_skb(skb);
-
- if (skbn != NULL) {
+ if (skbn) {
+ consume_skb(skb);
skb_queue_tail(&loopback_queue, skbn);
if (!rose_loopback_running())
rose_set_loopback_timer();
+ } else {
+ kfree_skb(skb);
}
return 1;
}
-
static void rose_set_loopback_timer(void)
{
- del_timer(&loopback_timer);
-
- loopback_timer.expires = jiffies + 10;
- add_timer(&loopback_timer);
+ mod_timer(&loopback_timer, jiffies + 10);
}
static void rose_loopback_timer(struct timer_list *unused)
struct sock *sk;
unsigned short frametype;
unsigned int lci_i, lci_o;
+ int count;
- while ((skb = skb_dequeue(&loopback_queue)) != NULL) {
+ for (count = 0; count < ROSE_LOOPBACK_LIMIT; count++) {
+ skb = skb_dequeue(&loopback_queue);
+ if (!skb)
+ return;
if (skb->len < ROSE_MIN_LEN) {
kfree_skb(skb);
continue;
kfree_skb(skb);
}
}
+ if (!skb_queue_empty(&loopback_queue))
+ mod_timer(&loopback_timer, jiffies + 1);
}
void __exit rose_loopback_clear(void)
struct sockaddr_rxrpc *srx = (struct sockaddr_rxrpc *)saddr;
struct rxrpc_local *local;
struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
- u16 service_id = srx->srx_service;
+ u16 service_id;
int ret;
_enter("%p,%p,%d", rx, saddr, len);
ret = rxrpc_validate_address(rx, srx, len);
if (ret < 0)
goto error;
+ service_id = srx->srx_service;
lock_sock(&rx->sk);
* rxrpc_kernel_check_life - Check to see whether a call is still alive
* @sock: The socket the call is on
* @call: The call to check
+ * @_life: Where to store the life value
*
* Allow a kernel service to find out whether a call is still alive - ie. we're
- * getting ACKs from the server. Returns a number representing the life state
- * which can be compared to that returned by a previous call.
+ * getting ACKs from the server. Passes back in *_life a number representing
+ * the life state which can be compared to that returned by a previous call and
+ * return true if the call is still alive.
*
* If the life state stalls, rxrpc_kernel_probe_life() should be called and
* then 2RTT waited.
*/
-u32 rxrpc_kernel_check_life(const struct socket *sock,
- const struct rxrpc_call *call)
+bool rxrpc_kernel_check_life(const struct socket *sock,
+ const struct rxrpc_call *call,
+ u32 *_life)
{
- return call->acks_latest;
+ *_life = call->acks_latest;
+ return call->state != RXRPC_CALL_COMPLETE;
}
EXPORT_SYMBOL(rxrpc_kernel_check_life);
u8 ackr_reason; /* reason to ACK */
u16 ackr_skew; /* skew on packet being ACK'd */
rxrpc_serial_t ackr_serial; /* serial of packet being ACK'd */
+ rxrpc_serial_t ackr_first_seq; /* first sequence number received */
rxrpc_seq_t ackr_prev_seq; /* previous sequence number received */
rxrpc_seq_t ackr_consumed; /* Highest packet shown consumed */
rxrpc_seq_t ackr_seen; /* Highest packet shown seen */
* pass a connection-level abort onto all calls on that connection
*/
static void rxrpc_abort_calls(struct rxrpc_connection *conn,
- enum rxrpc_call_completion compl)
+ enum rxrpc_call_completion compl,
+ rxrpc_serial_t serial)
{
struct rxrpc_call *call;
int i;
call->call_id, 0,
conn->abort_code,
conn->error);
+ else
+ trace_rxrpc_rx_abort(call, serial,
+ conn->abort_code);
if (rxrpc_set_call_completion(call, compl,
conn->abort_code,
conn->error))
conn->state = RXRPC_CONN_LOCALLY_ABORTED;
spin_unlock_bh(&conn->state_lock);
- rxrpc_abort_calls(conn, RXRPC_CALL_LOCALLY_ABORTED);
-
msg.msg_name = &conn->params.peer->srx.transport;
msg.msg_namelen = conn->params.peer->srx.transport_len;
msg.msg_control = NULL;
len = iov[0].iov_len + iov[1].iov_len;
serial = atomic_inc_return(&conn->serial);
+ rxrpc_abort_calls(conn, RXRPC_CALL_LOCALLY_ABORTED, serial);
whdr.serial = htonl(serial);
_proto("Tx CONN ABORT %%%u { %d }", serial, conn->abort_code);
conn->error = -ECONNABORTED;
conn->abort_code = abort_code;
conn->state = RXRPC_CONN_REMOTELY_ABORTED;
- rxrpc_abort_calls(conn, RXRPC_CALL_REMOTELY_ABORTED);
+ rxrpc_abort_calls(conn, RXRPC_CALL_REMOTELY_ABORTED, sp->hdr.serial);
return -ECONNABORTED;
case RXRPC_PACKET_TYPE_CHALLENGE:
u8 acks[RXRPC_MAXACKS];
} buf;
rxrpc_serial_t acked_serial;
- rxrpc_seq_t first_soft_ack, hard_ack;
+ rxrpc_seq_t first_soft_ack, hard_ack, prev_pkt;
int nr_acks, offset, ioffset;
_enter("");
acked_serial = ntohl(buf.ack.serial);
first_soft_ack = ntohl(buf.ack.firstPacket);
+ prev_pkt = ntohl(buf.ack.previousPacket);
hard_ack = first_soft_ack - 1;
nr_acks = buf.ack.nAcks;
summary.ack_reason = (buf.ack.reason < RXRPC_ACK__INVALID ?
buf.ack.reason : RXRPC_ACK__INVALID);
trace_rxrpc_rx_ack(call, sp->hdr.serial, acked_serial,
- first_soft_ack, ntohl(buf.ack.previousPacket),
+ first_soft_ack, prev_pkt,
summary.ack_reason, nr_acks);
if (buf.ack.reason == RXRPC_ACK_PING_RESPONSE)
rxrpc_propose_ack_respond_to_ack);
}
- /* Discard any out-of-order or duplicate ACKs. */
- if (before_eq(sp->hdr.serial, call->acks_latest))
+ /* Discard any out-of-order or duplicate ACKs (outside lock). */
+ if (before(first_soft_ack, call->ackr_first_seq) ||
+ before(prev_pkt, call->ackr_prev_seq))
return;
buf.info.rxMTU = 0;
spin_lock(&call->input_lock);
- /* Discard any out-of-order or duplicate ACKs. */
- if (before_eq(sp->hdr.serial, call->acks_latest))
+ /* Discard any out-of-order or duplicate ACKs (inside lock). */
+ if (before(first_soft_ack, call->ackr_first_seq) ||
+ before(prev_pkt, call->ackr_prev_seq))
goto out;
call->acks_latest_ts = skb->tstamp;
call->acks_latest = sp->hdr.serial;
+ call->ackr_first_seq = first_soft_ack;
+ call->ackr_prev_seq = prev_pkt;
+
/* Parse rwind and mtu sizes if provided. */
if (buf.info.rxMTU)
rxrpc_input_ackinfo(call, skb, &buf.info);
* handle data received on the local endpoint
* - may be called in interrupt context
*
- * The socket is locked by the caller and this prevents the socket from being
- * shut down and the local endpoint from going away, thus sk_user_data will not
- * be cleared until this function returns.
+ * [!] Note that as this is called from the encap_rcv hook, the socket is not
+ * held locked by the caller and nothing prevents sk_user_data on the UDP from
+ * being cleared in the middle of processing this function.
*
* Called with the RCU read lock held from the IP layer via UDP.
*/
int rxrpc_input_packet(struct sock *udp_sk, struct sk_buff *skb)
{
+ struct rxrpc_local *local = rcu_dereference_sk_user_data(udp_sk);
struct rxrpc_connection *conn;
struct rxrpc_channel *chan;
struct rxrpc_call *call = NULL;
struct rxrpc_skb_priv *sp;
- struct rxrpc_local *local = udp_sk->sk_user_data;
struct rxrpc_peer *peer = NULL;
struct rxrpc_sock *rx = NULL;
unsigned int channel;
_enter("%p", udp_sk);
+ if (unlikely(!local)) {
+ kfree_skb(skb);
+ return 0;
+ }
if (skb->tstamp == 0)
skb->tstamp = ktime_get_real();
ret = -ENOMEM;
sock_error:
mutex_unlock(&rxnet->local_mutex);
- kfree(local);
+ if (local)
+ call_rcu(&local->rcu, rxrpc_local_rcu);
_leave(" = %d", ret);
return ERR_PTR(ret);
_enter("%p{%d}", sk, local->debug_id);
+ /* Clear the outstanding error value on the socket so that it doesn't
+ * cause kernel_sendmsg() to return it later.
+ */
+ sock_error(sk);
+
skb = sock_dequeue_err_skb(sk);
if (!skb) {
_leave("UDP socket errqueue empty");
}
/*
- * Queue a DATA packet for transmission, set the resend timeout and send the
- * packet immediately
+ * Queue a DATA packet for transmission, set the resend timeout and send
+ * the packet immediately. Returns the error from rxrpc_send_data_packet()
+ * in case the caller wants to do something with it.
*/
-static void rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call,
- struct sk_buff *skb, bool last,
- rxrpc_notify_end_tx_t notify_end_tx)
+static int rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call,
+ struct sk_buff *skb, bool last,
+ rxrpc_notify_end_tx_t notify_end_tx)
{
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
unsigned long now;
out:
rxrpc_free_skb(skb, rxrpc_skb_tx_freed);
- _leave("");
+ _leave(" = %d", ret);
+ return ret;
}
/*
if (ret < 0)
goto out;
- rxrpc_queue_packet(rx, call, skb,
- !msg_data_left(msg) && !more,
- notify_end_tx);
+ ret = rxrpc_queue_packet(rx, call, skb,
+ !msg_data_left(msg) && !more,
+ notify_end_tx);
+ /* Should check for failure here */
skb = NULL;
}
} while (msg_data_left(msg) > 0);
struct rhashtable ht;
spinlock_t masks_lock; /* Protect masks list */
struct list_head masks;
+ struct list_head hw_filters;
struct rcu_work rwork;
struct idr handle_idr;
};
struct tcf_result res;
struct fl_flow_key key;
struct list_head list;
+ struct list_head hw_list;
u32 handle;
u32 flags;
u32 in_hw_count;
spin_lock_init(&head->masks_lock);
INIT_LIST_HEAD_RCU(&head->masks);
+ INIT_LIST_HEAD(&head->hw_filters);
rcu_assign_pointer(tp->root, head);
idr_init(&head->handle_idr);
fl_mask_free(mask);
}
-static bool fl_mask_put(struct cls_fl_head *head, struct fl_flow_mask *mask,
- bool async)
+static bool fl_mask_put(struct cls_fl_head *head, struct fl_flow_mask *mask)
{
if (!refcount_dec_and_test(&mask->refcnt))
return false;
list_del_rcu(&mask->list);
spin_unlock(&head->masks_lock);
- if (async)
- tcf_queue_work(&mask->rwork, fl_mask_free_work);
- else
- fl_mask_free(mask);
+ tcf_queue_work(&mask->rwork, fl_mask_free_work);
return true;
}
+static struct cls_fl_head *fl_head_dereference(struct tcf_proto *tp)
+{
+ /* Flower classifier only changes root pointer during init and destroy.
+ * Users must obtain reference to tcf_proto instance before calling its
+ * API, so tp->root pointer is protected from concurrent call to
+ * fl_destroy() by reference counting.
+ */
+ return rcu_dereference_raw(tp->root);
+}
+
static void __fl_destroy_filter(struct cls_fl_filter *f)
{
tcf_exts_destroy(&f->exts);
tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false);
spin_lock(&tp->lock);
+ list_del_init(&f->hw_list);
tcf_block_offload_dec(block, &f->flags);
spin_unlock(&tp->lock);
struct cls_fl_filter *f, bool rtnl_held,
struct netlink_ext_ack *extack)
{
+ struct cls_fl_head *head = fl_head_dereference(tp);
struct tc_cls_flower_offload cls_flower = {};
struct tcf_block *block = tp->chain->block;
bool skip_sw = tc_skip_sw(f->flags);
goto errout;
}
+ spin_lock(&tp->lock);
+ list_add(&f->hw_list, &head->hw_filters);
+ spin_unlock(&tp->lock);
errout:
if (!rtnl_held)
rtnl_unlock();
rtnl_unlock();
}
-static struct cls_fl_head *fl_head_dereference(struct tcf_proto *tp)
-{
- /* Flower classifier only changes root pointer during init and destroy.
- * Users must obtain reference to tcf_proto instance before calling its
- * API, so tp->root pointer is protected from concurrent call to
- * fl_destroy() by reference counting.
- */
- return rcu_dereference_raw(tp->root);
-}
-
static void __fl_put(struct cls_fl_filter *f)
{
if (!refcount_dec_and_test(&f->refcnt))
return;
- WARN_ON(!f->deleted);
-
if (tcf_exts_get_net(&f->exts))
tcf_queue_work(&f->rwork, fl_destroy_filter_work);
else
struct netlink_ext_ack *extack)
{
struct cls_fl_head *head = fl_head_dereference(tp);
- bool async = tcf_exts_get_net(&f->exts);
*last = false;
list_del_rcu(&f->list);
spin_unlock(&tp->lock);
- *last = fl_mask_put(head, f->mask, async);
+ *last = fl_mask_put(head, f->mask);
if (!tc_skip_hw(f->flags))
fl_hw_destroy_filter(tp, f, rtnl_held, extack);
tcf_unbind_filter(tp, &f->res);
struct fl_flow_mask *mask = fnew->mask;
int err;
- err = rhashtable_insert_fast(&mask->ht,
- &fnew->ht_node,
- mask->filter_ht_params);
+ err = rhashtable_lookup_insert_fast(&mask->ht,
+ &fnew->ht_node,
+ mask->filter_ht_params);
if (err) {
*in_ht = false;
/* It is okay if filter with same key exists when
err = -ENOBUFS;
goto errout_tb;
}
+ INIT_LIST_HEAD(&fnew->hw_list);
refcount_set(&fnew->refcnt, 1);
err = tcf_exts_init(&fnew->exts, net, TCA_FLOWER_ACT, 0);
goto errout_hw;
}
- refcount_inc(&fnew->refcnt);
if (fold) {
/* Fold filter was deleted concurrently. Retry lookup. */
if (fold->deleted) {
in_ht = true;
}
+ refcount_inc(&fnew->refcnt);
rhashtable_remove_fast(&fold->mask->ht,
&fold->ht_node,
fold->mask->filter_ht_params);
spin_unlock(&tp->lock);
- fl_mask_put(head, fold->mask, true);
+ fl_mask_put(head, fold->mask);
if (!tc_skip_hw(fold->flags))
fl_hw_destroy_filter(tp, fold, rtnl_held, NULL);
tcf_unbind_filter(tp, &fold->res);
- tcf_exts_get_net(&fold->exts);
/* Caller holds reference to fold, so refcnt is always > 0
* after this.
*/
if (err)
goto errout_hw;
+ refcount_inc(&fnew->refcnt);
fnew->handle = handle;
list_add_tail_rcu(&fnew->list, &fnew->mask->filters);
spin_unlock(&tp->lock);
kfree(mask);
return 0;
+errout_ht:
+ spin_lock(&tp->lock);
errout_hw:
+ fnew->deleted = true;
spin_unlock(&tp->lock);
if (!tc_skip_hw(fnew->flags))
fl_hw_destroy_filter(tp, fnew, rtnl_held, NULL);
-errout_ht:
if (in_ht)
rhashtable_remove_fast(&fnew->mask->ht, &fnew->ht_node,
fnew->mask->filter_ht_params);
errout_mask:
- fl_mask_put(head, fnew->mask, true);
+ fl_mask_put(head, fnew->mask);
errout:
- tcf_queue_work(&fnew->rwork, fl_destroy_filter_work);
+ __fl_put(fnew);
errout_tb:
kfree(tb);
errout_mask_alloc:
}
}
+static struct cls_fl_filter *
+fl_get_next_hw_filter(struct tcf_proto *tp, struct cls_fl_filter *f, bool add)
+{
+ struct cls_fl_head *head = fl_head_dereference(tp);
+
+ spin_lock(&tp->lock);
+ if (list_empty(&head->hw_filters)) {
+ spin_unlock(&tp->lock);
+ return NULL;
+ }
+
+ if (!f)
+ f = list_entry(&head->hw_filters, struct cls_fl_filter,
+ hw_list);
+ list_for_each_entry_continue(f, &head->hw_filters, hw_list) {
+ if (!(add && f->deleted) && refcount_inc_not_zero(&f->refcnt)) {
+ spin_unlock(&tp->lock);
+ return f;
+ }
+ }
+
+ spin_unlock(&tp->lock);
+ return NULL;
+}
+
static int fl_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb,
void *cb_priv, struct netlink_ext_ack *extack)
{
struct tc_cls_flower_offload cls_flower = {};
struct tcf_block *block = tp->chain->block;
- unsigned long handle = 0;
- struct cls_fl_filter *f;
+ struct cls_fl_filter *f = NULL;
int err;
- while ((f = fl_get_next_filter(tp, &handle))) {
- if (tc_skip_hw(f->flags))
- goto next_flow;
+ /* hw_filters list can only be changed by hw offload functions after
+ * obtaining rtnl lock. Make sure it is not changed while reoffload is
+ * iterating it.
+ */
+ ASSERT_RTNL();
+ while ((f = fl_get_next_hw_filter(tp, f, add))) {
cls_flower.rule =
flow_rule_alloc(tcf_exts_num_actions(&f->exts));
if (!cls_flower.rule) {
add);
spin_unlock(&tp->lock);
next_flow:
- handle++;
__fl_put(f);
}
qdisc_put(old);
}
+static void qdisc_clear_nolock(struct Qdisc *sch)
+{
+ sch->flags &= ~TCQ_F_NOLOCK;
+ if (!(sch->flags & TCQ_F_CPUSTATS))
+ return;
+
+ free_percpu(sch->cpu_bstats);
+ free_percpu(sch->cpu_qstats);
+ sch->cpu_bstats = NULL;
+ sch->cpu_qstats = NULL;
+ sch->flags &= ~TCQ_F_CPUSTATS;
+}
+
/* Graft qdisc "new" to class "classid" of qdisc "parent" or
* to device "dev".
*
/* Only support running class lockless if parent is lockless */
if (new && (new->flags & TCQ_F_NOLOCK) &&
parent && !(parent->flags & TCQ_F_NOLOCK))
- new->flags &= ~TCQ_F_NOLOCK;
+ qdisc_clear_nolock(new);
if (!cops || !cops->graft)
return -EOPNOTSUPP;
#include <linux/string.h>
#include <linux/errno.h>
#include <linux/skbuff.h>
+#include <net/netevent.h>
#include <net/netlink.h>
#include <net/sch_generic.h>
#include <net/pkt_sched.h>
+static LIST_HEAD(cbs_list);
+static DEFINE_SPINLOCK(cbs_list_lock);
+
#define BYTES_PER_KBIT (1000LL / 8)
struct cbs_sched_data {
bool offload;
int queue;
- s64 port_rate; /* in bytes/s */
+ atomic64_t port_rate; /* in bytes/s */
s64 last; /* timestamp in ns */
s64 credits; /* in bytes */
s32 locredit; /* in bytes */
struct sk_buff **to_free);
struct sk_buff *(*dequeue)(struct Qdisc *sch);
struct Qdisc *qdisc;
+ struct list_head cbs_list;
};
static int cbs_child_enqueue(struct sk_buff *skb, struct Qdisc *sch,
s64 credits;
int len;
+ if (atomic64_read(&q->port_rate) == -1) {
+ WARN_ONCE(1, "cbs: dequeue() called with unknown port rate.");
+ return NULL;
+ }
+
if (q->credits < 0) {
credits = timediff_to_credits(now - q->last, q->idleslope);
/* As sendslope is a negative number, this will decrease the
* amount of q->credits.
*/
- credits = credits_from_len(len, q->sendslope, q->port_rate);
+ credits = credits_from_len(len, q->sendslope,
+ atomic64_read(&q->port_rate));
credits += q->credits;
q->credits = max_t(s64, credits, q->locredit);
return 0;
}
+static void cbs_set_port_rate(struct net_device *dev, struct cbs_sched_data *q)
+{
+ struct ethtool_link_ksettings ecmd;
+ int port_rate = -1;
+
+ if (!__ethtool_get_link_ksettings(dev, &ecmd) &&
+ ecmd.base.speed != SPEED_UNKNOWN)
+ port_rate = ecmd.base.speed * 1000 * BYTES_PER_KBIT;
+
+ atomic64_set(&q->port_rate, port_rate);
+ netdev_dbg(dev, "cbs: set %s's port_rate to: %lld, linkspeed: %d\n",
+ dev->name, (long long)atomic64_read(&q->port_rate),
+ ecmd.base.speed);
+}
+
+static int cbs_dev_notifier(struct notifier_block *nb, unsigned long event,
+ void *ptr)
+{
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+ struct cbs_sched_data *q;
+ struct net_device *qdev;
+ bool found = false;
+
+ ASSERT_RTNL();
+
+ if (event != NETDEV_UP && event != NETDEV_CHANGE)
+ return NOTIFY_DONE;
+
+ spin_lock(&cbs_list_lock);
+ list_for_each_entry(q, &cbs_list, cbs_list) {
+ qdev = qdisc_dev(q->qdisc);
+ if (qdev == dev) {
+ found = true;
+ break;
+ }
+ }
+ spin_unlock(&cbs_list_lock);
+
+ if (found)
+ cbs_set_port_rate(dev, q);
+
+ return NOTIFY_DONE;
+}
+
static int cbs_change(struct Qdisc *sch, struct nlattr *opt,
struct netlink_ext_ack *extack)
{
qopt = nla_data(tb[TCA_CBS_PARMS]);
if (!qopt->offload) {
- struct ethtool_link_ksettings ecmd;
- s64 link_speed;
-
- if (!__ethtool_get_link_ksettings(dev, &ecmd))
- link_speed = ecmd.base.speed;
- else
- link_speed = SPEED_1000;
-
- q->port_rate = link_speed * 1000 * BYTES_PER_KBIT;
-
+ cbs_set_port_rate(dev, q);
cbs_disable_offload(dev, q);
} else {
err = cbs_enable_offload(dev, q, qopt, extack);
{
struct cbs_sched_data *q = qdisc_priv(sch);
struct net_device *dev = qdisc_dev(sch);
+ int err;
if (!opt) {
NL_SET_ERR_MSG(extack, "Missing CBS qdisc options which are mandatory");
qdisc_watchdog_init(&q->watchdog, sch);
- return cbs_change(sch, opt, extack);
+ err = cbs_change(sch, opt, extack);
+ if (err)
+ return err;
+
+ if (!q->offload) {
+ spin_lock(&cbs_list_lock);
+ list_add(&q->cbs_list, &cbs_list);
+ spin_unlock(&cbs_list_lock);
+ }
+
+ return 0;
}
static void cbs_destroy(struct Qdisc *sch)
struct cbs_sched_data *q = qdisc_priv(sch);
struct net_device *dev = qdisc_dev(sch);
- qdisc_watchdog_cancel(&q->watchdog);
+ spin_lock(&cbs_list_lock);
+ list_del(&q->cbs_list);
+ spin_unlock(&cbs_list_lock);
+ qdisc_watchdog_cancel(&q->watchdog);
cbs_disable_offload(dev, q);
if (q->qdisc)
.owner = THIS_MODULE,
};
+static struct notifier_block cbs_device_notifier = {
+ .notifier_call = cbs_dev_notifier,
+};
+
static int __init cbs_module_init(void)
{
+ int err = register_netdevice_notifier(&cbs_device_notifier);
+
+ if (err)
+ return err;
+
return register_qdisc(&cbs_qdisc_ops);
}
static void __exit cbs_module_exit(void)
{
unregister_qdisc(&cbs_qdisc_ops);
+ unregister_netdevice_notifier(&cbs_device_notifier);
}
module_init(cbs_module_init)
module_exit(cbs_module_exit)
skb = __skb_dequeue(&q->skb_bad_txq);
if (qdisc_is_percpu_stats(q)) {
qdisc_qstats_cpu_backlog_dec(q, skb);
- qdisc_qstats_atomic_qlen_dec(q);
+ qdisc_qstats_cpu_qlen_dec(q);
} else {
qdisc_qstats_backlog_dec(q, skb);
q->q.qlen--;
if (qdisc_is_percpu_stats(q)) {
qdisc_qstats_cpu_backlog_inc(q, skb);
- qdisc_qstats_atomic_qlen_inc(q);
+ qdisc_qstats_cpu_qlen_inc(q);
} else {
qdisc_qstats_backlog_inc(q, skb);
q->q.qlen++;
spin_unlock(lock);
}
-static inline int __dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
+static inline void dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
{
- while (skb) {
- struct sk_buff *next = skb->next;
-
- __skb_queue_tail(&q->gso_skb, skb);
- q->qstats.requeues++;
- qdisc_qstats_backlog_inc(q, skb);
- q->q.qlen++; /* it's still part of the queue */
+ spinlock_t *lock = NULL;
- skb = next;
+ if (q->flags & TCQ_F_NOLOCK) {
+ lock = qdisc_lock(q);
+ spin_lock(lock);
}
- __netif_schedule(q);
-
- return 0;
-}
-static inline int dev_requeue_skb_locked(struct sk_buff *skb, struct Qdisc *q)
-{
- spinlock_t *lock = qdisc_lock(q);
-
- spin_lock(lock);
while (skb) {
struct sk_buff *next = skb->next;
__skb_queue_tail(&q->gso_skb, skb);
- qdisc_qstats_cpu_requeues_inc(q);
- qdisc_qstats_cpu_backlog_inc(q, skb);
- qdisc_qstats_atomic_qlen_inc(q);
+ /* it's still part of the queue */
+ if (qdisc_is_percpu_stats(q)) {
+ qdisc_qstats_cpu_requeues_inc(q);
+ qdisc_qstats_cpu_backlog_inc(q, skb);
+ qdisc_qstats_cpu_qlen_inc(q);
+ } else {
+ q->qstats.requeues++;
+ qdisc_qstats_backlog_inc(q, skb);
+ q->q.qlen++;
+ }
skb = next;
}
- spin_unlock(lock);
-
+ if (lock)
+ spin_unlock(lock);
__netif_schedule(q);
-
- return 0;
-}
-
-static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
-{
- if (q->flags & TCQ_F_NOLOCK)
- return dev_requeue_skb_locked(skb, q);
- else
- return __dev_requeue_skb(skb, q);
}
static void try_bulk_dequeue_skb(struct Qdisc *q,
skb = __skb_dequeue(&q->gso_skb);
if (qdisc_is_percpu_stats(q)) {
qdisc_qstats_cpu_backlog_dec(q, skb);
- qdisc_qstats_atomic_qlen_dec(q);
+ qdisc_qstats_cpu_qlen_dec(q);
} else {
qdisc_qstats_backlog_dec(q, skb);
q->q.qlen--;
if (unlikely(err))
return qdisc_drop_cpu(skb, qdisc, to_free);
- qdisc_qstats_atomic_qlen_inc(qdisc);
- /* Note: skb can not be used after skb_array_produce(),
- * so we better not use qdisc_qstats_cpu_backlog_inc()
- */
- this_cpu_add(qdisc->cpu_qstats->backlog, pkt_len);
+ qdisc_update_stats_at_enqueue(qdisc, pkt_len);
return NET_XMIT_SUCCESS;
}
skb = __skb_array_consume(q);
}
if (likely(skb)) {
- qdisc_qstats_cpu_backlog_dec(qdisc, skb);
- qdisc_bstats_cpu_update(qdisc, skb);
- qdisc_qstats_atomic_qlen_dec(qdisc);
+ qdisc_update_stats_at_dequeue(qdisc, skb);
} else {
qdisc->empty = true;
}
struct gnet_stats_queue *q = per_cpu_ptr(qdisc->cpu_qstats, i);
q->backlog = 0;
+ q->qlen = 0;
}
}
#include <linux/list.h>
#include <linux/errno.h>
#include <linux/skbuff.h>
+#include <linux/math64.h>
#include <linux/module.h>
#include <linux/spinlock.h>
#include <net/netlink.h>
#include <net/pkt_cls.h>
#include <net/sch_generic.h>
+static LIST_HEAD(taprio_list);
+static DEFINE_SPINLOCK(taprio_list_lock);
+
#define TAPRIO_ALL_GATES_OPEN -1
struct sched_entry {
struct Qdisc *root;
s64 base_time;
int clockid;
- int picos_per_byte; /* Using picoseconds because for 10Gbps+
- * speeds it's sub-nanoseconds per byte
- */
+ atomic64_t picos_per_byte; /* Using picoseconds because for 10Gbps+
+ * speeds it's sub-nanoseconds per byte
+ */
size_t num_entries;
/* Protects the update side of the RCU protected current_entry */
struct list_head entries;
ktime_t (*get_time)(void);
struct hrtimer advance_timer;
+ struct list_head taprio_list;
};
static int taprio_enqueue(struct sk_buff *skb, struct Qdisc *sch,
rcu_read_lock();
entry = rcu_dereference(q->current_entry);
- gate_mask = entry ? entry->gate_mask : -1;
+ gate_mask = entry ? entry->gate_mask : TAPRIO_ALL_GATES_OPEN;
rcu_read_unlock();
if (!gate_mask)
tc = netdev_get_prio_tc_map(dev, prio);
if (!(gate_mask & BIT(tc)))
- return NULL;
+ continue;
return skb;
}
static inline int length_to_duration(struct taprio_sched *q, int len)
{
- return (len * q->picos_per_byte) / 1000;
+ return div_u64(len * atomic64_read(&q->picos_per_byte), 1000);
+}
+
+static void taprio_set_budget(struct taprio_sched *q, struct sched_entry *entry)
+{
+ atomic_set(&entry->budget,
+ div64_u64((u64)entry->interval * 1000,
+ atomic64_read(&q->picos_per_byte)));
}
static struct sk_buff *taprio_dequeue(struct Qdisc *sch)
u32 gate_mask;
int i;
+ if (atomic64_read(&q->picos_per_byte) == -1) {
+ WARN_ONCE(1, "taprio: dequeue() called with unknown picos per byte.");
+ return NULL;
+ }
+
rcu_read_lock();
entry = rcu_dereference(q->current_entry);
/* if there's no entry, it means that the schedule didn't
*/
if (gate_mask != TAPRIO_ALL_GATES_OPEN &&
ktime_after(guard, entry->close_time))
- return NULL;
+ continue;
/* ... and no budget. */
if (gate_mask != TAPRIO_ALL_GATES_OPEN &&
atomic_sub_return(len, &entry->budget) < 0)
- return NULL;
+ continue;
skb = child->ops->dequeue(child);
if (unlikely(!skb))
return NULL;
}
-static bool should_restart_cycle(const struct taprio_sched *q,
- const struct sched_entry *entry)
-{
- WARN_ON(!entry);
-
- return list_is_last(&entry->list, &q->entries);
-}
-
static enum hrtimer_restart advance_sched(struct hrtimer *timer)
{
struct taprio_sched *q = container_of(timer, struct taprio_sched,
goto first_run;
}
- if (should_restart_cycle(q, entry))
+ if (list_is_last(&entry->list, &q->entries))
next = list_first_entry(&q->entries, struct sched_entry,
list);
else
close_time = ktime_add_ns(entry->close_time, next->interval);
next->close_time = close_time;
- atomic_set(&next->budget,
- (next->interval * 1000) / q->picos_per_byte);
+ taprio_set_budget(q, next);
first_run:
rcu_assign_pointer(q->current_entry, next);
return 0;
}
-static ktime_t taprio_get_start_time(struct Qdisc *sch)
+static int taprio_get_start_time(struct Qdisc *sch, ktime_t *start)
{
struct taprio_sched *q = qdisc_priv(sch);
struct sched_entry *entry;
s64 n;
base = ns_to_ktime(q->base_time);
- cycle = 0;
+ now = q->get_time();
+
+ if (ktime_after(base, now)) {
+ *start = base;
+ return 0;
+ }
/* Calculate the cycle_time, by summing all the intervals.
*/
+ cycle = 0;
list_for_each_entry(entry, &q->entries, list)
cycle = ktime_add_ns(cycle, entry->interval);
- if (!cycle)
- return base;
-
- now = q->get_time();
-
- if (ktime_after(base, now))
- return base;
+ /* The qdisc is expected to have at least one sched_entry. Moreover,
+ * any entry must have 'interval' > 0. Thus if the cycle time is zero,
+ * something went really wrong. In that case, we should warn about this
+ * inconsistent state and return error.
+ */
+ if (WARN_ON(!cycle))
+ return -EFAULT;
/* Schedule the start time for the beginning of the next
* cycle.
*/
n = div64_s64(ktime_sub_ns(now, base), cycle);
-
- return ktime_add_ns(base, (n + 1) * cycle);
+ *start = ktime_add_ns(base, (n + 1) * cycle);
+ return 0;
}
static void taprio_start_sched(struct Qdisc *sch, ktime_t start)
list);
first->close_time = ktime_add_ns(start, first->interval);
- atomic_set(&first->budget,
- (first->interval * 1000) / q->picos_per_byte);
+ taprio_set_budget(q, first);
rcu_assign_pointer(q->current_entry, NULL);
spin_unlock_irqrestore(&q->current_entry_lock, flags);
hrtimer_start(&q->advance_timer, start, HRTIMER_MODE_ABS);
}
+static void taprio_set_picos_per_byte(struct net_device *dev,
+ struct taprio_sched *q)
+{
+ struct ethtool_link_ksettings ecmd;
+ int picos_per_byte = -1;
+
+ if (!__ethtool_get_link_ksettings(dev, &ecmd) &&
+ ecmd.base.speed != SPEED_UNKNOWN)
+ picos_per_byte = div64_s64(NSEC_PER_SEC * 1000LL * 8,
+ ecmd.base.speed * 1000 * 1000);
+
+ atomic64_set(&q->picos_per_byte, picos_per_byte);
+ netdev_dbg(dev, "taprio: set %s's picos_per_byte to: %lld, linkspeed: %d\n",
+ dev->name, (long long)atomic64_read(&q->picos_per_byte),
+ ecmd.base.speed);
+}
+
+static int taprio_dev_notifier(struct notifier_block *nb, unsigned long event,
+ void *ptr)
+{
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+ struct net_device *qdev;
+ struct taprio_sched *q;
+ bool found = false;
+
+ ASSERT_RTNL();
+
+ if (event != NETDEV_UP && event != NETDEV_CHANGE)
+ return NOTIFY_DONE;
+
+ spin_lock(&taprio_list_lock);
+ list_for_each_entry(q, &taprio_list, taprio_list) {
+ qdev = qdisc_dev(q->root);
+ if (qdev == dev) {
+ found = true;
+ break;
+ }
+ }
+ spin_unlock(&taprio_list_lock);
+
+ if (found)
+ taprio_set_picos_per_byte(dev, q);
+
+ return NOTIFY_DONE;
+}
+
static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
struct netlink_ext_ack *extack)
{
struct taprio_sched *q = qdisc_priv(sch);
struct net_device *dev = qdisc_dev(sch);
struct tc_mqprio_qopt *mqprio = NULL;
- struct ethtool_link_ksettings ecmd;
int i, err, size;
- s64 link_speed;
ktime_t start;
err = nla_parse_nested(tb, TCA_TAPRIO_ATTR_MAX, opt,
if (err < 0)
return err;
- err = -EINVAL;
if (tb[TCA_TAPRIO_ATTR_PRIOMAP])
mqprio = nla_data(tb[TCA_TAPRIO_ATTR_PRIOMAP]);
mqprio->prio_tc_map[i]);
}
- if (!__ethtool_get_link_ksettings(dev, &ecmd))
- link_speed = ecmd.base.speed;
- else
- link_speed = SPEED_1000;
-
- q->picos_per_byte = div64_s64(NSEC_PER_SEC * 1000LL * 8,
- link_speed * 1000 * 1000);
+ taprio_set_picos_per_byte(dev, q);
- start = taprio_get_start_time(sch);
- if (!start)
- return 0;
+ err = taprio_get_start_time(sch, &start);
+ if (err < 0) {
+ NL_SET_ERR_MSG(extack, "Internal error: failed get start time");
+ return err;
+ }
taprio_start_sched(sch, start);
struct sched_entry *entry, *n;
unsigned int i;
+ spin_lock(&taprio_list_lock);
+ list_del(&q->taprio_list);
+ spin_unlock(&taprio_list_lock);
+
hrtimer_cancel(&q->advance_timer);
if (q->qdiscs) {
if (!opt)
return -EINVAL;
+ spin_lock(&taprio_list_lock);
+ list_add(&q->taprio_list, &taprio_list);
+ spin_unlock(&taprio_list_lock);
+
return taprio_change(sch, opt, extack);
}
.owner = THIS_MODULE,
};
+static struct notifier_block taprio_device_notifier = {
+ .notifier_call = taprio_dev_notifier,
+};
+
static int __init taprio_module_init(void)
{
+ int err = register_netdevice_notifier(&taprio_device_notifier);
+
+ if (err)
+ return err;
+
return register_qdisc(&taprio_qdisc_ops);
}
static void __exit taprio_module_exit(void)
{
unregister_qdisc(&taprio_qdisc_ops);
+ unregister_netdevice_notifier(&taprio_device_notifier);
}
module_init(taprio_module_init);
.getname = sctp_getname,
.poll = sctp_poll,
.ioctl = inet6_ioctl,
+ .gettstamp = sock_gettstamp,
.listen = sctp_inet_listen,
.shutdown = inet_shutdown,
.setsockopt = sock_common_setsockopt,
.getname = inet_getname, /* Semantics are different. */
.poll = sctp_poll,
.ioctl = inet_ioctl,
+ .gettstamp = sock_gettstamp,
.listen = sctp_inet_listen,
.shutdown = inet_shutdown, /* Looks harmless. */
.setsockopt = sock_common_setsockopt, /* IP_SOL IP_OPTION is a problem */
* in sctp_ulpevent_make_rcvmsg will drop the frame if we grow our
* memory usage too much
*/
- if (*sk->sk_prot_creator->memory_pressure) {
+ if (sk_under_memory_pressure(sk)) {
if (sctp_tsnmap_has_gap(map) &&
(sctp_tsnmap_get_ctsn(map) + 1) == tsn) {
pr_debug("%s: under pressure, reneging for tsn:%u\n",
__func__, tsn);
deliver = SCTP_CMD_RENEGE;
- }
+ } else {
+ sk_mem_reclaim(sk);
+ }
}
/*
if (sctp_wspace(asoc) < (int)msg_len)
sctp_prsctp_prune(asoc, sinfo, msg_len - sctp_wspace(asoc));
- if (sctp_wspace(asoc) <= 0) {
+ if (sk_under_memory_pressure(sk))
+ sk_mem_reclaim(sk);
+
+ if (sctp_wspace(asoc) <= 0 || !sk_wmem_schedule(sk, msg_len)) {
timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len);
if (err)
}
/* Validate addr_len before calling common connect/connectx routine. */
- af = sctp_get_af_specific(addr->sa_family);
+ af = addr_len < offsetofend(struct sockaddr, sa_family) ? NULL :
+ sctp_get_af_specific(addr->sa_family);
if (!af || addr_len < af->sockaddr_len) {
err = -EINVAL;
} else {
goto do_error;
if (signal_pending(current))
goto do_interrupted;
- if ((int)msg_len <= sctp_wspace(asoc))
+ if (sk_under_memory_pressure(sk))
+ sk_mem_reclaim(sk);
+ if ((int)msg_len <= sctp_wspace(asoc) &&
+ sk_wmem_schedule(sk, msg_len))
break;
/* Let another process have a go. Since we are going
}
static int sctp_enqueue_event(struct sctp_ulpq *ulpq,
- struct sctp_ulpevent *event)
+ struct sk_buff_head *skb_list)
{
- struct sk_buff *skb = sctp_event2skb(event);
struct sock *sk = ulpq->asoc->base.sk;
struct sctp_sock *sp = sctp_sk(sk);
- struct sk_buff_head *skb_list;
+ struct sctp_ulpevent *event;
+ struct sk_buff *skb;
- skb_list = (struct sk_buff_head *)skb->prev;
+ skb = __skb_peek(skb_list);
+ event = sctp_skb2event(skb);
if (sk->sk_shutdown & RCV_SHUTDOWN &&
(sk->sk_shutdown & SEND_SHUTDOWN ||
if (!(event->msg_flags & SCTP_DATA_UNORDERED)) {
event = sctp_intl_reasm(ulpq, event);
- if (event && event->msg_flags & MSG_EOR) {
+ if (event) {
skb_queue_head_init(&temp);
__skb_queue_tail(&temp, sctp_event2skb(event));
- event = sctp_intl_order(ulpq, event);
+ if (event->msg_flags & MSG_EOR)
+ event = sctp_intl_order(ulpq, event);
}
} else {
event = sctp_intl_reasm_uo(ulpq, event);
+ if (event) {
+ skb_queue_head_init(&temp);
+ __skb_queue_tail(&temp, sctp_event2skb(event));
+ }
}
if (event) {
event_eor = (event->msg_flags & MSG_EOR) ? 1 : 0;
- sctp_enqueue_event(ulpq, event);
+ sctp_enqueue_event(ulpq, &temp);
}
return event_eor;
static void sctp_intl_start_pd(struct sctp_ulpq *ulpq, gfp_t gfp)
{
struct sctp_ulpevent *event;
+ struct sk_buff_head temp;
if (!skb_queue_empty(&ulpq->reasm)) {
do {
event = sctp_intl_retrieve_first(ulpq);
- if (event)
- sctp_enqueue_event(ulpq, event);
+ if (event) {
+ skb_queue_head_init(&temp);
+ __skb_queue_tail(&temp, sctp_event2skb(event));
+ sctp_enqueue_event(ulpq, &temp);
+ }
} while (event);
}
if (!skb_queue_empty(&ulpq->reasm_uo)) {
do {
event = sctp_intl_retrieve_first_uo(ulpq);
- if (event)
- sctp_enqueue_event(ulpq, event);
+ if (event) {
+ skb_queue_head_init(&temp);
+ __skb_queue_tail(&temp, sctp_event2skb(event));
+ sctp_enqueue_event(ulpq, &temp);
+ }
} while (event);
}
}
if (event) {
sctp_intl_retrieve_ordered(ulpq, event);
- sctp_enqueue_event(ulpq, event);
+ sctp_enqueue_event(ulpq, &temp);
}
}
ntohl(skip->mid), skip->flags);
}
+static int do_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event)
+{
+ struct sk_buff_head temp;
+
+ skb_queue_head_init(&temp);
+ __skb_queue_tail(&temp, sctp_event2skb(event));
+ return sctp_ulpq_tail_event(ulpq, &temp);
+}
+
static struct sctp_stream_interleave sctp_stream_interleave_0 = {
.data_chunk_len = sizeof(struct sctp_data_chunk),
.ftsn_chunk_len = sizeof(struct sctp_fwdtsn_chunk),
.assign_number = sctp_chunk_assign_ssn,
.validate_data = sctp_validate_data,
.ulpevent_data = sctp_ulpq_tail_data,
- .enqueue_event = sctp_ulpq_tail_event,
+ .enqueue_event = do_ulpq_tail_event,
.renege_events = sctp_ulpq_renege,
.start_pd = sctp_ulpq_partial_delivery,
.abort_pd = sctp_ulpq_abort_pd,
.handle_ftsn = sctp_handle_fwdtsn,
};
+static int do_sctp_enqueue_event(struct sctp_ulpq *ulpq,
+ struct sctp_ulpevent *event)
+{
+ struct sk_buff_head temp;
+
+ skb_queue_head_init(&temp);
+ __skb_queue_tail(&temp, sctp_event2skb(event));
+ return sctp_enqueue_event(ulpq, &temp);
+}
+
static struct sctp_stream_interleave sctp_stream_interleave_1 = {
.data_chunk_len = sizeof(struct sctp_idata_chunk),
.ftsn_chunk_len = sizeof(struct sctp_ifwdtsn_chunk),
.assign_number = sctp_chunk_assign_mid,
.validate_data = sctp_validate_idata,
.ulpevent_data = sctp_ulpevent_idata,
- .enqueue_event = sctp_enqueue_event,
+ .enqueue_event = do_sctp_enqueue_event,
.renege_events = sctp_renege_events,
.start_pd = sctp_intl_start_pd,
.abort_pd = sctp_intl_abort_pd,
gfp_t gfp)
{
struct sctp_ulpevent *event = NULL;
- struct sk_buff *skb;
- size_t padding, len;
+ struct sk_buff *skb = chunk->skb;
+ struct sock *sk = asoc->base.sk;
+ size_t padding, datalen;
int rx_count;
/*
if (asoc->ep->rcvbuf_policy)
rx_count = atomic_read(&asoc->rmem_alloc);
else
- rx_count = atomic_read(&asoc->base.sk->sk_rmem_alloc);
+ rx_count = atomic_read(&sk->sk_rmem_alloc);
- if (rx_count >= asoc->base.sk->sk_rcvbuf) {
+ datalen = ntohs(chunk->chunk_hdr->length);
- if ((asoc->base.sk->sk_userlocks & SOCK_RCVBUF_LOCK) ||
- (!sk_rmem_schedule(asoc->base.sk, chunk->skb,
- chunk->skb->truesize)))
- goto fail;
- }
+ if (rx_count >= sk->sk_rcvbuf || !sk_rmem_schedule(sk, skb, datalen))
+ goto fail;
/* Clone the original skb, sharing the data. */
skb = skb_clone(chunk->skb, gfp);
* The sender should never pad with more than 3 bytes. The receiver
* MUST ignore the padding bytes.
*/
- len = ntohs(chunk->chunk_hdr->length);
- padding = SCTP_PAD4(len) - len;
+ padding = SCTP_PAD4(datalen) - datalen;
/* Fixup cloned skb with just this chunks data. */
skb_trim(skb, chunk->chunk_end - padding - skb->data);
event = sctp_ulpq_reasm(ulpq, event);
/* Do ordering if needed. */
- if ((event) && (event->msg_flags & MSG_EOR)) {
+ if (event) {
/* Create a temporary list to collect chunks on. */
skb_queue_head_init(&temp);
__skb_queue_tail(&temp, sctp_event2skb(event));
- event = sctp_ulpq_order(ulpq, event);
+ if (event->msg_flags & MSG_EOR)
+ event = sctp_ulpq_order(ulpq, event);
}
/* Send event to the ULP. 'event' is the sctp_ulpevent for
*/
if (event) {
event_eor = (event->msg_flags & MSG_EOR) ? 1 : 0;
- sctp_ulpq_tail_event(ulpq, event);
+ sctp_ulpq_tail_event(ulpq, &temp);
}
return event_eor;
return sctp_clear_pd(ulpq->asoc->base.sk, ulpq->asoc);
}
-/* If the SKB of 'event' is on a list, it is the first such member
- * of that list.
- */
-int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event)
+int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sk_buff_head *skb_list)
{
struct sock *sk = ulpq->asoc->base.sk;
struct sctp_sock *sp = sctp_sk(sk);
- struct sk_buff_head *queue, *skb_list;
- struct sk_buff *skb = sctp_event2skb(event);
+ struct sctp_ulpevent *event;
+ struct sk_buff_head *queue;
+ struct sk_buff *skb;
int clear_pd = 0;
- skb_list = (struct sk_buff_head *) skb->prev;
+ skb = __skb_peek(skb_list);
+ event = sctp_skb2event(skb);
/* If the socket is just going to throw this away, do not
* even try to deliver it.
}
}
- /* If we are harvesting multiple skbs they will be
- * collected on a list.
- */
- if (skb_list)
- skb_queue_splice_tail_init(skb_list, queue);
- else
- __skb_queue_tail(queue, skb);
+ skb_queue_splice_tail_init(skb_list, queue);
/* Did we just complete partial delivery and need to get
* rolling again? Move pending data to the receive
static void sctp_ulpq_reasm_drain(struct sctp_ulpq *ulpq)
{
struct sctp_ulpevent *event = NULL;
- struct sk_buff_head temp;
if (skb_queue_empty(&ulpq->reasm))
return;
while ((event = sctp_ulpq_retrieve_reassembled(ulpq)) != NULL) {
- /* Do ordering if needed. */
- if ((event) && (event->msg_flags & MSG_EOR)) {
- skb_queue_head_init(&temp);
- __skb_queue_tail(&temp, sctp_event2skb(event));
+ struct sk_buff_head temp;
+
+ skb_queue_head_init(&temp);
+ __skb_queue_tail(&temp, sctp_event2skb(event));
+ /* Do ordering if needed. */
+ if (event->msg_flags & MSG_EOR)
event = sctp_ulpq_order(ulpq, event);
- }
/* Send event to the ULP. 'event' is the
* sctp_ulpevent for very first SKB on the temp' list.
*/
if (event)
- sctp_ulpq_tail_event(ulpq, event);
+ sctp_ulpq_tail_event(ulpq, &temp);
}
}
if (event) {
/* see if we have more ordered that we can deliver */
sctp_ulpq_retrieve_ordered(ulpq, event);
- sctp_ulpq_tail_event(ulpq, event);
+ sctp_ulpq_tail_event(ulpq, &temp);
}
}
event = sctp_ulpq_retrieve_first(ulpq);
/* Send event to the ULP. */
if (event) {
- sctp_ulpq_tail_event(ulpq, event);
+ struct sk_buff_head temp;
+
+ skb_queue_head_init(&temp);
+ __skb_queue_tail(&temp, sctp_event2skb(event));
+ sctp_ulpq_tail_event(ulpq, &temp);
sctp_ulpq_set_pd(ulpq);
return;
}
freed += sctp_ulpq_renege_frags(ulpq, needed - freed);
}
/* If able to free enough room, accept this chunk. */
- if (freed >= needed) {
+ if (sk_rmem_schedule(asoc->base.sk, chunk->skb, needed) &&
+ freed >= needed) {
int retval = sctp_ulpq_tail_data(ulpq, chunk, gfp);
/*
* Enter partial delivery if chunk has not been
smc = smc_sk(sk);
/* cleanup for a dangling non-blocking connect */
- if (smc->connect_info && sk->sk_state == SMC_INIT)
+ if (smc->connect_nonblock && sk->sk_state == SMC_INIT)
tcp_abort(smc->clcsock->sk, ECONNABORTED);
flush_work(&smc->connect_work);
- kfree(smc->connect_info);
- smc->connect_info = NULL;
if (sk->sk_state == SMC_LISTEN)
/* smc_close_non_accepted() is called and acquires
if (sk->sk_state == SMC_CLOSED) {
if (smc->clcsock) {
- mutex_lock(&smc->clcsock_release_lock);
- sock_release(smc->clcsock);
- smc->clcsock = NULL;
- mutex_unlock(&smc->clcsock_release_lock);
+ release_sock(sk);
+ smc_clcsock_release(smc);
+ lock_sock(sk);
}
if (!smc->use_fallback)
smc_conn_free(&smc->conn);
link->peer_mtu = clc->qp_mtu;
}
+static void smc_switch_to_fallback(struct smc_sock *smc)
+{
+ smc->use_fallback = true;
+ if (smc->sk.sk_socket && smc->sk.sk_socket->file) {
+ smc->clcsock->file = smc->sk.sk_socket->file;
+ smc->clcsock->file->private_data = smc->clcsock;
+ }
+}
+
/* fall back during connect */
static int smc_connect_fallback(struct smc_sock *smc, int reason_code)
{
- smc->use_fallback = true;
+ smc_switch_to_fallback(smc);
smc->fallback_rsn = reason_code;
smc_copy_sock_settings_to_clc(smc);
+ smc->connect_nonblock = 0;
if (smc->sk.sk_state == SMC_INIT)
smc->sk.sk_state = SMC_ACTIVE;
return 0;
mutex_unlock(&smc_client_lgr_pending);
smc_conn_free(&smc->conn);
+ smc->connect_nonblock = 0;
return reason_code;
}
/* check if there is a rdma device available for this connection. */
/* called for connect and listen */
-static int smc_check_rdma(struct smc_sock *smc, struct smc_ib_device **ibdev,
- u8 *ibport, unsigned short vlan_id, u8 gid[])
+static int smc_find_rdma_device(struct smc_sock *smc, struct smc_init_info *ini)
{
- int reason_code = 0;
-
/* PNET table look up: search active ib_device and port
* within same PNETID that also contains the ethernet device
* used for the internal TCP socket
*/
- smc_pnet_find_roce_resource(smc->clcsock->sk, ibdev, ibport, vlan_id,
- gid);
- if (!(*ibdev))
- reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */
-
- return reason_code;
+ smc_pnet_find_roce_resource(smc->clcsock->sk, ini);
+ if (!ini->ib_dev)
+ return SMC_CLC_DECL_NOSMCRDEV;
+ return 0;
}
/* check if there is an ISM device available for this connection. */
/* called for connect and listen */
-static int smc_check_ism(struct smc_sock *smc, struct smcd_dev **ismdev)
+static int smc_find_ism_device(struct smc_sock *smc, struct smc_init_info *ini)
{
/* Find ISM device with same PNETID as connecting interface */
- smc_pnet_find_ism_resource(smc->clcsock->sk, ismdev);
- if (!(*ismdev))
- return SMC_CLC_DECL_CNFERR; /* configuration error */
+ smc_pnet_find_ism_resource(smc->clcsock->sk, ini);
+ if (!ini->ism_dev)
+ return SMC_CLC_DECL_NOSMCDDEV;
return 0;
}
/* Check for VLAN ID and register it on ISM device just for CLC handshake */
static int smc_connect_ism_vlan_setup(struct smc_sock *smc,
- struct smcd_dev *ismdev,
- unsigned short vlan_id)
+ struct smc_init_info *ini)
{
- if (vlan_id && smc_ism_get_vlan(ismdev, vlan_id))
- return SMC_CLC_DECL_CNFERR;
+ if (ini->vlan_id && smc_ism_get_vlan(ini->ism_dev, ini->vlan_id))
+ return SMC_CLC_DECL_ISMVLANERR;
return 0;
}
* used, the VLAN ID will be registered again during the connection setup.
*/
static int smc_connect_ism_vlan_cleanup(struct smc_sock *smc, bool is_smcd,
- struct smcd_dev *ismdev,
- unsigned short vlan_id)
+ struct smc_init_info *ini)
{
if (!is_smcd)
return 0;
- if (vlan_id && smc_ism_put_vlan(ismdev, vlan_id))
+ if (ini->vlan_id && smc_ism_put_vlan(ini->ism_dev, ini->vlan_id))
return SMC_CLC_DECL_CNFERR;
return 0;
}
/* CLC handshake during connect */
static int smc_connect_clc(struct smc_sock *smc, int smc_type,
struct smc_clc_msg_accept_confirm *aclc,
- struct smc_ib_device *ibdev, u8 ibport,
- u8 gid[], struct smcd_dev *ismdev)
+ struct smc_init_info *ini)
{
int rc = 0;
/* do inband token exchange */
- rc = smc_clc_send_proposal(smc, smc_type, ibdev, ibport, gid, ismdev);
+ rc = smc_clc_send_proposal(smc, smc_type, ini);
if (rc)
return rc;
/* receive SMC Accept CLC message */
/* setup for RDMA connection of client */
static int smc_connect_rdma(struct smc_sock *smc,
struct smc_clc_msg_accept_confirm *aclc,
- struct smc_ib_device *ibdev, u8 ibport)
+ struct smc_init_info *ini)
{
- int local_contact = SMC_FIRST_CONTACT;
struct smc_link *link;
int reason_code = 0;
+ ini->is_smcd = false;
+ ini->ib_lcl = &aclc->lcl;
+ ini->ib_clcqpn = ntoh24(aclc->qpn);
+ ini->srv_first_contact = aclc->hdr.flag;
+
mutex_lock(&smc_client_lgr_pending);
- local_contact = smc_conn_create(smc, false, aclc->hdr.flag, ibdev,
- ibport, ntoh24(aclc->qpn), &aclc->lcl,
- NULL, 0);
- if (local_contact < 0) {
- if (local_contact == -ENOMEM)
- reason_code = SMC_CLC_DECL_MEM;/* insufficient memory*/
- else if (local_contact == -ENOLINK)
- reason_code = SMC_CLC_DECL_SYNCERR; /* synchr. error */
- else
- reason_code = SMC_CLC_DECL_INTERR; /* other error */
+ reason_code = smc_conn_create(smc, ini);
+ if (reason_code) {
mutex_unlock(&smc_client_lgr_pending);
return reason_code;
}
/* create send buffer and rmb */
if (smc_buf_create(smc, false))
- return smc_connect_abort(smc, SMC_CLC_DECL_MEM, local_contact);
+ return smc_connect_abort(smc, SMC_CLC_DECL_MEM,
+ ini->cln_first_contact);
- if (local_contact == SMC_FIRST_CONTACT)
+ if (ini->cln_first_contact == SMC_FIRST_CONTACT)
smc_link_save_peer_info(link, aclc);
if (smc_rmb_rtoken_handling(&smc->conn, aclc))
return smc_connect_abort(smc, SMC_CLC_DECL_ERR_RTOK,
- local_contact);
+ ini->cln_first_contact);
smc_close_init(smc);
smc_rx_init(smc);
- if (local_contact == SMC_FIRST_CONTACT) {
+ if (ini->cln_first_contact == SMC_FIRST_CONTACT) {
if (smc_ib_ready_link(link))
return smc_connect_abort(smc, SMC_CLC_DECL_ERR_RDYLNK,
- local_contact);
+ ini->cln_first_contact);
} else {
if (smc_reg_rmb(link, smc->conn.rmb_desc, true))
return smc_connect_abort(smc, SMC_CLC_DECL_ERR_REGRMB,
- local_contact);
+ ini->cln_first_contact);
}
smc_rmb_sync_sg_for_device(&smc->conn);
reason_code = smc_clc_send_confirm(smc);
if (reason_code)
- return smc_connect_abort(smc, reason_code, local_contact);
+ return smc_connect_abort(smc, reason_code,
+ ini->cln_first_contact);
smc_tx_init(smc);
- if (local_contact == SMC_FIRST_CONTACT) {
+ if (ini->cln_first_contact == SMC_FIRST_CONTACT) {
/* QP confirmation over RoCE fabric */
reason_code = smc_clnt_conf_first_link(smc);
if (reason_code)
return smc_connect_abort(smc, reason_code,
- local_contact);
+ ini->cln_first_contact);
}
mutex_unlock(&smc_client_lgr_pending);
smc_copy_sock_settings_to_clc(smc);
+ smc->connect_nonblock = 0;
if (smc->sk.sk_state == SMC_INIT)
smc->sk.sk_state = SMC_ACTIVE;
/* setup for ISM connection of client */
static int smc_connect_ism(struct smc_sock *smc,
struct smc_clc_msg_accept_confirm *aclc,
- struct smcd_dev *ismdev)
+ struct smc_init_info *ini)
{
- int local_contact = SMC_FIRST_CONTACT;
int rc = 0;
+ ini->is_smcd = true;
+ ini->ism_gid = aclc->gid;
+ ini->srv_first_contact = aclc->hdr.flag;
+
/* there is only one lgr role for SMC-D; use server lock */
mutex_lock(&smc_server_lgr_pending);
- local_contact = smc_conn_create(smc, true, aclc->hdr.flag, NULL, 0, 0,
- NULL, ismdev, aclc->gid);
- if (local_contact < 0) {
+ rc = smc_conn_create(smc, ini);
+ if (rc) {
mutex_unlock(&smc_server_lgr_pending);
- return SMC_CLC_DECL_MEM;
+ return rc;
}
/* Create send and receive buffers */
if (smc_buf_create(smc, true))
- return smc_connect_abort(smc, SMC_CLC_DECL_MEM, local_contact);
+ return smc_connect_abort(smc, SMC_CLC_DECL_MEM,
+ ini->cln_first_contact);
smc_conn_save_peer_info(smc, aclc);
smc_close_init(smc);
rc = smc_clc_send_confirm(smc);
if (rc)
- return smc_connect_abort(smc, rc, local_contact);
+ return smc_connect_abort(smc, rc, ini->cln_first_contact);
mutex_unlock(&smc_server_lgr_pending);
smc_copy_sock_settings_to_clc(smc);
+ smc->connect_nonblock = 0;
if (smc->sk.sk_state == SMC_INIT)
smc->sk.sk_state = SMC_ACTIVE;
{
bool ism_supported = false, rdma_supported = false;
struct smc_clc_msg_accept_confirm aclc;
- struct smc_ib_device *ibdev;
- struct smcd_dev *ismdev;
- u8 gid[SMC_GID_SIZE];
- unsigned short vlan;
+ struct smc_init_info ini = {0};
int smc_type;
int rc = 0;
- u8 ibport;
sock_hold(&smc->sk); /* sock put in passive closing */
if (using_ipsec(smc))
return smc_connect_decline_fallback(smc, SMC_CLC_DECL_IPSEC);
- /* check for VLAN ID */
- if (smc_vlan_by_tcpsk(smc->clcsock, &vlan))
- return smc_connect_decline_fallback(smc, SMC_CLC_DECL_CNFERR);
+ /* get vlan id from IP device */
+ if (smc_vlan_by_tcpsk(smc->clcsock, &ini))
+ return smc_connect_decline_fallback(smc,
+ SMC_CLC_DECL_GETVLANERR);
/* check if there is an ism device available */
- if (!smc_check_ism(smc, &ismdev) &&
- !smc_connect_ism_vlan_setup(smc, ismdev, vlan)) {
+ if (!smc_find_ism_device(smc, &ini) &&
+ !smc_connect_ism_vlan_setup(smc, &ini)) {
/* ISM is supported for this connection */
ism_supported = true;
smc_type = SMC_TYPE_D;
}
/* check if there is a rdma device available */
- if (!smc_check_rdma(smc, &ibdev, &ibport, vlan, gid)) {
+ if (!smc_find_rdma_device(smc, &ini)) {
/* RDMA is supported for this connection */
rdma_supported = true;
if (ism_supported)
return smc_connect_decline_fallback(smc, SMC_CLC_DECL_NOSMCDEV);
/* perform CLC handshake */
- rc = smc_connect_clc(smc, smc_type, &aclc, ibdev, ibport, gid, ismdev);
+ rc = smc_connect_clc(smc, smc_type, &aclc, &ini);
if (rc) {
- smc_connect_ism_vlan_cleanup(smc, ism_supported, ismdev, vlan);
+ smc_connect_ism_vlan_cleanup(smc, ism_supported, &ini);
return smc_connect_decline_fallback(smc, rc);
}
/* depending on previous steps, connect using rdma or ism */
if (rdma_supported && aclc.hdr.path == SMC_TYPE_R)
- rc = smc_connect_rdma(smc, &aclc, ibdev, ibport);
+ rc = smc_connect_rdma(smc, &aclc, &ini);
else if (ism_supported && aclc.hdr.path == SMC_TYPE_D)
- rc = smc_connect_ism(smc, &aclc, ismdev);
+ rc = smc_connect_ism(smc, &aclc, &ini);
else
rc = SMC_CLC_DECL_MODEUNSUPP;
if (rc) {
- smc_connect_ism_vlan_cleanup(smc, ism_supported, ismdev, vlan);
+ smc_connect_ism_vlan_cleanup(smc, ism_supported, &ini);
return smc_connect_decline_fallback(smc, rc);
}
- smc_connect_ism_vlan_cleanup(smc, ism_supported, ismdev, vlan);
+ smc_connect_ism_vlan_cleanup(smc, ism_supported, &ini);
return 0;
}
{
struct smc_sock *smc = container_of(work, struct smc_sock,
connect_work);
- int rc;
+ long timeo = smc->sk.sk_sndtimeo;
+ int rc = 0;
- lock_sock(&smc->sk);
- rc = kernel_connect(smc->clcsock, &smc->connect_info->addr,
- smc->connect_info->alen, smc->connect_info->flags);
+ if (!timeo)
+ timeo = MAX_SCHEDULE_TIMEOUT;
+ lock_sock(smc->clcsock->sk);
if (smc->clcsock->sk->sk_err) {
smc->sk.sk_err = smc->clcsock->sk->sk_err;
- goto out;
- }
- if (rc < 0) {
- smc->sk.sk_err = -rc;
+ } else if ((1 << smc->clcsock->sk->sk_state) &
+ (TCPF_SYN_SENT | TCP_SYN_RECV)) {
+ rc = sk_stream_wait_connect(smc->clcsock->sk, &timeo);
+ if ((rc == -EPIPE) &&
+ ((1 << smc->clcsock->sk->sk_state) &
+ (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)))
+ rc = 0;
+ }
+ release_sock(smc->clcsock->sk);
+ lock_sock(&smc->sk);
+ if (rc != 0 || smc->sk.sk_err) {
+ smc->sk.sk_state = SMC_CLOSED;
+ if (rc == -EPIPE || rc == -EAGAIN)
+ smc->sk.sk_err = EPIPE;
+ else if (signal_pending(current))
+ smc->sk.sk_err = -sock_intr_errno(timeo);
goto out;
}
smc->sk.sk_err = -rc;
out:
- if (smc->sk.sk_err)
- smc->sk.sk_state_change(&smc->sk);
- else
- smc->sk.sk_write_space(&smc->sk);
- kfree(smc->connect_info);
- smc->connect_info = NULL;
+ if (!sock_flag(&smc->sk, SOCK_DEAD)) {
+ if (smc->sk.sk_err) {
+ smc->sk.sk_state_change(&smc->sk);
+ } else { /* allow polling before and after fallback decision */
+ smc->clcsock->sk->sk_write_space(smc->clcsock->sk);
+ smc->sk.sk_write_space(&smc->sk);
+ }
+ }
release_sock(&smc->sk);
}
smc_copy_sock_settings_to_clc(smc);
tcp_sk(smc->clcsock->sk)->syn_smc = 1;
+ if (smc->connect_nonblock) {
+ rc = -EALREADY;
+ goto out;
+ }
+ rc = kernel_connect(smc->clcsock, addr, alen, flags);
+ if (rc && rc != -EINPROGRESS)
+ goto out;
if (flags & O_NONBLOCK) {
- if (smc->connect_info) {
- rc = -EALREADY;
- goto out;
- }
- smc->connect_info = kzalloc(alen + 2 * sizeof(int), GFP_KERNEL);
- if (!smc->connect_info) {
- rc = -ENOMEM;
- goto out;
- }
- smc->connect_info->alen = alen;
- smc->connect_info->flags = flags ^ O_NONBLOCK;
- memcpy(&smc->connect_info->addr, addr, alen);
- schedule_work(&smc->connect_work);
+ if (schedule_work(&smc->connect_work))
+ smc->connect_nonblock = 1;
rc = -EINPROGRESS;
} else {
- rc = kernel_connect(smc->clcsock, addr, alen, flags);
- if (rc)
- goto out;
-
rc = __smc_connect(smc);
if (rc < 0)
goto out;
if (rc < 0)
lsk->sk_err = -rc;
if (rc < 0 || lsk->sk_state == SMC_CLOSED) {
+ new_sk->sk_prot->unhash(new_sk);
if (new_clcsock)
sock_release(new_clcsock);
new_sk->sk_state = SMC_CLOSED;
sock_set_flag(new_sk, SOCK_DEAD);
- new_sk->sk_prot->unhash(new_sk);
sock_put(new_sk); /* final */
*new_smc = NULL;
goto out;
smc_accept_unlink(new_sk);
if (new_sk->sk_state == SMC_CLOSED) {
+ new_sk->sk_prot->unhash(new_sk);
if (isk->clcsock) {
sock_release(isk->clcsock);
isk->clcsock = NULL;
}
- new_sk->sk_prot->unhash(new_sk);
sock_put(new_sk); /* final */
continue;
}
- if (new_sock)
+ if (new_sock) {
sock_graft(new_sk, new_sock);
+ if (isk->use_fallback) {
+ smc_sk(new_sk)->clcsock->file = new_sock->file;
+ isk->clcsock->file->private_data = isk->clcsock;
+ }
+ }
return new_sk;
}
return NULL;
sock_set_flag(sk, SOCK_DEAD);
sk->sk_shutdown |= SHUTDOWN_MASK;
}
+ sk->sk_prot->unhash(sk);
if (smc->clcsock) {
struct socket *tcp;
smc_conn_free(&smc->conn);
}
release_sock(sk);
- sk->sk_prot->unhash(sk);
sock_put(sk); /* final sock_put */
}
struct smc_sock *lsmc = new_smc->listen_smc;
struct sock *newsmcsk = &new_smc->sk;
- lock_sock_nested(&lsmc->sk, SINGLE_DEPTH_NESTING);
if (lsmc->sk.sk_state == SMC_LISTEN) {
+ lock_sock_nested(&lsmc->sk, SINGLE_DEPTH_NESTING);
smc_accept_enqueue(&lsmc->sk, newsmcsk);
+ release_sock(&lsmc->sk);
} else { /* no longer listening */
smc_close_non_accepted(newsmcsk);
}
- release_sock(&lsmc->sk);
/* Wake up accept */
lsmc->sk.sk_data_ready(&lsmc->sk);
return;
}
smc_conn_free(&new_smc->conn);
- new_smc->use_fallback = true;
+ smc_switch_to_fallback(new_smc);
new_smc->fallback_rsn = reason_code;
if (reason_code && reason_code != SMC_CLC_DECL_PEERDECL) {
if (smc_clc_send_decline(new_smc, reason_code) < 0) {
}
/* listen worker: check prefixes */
-static int smc_listen_rdma_check(struct smc_sock *new_smc,
+static int smc_listen_prfx_check(struct smc_sock *new_smc,
struct smc_clc_msg_proposal *pclc)
{
struct smc_clc_msg_proposal_prefix *pclc_prfx;
pclc_prfx = smc_clc_proposal_get_prefix(pclc);
if (smc_clc_prfx_match(newclcsock, pclc_prfx))
- return SMC_CLC_DECL_CNFERR;
+ return SMC_CLC_DECL_DIFFPREFIX;
return 0;
}
/* listen worker: initialize connection and buffers */
static int smc_listen_rdma_init(struct smc_sock *new_smc,
- struct smc_clc_msg_proposal *pclc,
- struct smc_ib_device *ibdev, u8 ibport,
- int *local_contact)
+ struct smc_init_info *ini)
{
+ int rc;
+
/* allocate connection / link group */
- *local_contact = smc_conn_create(new_smc, false, 0, ibdev, ibport, 0,
- &pclc->lcl, NULL, 0);
- if (*local_contact < 0) {
- if (*local_contact == -ENOMEM)
- return SMC_CLC_DECL_MEM;/* insufficient memory*/
- return SMC_CLC_DECL_INTERR; /* other error */
- }
+ rc = smc_conn_create(new_smc, ini);
+ if (rc)
+ return rc;
/* create send buffer and rmb */
if (smc_buf_create(new_smc, false))
/* listen worker: initialize connection and buffers for SMC-D */
static int smc_listen_ism_init(struct smc_sock *new_smc,
struct smc_clc_msg_proposal *pclc,
- struct smcd_dev *ismdev,
- int *local_contact)
+ struct smc_init_info *ini)
{
struct smc_clc_msg_smcd *pclc_smcd;
+ int rc;
pclc_smcd = smc_get_clc_msg_smcd(pclc);
- *local_contact = smc_conn_create(new_smc, true, 0, NULL, 0, 0, NULL,
- ismdev, pclc_smcd->gid);
- if (*local_contact < 0) {
- if (*local_contact == -ENOMEM)
- return SMC_CLC_DECL_MEM;/* insufficient memory*/
- return SMC_CLC_DECL_INTERR; /* other error */
- }
+ ini->ism_gid = pclc_smcd->gid;
+ rc = smc_conn_create(new_smc, ini);
+ if (rc)
+ return rc;
/* Check if peer can be reached via ISM device */
if (smc_ism_cantalk(new_smc->conn.lgr->peer_gid,
new_smc->conn.lgr->vlan_id,
new_smc->conn.lgr->smcd)) {
- if (*local_contact == SMC_FIRST_CONTACT)
+ if (ini->cln_first_contact == SMC_FIRST_CONTACT)
smc_lgr_forget(new_smc->conn.lgr);
smc_conn_free(&new_smc->conn);
- return SMC_CLC_DECL_CNFERR;
+ return SMC_CLC_DECL_SMCDNOTALK;
}
/* Create send and receive buffers */
if (smc_buf_create(new_smc, true)) {
- if (*local_contact == SMC_FIRST_CONTACT)
+ if (ini->cln_first_contact == SMC_FIRST_CONTACT)
smc_lgr_forget(new_smc->conn.lgr);
smc_conn_free(&new_smc->conn);
return SMC_CLC_DECL_MEM;
struct socket *newclcsock = new_smc->clcsock;
struct smc_clc_msg_accept_confirm cclc;
struct smc_clc_msg_proposal *pclc;
- struct smc_ib_device *ibdev;
+ struct smc_init_info ini = {0};
bool ism_supported = false;
- struct smcd_dev *ismdev;
u8 buf[SMC_CLC_MAX_LEN];
- int local_contact = 0;
- unsigned short vlan;
- int reason_code = 0;
int rc = 0;
- u8 ibport;
+
+ if (new_smc->listen_smc->sk.sk_state != SMC_LISTEN)
+ return smc_listen_out_err(new_smc);
if (new_smc->use_fallback) {
smc_listen_out_connected(new_smc);
/* check if peer is smc capable */
if (!tcp_sk(newclcsock->sk)->syn_smc) {
- new_smc->use_fallback = true;
+ smc_switch_to_fallback(new_smc);
new_smc->fallback_rsn = SMC_CLC_DECL_PEERNOSMC;
smc_listen_out_connected(new_smc);
return;
* wait for and receive SMC Proposal CLC message
*/
pclc = (struct smc_clc_msg_proposal *)&buf;
- reason_code = smc_clc_wait_msg(new_smc, pclc, SMC_CLC_MAX_LEN,
- SMC_CLC_PROPOSAL, CLC_WAIT_TIME);
- if (reason_code) {
- smc_listen_decline(new_smc, reason_code, 0);
- return;
- }
+ rc = smc_clc_wait_msg(new_smc, pclc, SMC_CLC_MAX_LEN,
+ SMC_CLC_PROPOSAL, CLC_WAIT_TIME);
+ if (rc)
+ goto out_decl;
/* IPSec connections opt out of SMC-R optimizations */
if (using_ipsec(new_smc)) {
- smc_listen_decline(new_smc, SMC_CLC_DECL_IPSEC, 0);
- return;
+ rc = SMC_CLC_DECL_IPSEC;
+ goto out_decl;
+ }
+
+ /* check for matching IP prefix and subnet length */
+ rc = smc_listen_prfx_check(new_smc, pclc);
+ if (rc)
+ goto out_decl;
+
+ /* get vlan id from IP device */
+ if (smc_vlan_by_tcpsk(new_smc->clcsock, &ini)) {
+ rc = SMC_CLC_DECL_GETVLANERR;
+ goto out_decl;
}
mutex_lock(&smc_server_lgr_pending);
smc_tx_init(new_smc);
/* check if ISM is available */
- if ((pclc->hdr.path == SMC_TYPE_D || pclc->hdr.path == SMC_TYPE_B) &&
- !smc_check_ism(new_smc, &ismdev) &&
- !smc_listen_ism_init(new_smc, pclc, ismdev, &local_contact)) {
- ism_supported = true;
+ if (pclc->hdr.path == SMC_TYPE_D || pclc->hdr.path == SMC_TYPE_B) {
+ ini.is_smcd = true; /* prepare ISM check */
+ rc = smc_find_ism_device(new_smc, &ini);
+ if (!rc)
+ rc = smc_listen_ism_init(new_smc, pclc, &ini);
+ if (!rc)
+ ism_supported = true;
+ else if (pclc->hdr.path == SMC_TYPE_D)
+ goto out_unlock; /* skip RDMA and decline */
}
/* check if RDMA is available */
- if (!ism_supported &&
- ((pclc->hdr.path != SMC_TYPE_R && pclc->hdr.path != SMC_TYPE_B) ||
- smc_vlan_by_tcpsk(new_smc->clcsock, &vlan) ||
- smc_check_rdma(new_smc, &ibdev, &ibport, vlan, NULL) ||
- smc_listen_rdma_check(new_smc, pclc) ||
- smc_listen_rdma_init(new_smc, pclc, ibdev, ibport,
- &local_contact) ||
- smc_listen_rdma_reg(new_smc, local_contact))) {
- /* SMC not supported, decline */
- mutex_unlock(&smc_server_lgr_pending);
- smc_listen_decline(new_smc, SMC_CLC_DECL_MODEUNSUPP,
- local_contact);
- return;
+ if (!ism_supported) { /* SMC_TYPE_R or SMC_TYPE_B */
+ /* prepare RDMA check */
+ memset(&ini, 0, sizeof(ini));
+ ini.is_smcd = false;
+ ini.ib_lcl = &pclc->lcl;
+ rc = smc_find_rdma_device(new_smc, &ini);
+ if (rc) {
+ /* no RDMA device found */
+ if (pclc->hdr.path == SMC_TYPE_B)
+ /* neither ISM nor RDMA device found */
+ rc = SMC_CLC_DECL_NOSMCDEV;
+ goto out_unlock;
+ }
+ rc = smc_listen_rdma_init(new_smc, &ini);
+ if (rc)
+ goto out_unlock;
+ rc = smc_listen_rdma_reg(new_smc, ini.cln_first_contact);
+ if (rc)
+ goto out_unlock;
}
/* send SMC Accept CLC message */
- rc = smc_clc_send_accept(new_smc, local_contact);
- if (rc) {
- mutex_unlock(&smc_server_lgr_pending);
- smc_listen_decline(new_smc, rc, local_contact);
- return;
- }
+ rc = smc_clc_send_accept(new_smc, ini.cln_first_contact);
+ if (rc)
+ goto out_unlock;
/* SMC-D does not need this lock any more */
if (ism_supported)
mutex_unlock(&smc_server_lgr_pending);
/* receive SMC Confirm CLC message */
- reason_code = smc_clc_wait_msg(new_smc, &cclc, sizeof(cclc),
- SMC_CLC_CONFIRM, CLC_WAIT_TIME);
- if (reason_code) {
+ rc = smc_clc_wait_msg(new_smc, &cclc, sizeof(cclc),
+ SMC_CLC_CONFIRM, CLC_WAIT_TIME);
+ if (rc) {
if (!ism_supported)
- mutex_unlock(&smc_server_lgr_pending);
- smc_listen_decline(new_smc, reason_code, local_contact);
- return;
+ goto out_unlock;
+ goto out_decl;
}
/* finish worker */
if (!ism_supported) {
- rc = smc_listen_rdma_finish(new_smc, &cclc, local_contact);
+ rc = smc_listen_rdma_finish(new_smc, &cclc,
+ ini.cln_first_contact);
mutex_unlock(&smc_server_lgr_pending);
if (rc)
return;
}
smc_conn_save_peer_info(new_smc, &cclc);
smc_listen_out_connected(new_smc);
+ return;
+
+out_unlock:
+ mutex_unlock(&smc_server_lgr_pending);
+out_decl:
+ smc_listen_decline(new_smc, rc, ini.cln_first_contact);
}
static void smc_tcp_listen_work(struct work_struct *work)
if (msg->msg_flags & MSG_FASTOPEN) {
if (sk->sk_state == SMC_INIT) {
- smc->use_fallback = true;
+ smc_switch_to_fallback(smc);
smc->fallback_rsn = SMC_CLC_DECL_OPTUNSUPP;
} else {
rc = -EINVAL;
poll_table *wait)
{
struct sock *sk = sock->sk;
- __poll_t mask = 0;
struct smc_sock *smc;
+ __poll_t mask = 0;
if (!sk)
return EPOLLNVAL;
/* delegate to CLC child sock */
mask = smc->clcsock->ops->poll(file, smc->clcsock, wait);
sk->sk_err = smc->clcsock->sk->sk_err;
- if (sk->sk_err)
- mask |= EPOLLERR;
} else {
if (sk->sk_state != SMC_CLOSED)
sock_poll_wait(file, sock, wait);
mask |= EPOLLHUP;
if (sk->sk_state == SMC_LISTEN) {
/* woken up by sk_data_ready in smc_listen_work() */
- mask = smc_accept_poll(sk);
+ mask |= smc_accept_poll(sk);
+ } else if (smc->use_fallback) { /* as result of connect_work()*/
+ mask |= smc->clcsock->ops->poll(file, smc->clcsock,
+ wait);
+ sk->sk_err = smc->clcsock->sk->sk_err;
} else {
- if (atomic_read(&smc->conn.sndbuf_space) ||
+ if ((sk->sk_state != SMC_INIT &&
+ atomic_read(&smc->conn.sndbuf_space)) ||
sk->sk_shutdown & SEND_SHUTDOWN) {
mask |= EPOLLOUT | EPOLLWRNORM;
} else {
case TCP_FASTOPEN_NO_COOKIE:
/* option not supported by SMC */
if (sk->sk_state == SMC_INIT) {
- smc->use_fallback = true;
+ smc_switch_to_fallback(smc);
smc->fallback_rsn = SMC_CLC_DECL_OPTUNSUPP;
} else {
if (!smc->use_fallback)
u64 peer_token; /* SMC-D token of peer */
};
-struct smc_connect_info {
- int flags;
- int alen;
- struct sockaddr addr;
-};
-
struct smc_sock { /* smc sock container */
struct sock sk;
struct socket *clcsock; /* internal tcp socket */
struct smc_connection conn; /* smc connection */
struct smc_sock *listen_smc; /* listen parent */
- struct smc_connect_info *connect_info; /* connect address & flags */
struct work_struct connect_work; /* handle non-blocking connect*/
struct work_struct tcp_listen_work;/* handle tcp socket accepts */
struct work_struct smc_listen_work;/* prepare new accept socket */
* started, waiting for unsent
* data to be sent
*/
+ u8 connect_nonblock : 1;
+ /* non-blocking connect in
+ * flight
+ */
struct mutex clcsock_release_lock;
/* protects clcsock of a listen
* socket
/* send CLC PROPOSAL message across internal TCP socket */
int smc_clc_send_proposal(struct smc_sock *smc, int smc_type,
- struct smc_ib_device *ibdev, u8 ibport, u8 gid[],
- struct smcd_dev *ismdev)
+ struct smc_init_info *ini)
{
struct smc_clc_ipv6_prefix ipv6_prfx[SMC_CLC_MAX_V6_PREFIX];
struct smc_clc_msg_proposal_prefix pclc_prfx;
/* add SMC-R specifics */
memcpy(pclc.lcl.id_for_peer, local_systemid,
sizeof(local_systemid));
- memcpy(&pclc.lcl.gid, gid, SMC_GID_SIZE);
- memcpy(&pclc.lcl.mac, &ibdev->mac[ibport - 1], ETH_ALEN);
+ memcpy(&pclc.lcl.gid, ini->ib_gid, SMC_GID_SIZE);
+ memcpy(&pclc.lcl.mac, &ini->ib_dev->mac[ini->ib_port - 1],
+ ETH_ALEN);
pclc.iparea_offset = htons(0);
}
if (smc_type == SMC_TYPE_D || smc_type == SMC_TYPE_B) {
memset(&pclc_smcd, 0, sizeof(pclc_smcd));
plen += sizeof(pclc_smcd);
pclc.iparea_offset = htons(SMC_CLC_PROPOSAL_MAX_OFFSET);
- pclc_smcd.gid = ismdev->local_gid;
+ pclc_smcd.gid = ini->ism_dev->local_gid;
}
pclc.hdr.length = htons(plen);
#define SMC_CLC_DECL_CNFERR 0x03000000 /* configuration error */
#define SMC_CLC_DECL_PEERNOSMC 0x03010000 /* peer did not indicate SMC */
#define SMC_CLC_DECL_IPSEC 0x03020000 /* IPsec usage */
-#define SMC_CLC_DECL_NOSMCDEV 0x03030000 /* no SMC device found */
+#define SMC_CLC_DECL_NOSMCDEV 0x03030000 /* no SMC device found (R or D) */
+#define SMC_CLC_DECL_NOSMCDDEV 0x03030001 /* no SMC-D device found */
+#define SMC_CLC_DECL_NOSMCRDEV 0x03030002 /* no SMC-R device found */
+#define SMC_CLC_DECL_SMCDNOTALK 0x03030003 /* SMC-D dev can't talk to peer */
#define SMC_CLC_DECL_MODEUNSUPP 0x03040000 /* smc modes do not match (R or D)*/
#define SMC_CLC_DECL_RMBE_EC 0x03050000 /* peer has eyecatcher in RMBE */
#define SMC_CLC_DECL_OPTUNSUPP 0x03060000 /* fastopen sockopt not supported */
+#define SMC_CLC_DECL_DIFFPREFIX 0x03070000 /* IP prefix / subnet mismatch */
+#define SMC_CLC_DECL_GETVLANERR 0x03080000 /* err to get vlan id of ip device*/
+#define SMC_CLC_DECL_ISMVLANERR 0x03090000 /* err to reg vlan id on ism dev */
#define SMC_CLC_DECL_SYNCERR 0x04000000 /* synchronization error */
#define SMC_CLC_DECL_PEERDECL 0x05000000 /* peer declined during handshake */
-#define SMC_CLC_DECL_INTERR 0x99990000 /* internal error */
-#define SMC_CLC_DECL_ERR_RTOK 0x99990001 /* rtoken handling failed */
-#define SMC_CLC_DECL_ERR_RDYLNK 0x99990002 /* ib ready link failed */
-#define SMC_CLC_DECL_ERR_REGRMB 0x99990003 /* reg rmb failed */
+#define SMC_CLC_DECL_INTERR 0x09990000 /* internal error */
+#define SMC_CLC_DECL_ERR_RTOK 0x09990001 /* rtoken handling failed */
+#define SMC_CLC_DECL_ERR_RDYLNK 0x09990002 /* ib ready link failed */
+#define SMC_CLC_DECL_ERR_REGRMB 0x09990003 /* reg rmb failed */
struct smc_clc_msg_hdr { /* header1 of clc messages */
u8 eyecatcher[4]; /* eye catcher */
}
struct smcd_dev;
+struct smc_init_info;
int smc_clc_prfx_match(struct socket *clcsock,
struct smc_clc_msg_proposal_prefix *prop);
u8 expected_type, unsigned long timeout);
int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info);
int smc_clc_send_proposal(struct smc_sock *smc, int smc_type,
- struct smc_ib_device *smcibdev, u8 ibport, u8 gid[],
- struct smcd_dev *ismdev);
+ struct smc_init_info *ini);
int smc_clc_send_confirm(struct smc_sock *smc);
int smc_clc_send_accept(struct smc_sock *smc, int srv_first_contact);
#define SMC_CLOSE_WAIT_LISTEN_CLCSOCK_TIME (5 * HZ)
+/* release the clcsock that is assigned to the smc_sock */
+void smc_clcsock_release(struct smc_sock *smc)
+{
+ struct socket *tcp;
+
+ if (smc->listen_smc && current_work() != &smc->smc_listen_work)
+ cancel_work_sync(&smc->smc_listen_work);
+ mutex_lock(&smc->clcsock_release_lock);
+ if (smc->clcsock) {
+ tcp = smc->clcsock;
+ smc->clcsock = NULL;
+ sock_release(tcp);
+ }
+ mutex_unlock(&smc->clcsock_release_lock);
+}
+
static void smc_close_cleanup_listen(struct sock *parent)
{
struct sock *sk;
close_work);
struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
struct smc_cdc_conn_state_flags *rxflags;
+ bool release_clcsock = false;
struct sock *sk = &smc->sk;
int old_state;
if ((sk->sk_state == SMC_CLOSED) &&
(sock_flag(sk, SOCK_DEAD) || !sk->sk_socket)) {
smc_conn_free(conn);
- if (smc->clcsock) {
- sock_release(smc->clcsock);
- smc->clcsock = NULL;
- }
+ if (smc->clcsock)
+ release_clcsock = true;
}
}
release_sock(sk);
+ if (release_clcsock)
+ smc_clcsock_release(smc);
sock_put(sk); /* sock_hold done by schedulers of close_work */
}
int smc_close_active(struct smc_sock *smc);
int smc_close_shutdown_write(struct smc_sock *smc);
void smc_close_init(struct smc_sock *smc);
+void smc_clcsock_release(struct smc_sock *smc);
#endif /* SMC_CLOSE_H */
}
/* create a new SMC link group */
-static int smc_lgr_create(struct smc_sock *smc, bool is_smcd,
- struct smc_ib_device *smcibdev, u8 ibport,
- char *peer_systemid, unsigned short vlan_id,
- struct smcd_dev *smcismdev, u64 peer_gid)
+static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
{
struct smc_link_group *lgr;
struct smc_link *lnk;
int rc = 0;
int i;
- if (is_smcd && vlan_id) {
- rc = smc_ism_get_vlan(smcismdev, vlan_id);
- if (rc)
+ if (ini->is_smcd && ini->vlan_id) {
+ if (smc_ism_get_vlan(ini->ism_dev, ini->vlan_id)) {
+ rc = SMC_CLC_DECL_ISMVLANERR;
goto out;
+ }
}
lgr = kzalloc(sizeof(*lgr), GFP_KERNEL);
if (!lgr) {
- rc = -ENOMEM;
+ rc = SMC_CLC_DECL_MEM;
goto out;
}
- lgr->is_smcd = is_smcd;
+ lgr->is_smcd = ini->is_smcd;
lgr->sync_err = 0;
- lgr->vlan_id = vlan_id;
+ lgr->vlan_id = ini->vlan_id;
rwlock_init(&lgr->sndbufs_lock);
rwlock_init(&lgr->rmbs_lock);
rwlock_init(&lgr->conns_lock);
memcpy(&lgr->id, (u8 *)&smc_lgr_list.num, SMC_LGR_ID_SIZE);
INIT_DELAYED_WORK(&lgr->free_work, smc_lgr_free_work);
lgr->conns_all = RB_ROOT;
- if (is_smcd) {
+ if (ini->is_smcd) {
/* SMC-D specific settings */
- lgr->peer_gid = peer_gid;
- lgr->smcd = smcismdev;
+ lgr->peer_gid = ini->ism_gid;
+ lgr->smcd = ini->ism_dev;
} else {
/* SMC-R specific settings */
lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
- memcpy(lgr->peer_systemid, peer_systemid, SMC_SYSTEMID_LEN);
+ memcpy(lgr->peer_systemid, ini->ib_lcl->id_for_peer,
+ SMC_SYSTEMID_LEN);
lnk = &lgr->lnk[SMC_SINGLE_LINK];
/* initialize link */
lnk->state = SMC_LNK_ACTIVATING;
lnk->link_id = SMC_SINGLE_LINK;
- lnk->smcibdev = smcibdev;
- lnk->ibport = ibport;
- lnk->path_mtu = smcibdev->pattr[ibport - 1].active_mtu;
- if (!smcibdev->initialized)
- smc_ib_setup_per_ibdev(smcibdev);
+ lnk->smcibdev = ini->ib_dev;
+ lnk->ibport = ini->ib_port;
+ lnk->path_mtu =
+ ini->ib_dev->pattr[ini->ib_port - 1].active_mtu;
+ if (!ini->ib_dev->initialized)
+ smc_ib_setup_per_ibdev(ini->ib_dev);
get_random_bytes(rndvec, sizeof(rndvec));
lnk->psn_initial = rndvec[0] + (rndvec[1] << 8) +
(rndvec[2] << 16);
rc = smc_ib_determine_gid(lnk->smcibdev, lnk->ibport,
- vlan_id, lnk->gid, &lnk->sgid_index);
+ ini->vlan_id, lnk->gid,
+ &lnk->sgid_index);
if (rc)
goto free_lgr;
rc = smc_llc_link_init(lnk);
free_lgr:
kfree(lgr);
out:
+ if (rc < 0) {
+ if (rc == -ENOMEM)
+ rc = SMC_CLC_DECL_MEM;
+ else
+ rc = SMC_CLC_DECL_INTERR;
+ }
return rc;
}
/* Determine vlan of internal TCP socket.
* @vlan_id: address to store the determined vlan id into
*/
-int smc_vlan_by_tcpsk(struct socket *clcsock, unsigned short *vlan_id)
+int smc_vlan_by_tcpsk(struct socket *clcsock, struct smc_init_info *ini)
{
struct dst_entry *dst = sk_dst_get(clcsock->sk);
struct net_device *ndev;
int i, nest_lvl, rc = 0;
- *vlan_id = 0;
+ ini->vlan_id = 0;
if (!dst) {
rc = -ENOTCONN;
goto out;
ndev = dst->dev;
if (is_vlan_dev(ndev)) {
- *vlan_id = vlan_dev_vlan_id(ndev);
+ ini->vlan_id = vlan_dev_vlan_id(ndev);
goto out_rel;
}
lower = lower->next;
ndev = (struct net_device *)netdev_lower_get_next(ndev, &lower);
if (is_vlan_dev(ndev)) {
- *vlan_id = vlan_dev_vlan_id(ndev);
+ ini->vlan_id = vlan_dev_vlan_id(ndev);
break;
}
}
}
/* create a new SMC connection (and a new link group if necessary) */
-int smc_conn_create(struct smc_sock *smc, bool is_smcd, int srv_first_contact,
- struct smc_ib_device *smcibdev, u8 ibport, u32 clcqpn,
- struct smc_clc_msg_local *lcl, struct smcd_dev *smcd,
- u64 peer_gid)
+int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
{
struct smc_connection *conn = &smc->conn;
- int local_contact = SMC_FIRST_CONTACT;
struct smc_link_group *lgr;
- unsigned short vlan_id;
enum smc_lgr_role role;
int rc = 0;
+ ini->cln_first_contact = SMC_FIRST_CONTACT;
role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
- rc = smc_vlan_by_tcpsk(smc->clcsock, &vlan_id);
- if (rc)
- return rc;
-
- if ((role == SMC_CLNT) && srv_first_contact)
+ if (role == SMC_CLNT && ini->srv_first_contact)
/* create new link group as well */
goto create;
spin_lock_bh(&smc_lgr_list.lock);
list_for_each_entry(lgr, &smc_lgr_list.list, list) {
write_lock_bh(&lgr->conns_lock);
- if ((is_smcd ? smcd_lgr_match(lgr, smcd, peer_gid) :
- smcr_lgr_match(lgr, lcl, role, clcqpn)) &&
+ if ((ini->is_smcd ?
+ smcd_lgr_match(lgr, ini->ism_dev, ini->ism_gid) :
+ smcr_lgr_match(lgr, ini->ib_lcl, role, ini->ib_clcqpn)) &&
!lgr->sync_err &&
- lgr->vlan_id == vlan_id &&
+ lgr->vlan_id == ini->vlan_id &&
(role == SMC_CLNT ||
lgr->conns_num < SMC_RMBS_PER_LGR_MAX)) {
/* link group found */
- local_contact = SMC_REUSE_CONTACT;
+ ini->cln_first_contact = SMC_REUSE_CONTACT;
conn->lgr = lgr;
smc_lgr_register_conn(conn); /* add smc conn to lgr */
if (delayed_work_pending(&lgr->free_work))
}
spin_unlock_bh(&smc_lgr_list.lock);
- if (role == SMC_CLNT && !srv_first_contact &&
- (local_contact == SMC_FIRST_CONTACT)) {
+ if (role == SMC_CLNT && !ini->srv_first_contact &&
+ ini->cln_first_contact == SMC_FIRST_CONTACT) {
/* Server reuses a link group, but Client wants to start
* a new one
* send out_of_sync decline, reason synchr. error
*/
- return -ENOLINK;
+ return SMC_CLC_DECL_SYNCERR;
}
create:
- if (local_contact == SMC_FIRST_CONTACT) {
- rc = smc_lgr_create(smc, is_smcd, smcibdev, ibport,
- lcl->id_for_peer, vlan_id, smcd, peer_gid);
+ if (ini->cln_first_contact == SMC_FIRST_CONTACT) {
+ rc = smc_lgr_create(smc, ini);
if (rc)
goto out;
smc_lgr_register_conn(conn); /* add smc conn to lgr */
conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE;
conn->local_tx_ctrl.len = SMC_WR_TX_SIZE;
conn->urg_state = SMC_URG_READ;
- if (is_smcd) {
+ if (ini->is_smcd) {
conn->rx_off = sizeof(struct smcd_cdc_msg);
smcd_cdc_rx_init(conn); /* init tasklet for this conn */
}
#endif
out:
- return rc ? rc : local_contact;
+ return rc;
}
/* convert the RMB size into the compressed notation - minimum 16K.
};
};
+struct smc_clc_msg_local;
+
+struct smc_init_info {
+ u8 is_smcd;
+ unsigned short vlan_id;
+ int srv_first_contact;
+ int cln_first_contact;
+ /* SMC-R */
+ struct smc_clc_msg_local *ib_lcl;
+ struct smc_ib_device *ib_dev;
+ u8 ib_gid[SMC_GID_SIZE];
+ u8 ib_port;
+ u32 ib_clcqpn;
+ /* SMC-D */
+ u64 ism_gid;
+ struct smcd_dev *ism_dev;
+};
+
/* Find the connection associated with the given alert token in the link group.
* To use rbtrees we have to implement our own search core.
* Requires @conns_lock
void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn);
void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn);
void smc_rmb_sync_sg_for_device(struct smc_connection *conn);
-int smc_vlan_by_tcpsk(struct socket *clcsock, unsigned short *vlan_id);
+int smc_vlan_by_tcpsk(struct socket *clcsock, struct smc_init_info *ini);
void smc_conn_free(struct smc_connection *conn);
-int smc_conn_create(struct smc_sock *smc, bool is_smcd, int srv_first_contact,
- struct smc_ib_device *smcibdev, u8 ibport, u32 clcqpn,
- struct smc_clc_msg_local *lcl, struct smcd_dev *smcd,
- u64 peer_gid);
+int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini);
void smcd_conn_free(struct smc_connection *conn);
void smc_lgr_schedule_free_work_fast(struct smc_link_group *lgr);
void smc_core_exit(void);
INIT_LIST_HEAD(&smcd->vlan);
smcd->event_wq = alloc_ordered_workqueue("ism_evt_wq-%s)",
WQ_MEM_RECLAIM, name);
+ if (!smcd->event_wq) {
+ kfree(smcd->conn);
+ kfree(smcd);
+ return NULL;
+ }
return smcd;
}
EXPORT_SYMBOL_GPL(smcd_alloc_dev);
#include "smc_pnet.h"
#include "smc_ib.h"
#include "smc_ism.h"
+#include "smc_core.h"
#define SMC_ASCII_BLANK 32
{
struct net *net = genl_info_net(info);
- return smc_pnet_remove_by_pnetid(net, NULL);
+ smc_pnet_remove_by_pnetid(net, NULL);
+ return 0;
}
/* SMC_PNETID generic netlink operation definition */
* IB device and port
*/
static void smc_pnet_find_rdma_dev(struct net_device *netdev,
- struct smc_ib_device **smcibdev,
- u8 *ibport, unsigned short vlan_id, u8 gid[])
+ struct smc_init_info *ini)
{
struct smc_ib_device *ibdev;
dev_put(ndev);
if (netdev == ndev &&
smc_ib_port_active(ibdev, i) &&
- !smc_ib_determine_gid(ibdev, i, vlan_id, gid,
- NULL)) {
- *smcibdev = ibdev;
- *ibport = i;
+ !smc_ib_determine_gid(ibdev, i, ini->vlan_id,
+ ini->ib_gid, NULL)) {
+ ini->ib_dev = ibdev;
+ ini->ib_port = i;
break;
}
}
* If nothing found, try to use handshake device
*/
static void smc_pnet_find_roce_by_pnetid(struct net_device *ndev,
- struct smc_ib_device **smcibdev,
- u8 *ibport, unsigned short vlan_id,
- u8 gid[])
+ struct smc_init_info *ini)
{
u8 ndev_pnetid[SMC_MAX_PNETID_LEN];
struct smc_ib_device *ibdev;
if (smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port,
ndev_pnetid) &&
smc_pnet_find_ndev_pnetid_by_table(ndev, ndev_pnetid)) {
- smc_pnet_find_rdma_dev(ndev, smcibdev, ibport, vlan_id, gid);
+ smc_pnet_find_rdma_dev(ndev, ini);
return; /* pnetid could not be determined */
}
continue;
if (smc_pnet_match(ibdev->pnetid[i - 1], ndev_pnetid) &&
smc_ib_port_active(ibdev, i) &&
- !smc_ib_determine_gid(ibdev, i, vlan_id, gid,
- NULL)) {
- *smcibdev = ibdev;
- *ibport = i;
+ !smc_ib_determine_gid(ibdev, i, ini->vlan_id,
+ ini->ib_gid, NULL)) {
+ ini->ib_dev = ibdev;
+ ini->ib_port = i;
goto out;
}
}
}
static void smc_pnet_find_ism_by_pnetid(struct net_device *ndev,
- struct smcd_dev **smcismdev)
+ struct smc_init_info *ini)
{
u8 ndev_pnetid[SMC_MAX_PNETID_LEN];
struct smcd_dev *ismdev;
spin_lock(&smcd_dev_list.lock);
list_for_each_entry(ismdev, &smcd_dev_list.list, list) {
if (smc_pnet_match(ismdev->pnetid, ndev_pnetid)) {
- *smcismdev = ismdev;
+ ini->ism_dev = ismdev;
break;
}
}
* determine ib_device and port belonging to used internal TCP socket
* ethernet interface.
*/
-void smc_pnet_find_roce_resource(struct sock *sk,
- struct smc_ib_device **smcibdev, u8 *ibport,
- unsigned short vlan_id, u8 gid[])
+void smc_pnet_find_roce_resource(struct sock *sk, struct smc_init_info *ini)
{
struct dst_entry *dst = sk_dst_get(sk);
- *smcibdev = NULL;
- *ibport = 0;
-
+ ini->ib_dev = NULL;
+ ini->ib_port = 0;
if (!dst)
goto out;
if (!dst->dev)
goto out_rel;
- smc_pnet_find_roce_by_pnetid(dst->dev, smcibdev, ibport, vlan_id, gid);
+ smc_pnet_find_roce_by_pnetid(dst->dev, ini);
out_rel:
dst_release(dst);
return;
}
-void smc_pnet_find_ism_resource(struct sock *sk, struct smcd_dev **smcismdev)
+void smc_pnet_find_ism_resource(struct sock *sk, struct smc_init_info *ini)
{
struct dst_entry *dst = sk_dst_get(sk);
- *smcismdev = NULL;
+ ini->ism_dev = NULL;
if (!dst)
goto out;
if (!dst->dev)
goto out_rel;
- smc_pnet_find_ism_by_pnetid(dst->dev, smcismdev);
+ smc_pnet_find_ism_by_pnetid(dst->dev, ini);
out_rel:
dst_release(dst);
struct smc_ib_device;
struct smcd_dev;
+struct smc_init_info;
/**
* struct smc_pnettable - SMC PNET table anchor
int smc_pnet_net_init(struct net *net);
void smc_pnet_exit(void);
void smc_pnet_net_exit(struct net *net);
-void smc_pnet_find_roce_resource(struct sock *sk,
- struct smc_ib_device **smcibdev, u8 *ibport,
- unsigned short vlan_id, u8 gid[]);
-void smc_pnet_find_ism_resource(struct sock *sk, struct smcd_dev **smcismdev);
+void smc_pnet_find_roce_resource(struct sock *sk, struct smc_init_info *ini);
+void smc_pnet_find_ism_resource(struct sock *sk, struct smc_init_info *ini);
#endif
err = open_related_ns(&net->ns, get_net_ns);
break;
+ case SIOCGSTAMP_OLD:
+ case SIOCGSTAMPNS_OLD:
+ if (!sock->ops->gettstamp) {
+ err = -ENOIOCTLCMD;
+ break;
+ }
+ err = sock->ops->gettstamp(sock, argp,
+ cmd == SIOCGSTAMP_OLD,
+ !IS_ENABLED(CONFIG_64BIT));
+ break;
+ case SIOCGSTAMP_NEW:
+ case SIOCGSTAMPNS_NEW:
+ if (!sock->ops->gettstamp) {
+ err = -ENOIOCTLCMD;
+ break;
+ }
+ err = sock->ops->gettstamp(sock, argp,
+ cmd == SIOCGSTAMP_NEW,
+ false);
+ break;
default:
err = sock_do_ioctl(net, sock, cmd, arg);
break;
#endif /* CONFIG_PROC_FS */
#ifdef CONFIG_COMPAT
-static int do_siocgstamp(struct net *net, struct socket *sock,
- unsigned int cmd, void __user *up)
-{
- mm_segment_t old_fs = get_fs();
- struct timeval ktv;
- int err;
-
- set_fs(KERNEL_DS);
- err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ktv);
- set_fs(old_fs);
- if (!err)
- err = compat_put_timeval(&ktv, up);
-
- return err;
-}
-
-static int do_siocgstampns(struct net *net, struct socket *sock,
- unsigned int cmd, void __user *up)
-{
- mm_segment_t old_fs = get_fs();
- struct timespec kts;
- int err;
-
- set_fs(KERNEL_DS);
- err = sock_do_ioctl(net, sock, cmd, (unsigned long)&kts);
- set_fs(old_fs);
- if (!err)
- err = compat_put_timespec(&kts, up);
-
- return err;
-}
-
static int compat_dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
{
struct compat_ifconf ifc32;
case SIOCADDRT:
case SIOCDELRT:
return routing_ioctl(net, sock, cmd, argp);
- case SIOCGSTAMP:
- return do_siocgstamp(net, sock, cmd, argp);
- case SIOCGSTAMPNS:
- return do_siocgstampns(net, sock, cmd, argp);
+ case SIOCGSTAMP_OLD:
+ case SIOCGSTAMPNS_OLD:
+ if (!sock->ops->gettstamp)
+ return -ENOIOCTLCMD;
+ return sock->ops->gettstamp(sock, argp, cmd == SIOCGSTAMP_OLD,
+ !COMPAT_USE_64BIT_TIME);
+
case SIOCBONDSLAVEINFOQUERY:
case SIOCBONDINFOQUERY:
case SIOCSHWTSTAMP:
case SIOCADDDLCI:
case SIOCDELDLCI:
case SIOCGSKNS:
+ case SIOCGSTAMP_NEW:
+ case SIOCGSTAMPNS_NEW:
return sock_ioctl(file, cmd, arg);
case SIOCGIFFLAGS:
#include <linux/file.h>
#include <linux/in.h>
#include <linux/kernel.h>
-#include <linux/module.h>
+#include <linux/export.h>
+#include <linux/init.h>
#include <linux/net.h>
#include <linux/netdevice.h>
#include <linux/poll.h>
/* We are going to append to the frags_list of head.
* Need to unshare the frag_list.
*/
- if (skb_has_frag_list(head)) {
- err = skb_unclone(head, GFP_ATOMIC);
- if (err) {
- STRP_STATS_INCR(strp->stats.mem_fail);
- desc->error = err;
- return 0;
- }
+ err = skb_unclone(head, GFP_ATOMIC);
+ if (err) {
+ STRP_STATS_INCR(strp->stats.mem_fail);
+ desc->error = err;
+ return 0;
}
if (unlikely(skb_shinfo(head)->frag_list)) {
break;
}
- /* Positive extra indicates ore bytes than needed for the
+ /* Positive extra indicates more bytes than needed for the
* message
*/
}
EXPORT_SYMBOL_GPL(strp_check_rcv);
-static int __init strp_mod_init(void)
+static int __init strp_dev_init(void)
{
strp_wq = create_singlethread_workqueue("kstrp");
if (unlikely(!strp_wq))
return 0;
}
-
-static void __exit strp_mod_exit(void)
-{
- destroy_workqueue(strp_wq);
-}
-module_init(strp_mod_init);
-module_exit(strp_mod_exit);
-MODULE_LICENSE("GPL");
+device_initcall(strp_dev_init);
h->last_refresh = now;
}
+static inline int cache_is_valid(struct cache_head *h);
static void cache_fresh_locked(struct cache_head *head, time_t expiry,
struct cache_detail *detail);
static void cache_fresh_unlocked(struct cache_head *head,
if (cache_is_expired(detail, tmp)) {
hlist_del_init_rcu(&tmp->cache_list);
detail->entries --;
+ if (cache_is_valid(tmp) == -EAGAIN)
+ set_bit(CACHE_NEGATIVE, &tmp->flags);
cache_fresh_locked(tmp, 0, detail);
freeme = tmp;
break;
clnt->cl_stats->rpccnt++;
task->tk_action = call_reserve;
rpc_task_set_transport(task, clnt);
- call_reserve(task);
}
/*
task->tk_status = 0;
task->tk_action = call_reserveresult;
xprt_reserve(task);
- if (rpc_task_need_resched(task))
- return;
- call_reserveresult(task);
}
static void call_retry_reserve(struct rpc_task *task);
if (status >= 0) {
if (task->tk_rqstp) {
task->tk_action = call_refresh;
- call_refresh(task);
return;
}
/* fall through */
case -EAGAIN: /* woken up; retry */
task->tk_action = call_retry_reserve;
- call_retry_reserve(task);
return;
case -EIO: /* probably a shutdown */
break;
task->tk_status = 0;
task->tk_action = call_reserveresult;
xprt_retry_reserve(task);
- if (rpc_task_need_resched(task))
- return;
- call_reserveresult(task);
}
/*
task->tk_status = 0;
task->tk_client->cl_stats->rpcauthrefresh++;
rpcauth_refreshcred(task);
- if (rpc_task_need_resched(task))
- return;
- call_refreshresult(task);
}
/*
case 0:
if (rpcauth_uptodatecred(task)) {
task->tk_action = call_allocate;
- call_allocate(task);
return;
}
/* Use rate-limiting and a max number of retries if refresh
task->tk_cred_retry--;
dprintk("RPC: %5u %s: retry refresh creds\n",
task->tk_pid, __func__);
- call_refresh(task);
return;
}
dprintk("RPC: %5u %s: refresh creds failed with error %d\n",
task->tk_status = 0;
task->tk_action = call_encode;
- if (req->rq_buffer) {
- call_encode(task);
+ if (req->rq_buffer)
return;
- }
if (proc->p_proc != 0) {
BUG_ON(proc->p_arglen == 0);
status = xprt->ops->buf_alloc(task);
xprt_inject_disconnect(xprt);
- if (status == 0) {
- if (rpc_task_need_resched(task))
- return;
- call_encode(task);
+ if (status == 0)
return;
- }
if (status != -ENOMEM) {
rpc_exit(task, status);
return;
xprt_request_enqueue_receive(task);
xprt_request_enqueue_transmit(task);
out:
- task->tk_action = call_bind;
- call_bind(task);
+ task->tk_action = call_transmit;
+ /* Check that the connection is OK */
+ if (!xprt_bound(task->tk_xprt))
+ task->tk_action = call_bind;
+ else if (!xprt_connected(task->tk_xprt))
+ task->tk_action = call_connect;
}
/*
{
xprt_end_transmit(task);
task->tk_action = call_transmit_status;
- call_transmit_status(task);
}
/*
if (xprt_bound(xprt)) {
task->tk_action = call_connect;
- call_connect(task);
return;
}
dprint_status(task);
task->tk_status = 0;
task->tk_action = call_connect;
- call_connect(task);
return;
}
if (xprt_connected(xprt)) {
task->tk_action = call_transmit;
- call_transmit(task);
return;
}
case 0:
clnt->cl_stats->netreconn++;
task->tk_action = call_transmit;
- call_transmit(task);
return;
}
rpc_exit(task, status);
xprt_transmit(task);
}
xprt_end_transmit(task);
- if (rpc_task_need_resched(task))
- return;
- call_transmit_status(task);
}
/*
* test first.
*/
if (rpc_task_transmitted(task)) {
- if (task->tk_status == 0)
- xprt_request_wait_receive(task);
- if (rpc_task_need_resched(task))
- return;
- call_status(task);
+ task->tk_status = 0;
+ xprt_request_wait_receive(task);
return;
}
{
xprt_request_enqueue_transmit(task);
task->tk_action = call_bc_transmit;
- call_bc_transmit(task);
}
/*
{
struct rpc_rqst *req = task->tk_rqstp;
+ if (rpc_task_transmitted(task))
+ task->tk_status = 0;
+
dprint_status(task);
switch (task->tk_status) {
status = task->tk_status;
if (status >= 0) {
task->tk_action = call_decode;
- call_decode(task);
return;
}
/* Flush Receives, then wait for deferred Reply work
* to complete.
*/
- ib_drain_qp(ia->ri_id->qp);
+ ib_drain_rq(ia->ri_id->qp);
drain_workqueue(buf->rb_completion_wq);
/* Deferred Reply processing might have scheduled
__skb_queue_head_init(&list);
l->in_session = false;
+ /* Force re-synch of peer session number before establishing */
+ l->peer_session--;
l->session++;
l->mtu = l->advertised_mtu;
for (; i < TIPC_NAMETBL_SIZE; i++) {
head = &tn->nametbl->services[i];
- if (*last_type) {
+ if (*last_type ||
+ (!i && *last_key && (*last_lower == *last_key))) {
service = tipc_service_find(net, *last_type);
if (!service)
return -EPIPE;
if (n->capabilities == capabilities)
goto exit;
/* Same node may come back with new capabilities */
- write_lock_bh(&n->lock);
+ tipc_node_write_lock(n);
n->capabilities = capabilities;
for (bearer_id = 0; bearer_id < MAX_BEARERS; bearer_id++) {
l = n->links[bearer_id].link;
if (l)
tipc_link_update_caps(l, capabilities);
}
- write_unlock_bh(&n->lock);
+ tipc_node_write_unlock_fast(n);
+
/* Calculate cluster capabilities */
tn->capabilities = TIPC_NODE_CAPABILITIES;
list_for_each_entry_rcu(temp_node, &tn->node_list, list) {
case TIPC_SOCK_RECVQ_DEPTH:
value = skb_queue_len(&sk->sk_receive_queue);
break;
+ case TIPC_SOCK_RECVQ_USED:
+ value = sk_rmem_alloc_get(sk);
+ break;
case TIPC_GROUP_JOIN:
seq.type = 0;
if (tsk->group)
#include <linux/sysctl.h>
+static int zero;
+static int one = 1;
static struct ctl_table_header *tipc_ctl_hdr;
static struct ctl_table tipc_table[] = {
.data = &sysctl_tipc_rmem,
.maxlen = sizeof(sysctl_tipc_rmem),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &one,
},
{
.procname = "named_timeout",
.data = &sysctl_tipc_named_timeout,
.maxlen = sizeof(sysctl_tipc_named_timeout),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
},
{
.procname = "sk_filter",
skb_pull(skb, sizeof(struct udphdr));
hdr = buf_msg(skb);
- rcu_read_lock();
- b = rcu_dereference_rtnl(ub->bearer);
+ b = rcu_dereference(ub->bearer);
if (!b)
- goto rcu_out;
+ goto out;
if (b && test_bit(0, &b->up)) {
tipc_rcv(sock_net(sk), skb, b);
- rcu_read_unlock();
return 0;
}
if (unlikely(msg_user(hdr) == LINK_CONFIG)) {
err = tipc_udp_rcast_disc(b, skb);
if (err)
- goto rcu_out;
+ goto out;
}
-rcu_out:
- rcu_read_unlock();
out:
kfree_skb(skb);
return 0;
static void tls_device_free_ctx(struct tls_context *ctx)
{
- if (ctx->tx_conf == TLS_HW)
+ if (ctx->tx_conf == TLS_HW) {
kfree(tls_offload_ctx_tx(ctx));
+ kfree(ctx->tx.rec_seq);
+ kfree(ctx->tx.iv);
+ }
if (ctx->rx_conf == TLS_HW)
kfree(tls_offload_ctx_rx(ctx));
}
EXPORT_SYMBOL(tls_device_sk_destruct);
+void tls_device_free_resources_tx(struct sock *sk)
+{
+ struct tls_context *tls_ctx = tls_get_ctx(sk);
+
+ tls_free_partial_record(sk, tls_ctx);
+}
+
static void tls_append_frag(struct tls_record_info *record,
struct page_frag *pfrag,
int size)
goto release_netdev;
free_sw_resources:
+ up_read(&device_offload_lock);
tls_sw_free_resources_rx(sk);
+ down_read(&device_offload_lock);
release_ctx:
ctx->priv_ctx_rx = NULL;
release_netdev:
}
out:
up_read(&device_offload_lock);
- kfree(tls_ctx->rx.rec_seq);
- kfree(tls_ctx->rx.iv);
tls_sw_release_resources_rx(sk);
}
static void complete_skb(struct sk_buff *nskb, struct sk_buff *skb, int headln)
{
+ struct sock *sk = skb->sk;
+ int delta;
+
skb_copy_header(nskb, skb);
skb_put(nskb, skb->len);
update_chksum(nskb, headln);
nskb->destructor = skb->destructor;
- nskb->sk = skb->sk;
+ nskb->sk = sk;
skb->destructor = NULL;
skb->sk = NULL;
- refcount_add(nskb->truesize - skb->truesize,
- &nskb->sk->sk_wmem_alloc);
+
+ delta = nskb->truesize - skb->truesize;
+ if (likely(delta < 0))
+ WARN_ON_ONCE(refcount_sub_and_test(-delta, &sk->sk_wmem_alloc));
+ else if (delta)
+ refcount_add(delta, &sk->sk_wmem_alloc);
}
/* This function may be called after the user socket is already
return tls_push_sg(sk, ctx, sg, offset, flags);
}
+bool tls_free_partial_record(struct sock *sk, struct tls_context *ctx)
+{
+ struct scatterlist *sg;
+
+ sg = ctx->partially_sent_record;
+ if (!sg)
+ return false;
+
+ while (1) {
+ put_page(sg_page(sg));
+ sk_mem_uncharge(sk, sg->length);
+
+ if (sg_is_last(sg))
+ break;
+ sg++;
+ }
+ ctx->partially_sent_record = NULL;
+ return true;
+}
+
static void tls_write_space(struct sock *sk)
{
struct tls_context *ctx = tls_get_ctx(sk);
kfree(ctx->tx.rec_seq);
kfree(ctx->tx.iv);
tls_sw_free_resources_tx(sk);
+#ifdef CONFIG_TLS_DEVICE
+ } else if (ctx->tx_conf == TLS_HW) {
+ tls_device_free_resources_tx(sk);
+#endif
}
- if (ctx->rx_conf == TLS_SW) {
- kfree(ctx->rx.rec_seq);
- kfree(ctx->rx.iv);
+ if (ctx->rx_conf == TLS_SW)
tls_sw_free_resources_rx(sk);
- }
#ifdef CONFIG_TLS_DEVICE
if (ctx->rx_conf == TLS_HW)
/* Free up un-sent records in tx_list. First, free
* the partially sent record if any at head of tx_list.
*/
- if (tls_ctx->partially_sent_record) {
- struct scatterlist *sg = tls_ctx->partially_sent_record;
-
- while (1) {
- put_page(sg_page(sg));
- sk_mem_uncharge(sk, sg->length);
-
- if (sg_is_last(sg))
- break;
- sg++;
- }
-
- tls_ctx->partially_sent_record = NULL;
-
+ if (tls_free_partial_record(sk, tls_ctx)) {
rec = list_first_entry(&ctx->tx_list,
struct tls_rec, list);
list_del(&rec->list);
struct tls_context *tls_ctx = tls_get_ctx(sk);
struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
+ kfree(tls_ctx->rx.rec_seq);
+ kfree(tls_ctx->rx.iv);
+
if (ctx->aead_recv) {
kfree_skb(ctx->recv_pkt);
ctx->recv_pkt = NULL;
struct unix_sock *u = unix_sk(sk);
struct sk_buff *skb, *last;
long timeo;
+ int skip;
int err;
- int peeked, skip;
err = -EOPNOTSUPP;
if (flags&MSG_OOB)
mutex_lock(&u->iolock);
skip = sk_peek_offset(sk, flags);
- skb = __skb_try_recv_datagram(sk, flags, NULL, &peeked, &skip,
- &err, &last);
+ skb = __skb_try_recv_datagram(sk, flags, NULL, &skip, &err,
+ &last);
if (skb)
break;
.doit = nl80211_associate,
.flags = GENL_UNS_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
- NL80211_FLAG_NEED_RTNL,
+ NL80211_FLAG_NEED_RTNL |
+ NL80211_FLAG_CLEAR_SKB,
},
{
.cmd = NL80211_CMD_DEAUTHENTICATE,
.doit = nl80211_connect,
.flags = GENL_UNS_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
- NL80211_FLAG_NEED_RTNL,
+ NL80211_FLAG_NEED_RTNL |
+ NL80211_FLAG_CLEAR_SKB,
},
{
.cmd = NL80211_CMD_UPDATE_CONNECT_PARAMS,
.doit = nl80211_update_connect_params,
.flags = GENL_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
- NL80211_FLAG_NEED_RTNL,
+ NL80211_FLAG_NEED_RTNL |
+ NL80211_FLAG_CLEAR_SKB,
},
{
.cmd = NL80211_CMD_DISCONNECT,
.doit = nl80211_setdel_pmksa,
.flags = GENL_UNS_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
- NL80211_FLAG_NEED_RTNL,
+ NL80211_FLAG_NEED_RTNL |
+ NL80211_FLAG_CLEAR_SKB,
},
{
.cmd = NL80211_CMD_DEL_PMKSA,
.dumpit = nl80211_vendor_cmd_dump,
.flags = GENL_UNS_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_WIPHY |
- NL80211_FLAG_NEED_RTNL,
+ NL80211_FLAG_NEED_RTNL |
+ NL80211_FLAG_CLEAR_SKB,
},
{
.cmd = NL80211_CMD_SET_QOS_MAP,
.cmd = NL80211_CMD_SET_PMK,
.doit = nl80211_set_pmk,
.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
- NL80211_FLAG_NEED_RTNL,
+ NL80211_FLAG_NEED_RTNL |
+ NL80211_FLAG_CLEAR_SKB,
},
{
.cmd = NL80211_CMD_DEL_PMK,
return dfs_region1;
}
+static void reg_wmm_rules_intersect(const struct ieee80211_wmm_ac *wmm_ac1,
+ const struct ieee80211_wmm_ac *wmm_ac2,
+ struct ieee80211_wmm_ac *intersect)
+{
+ intersect->cw_min = max_t(u16, wmm_ac1->cw_min, wmm_ac2->cw_min);
+ intersect->cw_max = max_t(u16, wmm_ac1->cw_max, wmm_ac2->cw_max);
+ intersect->cot = min_t(u16, wmm_ac1->cot, wmm_ac2->cot);
+ intersect->aifsn = max_t(u8, wmm_ac1->aifsn, wmm_ac2->aifsn);
+}
+
/*
* Helper for regdom_intersect(), this does the real
* mathematical intersection fun
struct ieee80211_freq_range *freq_range;
const struct ieee80211_power_rule *power_rule1, *power_rule2;
struct ieee80211_power_rule *power_rule;
+ const struct ieee80211_wmm_rule *wmm_rule1, *wmm_rule2;
+ struct ieee80211_wmm_rule *wmm_rule;
u32 freq_diff, max_bandwidth1, max_bandwidth2;
freq_range1 = &rule1->freq_range;
power_rule2 = &rule2->power_rule;
power_rule = &intersected_rule->power_rule;
+ wmm_rule1 = &rule1->wmm_rule;
+ wmm_rule2 = &rule2->wmm_rule;
+ wmm_rule = &intersected_rule->wmm_rule;
+
freq_range->start_freq_khz = max(freq_range1->start_freq_khz,
freq_range2->start_freq_khz);
freq_range->end_freq_khz = min(freq_range1->end_freq_khz,
intersected_rule->dfs_cac_ms = max(rule1->dfs_cac_ms,
rule2->dfs_cac_ms);
+ if (rule1->has_wmm && rule2->has_wmm) {
+ u8 ac;
+
+ for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
+ reg_wmm_rules_intersect(&wmm_rule1->client[ac],
+ &wmm_rule2->client[ac],
+ &wmm_rule->client[ac]);
+ reg_wmm_rules_intersect(&wmm_rule1->ap[ac],
+ &wmm_rule2->ap[ac],
+ &wmm_rule->ap[ac]);
+ }
+
+ intersected_rule->has_wmm = true;
+ } else if (rule1->has_wmm) {
+ *wmm_rule = *wmm_rule1;
+ intersected_rule->has_wmm = true;
+ } else if (rule2->has_wmm) {
+ *wmm_rule = *wmm_rule2;
+ intersected_rule->has_wmm = true;
+ } else {
+ intersected_rule->has_wmm = false;
+ }
+
if (!is_valid_reg_rule(intersected_rule))
return -EINVAL;
/* copy subelement as we need to change its content to
* mark an ie after it is processed.
*/
- sub_copy = kmalloc(subie_len, gfp);
+ sub_copy = kmemdup(subelement, subie_len, gfp);
if (!sub_copy)
return 0;
- memcpy(sub_copy, subelement, subie_len);
pos = &new_ie[0];
else if (rate->bw == RATE_INFO_BW_HE_RU &&
rate->he_ru_alloc == NL80211_RATE_INFO_HE_RU_ALLOC_26)
result = rates_26[rate->he_gi];
- else if (WARN(1, "invalid HE MCS: bw:%d, ru:%d\n",
- rate->bw, rate->he_ru_alloc))
+ else {
+ WARN(1, "invalid HE MCS: bw:%d, ru:%d\n",
+ rate->bw, rate->he_ru_alloc);
return 0;
+ }
/* now scale to the appropriate MCS */
tmp = result;
break;
}
- case SIOCGSTAMP:
- rc = -EINVAL;
- if (sk)
- rc = sock_get_timestamp(sk,
- (struct timeval __user *)argp);
- break;
- case SIOCGSTAMPNS:
- rc = -EINVAL;
- if (sk)
- rc = sock_get_timestampns(sk,
- (struct timespec __user *)argp);
- break;
case SIOCGIFADDR:
case SIOCSIFADDR:
case SIOCGIFDSTADDR:
unsigned long arg)
{
void __user *argp = compat_ptr(arg);
- struct sock *sk = sock->sk;
-
int rc = -ENOIOCTLCMD;
switch(cmd) {
case TIOCINQ:
rc = x25_ioctl(sock, cmd, (unsigned long)argp);
break;
- case SIOCGSTAMP:
- rc = -EINVAL;
- if (sk)
- rc = compat_sock_get_timestamp(sk,
- (struct timeval __user*)argp);
- break;
- case SIOCGSTAMPNS:
- rc = -EINVAL;
- if (sk)
- rc = compat_sock_get_timestampns(sk,
- (struct timespec __user*)argp);
- break;
case SIOCGIFADDR:
case SIOCSIFADDR:
case SIOCGIFDSTADDR:
#ifdef CONFIG_COMPAT
.compat_ioctl = compat_x25_ioctl,
#endif
+ .gettstamp = sock_gettstamp,
.listen = x25_listen,
.shutdown = sock_no_shutdown,
.setsockopt = x25_setsockopt,
u64 invalid_descs;
};
+/* The structure of the shared state of the rings are the same as the
+ * ring buffer in kernel/events/ring_buffer.c. For the Rx and completion
+ * ring, the kernel is the producer and user space is the consumer. For
+ * the Tx and fill rings, the kernel is the consumer and user space is
+ * the producer.
+ *
+ * producer consumer
+ *
+ * if (LOAD ->consumer) { LOAD ->producer
+ * (A) smp_rmb() (C)
+ * STORE $data LOAD $data
+ * smp_wmb() (B) smp_mb() (D)
+ * STORE ->producer STORE ->consumer
+ * }
+ *
+ * (A) pairs with (D), and (B) pairs with (C).
+ *
+ * Starting with (B), it protects the data from being written after
+ * the producer pointer. If this barrier was missing, the consumer
+ * could observe the producer pointer being set and thus load the data
+ * before the producer has written the new data. The consumer would in
+ * this case load the old data.
+ *
+ * (C) protects the consumer from speculatively loading the data before
+ * the producer pointer actually has been read. If we do not have this
+ * barrier, some architectures could load old data as speculative loads
+ * are not discarded as the CPU does not know there is a dependency
+ * between ->producer and data.
+ *
+ * (A) is a control dependency that separates the load of ->consumer
+ * from the stores of $data. In case ->consumer indicates there is no
+ * room in the buffer to store $data we do not. So no barrier is needed.
+ *
+ * (D) protects the load of the data to be observed to happen after the
+ * store of the consumer pointer. If we did not have this memory
+ * barrier, the producer could observe the consumer pointer being set
+ * and overwrite the data with a new value before the consumer got the
+ * chance to read the old value. The consumer would thus miss reading
+ * the old entry and very likely read the new entry twice, once right
+ * now and again after circling through the ring.
+ */
+
/* Common functions operating for both RXTX and umem queues */
static inline u64 xskq_nb_invalid_descs(struct xsk_queue *q)
static inline u64 *xskq_peek_addr(struct xsk_queue *q, u64 *addr)
{
if (q->cons_tail == q->cons_head) {
+ smp_mb(); /* D, matches A */
WRITE_ONCE(q->ring->consumer, q->cons_tail);
q->cons_head = q->cons_tail + xskq_nb_avail(q, RX_BATCH_SIZE);
if (xskq_nb_free(q, q->prod_tail, 1) == 0)
return -ENOSPC;
+ /* A, matches D */
ring->desc[q->prod_tail++ & q->ring_mask] = addr;
/* Order producer and data */
- smp_wmb();
+ smp_wmb(); /* B, matches C */
WRITE_ONCE(q->ring->producer, q->prod_tail);
return 0;
if (xskq_nb_free(q, q->prod_head, LAZY_UPDATE_THRESHOLD) == 0)
return -ENOSPC;
+ /* A, matches D */
ring->desc[q->prod_head++ & q->ring_mask] = addr;
return 0;
}
u32 nb_entries)
{
/* Order producer and data */
- smp_wmb();
+ smp_wmb(); /* B, matches C */
q->prod_tail += nb_entries;
WRITE_ONCE(q->ring->producer, q->prod_tail);
if (xskq_nb_free(q, q->prod_head, 1) == 0)
return -ENOSPC;
+ /* A, matches D */
q->prod_head++;
return 0;
}
struct xdp_desc *desc)
{
if (q->cons_tail == q->cons_head) {
+ smp_mb(); /* D, matches A */
WRITE_ONCE(q->ring->consumer, q->cons_tail);
q->cons_head = q->cons_tail + xskq_nb_avail(q, RX_BATCH_SIZE);
/* Order consumer and data */
- smp_rmb();
+ smp_rmb(); /* C, matches B */
}
return xskq_validate_desc(q, desc);
if (xskq_nb_free(q, q->prod_head, 1) == 0)
return -ENOSPC;
+ /* A, matches D */
idx = (q->prod_head++) & q->ring_mask;
ring->desc[idx].addr = addr;
ring->desc[idx].len = len;
static inline void xskq_produce_flush_desc(struct xsk_queue *q)
{
/* Order producer and data */
- smp_wmb();
+ smp_wmb(); /* B, matches C */
q->prod_tail = q->prod_head,
WRITE_ONCE(q->ring->producer, q->prod_tail);
#define asm_volatile_goto(x...) asm volatile("invalid use of asm_volatile_goto")
#endif
+#define volatile(x...) volatile("")
#endif
if (!addr)
return;
sym = ksym_search(addr);
+ if (!sym) {
+ printf("ksym not found. Is kallsyms loaded?\n");
+ return;
+ }
+
if (PRINT_RAW_ADDR)
printf("%s/%llx;", sym->name, addr);
else
for (i = 0; i < max; i++) {
if (counts[i].ip > PAGE_OFFSET) {
sym = ksym_search(counts[i].ip);
+ if (!sym) {
+ printf("ksym not found. Is kallsyms loaded?\n");
+ continue;
+ }
+
printf("0x%-17llx %-32s %u\n", counts[i].ip, sym->name,
counts[i].count);
} else {
bpf_map_lookup_elem(map_fd[0], &next_key, &value);
assert(next_key == value);
sym = ksym_search(value);
- printf(" %s", sym->name);
key = next_key;
+ if (!sym) {
+ printf("ksym not found. Is kallsyms loaded?\n");
+ continue;
+ }
+
+ printf(" %s", sym->name);
}
if (key)
printf("\n");
if (!addr)
return;
sym = ksym_search(addr);
+ if (!sym) {
+ printf("ksym not found. Is kallsyms loaded?\n");
+ return;
+ }
+
printf("%s;", sym->name);
if (!strcmp(sym->name, "sys_read"))
sys_read_seen = true;
gen-atomic-fallback.sh linux/atomic-fallback.h
EOF
while read script header; do
- ${ATOMICDIR}/${script} ${ATOMICTBL} > ${LINUXDIR}/include/${header}
+ /bin/sh ${ATOMICDIR}/${script} ${ATOMICTBL} > ${LINUXDIR}/include/${header}
HASH="$(sha1sum ${LINUXDIR}/include/${header})"
HASH="${HASH%% *}"
printf "// %s\n" "${HASH}" >> ${LINUXDIR}/include/${header}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+// Author: Kirill Smelkov (kirr@nexedi.com)
+//
+// Search for stream-like files that are using nonseekable_open and convert
+// them to stream_open. A stream-like file is a file that does not use ppos in
+// its read and write. Rationale for the conversion is to avoid deadlock in
+// between read and write.
+
+virtual report
+virtual patch
+virtual explain // explain decisions in the patch (SPFLAGS="-D explain")
+
+// stream-like reader & writer - ones that do not depend on f_pos.
+@ stream_reader @
+identifier readstream, ppos;
+identifier f, buf, len;
+type loff_t;
+@@
+ ssize_t readstream(struct file *f, char *buf, size_t len, loff_t *ppos)
+ {
+ ... when != ppos
+ }
+
+@ stream_writer @
+identifier writestream, ppos;
+identifier f, buf, len;
+type loff_t;
+@@
+ ssize_t writestream(struct file *f, const char *buf, size_t len, loff_t *ppos)
+ {
+ ... when != ppos
+ }
+
+
+// a function that blocks
+@ blocks @
+identifier block_f;
+identifier wait_event =~ "^wait_event_.*";
+@@
+ block_f(...) {
+ ... when exists
+ wait_event(...)
+ ... when exists
+ }
+
+// stream_reader that can block inside.
+//
+// XXX wait_* can be called not directly from current function (e.g. func -> f -> g -> wait())
+// XXX currently reader_blocks supports only direct and 1-level indirect cases.
+@ reader_blocks_direct @
+identifier stream_reader.readstream;
+identifier wait_event =~ "^wait_event_.*";
+@@
+ readstream(...)
+ {
+ ... when exists
+ wait_event(...)
+ ... when exists
+ }
+
+@ reader_blocks_1 @
+identifier stream_reader.readstream;
+identifier blocks.block_f;
+@@
+ readstream(...)
+ {
+ ... when exists
+ block_f(...)
+ ... when exists
+ }
+
+@ reader_blocks depends on reader_blocks_direct || reader_blocks_1 @
+identifier stream_reader.readstream;
+@@
+ readstream(...) {
+ ...
+ }
+
+
+// file_operations + whether they have _any_ .read, .write, .llseek ... at all.
+//
+// XXX add support for file_operations xxx[N] = ... (sound/core/pcm_native.c)
+@ fops0 @
+identifier fops;
+@@
+ struct file_operations fops = {
+ ...
+ };
+
+@ has_read @
+identifier fops0.fops;
+identifier read_f;
+@@
+ struct file_operations fops = {
+ .read = read_f,
+ };
+
+@ has_read_iter @
+identifier fops0.fops;
+identifier read_iter_f;
+@@
+ struct file_operations fops = {
+ .read_iter = read_iter_f,
+ };
+
+@ has_write @
+identifier fops0.fops;
+identifier write_f;
+@@
+ struct file_operations fops = {
+ .write = write_f,
+ };
+
+@ has_write_iter @
+identifier fops0.fops;
+identifier write_iter_f;
+@@
+ struct file_operations fops = {
+ .write_iter = write_iter_f,
+ };
+
+@ has_llseek @
+identifier fops0.fops;
+identifier llseek_f;
+@@
+ struct file_operations fops = {
+ .llseek = llseek_f,
+ };
+
+@ has_no_llseek @
+identifier fops0.fops;
+@@
+ struct file_operations fops = {
+ .llseek = no_llseek,
+ };
+
+@ has_mmap @
+identifier fops0.fops;
+identifier mmap_f;
+@@
+ struct file_operations fops = {
+ .mmap = mmap_f,
+ };
+
+@ has_copy_file_range @
+identifier fops0.fops;
+identifier copy_file_range_f;
+@@
+ struct file_operations fops = {
+ .copy_file_range = copy_file_range_f,
+ };
+
+@ has_remap_file_range @
+identifier fops0.fops;
+identifier remap_file_range_f;
+@@
+ struct file_operations fops = {
+ .remap_file_range = remap_file_range_f,
+ };
+
+@ has_splice_read @
+identifier fops0.fops;
+identifier splice_read_f;
+@@
+ struct file_operations fops = {
+ .splice_read = splice_read_f,
+ };
+
+@ has_splice_write @
+identifier fops0.fops;
+identifier splice_write_f;
+@@
+ struct file_operations fops = {
+ .splice_write = splice_write_f,
+ };
+
+
+// file_operations that is candidate for stream_open conversion - it does not
+// use mmap and other methods that assume @offset access to file.
+//
+// XXX for simplicity require no .{read/write}_iter and no .splice_{read/write} for now.
+// XXX maybe_steam.fops cannot be used in other rules - it gives "bad rule maybe_stream or bad variable fops".
+@ maybe_stream depends on (!has_llseek || has_no_llseek) && !has_mmap && !has_copy_file_range && !has_remap_file_range && !has_read_iter && !has_write_iter && !has_splice_read && !has_splice_write @
+identifier fops0.fops;
+@@
+ struct file_operations fops = {
+ };
+
+
+// ---- conversions ----
+
+// XXX .open = nonseekable_open -> .open = stream_open
+// XXX .open = func -> openfunc -> nonseekable_open
+
+// read & write
+//
+// if both are used in the same file_operations together with an opener -
+// under that conditions we can use stream_open instead of nonseekable_open.
+@ fops_rw depends on maybe_stream @
+identifier fops0.fops, openfunc;
+identifier stream_reader.readstream;
+identifier stream_writer.writestream;
+@@
+ struct file_operations fops = {
+ .open = openfunc,
+ .read = readstream,
+ .write = writestream,
+ };
+
+@ report_rw depends on report @
+identifier fops_rw.openfunc;
+position p1;
+@@
+ openfunc(...) {
+ <...
+ nonseekable_open@p1
+ ...>
+ }
+
+@ script:python depends on report && reader_blocks @
+fops << fops0.fops;
+p << report_rw.p1;
+@@
+coccilib.report.print_report(p[0],
+ "ERROR: %s: .read() can deadlock .write(); change nonseekable_open -> stream_open to fix." % (fops,))
+
+@ script:python depends on report && !reader_blocks @
+fops << fops0.fops;
+p << report_rw.p1;
+@@
+coccilib.report.print_report(p[0],
+ "WARNING: %s: .read() and .write() have stream semantic; safe to change nonseekable_open -> stream_open." % (fops,))
+
+
+@ explain_rw_deadlocked depends on explain && reader_blocks @
+identifier fops_rw.openfunc;
+@@
+ openfunc(...) {
+ <...
+- nonseekable_open
++ nonseekable_open /* read & write (was deadlock) */
+ ...>
+ }
+
+
+@ explain_rw_nodeadlock depends on explain && !reader_blocks @
+identifier fops_rw.openfunc;
+@@
+ openfunc(...) {
+ <...
+- nonseekable_open
++ nonseekable_open /* read & write (no direct deadlock) */
+ ...>
+ }
+
+@ patch_rw depends on patch @
+identifier fops_rw.openfunc;
+@@
+ openfunc(...) {
+ <...
+- nonseekable_open
++ stream_open
+ ...>
+ }
+
+
+// read, but not write
+@ fops_r depends on maybe_stream && !has_write @
+identifier fops0.fops, openfunc;
+identifier stream_reader.readstream;
+@@
+ struct file_operations fops = {
+ .open = openfunc,
+ .read = readstream,
+ };
+
+@ report_r depends on report @
+identifier fops_r.openfunc;
+position p1;
+@@
+ openfunc(...) {
+ <...
+ nonseekable_open@p1
+ ...>
+ }
+
+@ script:python depends on report @
+fops << fops0.fops;
+p << report_r.p1;
+@@
+coccilib.report.print_report(p[0],
+ "WARNING: %s: .read() has stream semantic; safe to change nonseekable_open -> stream_open." % (fops,))
+
+@ explain_r depends on explain @
+identifier fops_r.openfunc;
+@@
+ openfunc(...) {
+ <...
+- nonseekable_open
++ nonseekable_open /* read only */
+ ...>
+ }
+
+@ patch_r depends on patch @
+identifier fops_r.openfunc;
+@@
+ openfunc(...) {
+ <...
+- nonseekable_open
++ stream_open
+ ...>
+ }
+
+
+// write, but not read
+@ fops_w depends on maybe_stream && !has_read @
+identifier fops0.fops, openfunc;
+identifier stream_writer.writestream;
+@@
+ struct file_operations fops = {
+ .open = openfunc,
+ .write = writestream,
+ };
+
+@ report_w depends on report @
+identifier fops_w.openfunc;
+position p1;
+@@
+ openfunc(...) {
+ <...
+ nonseekable_open@p1
+ ...>
+ }
+
+@ script:python depends on report @
+fops << fops0.fops;
+p << report_w.p1;
+@@
+coccilib.report.print_report(p[0],
+ "WARNING: %s: .write() has stream semantic; safe to change nonseekable_open -> stream_open." % (fops,))
+
+@ explain_w depends on explain @
+identifier fops_w.openfunc;
+@@
+ openfunc(...) {
+ <...
+- nonseekable_open
++ nonseekable_open /* write only */
+ ...>
+ }
+
+@ patch_w depends on patch @
+identifier fops_w.openfunc;
+@@
+ openfunc(...) {
+ <...
+- nonseekable_open
++ stream_open
+ ...>
+ }
+
+
+// no read, no write - don't change anything
info()
{
if [ "${quiet}" != "silent_" ]; then
- printf " %-7s %s\n" ${1} ${2}
+ printf " %-7s %s\n" "${1}" "${2}"
fi
}
fi
}
+# generate .BTF typeinfo from DWARF debuginfo
+gen_btf()
+{
+ local pahole_ver;
+
+ pahole_ver=$(${PAHOLE} --version | sed -E 's/v([0-9]+)\.([0-9]+)/\1\2/')
+ if [ "${pahole_ver}" -lt "113" ]; then
+ info "BTF" "${1}: pahole version $(${PAHOLE} --version) is too old, need at least v1.13"
+ return 0
+ fi
+
+ info "BTF" ${1}
+ LLVM_OBJCOPY=${OBJCOPY} ${PAHOLE} -J ${1}
+}
# Create ${2} .o file with all symbols from the ${1} object file
kallsyms()
info LD vmlinux
vmlinux_link "${kallsymso}" vmlinux
+if [ -n "${CONFIG_DEBUG_INFO_BTF}" ]; then
+ gen_btf vmlinux
+fi
+
if [ -n "${CONFIG_BUILDTIME_EXTABLE_SORT}" ]; then
info SORTEX vmlinux
sortextable vmlinux
bool aa_g_paranoid_load = true;
module_param_named(paranoid_load, aa_g_paranoid_load, aabool, S_IRUGO);
+static int param_get_aaintbool(char *buffer, const struct kernel_param *kp);
+static int param_set_aaintbool(const char *val, const struct kernel_param *kp);
+#define param_check_aaintbool param_check_int
+static const struct kernel_param_ops param_ops_aaintbool = {
+ .set = param_set_aaintbool,
+ .get = param_get_aaintbool
+};
/* Boot time disable flag */
static int apparmor_enabled __lsm_ro_after_init = 1;
-module_param_named(enabled, apparmor_enabled, int, 0444);
+module_param_named(enabled, apparmor_enabled, aaintbool, 0444);
static int __init apparmor_enabled_setup(char *str)
{
return param_get_uint(buffer, kp);
}
+/* Can only be set before AppArmor is initialized (i.e. on boot cmdline). */
+static int param_set_aaintbool(const char *val, const struct kernel_param *kp)
+{
+ struct kernel_param kp_local;
+ bool value;
+ int error;
+
+ if (apparmor_initialized)
+ return -EPERM;
+
+ /* Create local copy, with arg pointing to bool type. */
+ value = !!*((int *)kp->arg);
+ memcpy(&kp_local, kp, sizeof(kp_local));
+ kp_local.arg = &value;
+
+ error = param_set_bool(val, &kp_local);
+ if (!error)
+ *((int *)kp->arg) = *((bool *)kp_local.arg);
+ return error;
+}
+
+/*
+ * To avoid changing /sys/module/apparmor/parameters/enabled from Y/N to
+ * 1/0, this converts the "int that is actually bool" back to bool for
+ * display in the /sys filesystem, while keeping it "int" for the LSM
+ * infrastructure.
+ */
+static int param_get_aaintbool(char *buffer, const struct kernel_param *kp)
+{
+ struct kernel_param kp_local;
+ bool value;
+
+ /* Create local copy, with arg pointing to bool type. */
+ value = !!*((int *)kp->arg);
+ memcpy(&kp_local, kp, sizeof(kp_local));
+ kp_local.arg = &value;
+
+ return param_get_bool(buffer, &kp_local);
+}
+
static int param_get_audit(char *buffer, const struct kernel_param *kp)
{
if (!apparmor_enabled)
devcg->behavior == DEVCG_DEFAULT_ALLOW) {
rc = dev_exception_add(devcg, ex);
if (rc)
- break;
+ return rc;
} else {
/*
* in the other possible cases:
*/
int TSS_authhmac(unsigned char *digest, const unsigned char *key,
unsigned int keylen, unsigned char *h1,
- unsigned char *h2, unsigned char h3, ...)
+ unsigned char *h2, unsigned int h3, ...)
{
unsigned char paramdigest[SHA1_DIGEST_SIZE];
struct sdesc *sdesc;
int ret;
va_list argp;
+ if (!chip)
+ return -ENODEV;
+
sdesc = init_sdesc(hashalg);
if (IS_ERR(sdesc)) {
pr_info("trusted_key: can't alloc %s\n", hash_alg);
return PTR_ERR(sdesc);
}
- c = h3;
+ c = !!h3;
ret = crypto_shash_init(&sdesc->shash);
if (ret < 0)
goto out;
va_list argp;
int ret;
+ if (!chip)
+ return -ENODEV;
+
bufsize = LOAD32(buffer, TPM_SIZE_OFFSET);
tag = LOAD16(buffer, 0);
ordinal = command;
{
int rc;
+ if (!chip)
+ return -ENODEV;
+
dump_tpm_buf(cmd);
rc = tpm_send(chip, cmd, buflen);
dump_tpm_buf(cmd);
{
int ret;
+ if (!chip)
+ return -ENODEV;
+
INIT_BUF(tb);
store16(tb, TPM_TAG_RQU_COMMAND);
store32(tb, TPM_OIAP_SIZE);
{
int ret;
+ /* encrypted_keys.ko depends on successful load of this module even if
+ * TPM is not used.
+ */
chip = tpm_default_chip();
if (!chip)
- return -ENOENT;
+ return 0;
+
ret = init_digests();
if (ret < 0)
goto err_put;
static void __exit cleanup_trusted(void)
{
- put_device(&chip->dev);
- kfree(digests);
- trusted_shash_release();
- unregister_key_type(&key_type_trusted);
+ if (chip) {
+ put_device(&chip->dev);
+ kfree(digests);
+ trusted_shash_release();
+ unregister_key_type(&key_type_trusted);
+ }
}
late_initcall(init_trusted);
INIT_LIST_HEAD(&entry->list);
entry->parent = parent;
entry->module = module;
- if (parent)
+ if (parent) {
+ mutex_lock(&parent->access);
list_add_tail(&entry->list, &parent->children);
+ mutex_unlock(&parent->access);
+ }
return entry;
}
list_for_each_entry_safe(p, n, &entry->children, list)
snd_info_free_entry(p);
- list_del(&entry->list);
+ p = entry->parent;
+ if (p) {
+ mutex_lock(&p->access);
+ list_del(&entry->list);
+ mutex_unlock(&p->access);
+ }
kfree(entry->name);
if (entry->private_free)
entry->private_free(entry);
card->shutdown = 1;
spin_unlock(&card->files_lock);
- /* phase 1: disable fops (user space) operations for ALSA API */
- mutex_lock(&snd_card_mutex);
- snd_cards[card->number] = NULL;
- clear_bit(card->number, snd_cards_lock);
- mutex_unlock(&snd_card_mutex);
-
- /* phase 2: replace file->f_op with special dummy operations */
-
+ /* replace file->f_op with special dummy operations */
spin_lock(&card->files_lock);
list_for_each_entry(mfile, &card->files_list, list) {
/* it's critical part, use endless loop */
}
spin_unlock(&card->files_lock);
- /* phase 3: notify all connected devices about disconnection */
+ /* notify all connected devices about disconnection */
/* at this point, they cannot respond to any calls except release() */
#if IS_ENABLED(CONFIG_SND_MIXER_OSS)
device_del(&card->card_dev);
card->registered = false;
}
+
+ /* disable fops (user space) operations for ALSA API */
+ mutex_lock(&snd_card_mutex);
+ snd_cards[card->number] = NULL;
+ clear_bit(card->number, snd_cards_lock);
+ mutex_unlock(&snd_card_mutex);
+
#ifdef CONFIG_PM
wake_up(&card->power_sleep);
#endif
/* fill the info fields */
if (client_info->name[0])
- strlcpy(client->name, client_info->name, sizeof(client->name));
+ strscpy(client->name, client_info->name, sizeof(client->name));
client->filter = client_info->filter;
client->event_lost = client_info->event_lost;
/* set queue name */
if (!info->name[0])
snprintf(info->name, sizeof(info->name), "Queue-%d", q->queue);
- strlcpy(q->name, info->name, sizeof(q->name));
+ strscpy(q->name, info->name, sizeof(q->name));
snd_use_lock_free(&q->use_lock);
return 0;
queuefree(q);
return -EPERM;
}
- strlcpy(q->name, info->name, sizeof(q->name));
+ strscpy(q->name, info->name, sizeof(q->name));
queuefree(q);
return 0;
INIT_LIST_HEAD(&bus->hlink_list);
bus->idx = idx++;
- mutex_init(&bus->lock);
bus->cmd_dma_state = true;
return 0;
INIT_WORK(&bus->unsol_work, snd_hdac_bus_process_unsol_events);
spin_lock_init(&bus->reg_lock);
mutex_init(&bus->cmd_mutex);
+ mutex_init(&bus->lock);
bus->irq = -1;
return 0;
}
dev_dbg(bus->dev, "display power %s\n",
enable ? "enable" : "disable");
+
+ mutex_lock(&bus->lock);
if (enable)
set_bit(idx, &bus->display_power_status);
else
clear_bit(idx, &bus->display_power_status);
if (!acomp || !acomp->ops)
- return;
+ goto unlock;
if (bus->display_power_status) {
if (!bus->display_power_active) {
bus->display_power_active = false;
}
}
+ unlock:
+ mutex_unlock(&bus->lock);
}
EXPORT_SYMBOL_GPL(snd_hdac_display_power);
/* power-up all before initialization */
hda_set_power_state(codec, AC_PWRST_D0);
+ codec->core.dev.power.power_state = PMSG_ON;
snd_hda_codec_proc_new(codec);
SND_PCI_QUIRK(0x8086, 0x2040, "Intel DZ77BH-55K", 0),
/* https://bugzilla.kernel.org/show_bug.cgi?id=199607 */
SND_PCI_QUIRK(0x8086, 0x2057, "Intel NUC5i7RYB", 0),
+ /* https://bugs.launchpad.net/bugs/1821663 */
+ SND_PCI_QUIRK(0x8086, 0x2064, "Intel SDP 8086:2064", 0),
/* https://bugzilla.redhat.com/show_bug.cgi?id=1520902 */
SND_PCI_QUIRK(0x8086, 0x2068, "Intel NUC7i3BNB", 0),
/* https://bugzilla.kernel.org/show_bug.cgi?id=198611 */
SND_PCI_QUIRK(0x17aa, 0x367b, "Lenovo IdeaCentre B550", 0),
/* https://bugzilla.redhat.com/show_bug.cgi?id=1572975 */
SND_PCI_QUIRK(0x17aa, 0x36a7, "Lenovo C50 All in one", 0),
+ /* https://bugs.launchpad.net/bugs/1821663 */
+ SND_PCI_QUIRK(0x1631, 0xe017, "Packard Bell NEC IMEDIA 5204", 0),
{}
};
#endif /* CONFIG_PM */
ALC887_FIXUP_BASS_CHMAP,
ALC1220_FIXUP_GB_DUAL_CODECS,
ALC1220_FIXUP_CLEVO_P950,
- ALC1220_FIXUP_SYSTEM76_ORYP5,
- ALC1220_FIXUP_SYSTEM76_ORYP5_PINS,
+ ALC1220_FIXUP_CLEVO_PB51ED,
+ ALC1220_FIXUP_CLEVO_PB51ED_PINS,
};
static void alc889_fixup_coef(struct hda_codec *codec,
static void alc_fixup_headset_mode_no_hp_mic(struct hda_codec *codec,
const struct hda_fixup *fix, int action);
-static void alc1220_fixup_system76_oryp5(struct hda_codec *codec,
+static void alc1220_fixup_clevo_pb51ed(struct hda_codec *codec,
const struct hda_fixup *fix,
int action)
{
.type = HDA_FIXUP_FUNC,
.v.func = alc1220_fixup_clevo_p950,
},
- [ALC1220_FIXUP_SYSTEM76_ORYP5] = {
+ [ALC1220_FIXUP_CLEVO_PB51ED] = {
.type = HDA_FIXUP_FUNC,
- .v.func = alc1220_fixup_system76_oryp5,
+ .v.func = alc1220_fixup_clevo_pb51ed,
},
- [ALC1220_FIXUP_SYSTEM76_ORYP5_PINS] = {
+ [ALC1220_FIXUP_CLEVO_PB51ED_PINS] = {
.type = HDA_FIXUP_PINS,
.v.pins = (const struct hda_pintbl[]) {
{ 0x19, 0x01a1913c }, /* use as headset mic, without its own jack detect */
{}
},
.chained = true,
- .chain_id = ALC1220_FIXUP_SYSTEM76_ORYP5,
+ .chain_id = ALC1220_FIXUP_CLEVO_PB51ED,
},
};
SND_PCI_QUIRK(0x1558, 0x9501, "Clevo P950HR", ALC1220_FIXUP_CLEVO_P950),
SND_PCI_QUIRK(0x1558, 0x95e1, "Clevo P95xER", ALC1220_FIXUP_CLEVO_P950),
SND_PCI_QUIRK(0x1558, 0x95e2, "Clevo P950ER", ALC1220_FIXUP_CLEVO_P950),
- SND_PCI_QUIRK(0x1558, 0x96e1, "System76 Oryx Pro (oryp5)", ALC1220_FIXUP_SYSTEM76_ORYP5_PINS),
- SND_PCI_QUIRK(0x1558, 0x97e1, "System76 Oryx Pro (oryp5)", ALC1220_FIXUP_SYSTEM76_ORYP5_PINS),
+ SND_PCI_QUIRK(0x1558, 0x96e1, "System76 Oryx Pro (oryp5)", ALC1220_FIXUP_CLEVO_PB51ED_PINS),
+ SND_PCI_QUIRK(0x1558, 0x97e1, "System76 Oryx Pro (oryp5)", ALC1220_FIXUP_CLEVO_PB51ED_PINS),
+ SND_PCI_QUIRK(0x1558, 0x65d1, "Tuxedo Book XC1509", ALC1220_FIXUP_CLEVO_PB51ED_PINS),
SND_PCI_QUIRK_VENDOR(0x1558, "Clevo laptop", ALC882_FIXUP_EAPD),
SND_PCI_QUIRK(0x161f, 0x2054, "Medion laptop", ALC883_FIXUP_EAPD),
SND_PCI_QUIRK(0x17aa, 0x3a0d, "Lenovo Y530", ALC882_FIXUP_LENOVO_Y530),
jack->jack->button_state = report;
}
-static void alc295_fixup_chromebook(struct hda_codec *codec,
+static void alc_fixup_headset_jack(struct hda_codec *codec,
const struct hda_fixup *fix, int action)
{
alc_headset_btn_callback);
snd_hda_jack_add_kctl(codec, 0x55, "Headset Jack", false,
SND_JACK_HEADSET, alc_headset_btn_keymap);
- switch (codec->core.vendor_id) {
- case 0x10ec0295:
- alc_update_coef_idx(codec, 0x4a, 0x8000, 1 << 15); /* Reset HP JD */
- alc_update_coef_idx(codec, 0x4a, 0x8000, 0 << 15);
- break;
- case 0x10ec0236:
- alc_update_coef_idx(codec, 0x1b, 0x8000, 1 << 15); /* Reset HP JD */
- alc_update_coef_idx(codec, 0x1b, 0x8000, 0 << 15);
- break;
- }
break;
case HDA_FIXUP_ACT_INIT:
switch (codec->core.vendor_id) {
}
}
+static void alc295_fixup_chromebook(struct hda_codec *codec,
+ const struct hda_fixup *fix, int action)
+{
+ switch (action) {
+ case HDA_FIXUP_ACT_INIT:
+ switch (codec->core.vendor_id) {
+ case 0x10ec0295:
+ alc_update_coef_idx(codec, 0x4a, 0x8000, 1 << 15); /* Reset HP JD */
+ alc_update_coef_idx(codec, 0x4a, 0x8000, 0 << 15);
+ break;
+ case 0x10ec0236:
+ alc_update_coef_idx(codec, 0x1b, 0x8000, 1 << 15); /* Reset HP JD */
+ alc_update_coef_idx(codec, 0x1b, 0x8000, 0 << 15);
+ break;
+ }
+ break;
+ }
+}
+
static void alc_fixup_disable_mic_vref(struct hda_codec *codec,
const struct hda_fixup *fix, int action)
{
ALC233_FIXUP_ASUS_MIC_NO_PRESENCE,
ALC233_FIXUP_EAPD_COEF_AND_MIC_NO_PRESENCE,
ALC233_FIXUP_LENOVO_MULTI_CODECS,
+ ALC233_FIXUP_ACER_HEADSET_MIC,
ALC294_FIXUP_LENOVO_MIC_LOCATION,
ALC225_FIXUP_DELL_WYSE_MIC_NO_PRESENCE,
ALC700_FIXUP_INTEL_REFERENCE,
ALC285_FIXUP_LENOVO_PC_BEEP_IN_NOISE,
ALC255_FIXUP_ACER_HEADSET_MIC,
ALC295_FIXUP_CHROME_BOOK,
+ ALC225_FIXUP_HEADSET_JACK,
ALC225_FIXUP_DELL_WYSE_AIO_MIC_NO_PRESENCE,
ALC225_FIXUP_WYSE_AUTO_MUTE,
ALC225_FIXUP_WYSE_DISABLE_MIC_VREF,
.type = HDA_FIXUP_FUNC,
.v.func = alc233_alc662_fixup_lenovo_dual_codecs,
},
+ [ALC233_FIXUP_ACER_HEADSET_MIC] = {
+ .type = HDA_FIXUP_VERBS,
+ .v.verbs = (const struct hda_verb[]) {
+ { 0x20, AC_VERB_SET_COEF_INDEX, 0x45 },
+ { 0x20, AC_VERB_SET_PROC_COEF, 0x5089 },
+ { }
+ },
+ .chained = true,
+ .chain_id = ALC233_FIXUP_ASUS_MIC_NO_PRESENCE
+ },
[ALC294_FIXUP_LENOVO_MIC_LOCATION] = {
.type = HDA_FIXUP_PINS,
.v.pins = (const struct hda_pintbl[]) {
[ALC295_FIXUP_CHROME_BOOK] = {
.type = HDA_FIXUP_FUNC,
.v.func = alc295_fixup_chromebook,
+ .chained = true,
+ .chain_id = ALC225_FIXUP_HEADSET_JACK
+ },
+ [ALC225_FIXUP_HEADSET_JACK] = {
+ .type = HDA_FIXUP_FUNC,
+ .v.func = alc_fixup_headset_jack,
},
[ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE] = {
.type = HDA_FIXUP_PINS,
SND_PCI_QUIRK(0x1025, 0x1290, "Acer Veriton Z4860G", ALC286_FIXUP_ACER_AIO_HEADSET_MIC),
SND_PCI_QUIRK(0x1025, 0x1291, "Acer Veriton Z4660G", ALC286_FIXUP_ACER_AIO_HEADSET_MIC),
SND_PCI_QUIRK(0x1025, 0x1308, "Acer Aspire Z24-890", ALC286_FIXUP_ACER_AIO_HEADSET_MIC),
+ SND_PCI_QUIRK(0x1025, 0x132a, "Acer TravelMate B114-21", ALC233_FIXUP_ACER_HEADSET_MIC),
SND_PCI_QUIRK(0x1025, 0x1330, "Acer TravelMate X514-51T", ALC255_FIXUP_ACER_HEADSET_MIC),
SND_PCI_QUIRK(0x1028, 0x0470, "Dell M101z", ALC269_FIXUP_DELL_M101Z),
SND_PCI_QUIRK(0x1028, 0x054b, "Dell XPS one 2710", ALC275_FIXUP_DELL_XPS),
{.id = ALC255_FIXUP_DUMMY_LINEOUT_VERB, .name = "alc255-dummy-lineout"},
{.id = ALC255_FIXUP_DELL_HEADSET_MIC, .name = "alc255-dell-headset"},
{.id = ALC295_FIXUP_HP_X360, .name = "alc295-hp-x360"},
- {.id = ALC295_FIXUP_CHROME_BOOK, .name = "alc-sense-combo"},
+ {.id = ALC225_FIXUP_HEADSET_JACK, .name = "alc-headset-jack"},
+ {.id = ALC295_FIXUP_CHROME_BOOK, .name = "alc-chrome-book"},
{.id = ALC299_FIXUP_PREDATOR_SPK, .name = "predator-spk"},
{}
};
{0x12, 0x90a60140},
{0x14, 0x90170150},
{0x21, 0x02211020}),
+ SND_HDA_PIN_QUIRK(0x10ec0236, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
+ {0x21, 0x02211020}),
SND_HDA_PIN_QUIRK(0x10ec0255, 0x1028, "Dell", ALC255_FIXUP_DELL2_MIC_NO_PRESENCE,
{0x14, 0x90170110},
{0x21, 0x02211020}),
{0x21, 0x0221101f}),
SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
ALC256_STANDARD_PINS),
+ SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
+ {0x14, 0x90170110},
+ {0x1b, 0x01011020},
+ {0x21, 0x0221101f}),
SND_HDA_PIN_QUIRK(0x10ec0256, 0x1043, "ASUS", ALC256_FIXUP_ASUS_MIC,
{0x14, 0x90170110},
{0x1b, 0x90a70130},
tristate "WCD9335 Codec"
depends on SLIMBUS
select REGMAP_SLIMBUS
+ select REGMAP_IRQ
help
The WCD9335 is a standalone Hi-Fi audio CODEC IC, supports
Qualcomm Technologies, Inc. (QTI) multimedia solutions,
dev_err(dai->component->dev,
"%s: ERROR: The device is either a master or a slave.\n",
__func__);
+ /* fall through */
default:
dev_err(dai->component->dev,
"%s: ERROR: Unsupporter master mask 0x%x\n",
return ret;
}
+static int cs35l35_i2c_remove(struct i2c_client *i2c_client)
+{
+ struct cs35l35_private *cs35l35 = i2c_get_clientdata(i2c_client);
+
+ regulator_bulk_disable(cs35l35->num_supplies, cs35l35->supplies);
+ gpiod_set_value_cansleep(cs35l35->reset_gpio, 0);
+
+ return 0;
+}
+
static const struct of_device_id cs35l35_of_match[] = {
{.compatible = "cirrus,cs35l35"},
{},
},
.id_table = cs35l35_id,
.probe = cs35l35_i2c_probe,
+ .remove = cs35l35_i2c_remove,
};
module_i2c_driver(cs35l35_i2c_driver);
.reg_defaults = cs4270_reg_defaults,
.num_reg_defaults = ARRAY_SIZE(cs4270_reg_defaults),
.cache_type = REGCACHE_RBTREE,
+ .write_flag_mask = CS4270_I2C_INCR,
.readable_reg = cs4270_reg_is_readable,
.volatile_reg = cs4270_reg_is_volatile,
struct snd_soc_dai *dai);
static int hdac_hda_dai_prepare(struct snd_pcm_substream *substream,
struct snd_soc_dai *dai);
+static int hdac_hda_dai_hw_params(struct snd_pcm_substream *substream,
+ struct snd_pcm_hw_params *params,
+ struct snd_soc_dai *dai);
static int hdac_hda_dai_hw_free(struct snd_pcm_substream *substream,
struct snd_soc_dai *dai);
static int hdac_hda_dai_set_tdm_slot(struct snd_soc_dai *dai,
.startup = hdac_hda_dai_open,
.shutdown = hdac_hda_dai_close,
.prepare = hdac_hda_dai_prepare,
+ .hw_params = hdac_hda_dai_hw_params,
.hw_free = hdac_hda_dai_hw_free,
.set_tdm_slot = hdac_hda_dai_set_tdm_slot,
};
return 0;
}
+static int hdac_hda_dai_hw_params(struct snd_pcm_substream *substream,
+ struct snd_pcm_hw_params *params,
+ struct snd_soc_dai *dai)
+{
+ struct snd_soc_component *component = dai->component;
+ struct hdac_hda_priv *hda_pvt;
+ unsigned int format_val;
+ unsigned int maxbps;
+
+ if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK)
+ maxbps = dai->driver->playback.sig_bits;
+ else
+ maxbps = dai->driver->capture.sig_bits;
+
+ hda_pvt = snd_soc_component_get_drvdata(component);
+ format_val = snd_hdac_calc_stream_format(params_rate(params),
+ params_channels(params),
+ params_format(params),
+ maxbps,
+ 0);
+ if (!format_val) {
+ dev_err(dai->dev,
+ "invalid format_val, rate=%d, ch=%d, format=%d, maxbps=%d\n",
+ params_rate(params), params_channels(params),
+ params_format(params), maxbps);
+
+ return -EINVAL;
+ }
+
+ hda_pvt->pcm[dai->id].format_val[substream->stream] = format_val;
+ return 0;
+}
+
static int hdac_hda_dai_hw_free(struct snd_pcm_substream *substream,
struct snd_soc_dai *dai)
{
struct snd_soc_dai *dai)
{
struct snd_soc_component *component = dai->component;
+ struct hda_pcm_stream *hda_stream;
struct hdac_hda_priv *hda_pvt;
- struct snd_pcm_runtime *runtime = substream->runtime;
struct hdac_device *hdev;
- struct hda_pcm_stream *hda_stream;
unsigned int format_val;
struct hda_pcm *pcm;
unsigned int stream;
hda_stream = &pcm->stream[substream->stream];
- format_val = snd_hdac_calc_stream_format(runtime->rate,
- runtime->channels,
- runtime->format,
- hda_stream->maxbps,
- 0);
- if (!format_val) {
- dev_err(&hdev->dev,
- "invalid format_val, rate=%d, ch=%d, format=%d\n",
- runtime->rate, runtime->channels, runtime->format);
- return -EINVAL;
- }
-
stream = hda_pvt->pcm[dai->id].stream_tag[substream->stream];
+ format_val = hda_pvt->pcm[dai->id].format_val[substream->stream];
ret = snd_hda_codec_prepare(&hda_pvt->codec, hda_stream,
stream, format_val, substream);
struct hdac_hda_pcm {
int stream_tag[2];
+ unsigned int format_val[2];
};
struct hdac_hda_priv {
params_width(params), params_rate(params),
params_channels(params));
- if (params_width(params) > 24)
- params->msbits = 24;
-
ret = snd_pcm_create_iec958_consumer_hw_params(params, hp.iec.status,
sizeof(hp.iec.status));
if (ret < 0) {
{
struct hdmi_codec_priv *hcp = snd_soc_dai_get_drvdata(dai);
struct hdmi_codec_daifmt cf = { 0 };
- int ret = 0;
dev_dbg(dai->dev, "%s()\n", __func__);
- if (dai->id == DAI_ID_SPDIF) {
- cf.fmt = HDMI_SPDIF;
- } else {
- switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) {
- case SND_SOC_DAIFMT_CBM_CFM:
- cf.bit_clk_master = 1;
- cf.frame_clk_master = 1;
- break;
- case SND_SOC_DAIFMT_CBS_CFM:
- cf.frame_clk_master = 1;
- break;
- case SND_SOC_DAIFMT_CBM_CFS:
- cf.bit_clk_master = 1;
- break;
- case SND_SOC_DAIFMT_CBS_CFS:
- break;
- default:
- return -EINVAL;
- }
+ if (dai->id == DAI_ID_SPDIF)
+ return 0;
+
+ switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) {
+ case SND_SOC_DAIFMT_CBM_CFM:
+ cf.bit_clk_master = 1;
+ cf.frame_clk_master = 1;
+ break;
+ case SND_SOC_DAIFMT_CBS_CFM:
+ cf.frame_clk_master = 1;
+ break;
+ case SND_SOC_DAIFMT_CBM_CFS:
+ cf.bit_clk_master = 1;
+ break;
+ case SND_SOC_DAIFMT_CBS_CFS:
+ break;
+ default:
+ return -EINVAL;
+ }
- switch (fmt & SND_SOC_DAIFMT_INV_MASK) {
- case SND_SOC_DAIFMT_NB_NF:
- break;
- case SND_SOC_DAIFMT_NB_IF:
- cf.frame_clk_inv = 1;
- break;
- case SND_SOC_DAIFMT_IB_NF:
- cf.bit_clk_inv = 1;
- break;
- case SND_SOC_DAIFMT_IB_IF:
- cf.frame_clk_inv = 1;
- cf.bit_clk_inv = 1;
- break;
- }
+ switch (fmt & SND_SOC_DAIFMT_INV_MASK) {
+ case SND_SOC_DAIFMT_NB_NF:
+ break;
+ case SND_SOC_DAIFMT_NB_IF:
+ cf.frame_clk_inv = 1;
+ break;
+ case SND_SOC_DAIFMT_IB_NF:
+ cf.bit_clk_inv = 1;
+ break;
+ case SND_SOC_DAIFMT_IB_IF:
+ cf.frame_clk_inv = 1;
+ cf.bit_clk_inv = 1;
+ break;
+ }
- switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) {
- case SND_SOC_DAIFMT_I2S:
- cf.fmt = HDMI_I2S;
- break;
- case SND_SOC_DAIFMT_DSP_A:
- cf.fmt = HDMI_DSP_A;
- break;
- case SND_SOC_DAIFMT_DSP_B:
- cf.fmt = HDMI_DSP_B;
- break;
- case SND_SOC_DAIFMT_RIGHT_J:
- cf.fmt = HDMI_RIGHT_J;
- break;
- case SND_SOC_DAIFMT_LEFT_J:
- cf.fmt = HDMI_LEFT_J;
- break;
- case SND_SOC_DAIFMT_AC97:
- cf.fmt = HDMI_AC97;
- break;
- default:
- dev_err(dai->dev, "Invalid DAI interface format\n");
- return -EINVAL;
- }
+ switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) {
+ case SND_SOC_DAIFMT_I2S:
+ cf.fmt = HDMI_I2S;
+ break;
+ case SND_SOC_DAIFMT_DSP_A:
+ cf.fmt = HDMI_DSP_A;
+ break;
+ case SND_SOC_DAIFMT_DSP_B:
+ cf.fmt = HDMI_DSP_B;
+ break;
+ case SND_SOC_DAIFMT_RIGHT_J:
+ cf.fmt = HDMI_RIGHT_J;
+ break;
+ case SND_SOC_DAIFMT_LEFT_J:
+ cf.fmt = HDMI_LEFT_J;
+ break;
+ case SND_SOC_DAIFMT_AC97:
+ cf.fmt = HDMI_AC97;
+ break;
+ default:
+ dev_err(dai->dev, "Invalid DAI interface format\n");
+ return -EINVAL;
}
hcp->daifmt[dai->id] = cf;
- return ret;
+ return 0;
}
static int hdmi_codec_digital_mute(struct snd_soc_dai *dai, int mute)
i++;
}
- if (hcd->spdif)
+ if (hcd->spdif) {
hcp->daidrv[i] = hdmi_spdif_dai;
+ hcp->daifmt[DAI_ID_SPDIF].fmt = HDMI_SPDIF;
+ }
dev_set_drvdata(dev, hcp);
SND_SOC_DAPM_MIXER("Mono Mixer", NAU8810_REG_POWER3,
NAU8810_MOUTMX_EN_SFT, 0, &nau8810_mono_mixer_controls[0],
ARRAY_SIZE(nau8810_mono_mixer_controls)),
- SND_SOC_DAPM_DAC("DAC", "HiFi Playback", NAU8810_REG_POWER3,
+ SND_SOC_DAPM_DAC("DAC", "Playback", NAU8810_REG_POWER3,
NAU8810_DAC_EN_SFT, 0),
- SND_SOC_DAPM_ADC("ADC", "HiFi Capture", NAU8810_REG_POWER2,
+ SND_SOC_DAPM_ADC("ADC", "Capture", NAU8810_REG_POWER2,
NAU8810_ADC_EN_SFT, 0),
SND_SOC_DAPM_PGA("SpkN Out", NAU8810_REG_POWER3,
NAU8810_NSPK_EN_SFT, 0, NULL, 0),
SND_SOC_DAPM_ADC("ADCR", NULL, NAU8824_REG_ANALOG_ADC_2,
NAU8824_ADCR_EN_SFT, 0),
- SND_SOC_DAPM_AIF_OUT("AIFTX", "HiFi Capture", 0, SND_SOC_NOPM, 0, 0),
- SND_SOC_DAPM_AIF_IN("AIFRX", "HiFi Playback", 0, SND_SOC_NOPM, 0, 0),
+ SND_SOC_DAPM_AIF_OUT("AIFTX", "Capture", 0, SND_SOC_NOPM, 0, 0),
+ SND_SOC_DAPM_AIF_IN("AIFRX", "Playback", 0, SND_SOC_NOPM, 0, 0),
SND_SOC_DAPM_DAC("DACL", NULL, NAU8824_REG_RDAC,
NAU8824_DACL_EN_SFT, 0),
}
}
+static void nau8824_dapm_disable_pin(struct nau8824 *nau8824, const char *pin)
+{
+ struct snd_soc_dapm_context *dapm = nau8824->dapm;
+ const char *prefix = dapm->component->name_prefix;
+ char prefixed_pin[80];
+
+ if (prefix) {
+ snprintf(prefixed_pin, sizeof(prefixed_pin), "%s %s",
+ prefix, pin);
+ snd_soc_dapm_disable_pin(dapm, prefixed_pin);
+ } else {
+ snd_soc_dapm_disable_pin(dapm, pin);
+ }
+}
+
+static void nau8824_dapm_enable_pin(struct nau8824 *nau8824, const char *pin)
+{
+ struct snd_soc_dapm_context *dapm = nau8824->dapm;
+ const char *prefix = dapm->component->name_prefix;
+ char prefixed_pin[80];
+
+ if (prefix) {
+ snprintf(prefixed_pin, sizeof(prefixed_pin), "%s %s",
+ prefix, pin);
+ snd_soc_dapm_force_enable_pin(dapm, prefixed_pin);
+ } else {
+ snd_soc_dapm_force_enable_pin(dapm, pin);
+ }
+}
+
static void nau8824_eject_jack(struct nau8824 *nau8824)
{
struct snd_soc_dapm_context *dapm = nau8824->dapm;
/* Clear all interruption status */
nau8824_int_status_clear_all(regmap);
- snd_soc_dapm_disable_pin(dapm, "SAR");
- snd_soc_dapm_disable_pin(dapm, "MICBIAS");
+ nau8824_dapm_disable_pin(nau8824, "SAR");
+ nau8824_dapm_disable_pin(nau8824, "MICBIAS");
snd_soc_dapm_sync(dapm);
/* Enable the insertion interruption, disable the ejection
struct regmap *regmap = nau8824->regmap;
int adc_value, event = 0, event_mask = 0;
- snd_soc_dapm_force_enable_pin(dapm, "MICBIAS");
- snd_soc_dapm_force_enable_pin(dapm, "SAR");
+ nau8824_dapm_enable_pin(nau8824, "MICBIAS");
+ nau8824_dapm_enable_pin(nau8824, "SAR");
snd_soc_dapm_sync(dapm);
msleep(100);
if (adc_value < HEADSET_SARADC_THD) {
event |= SND_JACK_HEADPHONE;
- snd_soc_dapm_disable_pin(dapm, "SAR");
- snd_soc_dapm_disable_pin(dapm, "MICBIAS");
+ nau8824_dapm_disable_pin(nau8824, "SAR");
+ nau8824_dapm_disable_pin(nau8824, "MICBIAS");
snd_soc_dapm_sync(dapm);
} else {
event |= SND_JACK_HEADSET;
int jack_insert)
{
struct rt5682_priv *rt5682 = snd_soc_component_get_drvdata(component);
- struct snd_soc_dapm_context *dapm =
- snd_soc_component_get_dapm(component);
unsigned int val, count;
if (jack_insert) {
- snd_soc_dapm_force_enable_pin(dapm, "CBJ Power");
- snd_soc_dapm_sync(dapm);
+
+ snd_soc_component_update_bits(component, RT5682_PWR_ANLG_1,
+ RT5682_PWR_VREF2 | RT5682_PWR_MB,
+ RT5682_PWR_VREF2 | RT5682_PWR_MB);
+ snd_soc_component_update_bits(component,
+ RT5682_PWR_ANLG_1, RT5682_PWR_FV2, 0);
+ usleep_range(15000, 20000);
+ snd_soc_component_update_bits(component,
+ RT5682_PWR_ANLG_1, RT5682_PWR_FV2, RT5682_PWR_FV2);
+ snd_soc_component_update_bits(component, RT5682_PWR_ANLG_3,
+ RT5682_PWR_CBJ, RT5682_PWR_CBJ);
+
snd_soc_component_update_bits(component, RT5682_CBJ_CTRL_1,
RT5682_TRIG_JD_MASK, RT5682_TRIG_JD_HIGH);
rt5682_enable_push_button_irq(component, false);
snd_soc_component_update_bits(component, RT5682_CBJ_CTRL_1,
RT5682_TRIG_JD_MASK, RT5682_TRIG_JD_LOW);
- snd_soc_dapm_disable_pin(dapm, "CBJ Power");
- snd_soc_dapm_sync(dapm);
+ snd_soc_component_update_bits(component, RT5682_PWR_ANLG_1,
+ RT5682_PWR_VREF2 | RT5682_PWR_MB, 0);
+ snd_soc_component_update_bits(component, RT5682_PWR_ANLG_3,
+ RT5682_PWR_CBJ, 0);
rt5682->jack_type = 0;
}
struct snd_soc_component *component =
snd_soc_dapm_to_component(w->dapm);
struct rt5682_priv *rt5682 = snd_soc_component_get_drvdata(component);
- int ref, val, reg, sft, mask, idx = -EINVAL;
+ int ref, val, reg, idx = -EINVAL;
static const int div_f[] = {1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48};
static const int div_o[] = {1, 2, 4, 6, 8, 12, 16, 24, 32, 48};
idx = rt5682_div_sel(rt5682, ref, div_f, ARRAY_SIZE(div_f));
- if (w->shift == RT5682_PWR_ADC_S1F_BIT) {
+ if (w->shift == RT5682_PWR_ADC_S1F_BIT)
reg = RT5682_PLL_TRACK_3;
- sft = RT5682_ADC_OSR_SFT;
- mask = RT5682_ADC_OSR_MASK;
- } else {
+ else
reg = RT5682_PLL_TRACK_2;
- sft = RT5682_DAC_OSR_SFT;
- mask = RT5682_DAC_OSR_MASK;
- }
snd_soc_component_update_bits(component, reg,
RT5682_FILTER_CLK_DIV_MASK, idx << RT5682_FILTER_CLK_DIV_SFT);
}
snd_soc_component_update_bits(component, RT5682_ADDA_CLK_1,
- mask, idx << sft);
+ RT5682_ADC_OSR_MASK | RT5682_DAC_OSR_MASK,
+ (idx << RT5682_ADC_OSR_SFT) | (idx << RT5682_DAC_OSR_SFT));
return 0;
}
0, NULL, 0),
SND_SOC_DAPM_SUPPLY("Vref1", RT5682_PWR_ANLG_1, RT5682_PWR_VREF1_BIT, 0,
rt5655_set_verf, SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMU),
- SND_SOC_DAPM_SUPPLY("Vref2", RT5682_PWR_ANLG_1, RT5682_PWR_VREF2_BIT, 0,
- rt5655_set_verf, SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMU),
/* ASRC */
SND_SOC_DAPM_SUPPLY_S("DAC STO1 ASRC", 1, RT5682_PLL_TRACK_1,
SND_SOC_DAPM_PGA("BST1 CBJ", SND_SOC_NOPM,
0, 0, NULL, 0),
- SND_SOC_DAPM_SUPPLY("CBJ Power", RT5682_PWR_ANLG_3,
- RT5682_PWR_CBJ_BIT, 0, NULL, 0),
-
/* REC Mixer */
SND_SOC_DAPM_MIXER("RECMIX1L", SND_SOC_NOPM, 0, 0, rt5682_rec1_l_mix,
ARRAY_SIZE(rt5682_rec1_l_mix)),
/*Vref*/
{"MICBIAS1", NULL, "Vref1"},
- {"MICBIAS1", NULL, "Vref2"},
{"MICBIAS2", NULL, "Vref1"},
- {"MICBIAS2", NULL, "Vref2"},
{"CLKDET SYS", NULL, "CLKDET"},
{"IN1P", NULL, "LDO2"},
{"BST1 CBJ", NULL, "IN1P"},
- {"BST1 CBJ", NULL, "CBJ Power"},
- {"CBJ Power", NULL, "Vref2"},
{"RECMIX1L", "CBJ Switch", "BST1 CBJ"},
{"RECMIX1L", NULL, "RECMIX1L Power"},
{"HP Amp", NULL, "Capless"},
{"HP Amp", NULL, "Charge Pump"},
{"HP Amp", NULL, "CLKDET SYS"},
- {"HP Amp", NULL, "CBJ Power"},
{"HP Amp", NULL, "Vref1"},
- {"HP Amp", NULL, "Vref2"},
{"HPOL Playback", "Switch", "HP Amp"},
{"HPOR Playback", "Switch", "HP Amp"},
{"HPOL", NULL, "HPOL Playback"},
switch (level) {
case SND_SOC_BIAS_PREPARE:
regmap_update_bits(rt5682->regmap, RT5682_PWR_ANLG_1,
- RT5682_PWR_MB | RT5682_PWR_BG,
- RT5682_PWR_MB | RT5682_PWR_BG);
+ RT5682_PWR_BG, RT5682_PWR_BG);
regmap_update_bits(rt5682->regmap, RT5682_PWR_DIG_1,
RT5682_DIG_GATE_CTRL | RT5682_PWR_LDO,
RT5682_DIG_GATE_CTRL | RT5682_PWR_LDO);
break;
case SND_SOC_BIAS_STANDBY:
- regmap_update_bits(rt5682->regmap, RT5682_PWR_ANLG_1,
- RT5682_PWR_MB, RT5682_PWR_MB);
regmap_update_bits(rt5682->regmap, RT5682_PWR_DIG_1,
RT5682_DIG_GATE_CTRL, RT5682_DIG_GATE_CTRL);
break;
regmap_update_bits(rt5682->regmap, RT5682_PWR_DIG_1,
RT5682_DIG_GATE_CTRL | RT5682_PWR_LDO, 0);
regmap_update_bits(rt5682->regmap, RT5682_PWR_ANLG_1,
- RT5682_PWR_MB | RT5682_PWR_BG, 0);
+ RT5682_PWR_BG, 0);
break;
default:
regcache_cache_only(rt5682->regmap, false);
regcache_sync(rt5682->regmap);
+ rt5682_irq(0, rt5682);
+
return 0;
}
#else
*
* Copyright 2011 NW Digital Radio
*
- * Author: Jeremy McDermond <nh6z@nh6z.net>
+ * Author: Annaliese McDermond <nh6z@nh6z.net>
*
* Based on sound/soc/codecs/wm8974 and TI driver for kernel 2.6.27.
*
module_i2c_driver(aic32x4_i2c_driver);
MODULE_DESCRIPTION("ASoC TLV320AIC32x4 codec driver I2C");
-MODULE_AUTHOR("Jeremy McDermond <nh6z@nh6z.net>");
+MODULE_AUTHOR("Annaliese McDermond <nh6z@nh6z.net>");
MODULE_LICENSE("GPL");
*
* Copyright 2011 NW Digital Radio
*
- * Author: Jeremy McDermond <nh6z@nh6z.net>
+ * Author: Annaliese McDermond <nh6z@nh6z.net>
*
* Based on sound/soc/codecs/wm8974 and TI driver for kernel 2.6.27.
*
module_spi_driver(aic32x4_spi_driver);
MODULE_DESCRIPTION("ASoC TLV320AIC32x4 codec driver SPI");
-MODULE_AUTHOR("Jeremy McDermond <nh6z@nh6z.net>");
+MODULE_AUTHOR("Annaliese McDermond <nh6z@nh6z.net>");
MODULE_LICENSE("GPL");
SND_SOC_DAPM_INPUT("IN2_R"),
SND_SOC_DAPM_INPUT("IN3_L"),
SND_SOC_DAPM_INPUT("IN3_R"),
+ SND_SOC_DAPM_INPUT("CM_L"),
+ SND_SOC_DAPM_INPUT("CM_R"),
};
static const struct snd_soc_dapm_route aic32x4_dapm_routes[] = {
struct aic3x_priv *aic3x = snd_soc_component_get_drvdata(component);
int ret, i;
- INIT_LIST_HEAD(&aic3x->list);
aic3x->component = component;
for (i = 0; i < ARRAY_SIZE(aic3x->supplies); i++) {
if (ret != 0)
goto err_gpio;
+ INIT_LIST_HEAD(&aic3x->list);
list_add(&aic3x->list, &reset_list);
return 0;
{
struct aic3x_priv *aic3x = i2c_get_clientdata(client);
+ list_del(&aic3x->list);
+
if (gpio_is_valid(aic3x->gpio_reset) &&
!aic3x_is_shared_reset(aic3x)) {
gpio_set_value(aic3x->gpio_reset, 0);
if (wm_adsp_fw[dsp->fw].num_caps != 0)
wm_adsp_buffer_free(dsp);
+ dsp->fatal_error = false;
+
mutex_unlock(&dsp->pwr_lock);
adsp_dbg(dsp, "Execution stopped\n");
{
struct wm_adsp_compr_buf *buf = NULL, *tmp;
+ if (compr->dsp->fatal_error)
+ return -EINVAL;
+
list_for_each_entry(tmp, &compr->dsp->buffer_list, list) {
if (!tmp->name || !strcmp(compr->name, tmp->name)) {
buf = tmp;
ret = wm_adsp_buffer_read(buf, HOST_BUFFER_FIELD(error), &buf->error);
if (ret < 0) {
- adsp_err(buf->dsp, "Failed to check buffer error: %d\n", ret);
+ compr_err(buf, "Failed to check buffer error: %d\n", ret);
return ret;
}
if (buf->error != 0) {
- adsp_err(buf->dsp, "Buffer error occurred: %d\n", buf->error);
+ compr_err(buf, "Buffer error occurred: %d\n", buf->error);
return -EIO;
}
if (ret < 0)
break;
- wm_adsp_buffer_clear(compr->buf);
-
/* Trigger the IRQ at one fragment of data */
ret = wm_adsp_buffer_write(compr->buf,
HOST_BUFFER_FIELD(high_water_mark),
}
break;
case SNDRV_PCM_TRIGGER_STOP:
+ if (wm_adsp_compr_attached(compr))
+ wm_adsp_buffer_clear(compr->buf);
break;
default:
ret = -EINVAL;
}
EXPORT_SYMBOL_GPL(wm_adsp2_lock);
+static void wm_adsp_fatal_error(struct wm_adsp *dsp)
+{
+ struct wm_adsp_compr *compr;
+
+ dsp->fatal_error = true;
+
+ list_for_each_entry(compr, &dsp->compr_list, list) {
+ if (compr->stream) {
+ snd_compr_stop_error(compr->stream,
+ SNDRV_PCM_STATE_XRUN);
+ snd_compr_fragment_elapsed(compr->stream);
+ }
+ }
+}
+
irqreturn_t wm_adsp2_bus_error(struct wm_adsp *dsp)
{
unsigned int val;
struct regmap *regmap = dsp->regmap;
int ret = 0;
+ mutex_lock(&dsp->pwr_lock);
+
ret = regmap_read(regmap, dsp->base + ADSP2_LOCK_REGION_CTRL, &val);
if (ret) {
adsp_err(dsp,
"Failed to read Region Lock Ctrl register: %d\n", ret);
- return IRQ_HANDLED;
+ goto error;
}
if (val & ADSP2_WDT_TIMEOUT_STS_MASK) {
adsp_err(dsp, "watchdog timeout error\n");
wm_adsp_stop_watchdog(dsp);
+ wm_adsp_fatal_error(dsp);
}
if (val & (ADSP2_SLAVE_ERR_MASK | ADSP2_REGION_LOCK_ERR_MASK)) {
adsp_err(dsp,
"Failed to read Bus Err Addr register: %d\n",
ret);
- return IRQ_HANDLED;
+ goto error;
}
adsp_err(dsp, "bus error address = 0x%x\n",
adsp_err(dsp,
"Failed to read Pmem Xmem Err Addr register: %d\n",
ret);
- return IRQ_HANDLED;
+ goto error;
}
adsp_err(dsp, "xmem error address = 0x%x\n",
regmap_update_bits(regmap, dsp->base + ADSP2_LOCK_REGION_CTRL,
ADSP2_CTRL_ERR_EINT, ADSP2_CTRL_ERR_EINT);
+error:
+ mutex_unlock(&dsp->pwr_lock);
+
return IRQ_HANDLED;
}
EXPORT_SYMBOL_GPL(wm_adsp2_bus_error);
bool preloaded;
bool booted;
bool running;
+ bool fatal_error;
struct list_head ctl_list;
}
EXPORT_SYMBOL_GPL(fsl_asrc_get_dma_channel);
+static int fsl_asrc_dai_startup(struct snd_pcm_substream *substream,
+ struct snd_soc_dai *dai)
+{
+ struct fsl_asrc *asrc_priv = snd_soc_dai_get_drvdata(dai);
+
+ /* Odd channel number is not valid for older ASRC (channel_bits==3) */
+ if (asrc_priv->channel_bits == 3)
+ snd_pcm_hw_constraint_step(substream->runtime, 0,
+ SNDRV_PCM_HW_PARAM_CHANNELS, 2);
+
+ return 0;
+}
+
static int fsl_asrc_dai_hw_params(struct snd_pcm_substream *substream,
struct snd_pcm_hw_params *params,
struct snd_soc_dai *dai)
}
static const struct snd_soc_dai_ops fsl_asrc_dai_ops = {
+ .startup = fsl_asrc_dai_startup,
.hw_params = fsl_asrc_dai_hw_params,
.hw_free = fsl_asrc_dai_hw_free,
.trigger = fsl_asrc_dai_trigger,
u32 fifo_depth;
u32 slot_width;
u32 slots;
+ u32 tx_mask;
+ u32 rx_mask;
u32 hck_rate[2];
u32 sck_rate[2];
bool hck_dir[2];
regmap_update_bits(esai_priv->regmap, REG_ESAI_TCCR,
ESAI_xCCR_xDC_MASK, ESAI_xCCR_xDC(slots));
- regmap_update_bits(esai_priv->regmap, REG_ESAI_TSMA,
- ESAI_xSMA_xS_MASK, ESAI_xSMA_xS(tx_mask));
- regmap_update_bits(esai_priv->regmap, REG_ESAI_TSMB,
- ESAI_xSMB_xS_MASK, ESAI_xSMB_xS(tx_mask));
-
regmap_update_bits(esai_priv->regmap, REG_ESAI_RCCR,
ESAI_xCCR_xDC_MASK, ESAI_xCCR_xDC(slots));
- regmap_update_bits(esai_priv->regmap, REG_ESAI_RSMA,
- ESAI_xSMA_xS_MASK, ESAI_xSMA_xS(rx_mask));
- regmap_update_bits(esai_priv->regmap, REG_ESAI_RSMB,
- ESAI_xSMB_xS_MASK, ESAI_xSMB_xS(rx_mask));
-
esai_priv->slot_width = slot_width;
esai_priv->slots = slots;
+ esai_priv->tx_mask = tx_mask;
+ esai_priv->rx_mask = rx_mask;
return 0;
}
bool tx = substream->stream == SNDRV_PCM_STREAM_PLAYBACK;
u8 i, channels = substream->runtime->channels;
u32 pins = DIV_ROUND_UP(channels, esai_priv->slots);
+ u32 mask;
switch (cmd) {
case SNDRV_PCM_TRIGGER_START:
for (i = 0; tx && i < channels; i++)
regmap_write(esai_priv->regmap, REG_ESAI_ETDR, 0x0);
+ /*
+ * When set the TE/RE in the end of enablement flow, there
+ * will be channel swap issue for multi data line case.
+ * In order to workaround this issue, we switch the bit
+ * enablement sequence to below sequence
+ * 1) clear the xSMB & xSMA: which is done in probe and
+ * stop state.
+ * 2) set TE/RE
+ * 3) set xSMB
+ * 4) set xSMA: xSMA is the last one in this flow, which
+ * will trigger esai to start.
+ */
regmap_update_bits(esai_priv->regmap, REG_ESAI_xCR(tx),
tx ? ESAI_xCR_TE_MASK : ESAI_xCR_RE_MASK,
tx ? ESAI_xCR_TE(pins) : ESAI_xCR_RE(pins));
+ mask = tx ? esai_priv->tx_mask : esai_priv->rx_mask;
+
+ regmap_update_bits(esai_priv->regmap, REG_ESAI_xSMB(tx),
+ ESAI_xSMB_xS_MASK, ESAI_xSMB_xS(mask));
+ regmap_update_bits(esai_priv->regmap, REG_ESAI_xSMA(tx),
+ ESAI_xSMA_xS_MASK, ESAI_xSMA_xS(mask));
+
break;
case SNDRV_PCM_TRIGGER_SUSPEND:
case SNDRV_PCM_TRIGGER_STOP:
case SNDRV_PCM_TRIGGER_PAUSE_PUSH:
regmap_update_bits(esai_priv->regmap, REG_ESAI_xCR(tx),
tx ? ESAI_xCR_TE_MASK : ESAI_xCR_RE_MASK, 0);
+ regmap_update_bits(esai_priv->regmap, REG_ESAI_xSMA(tx),
+ ESAI_xSMA_xS_MASK, 0);
+ regmap_update_bits(esai_priv->regmap, REG_ESAI_xSMB(tx),
+ ESAI_xSMB_xS_MASK, 0);
/* Disable and reset FIFO */
regmap_update_bits(esai_priv->regmap, REG_ESAI_xFCR(tx),
return ret;
}
+ esai_priv->tx_mask = 0xFFFFFFFF;
+ esai_priv->rx_mask = 0xFFFFFFFF;
+
+ /* Clear the TSMA, TSMB, RSMA, RSMB */
+ regmap_write(esai_priv->regmap, REG_ESAI_TSMA, 0);
+ regmap_write(esai_priv->regmap, REG_ESAI_TSMB, 0);
+ regmap_write(esai_priv->regmap, REG_ESAI_RSMA, 0);
+ regmap_write(esai_priv->regmap, REG_ESAI_RSMB, 0);
+
ret = devm_snd_soc_register_component(&pdev->dev, &fsl_esai_component,
&fsl_esai_dai, 1);
if (ret) {
#include <linux/string.h>
#include <sound/simple_card_utils.h>
+#define DPCM_SELECTABLE 1
+
struct graph_priv {
struct snd_soc_card snd_card;
struct graph_dai_props {
struct device_node *codec_port;
struct device_node *codec_port_old = NULL;
struct asoc_simple_card_data adata;
+ uintptr_t dpcm_selectable = (uintptr_t)of_device_get_match_data(dev);
int rc, ret;
/* loop for all listed CPU port */
* if Codec port has many endpoints,
* or has convert-xxx property
*/
- if ((of_get_child_count(codec_port) > 1) ||
- adata.convert_rate || adata.convert_channels)
+ if (dpcm_selectable &&
+ ((of_get_child_count(codec_port) > 1) ||
+ adata.convert_rate || adata.convert_channels))
ret = func_dpcm(priv, cpu_ep, codec_ep, li,
(codec_port_old == codec_port));
/* else normal sound */
static const struct of_device_id graph_of_match[] = {
{ .compatible = "audio-graph-card", },
- { .compatible = "audio-graph-scu-card", },
+ { .compatible = "audio-graph-scu-card",
+ .data = (void *)DPCM_SELECTABLE },
{},
};
MODULE_DEVICE_TABLE(of, graph_of_match);
#include <linux/device.h>
#include <linux/module.h>
#include <linux/of.h>
+#include <linux/of_device.h>
#include <linux/platform_device.h>
#include <linux/string.h>
#include <sound/simple_card.h>
#include <sound/soc-dai.h>
#include <sound/soc.h>
+#define DPCM_SELECTABLE 1
+
struct simple_priv {
struct snd_soc_card snd_card;
struct simple_dai_props {
struct device *dev = simple_priv_to_dev(priv);
struct device_node *top = dev->of_node;
struct device_node *node;
+ uintptr_t dpcm_selectable = (uintptr_t)of_device_get_match_data(dev);
bool is_top = 0;
int ret = 0;
* if it has many CPUs,
* or has convert-xxx property
*/
- if (num > 2 ||
- adata.convert_rate || adata.convert_channels)
+ if (dpcm_selectable &&
+ (num > 2 ||
+ adata.convert_rate || adata.convert_channels))
ret = func_dpcm(priv, np, codec, li, is_top);
/* else normal sound */
else
static const struct of_device_id simple_of_match[] = {
{ .compatible = "simple-audio-card", },
- { .compatible = "simple-scu-audio-card", },
+ { .compatible = "simple-scu-audio-card",
+ .data = (void *)DPCM_SELECTABLE },
{},
};
MODULE_DEVICE_TABLE(of, simple_of_match);
return sst_dsp_init_v2_dpcm(component);
}
+static void sst_soc_remove(struct snd_soc_component *component)
+{
+ struct sst_data *drv = dev_get_drvdata(component->dev);
+
+ drv->soc_card = NULL;
+}
+
static const struct snd_soc_component_driver sst_soc_platform_drv = {
.name = DRV_NAME,
.probe = sst_soc_probe,
+ .remove = sst_soc_remove,
.ops = &sst_platform_ops,
.compr_ops = &sst_platform_compr_ops,
.pcm_new = sst_pcm_new,
struct clk *mclk;
struct snd_soc_jack jack;
bool ts3a227e_present;
+ int quirks;
};
static int platform_clock_control(struct snd_soc_dapm_widget *w,
struct cht_mc_private *ctx = snd_soc_card_get_drvdata(card);
int ret;
+ /* See the comment in snd_cht_mc_probe() */
+ if (ctx->quirks & QUIRK_PMC_PLT_CLK_0)
+ return 0;
+
codec_dai = snd_soc_card_get_codec_dai(card, CHT_CODEC_DAI);
if (!codec_dai) {
dev_err(card->dev, "Codec dai not found; Unable to set platform clock\n");
"jack detection gpios not added, error %d\n", ret);
}
+ /* See the comment in snd_cht_mc_probe() */
+ if (ctx->quirks & QUIRK_PMC_PLT_CLK_0)
+ return 0;
+
/*
* The firmware might enable the clock at
* boot (this information may or may not
const char *mclk_name;
struct snd_soc_acpi_mach *mach;
const char *platform_name;
- int quirks = 0;
-
- dmi_id = dmi_first_match(cht_max98090_quirk_table);
- if (dmi_id)
- quirks = (unsigned long)dmi_id->driver_data;
drv = devm_kzalloc(&pdev->dev, sizeof(*drv), GFP_KERNEL);
if (!drv)
return -ENOMEM;
+ dmi_id = dmi_first_match(cht_max98090_quirk_table);
+ if (dmi_id)
+ drv->quirks = (unsigned long)dmi_id->driver_data;
+
drv->ts3a227e_present = acpi_dev_found("104C227E");
if (!drv->ts3a227e_present) {
/* no need probe TI jack detection chip */
snd_soc_card_cht.dev = &pdev->dev;
snd_soc_card_set_drvdata(&snd_soc_card_cht, drv);
- if (quirks & QUIRK_PMC_PLT_CLK_0)
+ if (drv->quirks & QUIRK_PMC_PLT_CLK_0)
mclk_name = "pmc_plt_clk_0";
else
mclk_name = "pmc_plt_clk_3";
return PTR_ERR(drv->mclk);
}
+ /*
+ * Boards which have the MAX98090's clk connected to clk_0 do not seem
+ * to like it if we muck with the clock. If we disable the clock when
+ * it is unused we get "max98090 i2c-193C9890:00: PLL unlocked" errors
+ * and the PLL never seems to lock again.
+ * So for these boards we enable it here once and leave it at that.
+ */
+ if (drv->quirks & QUIRK_PMC_PLT_CLK_0) {
+ ret_val = clk_prepare_enable(drv->mclk);
+ if (ret_val < 0) {
+ dev_err(&pdev->dev, "MCLK enable error: %d\n", ret_val);
+ return ret_val;
+ }
+ }
+
ret_val = devm_snd_soc_register_card(&pdev->dev, &snd_soc_card_cht);
if (ret_val) {
dev_err(&pdev->dev,
return ret_val;
}
+static int snd_cht_mc_remove(struct platform_device *pdev)
+{
+ struct snd_soc_card *card = platform_get_drvdata(pdev);
+ struct cht_mc_private *ctx = snd_soc_card_get_drvdata(card);
+
+ if (ctx->quirks & QUIRK_PMC_PLT_CLK_0)
+ clk_disable_unprepare(ctx->mclk);
+
+ return 0;
+}
+
static struct platform_driver snd_cht_mc_driver = {
.driver = {
.name = "cht-bsw-max98090",
},
.probe = snd_cht_mc_probe,
+ .remove = snd_cht_mc_remove,
};
module_platform_driver(snd_cht_mc_driver)
};
static const unsigned int dmic_2ch[] = {
- 4,
+ 2,
};
static const struct snd_pcm_hw_constraint_list constraints_dmic_2ch = {
base_cfg->audio_fmt.bit_depth = format->bit_depth;
base_cfg->audio_fmt.valid_bit_depth = format->valid_bit_depth;
base_cfg->audio_fmt.ch_cfg = format->ch_cfg;
+ base_cfg->audio_fmt.sample_type = format->sample_type;
dev_dbg(ctx->dev, "bit_depth=%x valid_bd=%x ch_config=%x\n",
format->bit_depth, format->valid_bit_depth,
struct hdac_stream *hstream;
struct hdac_ext_stream *stream;
struct hdac_ext_link *link;
+ unsigned char stream_tag;
hstream = snd_hdac_get_stream(bus, params->stream,
params->link_dma_id + 1);
snd_hdac_ext_link_stream_setup(stream, format_val);
- list_for_each_entry(link, &bus->hlink_list, list) {
- if (link->index == params->link_index)
- snd_hdac_ext_link_set_stream_id(link,
- hstream->stream_tag);
+ stream_tag = hstream->stream_tag;
+ if (stream->hstream.direction == SNDRV_PCM_STREAM_PLAYBACK) {
+ list_for_each_entry(link, &bus->hlink_list, list) {
+ if (link->index == params->link_index)
+ snd_hdac_ext_link_set_stream_id(link,
+ stream_tag);
+ }
}
stream->link_prepared = 1;
struct hdac_ext_stream *link_dev =
snd_soc_dai_get_dma_data(dai, substream);
struct hdac_ext_link *link;
+ unsigned char stream_tag;
dev_dbg(dai->dev, "%s: %s\n", __func__, dai->name);
if (!link)
return -EINVAL;
- snd_hdac_ext_link_clear_stream_id(link, hdac_stream(link_dev)->stream_tag);
+ if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) {
+ stream_tag = hdac_stream(link_dev)->stream_tag;
+ snd_hdac_ext_link_clear_stream_id(link, stream_tag);
+ }
+
snd_hdac_ext_stream_release(link_dev, HDAC_EXT_STREAM_TYPE_LINK);
return 0;
}
return 0;
}
+static void skl_pcm_remove(struct snd_soc_component *component)
+{
+ /* remove topology */
+ snd_soc_tplg_component_remove(component, SND_SOC_TPLG_INDEX_ALL);
+}
+
static const struct snd_soc_component_driver skl_component = {
.name = "pcm",
.probe = skl_platform_soc_probe,
+ .remove = skl_pcm_remove,
.ops = &skl_platform_ops,
.pcm_new = skl_pcm_new,
.pcm_free = skl_pcm_free,
- .ignore_module_refcount = 1, /* do not increase the refcount in core */
+ .module_get_upon_open = 1, /* increment refcount when a pcm is opened */
};
int skl_platform_register(struct device *dev)
BT_SCO_STATE_IDLE,
BT_SCO_STATE_RUNNING,
BT_SCO_STATE_ENDING,
+ BT_SCO_STATE_LOOPBACK,
};
enum bt_sco_direct {
if (bt->rx->state != BT_SCO_STATE_RUNNING &&
bt->rx->state != BT_SCO_STATE_ENDING &&
bt->tx->state != BT_SCO_STATE_RUNNING &&
- bt->tx->state != BT_SCO_STATE_ENDING) {
+ bt->tx->state != BT_SCO_STATE_ENDING &&
+ bt->tx->state != BT_SCO_STATE_LOOPBACK) {
dev_warn(bt->dev, "%s(), in idle state: rx->state: %d, tx->state: %d\n",
__func__, bt->rx->state, bt->tx->state);
goto irq_handler_exit;
buf_cnt_tx = btsco_packet_info[packet_type][2];
buf_cnt_rx = btsco_packet_info[packet_type][3];
+ if (bt->tx->state == BT_SCO_STATE_LOOPBACK) {
+ u8 *src, *dst;
+ unsigned long connsys_addr_rx, ap_addr_rx;
+ unsigned long connsys_addr_tx, ap_addr_tx;
+
+ connsys_addr_rx = *bt->bt_reg_pkt_r;
+ ap_addr_rx = (unsigned long)bt->bt_sram_bank2_base +
+ (connsys_addr_rx & 0xFFFF);
+
+ connsys_addr_tx = *bt->bt_reg_pkt_w;
+ ap_addr_tx = (unsigned long)bt->bt_sram_bank2_base +
+ (connsys_addr_tx & 0xFFFF);
+
+ if (connsys_addr_tx == 0xdeadfeed ||
+ connsys_addr_rx == 0xdeadfeed) {
+ /* bt return 0xdeadfeed if read reg during bt sleep */
+ dev_warn(bt->dev, "%s(), connsys_addr_tx == 0xdeadfeed\n",
+ __func__);
+ goto irq_handler_exit;
+ }
+
+ src = (u8 *)ap_addr_rx;
+ dst = (u8 *)ap_addr_tx;
+
+ mtk_btcvsd_snd_data_transfer(BT_SCO_DIRECT_BT2ARM, src,
+ bt->tx->temp_packet_buf,
+ packet_length,
+ packet_num);
+ mtk_btcvsd_snd_data_transfer(BT_SCO_DIRECT_ARM2BT,
+ bt->tx->temp_packet_buf, dst,
+ packet_length,
+ packet_num);
+ bt->rx->rw_cnt++;
+ bt->tx->rw_cnt++;
+ }
+
if (bt->rx->state == BT_SCO_STATE_RUNNING ||
bt->rx->state == BT_SCO_STATE_ENDING) {
if (bt->rx->xrun) {
return 0;
}
+static int btcvsd_loopback_get(struct snd_kcontrol *kcontrol,
+ struct snd_ctl_elem_value *ucontrol)
+{
+ struct snd_soc_component *cmpnt = snd_soc_kcontrol_component(kcontrol);
+ struct mtk_btcvsd_snd *bt = snd_soc_component_get_drvdata(cmpnt);
+ bool lpbk_en = bt->tx->state == BT_SCO_STATE_LOOPBACK;
+
+ ucontrol->value.integer.value[0] = lpbk_en;
+ return 0;
+}
+
+static int btcvsd_loopback_set(struct snd_kcontrol *kcontrol,
+ struct snd_ctl_elem_value *ucontrol)
+{
+ struct snd_soc_component *cmpnt = snd_soc_kcontrol_component(kcontrol);
+ struct mtk_btcvsd_snd *bt = snd_soc_component_get_drvdata(cmpnt);
+
+ if (ucontrol->value.integer.value[0]) {
+ mtk_btcvsd_snd_set_state(bt, bt->tx, BT_SCO_STATE_LOOPBACK);
+ mtk_btcvsd_snd_set_state(bt, bt->rx, BT_SCO_STATE_LOOPBACK);
+ } else {
+ mtk_btcvsd_snd_set_state(bt, bt->tx, BT_SCO_STATE_RUNNING);
+ mtk_btcvsd_snd_set_state(bt, bt->rx, BT_SCO_STATE_RUNNING);
+ }
+ return 0;
+}
+
static int btcvsd_tx_mute_get(struct snd_kcontrol *kcontrol,
struct snd_ctl_elem_value *ucontrol)
{
static const struct snd_kcontrol_new mtk_btcvsd_snd_controls[] = {
SOC_ENUM_EXT("BTCVSD Band", btcvsd_enum[0],
btcvsd_band_get, btcvsd_band_set),
+ SOC_SINGLE_BOOL_EXT("BTCVSD Loopback Switch", 0,
+ btcvsd_loopback_get, btcvsd_loopback_set),
SOC_SINGLE_BOOL_EXT("BTCVSD Tx Mute Switch", 0,
btcvsd_tx_mute_get, btcvsd_tx_mute_set),
SOC_SINGLE_BOOL_EXT("BTCVSD Tx Irq Received Switch", 0,
int m_sel_id = mck_div[mck_id].m_sel_id;
int div_clk_id = mck_div[mck_id].div_clk_id;
+ /* i2s5 mck not support */
+ if (mck_id == MT8183_I2S5_MCK)
+ return;
+
clk_disable_unprepare(afe_priv->clk[div_clk_id]);
if (m_sel_id >= 0)
clk_disable_unprepare(afe_priv->clk[m_sel_id]);
#include "rockchip_pdm.h"
-#define PDM_DMA_BURST_SIZE (16) /* size * width: 16*4 = 64 bytes */
+#define PDM_DMA_BURST_SIZE (8) /* size * width: 8*4 = 32 bytes */
struct rk_pdm_dev {
struct device *dev;
return -EINVAL;
}
+ pm_runtime_get_sync(cpu_dai->dev);
regmap_update_bits(pdm->regmap, PDM_CLK_CTRL, mask, val);
+ pm_runtime_put(cpu_dai->dev);
return 0;
}
};
static const struct snd_soc_dapm_route samsung_i2s_dapm_routes[] = {
- { "Playback Mixer", NULL, "Primary" },
- { "Playback Mixer", NULL, "Secondary" },
+ { "Playback Mixer", NULL, "Primary Playback" },
+ { "Playback Mixer", NULL, "Secondary Playback" },
{ "Mixer DAI TX", NULL, "Playback Mixer" },
- { "Playback Mixer", NULL, "Mixer DAI RX" },
+ { "Primary Capture", NULL, "Mixer DAI RX" },
};
static const struct snd_soc_component_driver samsung_i2s_component = {
int num_dais)
{
static const char *dai_names[] = { "samsung-i2s", "samsung-i2s-sec" };
- static const char *stream_names[] = { "Primary", "Secondary" };
+ static const char *stream_names[] = { "Primary Playback",
+ "Secondary Playback" };
struct snd_soc_dai_driver *dai_drv;
struct i2s_dai *dai;
int i;
dai_drv->capture.channels_max = 2;
dai_drv->capture.rates = i2s_dai_data->pcm_rates;
dai_drv->capture.formats = SAMSUNG_I2S_FMTS;
+ dai_drv->capture.stream_name = "Primary Capture";
return 0;
}
return ret;
/*
- * We add 1 to the rclk_freq value in order to avoid too low clock
+ * We add 2 to the rclk_freq value in order to avoid too low clock
* frequency values due to the EPLL output frequency not being exact
* multiple of the audio sampling rate.
*/
- rclk_freq = params_rate(params) * rfs + 1;
+ rclk_freq = params_rate(params) * rfs + 2;
ret = clk_set_rate(priv->sclk_i2s, rclk_freq);
if (ret < 0)
{ .compatible = "renesas,rcar_sound-gen1", .data = (void *)RSND_GEN1 },
{ .compatible = "renesas,rcar_sound-gen2", .data = (void *)RSND_GEN2 },
{ .compatible = "renesas,rcar_sound-gen3", .data = (void *)RSND_GEN3 },
+ /* Special Handling */
+ { .compatible = "renesas,rcar_sound-r8a77990", .data = (void *)(RSND_GEN3 | RSND_SOC_E) },
{},
};
MODULE_DEVICE_TABLE(of, rsnd_of_match);
#define RSND_GEN1 (1 << 0)
#define RSND_GEN2 (2 << 0)
#define RSND_GEN3 (3 << 0)
+#define RSND_SOC_MASK (0xFF << 4)
+#define RSND_SOC_E (1 << 4) /* E1/E2/E3 */
/*
* below value will be filled on rsnd_gen_probe()
#define rsnd_is_gen1(priv) (((priv)->flags & RSND_GEN_MASK) == RSND_GEN1)
#define rsnd_is_gen2(priv) (((priv)->flags & RSND_GEN_MASK) == RSND_GEN2)
#define rsnd_is_gen3(priv) (((priv)->flags & RSND_GEN_MASK) == RSND_GEN3)
+#define rsnd_is_e3(priv) (((priv)->flags & \
+ (RSND_GEN_MASK | RSND_SOC_MASK)) == \
+ (RSND_GEN3 | RSND_SOC_E))
#define rsnd_flags_has(p, f) ((p)->flags & (f))
#define rsnd_flags_set(p, f) ((p)->flags |= (f))
*/
#include "rsnd.h"
-#include <linux/sys_soc.h>
#define SRC_NAME "src"
return rate;
}
-const static u32 bsdsr_table_pattern1[] = {
+static const u32 bsdsr_table_pattern1[] = {
0x01800000, /* 6 - 1/6 */
0x01000000, /* 6 - 1/4 */
0x00c00000, /* 6 - 1/3 */
0x00400000, /* 6 - 1 */
};
-const static u32 bsdsr_table_pattern2[] = {
+static const u32 bsdsr_table_pattern2[] = {
0x02400000, /* 6 - 1/6 */
0x01800000, /* 6 - 1/4 */
0x01200000, /* 6 - 1/3 */
0x00600000, /* 6 - 1 */
};
-const static u32 bsisr_table[] = {
+static const u32 bsisr_table[] = {
0x00100060, /* 6 - 1/6 */
0x00100040, /* 6 - 1/4 */
0x00100030, /* 6 - 1/3 */
0x00100020, /* 6 - 1 */
};
-const static u32 chan288888[] = {
+static const u32 chan288888[] = {
0x00000006, /* 1 to 2 */
0x000001fe, /* 1 to 8 */
0x000001fe, /* 1 to 8 */
0x000001fe, /* 1 to 8 */
};
-const static u32 chan244888[] = {
+static const u32 chan244888[] = {
0x00000006, /* 1 to 2 */
0x0000001e, /* 1 to 4 */
0x0000001e, /* 1 to 4 */
0x000001fe, /* 1 to 8 */
};
-const static u32 chan222222[] = {
+static const u32 chan222222[] = {
0x00000006, /* 1 to 2 */
0x00000006, /* 1 to 2 */
0x00000006, /* 1 to 2 */
0x00000006, /* 1 to 2 */
};
-static const struct soc_device_attribute ov_soc[] = {
- { .soc_id = "r8a77990" }, /* E3 */
- { /* sentinel */ }
-};
-
static void rsnd_src_set_convert_rate(struct rsnd_dai_stream *io,
struct rsnd_mod *mod)
{
struct rsnd_priv *priv = rsnd_mod_to_priv(mod);
struct device *dev = rsnd_priv_to_dev(priv);
struct snd_pcm_runtime *runtime = rsnd_io_to_runtime(io);
- const struct soc_device_attribute *soc = soc_device_match(ov_soc);
int is_play = rsnd_io_is_play(io);
int use_src = 0;
u32 fin, fout;
/*
* E3 need to overwrite
*/
- if (soc)
+ if (rsnd_is_e3(priv))
switch (rsnd_mod_id(mod)) {
case 0:
case 4:
snd_soc_dapm_free(snd_soc_component_get_dapm(component));
soc_cleanup_component_debugfs(component);
component->card = NULL;
- if (!component->driver->ignore_module_refcount)
+ if (!component->driver->module_get_upon_open)
module_put(component->dev->driver->owner);
}
return 0;
}
- if (!component->driver->ignore_module_refcount &&
+ if (!component->driver->module_get_upon_open &&
!try_module_get(component->dev->driver->owner))
return -ENODEV;
ret = soc_init_dai_link(card, link);
if (ret) {
+ soc_cleanup_platform(card);
dev_err(card->dev, "ASoC: failed to init link %s\n",
link->name);
mutex_unlock(&client_mutex);
card->instantiated = 0;
mutex_init(&card->mutex);
mutex_init(&card->dapm_mutex);
+ spin_lock_init(&card->dpcm_lock);
return snd_soc_bind_card(card);
}
case snd_soc_dapm_dac:
case snd_soc_dapm_aif_in:
case snd_soc_dapm_pga:
+ case snd_soc_dapm_buffer:
+ case snd_soc_dapm_scheduler:
+ case snd_soc_dapm_effect:
+ case snd_soc_dapm_src:
+ case snd_soc_dapm_asrc:
+ case snd_soc_dapm_encoder:
+ case snd_soc_dapm_decoder:
case snd_soc_dapm_out_drv:
case snd_soc_dapm_micbias:
case snd_soc_dapm_line:
int count;
devm_kfree(card->dev, (void *)*private_value);
+
+ if (!w_param_text)
+ return;
+
for (count = 0 ; count < num_params; count++)
devm_kfree(card->dev, (void *)w_param_text[count]);
devm_kfree(card->dev, w_param_text);
#include <linux/delay.h>
#include <linux/pinctrl/consumer.h>
#include <linux/pm_runtime.h>
+#include <linux/module.h>
#include <linux/slab.h>
#include <linux/workqueue.h>
#include <linux/export.h>
continue;
component->driver->ops->close(substream);
+
+ if (component->driver->module_get_upon_open)
+ module_put(component->dev->driver->owner);
}
return 0;
!component->driver->ops->open)
continue;
+ if (component->driver->module_get_upon_open &&
+ !try_module_get(component->dev->driver->owner)) {
+ ret = -ENODEV;
+ goto module_err;
+ }
+
ret = component->driver->ops->open(substream);
if (ret < 0) {
dev_err(component->dev,
component_err:
soc_pcm_components_close(substream, component);
-
+module_err:
if (cpu_dai->driver->ops->shutdown)
cpu_dai->driver->ops->shutdown(substream, cpu_dai);
out:
codec_params = *params;
/* fixup params based on TDM slot masks */
- if (codec_dai->tx_mask)
+ if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK &&
+ codec_dai->tx_mask)
soc_pcm_codec_params_fixup(&codec_params,
codec_dai->tx_mask);
- if (codec_dai->rx_mask)
+
+ if (substream->stream == SNDRV_PCM_STREAM_CAPTURE &&
+ codec_dai->rx_mask)
soc_pcm_codec_params_fixup(&codec_params,
codec_dai->rx_mask);
struct snd_soc_pcm_runtime *be, int stream)
{
struct snd_soc_dpcm *dpcm;
+ unsigned long flags;
/* only add new dpcms */
for_each_dpcm_be(fe, stream, dpcm) {
dpcm->fe = fe;
be->dpcm[stream].runtime = fe->dpcm[stream].runtime;
dpcm->state = SND_SOC_DPCM_LINK_STATE_NEW;
+ spin_lock_irqsave(&fe->card->dpcm_lock, flags);
list_add(&dpcm->list_be, &fe->dpcm[stream].be_clients);
list_add(&dpcm->list_fe, &be->dpcm[stream].fe_clients);
+ spin_unlock_irqrestore(&fe->card->dpcm_lock, flags);
dev_dbg(fe->dev, "connected new DPCM %s path %s %s %s\n",
stream ? "capture" : "playback", fe->dai_link->name,
void dpcm_be_disconnect(struct snd_soc_pcm_runtime *fe, int stream)
{
struct snd_soc_dpcm *dpcm, *d;
+ unsigned long flags;
for_each_dpcm_be_safe(fe, stream, dpcm, d) {
dev_dbg(fe->dev, "ASoC: BE %s disconnect check for %s\n",
#ifdef CONFIG_DEBUG_FS
debugfs_remove(dpcm->debugfs_state);
#endif
+ spin_lock_irqsave(&fe->card->dpcm_lock, flags);
list_del(&dpcm->list_be);
list_del(&dpcm->list_fe);
+ spin_unlock_irqrestore(&fe->card->dpcm_lock, flags);
kfree(dpcm);
}
}
void dpcm_clear_pending_state(struct snd_soc_pcm_runtime *fe, int stream)
{
struct snd_soc_dpcm *dpcm;
+ unsigned long flags;
+ spin_lock_irqsave(&fe->card->dpcm_lock, flags);
for_each_dpcm_be(fe, stream, dpcm)
dpcm->be->dpcm[stream].runtime_update =
SND_SOC_DPCM_UPDATE_NO;
+ spin_unlock_irqrestore(&fe->card->dpcm_lock, flags);
}
static void dpcm_be_dai_startup_unwind(struct snd_soc_pcm_runtime *fe,
struct snd_soc_pcm_runtime *be = dpcm->be;
struct snd_pcm_substream *be_substream =
snd_soc_dpcm_get_substream(be, stream);
- struct snd_soc_pcm_runtime *rtd = be_substream->private_data;
+ struct snd_soc_pcm_runtime *rtd;
struct snd_soc_dai *codec_dai;
int i;
+ /* A backend may not have the requested substream */
+ if (!be_substream)
+ continue;
+
+ rtd = be_substream->private_data;
if (rtd->dai_link->be_hw_params_fixup)
continue;
struct snd_soc_dpcm *dpcm;
enum snd_soc_dpcm_trigger trigger = fe->dai_link->trigger[stream];
int ret;
+ unsigned long flags;
dev_dbg(fe->dev, "ASoC: runtime %s open on FE %s\n",
stream ? "capture" : "playback", fe->dai_link->name);
dpcm_be_dai_shutdown(fe, stream);
disconnect:
/* disconnect any non started BEs */
+ spin_lock_irqsave(&fe->card->dpcm_lock, flags);
for_each_dpcm_be(fe, stream, dpcm) {
struct snd_soc_pcm_runtime *be = dpcm->be;
if (be->dpcm[stream].state != SND_SOC_DPCM_STATE_START)
dpcm->state = SND_SOC_DPCM_LINK_STATE_FREE;
}
+ spin_unlock_irqrestore(&fe->card->dpcm_lock, flags);
return ret;
}
{
struct snd_soc_dpcm *dpcm;
int state;
+ int ret = 1;
+ unsigned long flags;
+ spin_lock_irqsave(&fe->card->dpcm_lock, flags);
for_each_dpcm_fe(be, stream, dpcm) {
if (dpcm->fe == fe)
state = dpcm->fe->dpcm[stream].state;
if (state == SND_SOC_DPCM_STATE_START ||
state == SND_SOC_DPCM_STATE_PAUSED ||
- state == SND_SOC_DPCM_STATE_SUSPEND)
- return 0;
+ state == SND_SOC_DPCM_STATE_SUSPEND) {
+ ret = 0;
+ break;
+ }
}
+ spin_unlock_irqrestore(&fe->card->dpcm_lock, flags);
/* it's safe to free/stop this BE DAI */
- return 1;
+ return ret;
}
EXPORT_SYMBOL_GPL(snd_soc_dpcm_can_be_free_stop);
{
struct snd_soc_dpcm *dpcm;
int state;
+ int ret = 1;
+ unsigned long flags;
+ spin_lock_irqsave(&fe->card->dpcm_lock, flags);
for_each_dpcm_fe(be, stream, dpcm) {
if (dpcm->fe == fe)
if (state == SND_SOC_DPCM_STATE_START ||
state == SND_SOC_DPCM_STATE_PAUSED ||
state == SND_SOC_DPCM_STATE_SUSPEND ||
- state == SND_SOC_DPCM_STATE_PREPARE)
- return 0;
+ state == SND_SOC_DPCM_STATE_PREPARE) {
+ ret = 0;
+ break;
+ }
}
+ spin_unlock_irqrestore(&fe->card->dpcm_lock, flags);
/* it's safe to change hw_params */
- return 1;
+ return ret;
}
EXPORT_SYMBOL_GPL(snd_soc_dpcm_can_be_params);
struct snd_pcm_hw_params *params = &fe->dpcm[stream].hw_params;
struct snd_soc_dpcm *dpcm;
ssize_t offset = 0;
+ unsigned long flags;
/* FE state */
offset += snprintf(buf + offset, size - offset,
goto out;
}
+ spin_lock_irqsave(&fe->card->dpcm_lock, flags);
for_each_dpcm_be(fe, stream, dpcm) {
struct snd_soc_pcm_runtime *be = dpcm->be;
params = &dpcm->hw_params;
params_channels(params),
params_rate(params));
}
-
+ spin_unlock_irqrestore(&fe->card->dpcm_lock, flags);
out:
return offset;
}
snd_ctl_remove(card, kcontrol);
- kfree(dobj->control.dvalues);
+ /* free enum kcontrol's dvalues and dtexts */
+ kfree(se->dobj.control.dvalues);
for (j = 0; j < se->items; j++)
- kfree(dobj->control.dtexts[j]);
- kfree(dobj->control.dtexts);
+ kfree(se->dobj.control.dtexts[j]);
+ kfree(se->dobj.control.dtexts);
kfree(se);
kfree(w->kcontrol_news[i].name);
#include <linux/clk.h>
#include <linux/module.h>
+#include <linux/mutex.h>
#include <linux/platform_device.h>
#include <linux/slab.h>
/* PCM buffer */
unsigned char *pcm_buff;
unsigned int pos;
+
+ struct mutex lock; /* protect against race condition on iio state */
};
static const struct snd_pcm_hardware stm32_adfsdm_pcm_hw = {
{
struct stm32_adfsdm_priv *priv = snd_soc_dai_get_drvdata(dai);
+ mutex_lock(&priv->lock);
if (priv->iio_active) {
iio_channel_stop_all_cb(priv->iio_cb);
priv->iio_active = false;
}
+ mutex_unlock(&priv->lock);
}
static int stm32_adfsdm_dai_prepare(struct snd_pcm_substream *substream,
struct stm32_adfsdm_priv *priv = snd_soc_dai_get_drvdata(dai);
int ret;
+ mutex_lock(&priv->lock);
+ if (priv->iio_active) {
+ iio_channel_stop_all_cb(priv->iio_cb);
+ priv->iio_active = false;
+ }
+
ret = iio_write_channel_attribute(priv->iio_ch,
substream->runtime->rate, 0,
IIO_CHAN_INFO_SAMP_FREQ);
if (ret < 0) {
dev_err(dai->dev, "%s: Failed to set %d sampling rate\n",
__func__, substream->runtime->rate);
- return ret;
+ goto out;
}
if (!priv->iio_active) {
__func__, ret);
}
+out:
+ mutex_unlock(&priv->lock);
+
return ret;
}
static int stm32_adfsdm_probe(struct platform_device *pdev)
{
struct stm32_adfsdm_priv *priv;
+ struct snd_soc_component *component;
int ret;
priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
priv->dev = &pdev->dev;
priv->dai_drv = stm32_adfsdm_dai;
+ mutex_init(&priv->lock);
dev_set_drvdata(&pdev->dev, priv);
if (IS_ERR(priv->iio_cb))
return PTR_ERR(priv->iio_cb);
- ret = devm_snd_soc_register_component(&pdev->dev,
- &stm32_adfsdm_soc_platform,
- NULL, 0);
+ component = devm_kzalloc(&pdev->dev, sizeof(*component), GFP_KERNEL);
+ if (!component)
+ return -ENOMEM;
+#ifdef CONFIG_DEBUG_FS
+ component->debugfs_prefix = "pcm";
+#endif
+
+ ret = snd_soc_add_component(&pdev->dev, component,
+ &stm32_adfsdm_soc_platform, NULL, 0);
if (ret < 0)
dev_err(&pdev->dev, "%s: Failed to register PCM platform\n",
__func__);
return ret;
}
+static int stm32_adfsdm_remove(struct platform_device *pdev)
+{
+ snd_soc_unregister_component(&pdev->dev);
+
+ return 0;
+}
+
static struct platform_driver stm32_adfsdm_driver = {
.driver = {
.name = STM32_ADFSDM_DRV_NAME,
.of_match_table = stm32_adfsdm_of_match,
},
.probe = stm32_adfsdm_probe,
+ .remove = stm32_adfsdm_remove,
};
module_platform_driver(stm32_adfsdm_driver);
case STM32_I2S_CFG2_REG:
case STM32_I2S_IER_REG:
case STM32_I2S_SR_REG:
- case STM32_I2S_TXDR_REG:
case STM32_I2S_RXDR_REG:
case STM32_I2S_CGFR_REG:
return true;
static bool stm32_i2s_volatile_reg(struct device *dev, unsigned int reg)
{
switch (reg) {
- case STM32_I2S_TXDR_REG:
+ case STM32_I2S_SR_REG:
case STM32_I2S_RXDR_REG:
return true;
default:
if (!pdev) {
dev_err(&sai_client->pdev->dev,
"Device not found for node %pOFn\n", np_provider);
+ of_node_put(np_provider);
return -ENODEV;
}
dev_err(&sai_client->pdev->dev,
"SAI sync provider data not found\n");
ret = -EINVAL;
- goto out_put_dev;
+ goto error;
}
/* Configure sync client */
ret = stm32_sai_sync_conf_client(sai_client, synci);
if (ret < 0)
- goto out_put_dev;
+ goto error;
/* Configure sync provider */
ret = stm32_sai_sync_conf_provider(sai_provider, synco);
-out_put_dev:
+error:
put_device(&pdev->dev);
+ of_node_put(np_provider);
return ret;
}
#define SAI_IEC60958_STATUS_BYTES 24
#define SAI_MCLK_NAME_LEN 32
+#define SAI_RATE_11K 11025
/**
* struct stm32_sai_sub_data - private data of SAI sub block (block A or B)
* @slot_mask: rx or tx active slots mask. set at init or at runtime
* @data_size: PCM data width. corresponds to PCM substream width.
* @spdif_frm_cnt: S/PDIF playback frame counter
- * @snd_aes_iec958: iec958 data
+ * @iec958: iec958 data
* @ctrl_lock: control lock
+ * @irq_lock: prevent race condition with IRQ
*/
struct stm32_sai_sub_data {
struct platform_device *pdev;
unsigned int spdif_frm_cnt;
struct snd_aes_iec958 iec958;
struct mutex ctrl_lock; /* protect resources accessed by controls */
+ spinlock_t irq_lock; /* used to prevent race condition with IRQ */
};
enum stm32_sai_fifo_th {
return ret;
}
+static int stm32_sai_set_parent_clock(struct stm32_sai_sub_data *sai,
+ unsigned int rate)
+{
+ struct platform_device *pdev = sai->pdev;
+ struct clk *parent_clk = sai->pdata->clk_x8k;
+ int ret;
+
+ if (!(rate % SAI_RATE_11K))
+ parent_clk = sai->pdata->clk_x11k;
+
+ ret = clk_set_parent(sai->sai_ck, parent_clk);
+ if (ret)
+ dev_err(&pdev->dev, " Error %d setting sai_ck parent clock. %s",
+ ret, ret == -EBUSY ?
+ "Active stream rates conflict\n" : "\n");
+
+ return ret;
+}
+
static long stm32_sai_mclk_round_rate(struct clk_hw *hw, unsigned long rate,
unsigned long *prate)
{
status = SNDRV_PCM_STATE_XRUN;
}
- if (status != SNDRV_PCM_STATE_RUNNING)
+ spin_lock(&sai->irq_lock);
+ if (status != SNDRV_PCM_STATE_RUNNING && sai->substream)
snd_pcm_stop_xrun(sai->substream);
+ spin_unlock(&sai->irq_lock);
return IRQ_HANDLED;
}
struct stm32_sai_sub_data *sai = snd_soc_dai_get_drvdata(cpu_dai);
int ret;
- if (dir == SND_SOC_CLOCK_OUT) {
+ if (dir == SND_SOC_CLOCK_OUT && sai->sai_mclk) {
ret = regmap_update_bits(sai->regmap, STM_SAI_CR1_REGX,
SAI_XCR1_NODIV,
(unsigned int)~SAI_XCR1_NODIV);
if (ret < 0)
return ret;
- dev_dbg(cpu_dai->dev, "SAI MCLK frequency is %uHz\n", freq);
- sai->mclk_rate = freq;
+ /* If master clock is used, set parent clock now */
+ ret = stm32_sai_set_parent_clock(sai, freq);
+ if (ret)
+ return ret;
- if (sai->sai_mclk) {
- ret = clk_set_rate_exclusive(sai->sai_mclk,
- sai->mclk_rate);
- if (ret) {
- dev_err(cpu_dai->dev,
- "Could not set mclk rate\n");
- return ret;
- }
+ ret = clk_set_rate_exclusive(sai->sai_mclk, freq);
+ if (ret) {
+ dev_err(cpu_dai->dev,
+ ret == -EBUSY ?
+ "Active streams have incompatible rates" :
+ "Could not set mclk rate\n");
+ return ret;
}
+
+ dev_dbg(cpu_dai->dev, "SAI MCLK frequency is %uHz\n", freq);
+ sai->mclk_rate = freq;
}
return 0;
{
struct stm32_sai_sub_data *sai = snd_soc_dai_get_drvdata(cpu_dai);
int imr, cr2, ret;
+ unsigned long flags;
+ spin_lock_irqsave(&sai->irq_lock, flags);
sai->substream = substream;
+ spin_unlock_irqrestore(&sai->irq_lock, flags);
+
+ if (STM_SAI_PROTOCOL_IS_SPDIF(sai)) {
+ snd_pcm_hw_constraint_mask64(substream->runtime,
+ SNDRV_PCM_HW_PARAM_FORMAT,
+ SNDRV_PCM_FMTBIT_S32_LE);
+ snd_pcm_hw_constraint_single(substream->runtime,
+ SNDRV_PCM_HW_PARAM_CHANNELS, 2);
+ }
ret = clk_prepare_enable(sai->sai_ck);
if (ret < 0) {
struct snd_pcm_hw_params *params)
{
struct stm32_sai_sub_data *sai = snd_soc_dai_get_drvdata(cpu_dai);
- int div = 0;
+ int div = 0, cr1 = 0;
int sai_clk_rate, mclk_ratio, den;
unsigned int rate = params_rate(params);
+ int ret;
- if (!(rate % 11025))
- clk_set_parent(sai->sai_ck, sai->pdata->clk_x11k);
- else
- clk_set_parent(sai->sai_ck, sai->pdata->clk_x8k);
+ if (!sai->sai_mclk) {
+ ret = stm32_sai_set_parent_clock(sai, rate);
+ if (ret)
+ return ret;
+ }
sai_clk_rate = clk_get_rate(sai->sai_ck);
if (STM_SAI_IS_F4(sai->pdata)) {
} else {
if (sai->mclk_rate) {
mclk_ratio = sai->mclk_rate / rate;
- if ((mclk_ratio != 512) &&
- (mclk_ratio != 256)) {
+ if (mclk_ratio == 512) {
+ cr1 = SAI_XCR1_OSR;
+ } else if (mclk_ratio != 256) {
dev_err(cpu_dai->dev,
"Wrong mclk ratio %d\n",
mclk_ratio);
return -EINVAL;
}
+
+ regmap_update_bits(sai->regmap,
+ STM_SAI_CR1_REGX,
+ SAI_XCR1_OSR, cr1);
+
div = stm32_sai_get_clk_div(sai, sai_clk_rate,
sai->mclk_rate);
if (div < 0)
struct snd_soc_dai *cpu_dai)
{
struct stm32_sai_sub_data *sai = snd_soc_dai_get_drvdata(cpu_dai);
+ unsigned long flags;
regmap_update_bits(sai->regmap, STM_SAI_IMR_REGX, SAI_XIMR_MASK, 0);
regmap_update_bits(sai->regmap, STM_SAI_CR1_REGX, SAI_XCR1_NODIV,
SAI_XCR1_NODIV);
- clk_disable_unprepare(sai->sai_ck);
+ /* Release mclk rate only if rate was actually set */
+ if (sai->mclk_rate) {
+ clk_rate_exclusive_put(sai->sai_mclk);
+ sai->mclk_rate = 0;
+ }
- clk_rate_exclusive_put(sai->sai_mclk);
+ clk_disable_unprepare(sai->sai_ck);
+ spin_lock_irqsave(&sai->irq_lock, flags);
sai->substream = NULL;
+ spin_unlock_irqrestore(&sai->irq_lock, flags);
}
static int stm32_sai_pcm_new(struct snd_soc_pcm_runtime *rtd,
struct snd_soc_dai *cpu_dai)
{
struct stm32_sai_sub_data *sai = dev_get_drvdata(cpu_dai->dev);
+ struct snd_kcontrol_new knew = iec958_ctls;
if (STM_SAI_PROTOCOL_IS_SPDIF(sai)) {
dev_dbg(&sai->pdev->dev, "%s: register iec controls", __func__);
- return snd_ctl_add(rtd->pcm->card,
- snd_ctl_new1(&iec958_ctls, sai));
+ knew.device = rtd->pcm->device;
+ return snd_ctl_add(rtd->pcm->card, snd_ctl_new1(&knew, sai));
}
return 0;
static int stm32_sai_dai_probe(struct snd_soc_dai *cpu_dai)
{
struct stm32_sai_sub_data *sai = dev_get_drvdata(cpu_dai->dev);
- int cr1 = 0, cr1_mask;
+ int cr1 = 0, cr1_mask, ret;
sai->cpu_dai = cpu_dai;
/* Configure synchronization */
if (sai->sync == SAI_SYNC_EXTERNAL) {
/* Configure synchro client and provider */
- sai->pdata->set_sync(sai->pdata, sai->np_sync_provider,
- sai->synco, sai->synci);
+ ret = sai->pdata->set_sync(sai->pdata, sai->np_sync_provider,
+ sai->synco, sai->synci);
+ if (ret)
+ return ret;
}
cr1_mask |= SAI_XCR1_SYNCEN_MASK;
if (!sai->cpu_dai_drv)
return -ENOMEM;
- sai->cpu_dai_drv->name = dev_name(&pdev->dev);
if (STM_SAI_IS_PLAYBACK(sai)) {
memcpy(sai->cpu_dai_drv, &stm32_sai_playback_dai,
sizeof(stm32_sai_playback_dai));
sizeof(stm32_sai_capture_dai));
sai->cpu_dai_drv->capture.stream_name = sai->cpu_dai_drv->name;
}
+ sai->cpu_dai_drv->name = dev_name(&pdev->dev);
return 0;
}
sai->pdev = pdev;
mutex_init(&sai->ctrl_lock);
+ spin_lock_init(&sai->irq_lock);
platform_set_drvdata(pdev, sai);
sai->pdata = dev_get_drvdata(pdev->dev.parent);
{
int i;
- stream->buffer = alloc_pages_exact(stream->buffer_sz, GFP_KERNEL);
+ stream->buffer = alloc_pages_exact(buffer_sz, GFP_KERNEL);
if (!stream->buffer)
return -ENOMEM;
#define wmb() asm volatile("dmb ishst" ::: "memory")
#define rmb() asm volatile("dmb ishld" ::: "memory")
+/*
+ * Kernel uses dmb variants on arm64 for smp_*() barriers. Pretty much the same
+ * implementation as above mb()/wmb()/rmb(), though for the latter kernel uses
+ * dsb. In any case, should above mb()/wmb()/rmb() change, make sure the below
+ * smp_*() don't.
+ */
+#define smp_mb() asm volatile("dmb ish" ::: "memory")
+#define smp_wmb() asm volatile("dmb ishst" ::: "memory")
+#define smp_rmb() asm volatile("dmb ishld" ::: "memory")
+
#define smp_store_release(p, v) \
do { \
union { typeof(*p) __val; char __c[1]; } __u = \
#define rmb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory")
#define wmb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory")
#elif defined(__x86_64__)
-#define mb() asm volatile("mfence":::"memory")
-#define rmb() asm volatile("lfence":::"memory")
+#define mb() asm volatile("mfence" ::: "memory")
+#define rmb() asm volatile("lfence" ::: "memory")
#define wmb() asm volatile("sfence" ::: "memory")
+#define smp_rmb() barrier()
+#define smp_wmb() barrier()
+#define smp_mb() asm volatile("lock; addl $0,-132(%%rsp)" ::: "memory", "cc")
#endif
#if defined(__x86_64__)
| *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* }
| *ATTACH_TYPE* := { **ingress** | **egress** | **sock_create** | **sock_ops** | **device** |
| **bind4** | **bind6** | **post_bind4** | **post_bind6** | **connect4** | **connect6** |
-| **sendmsg4** | **sendmsg6** }
+| **sendmsg4** | **sendmsg6** | **sysctl** }
| *ATTACH_FLAGS* := { **multi** | **override** }
DESCRIPTION
**sendmsg4** call to sendto(2), sendmsg(2), sendmmsg(2) for an
unconnected udp4 socket (since 4.18);
**sendmsg6** call to sendto(2), sendmsg(2), sendmmsg(2) for an
- unconnected udp6 socket (since 4.18).
+ unconnected udp6 socket (since 4.18);
+ **sysctl** sysctl access (since 5.2).
**bpftool cgroup detach** *CGROUP* *ATTACH_TYPE* *PROG*
Detach *PROG* from the cgroup *CGROUP* and attach type
-h, --help
Print short generic help message (similar to **bpftool help**).
- -v, --version
+ -V, --version
Print version number (similar to **bpftool version**).
-j, --json
-h, --help
Print short generic help message (similar to **bpftool help**).
- -v, --version
+ -V, --version
Print version number (similar to **bpftool version**).
-j, --json
-h, --help
Print short generic help message (similar to **bpftool help**).
- -v, --version
+ -V, --version
Print version number (similar to **bpftool version**).
-j, --json
-h, --help
Print short generic help message (similar to **bpftool help**).
- -v, --version
+ -V, --version
Print version number (similar to **bpftool version**).
-j, --json
-h, --help
Print short generic help message (similar to **bpftool help**).
- -v, --version
+ -V, --version
Print version number (similar to **bpftool version**).
-j, --json
| **bpftool** **prog dump xlated** *PROG* [{**file** *FILE* | **opcodes** | **visual** | **linum**}]
| **bpftool** **prog dump jited** *PROG* [{**file** *FILE* | **opcodes** | **linum**}]
| **bpftool** **prog pin** *PROG* *FILE*
-| **bpftool** **prog { load | loadall }** *OBJ* *PATH* [**type** *TYPE*] [**map** {**idx** *IDX* | **name** *NAME*} *MAP*] [**dev** *NAME*]
+| **bpftool** **prog { load | loadall }** *OBJ* *PATH* [**type** *TYPE*] [**map** {**idx** *IDX* | **name** *NAME*} *MAP*] [**dev** *NAME*] [**pinmaps** *MAP_DIR*]
| **bpftool** **prog attach** *PROG* *ATTACH_TYPE* [*MAP*]
| **bpftool** **prog detach** *PROG* *ATTACH_TYPE* [*MAP*]
| **bpftool** **prog tracelog**
| **cgroup/sock** | **cgroup/dev** | **lwt_in** | **lwt_out** | **lwt_xmit** |
| **lwt_seg6local** | **sockops** | **sk_skb** | **sk_msg** | **lirc_mode2** |
| **cgroup/bind4** | **cgroup/bind6** | **cgroup/post_bind4** | **cgroup/post_bind6** |
-| **cgroup/connect4** | **cgroup/connect6** | **cgroup/sendmsg4** | **cgroup/sendmsg6**
+| **cgroup/connect4** | **cgroup/connect6** | **cgroup/sendmsg4** | **cgroup/sendmsg6** |
+| **cgroup/sysctl**
| }
| *ATTACH_TYPE* := {
| **msg_verdict** | **stream_verdict** | **stream_parser** | **flow_dissector**
Output will start with program ID followed by program type and
zero or more named attributes (depending on kernel version).
+ Since Linux 5.1 the kernel can collect statistics on BPF
+ programs (such as the total time spent running the program,
+ and the number of times it was run). If available, bpftool
+ shows such statistics. However, the kernel does not collect
+ them by defaults, as it slightly impacts performance on each
+ program run. Activation or deactivation of the feature is
+ performed via the **kernel.bpf_stats_enabled** sysctl knob.
+
**bpftool prog dump xlated** *PROG* [{ **file** *FILE* | **opcodes** | **visual** | **linum** }]
Dump eBPF instructions of the program from the kernel. By
default, eBPF will be disassembled and printed to standard
-h, --help
Print short generic help message (similar to **bpftool help**).
- -v, --version
+ -V, --version
Print version number (similar to **bpftool version**).
-j, --json
-h, --help
Print short help message (similar to **bpftool help**).
- -v, --version
+ -V, --version
Print version number (similar to **bpftool version**).
-j, --json
lirc_mode2 cgroup/bind4 cgroup/bind6 \
cgroup/connect4 cgroup/connect6 \
cgroup/sendmsg4 cgroup/sendmsg6 \
- cgroup/post_bind4 cgroup/post_bind6" -- \
+ cgroup/post_bind4 cgroup/post_bind6 \
+ cgroup/sysctl" -- \
"$cur" ) )
return 0
;;
attach|detach)
local ATTACH_TYPES='ingress egress sock_create sock_ops \
device bind4 bind6 post_bind4 post_bind6 connect4 \
- connect6 sendmsg4 sendmsg6'
+ connect6 sendmsg4 sendmsg6 sysctl'
local ATTACH_FLAGS='multi override'
local PROG_TYPE='id pinned tag'
case $prev in
;;
ingress|egress|sock_create|sock_ops|device|bind4|bind6|\
post_bind4|post_bind6|connect4|connect6|sendmsg4|\
- sendmsg6)
+ sendmsg6|sysctl)
COMPREPLY=( $( compgen -W "$PROG_TYPE" -- \
"$cur" ) )
return 0
return ret;
}
+static int btf_dumper_var(const struct btf_dumper *d, __u32 type_id,
+ __u8 bit_offset, const void *data)
+{
+ const struct btf_type *t = btf__type_by_id(d->btf, type_id);
+ int ret;
+
+ jsonw_start_object(d->jw);
+ jsonw_name(d->jw, btf__name_by_offset(d->btf, t->name_off));
+ ret = btf_dumper_do_type(d, t->type, bit_offset, data);
+ jsonw_end_object(d->jw);
+
+ return ret;
+}
+
+static int btf_dumper_datasec(const struct btf_dumper *d, __u32 type_id,
+ const void *data)
+{
+ struct btf_var_secinfo *vsi;
+ const struct btf_type *t;
+ int ret = 0, i, vlen;
+
+ t = btf__type_by_id(d->btf, type_id);
+ if (!t)
+ return -EINVAL;
+
+ vlen = BTF_INFO_VLEN(t->info);
+ vsi = (struct btf_var_secinfo *)(t + 1);
+
+ jsonw_start_object(d->jw);
+ jsonw_name(d->jw, btf__name_by_offset(d->btf, t->name_off));
+ jsonw_start_array(d->jw);
+ for (i = 0; i < vlen; i++) {
+ ret = btf_dumper_do_type(d, vsi[i].type, 0, data + vsi[i].offset);
+ if (ret)
+ break;
+ }
+ jsonw_end_array(d->jw);
+ jsonw_end_object(d->jw);
+
+ return ret;
+}
+
static int btf_dumper_do_type(const struct btf_dumper *d, __u32 type_id,
__u8 bit_offset, const void *data)
{
case BTF_KIND_CONST:
case BTF_KIND_RESTRICT:
return btf_dumper_modifier(d, type_id, bit_offset, data);
+ case BTF_KIND_VAR:
+ return btf_dumper_var(d, type_id, bit_offset, data);
+ case BTF_KIND_DATASEC:
+ return btf_dumper_datasec(d, type_id, data);
default:
jsonw_printf(d->jw, "(unsupported-kind");
return -EINVAL;
{
const struct btf_type *proto_type;
const struct btf_array *array;
+ const struct btf_var *var;
const struct btf_type *t;
if (!type_id) {
if (pos == -1)
return -1;
break;
+ case BTF_KIND_VAR:
+ var = (struct btf_var *)(t + 1);
+ if (var->linkage == BTF_VAR_STATIC)
+ BTF_PRINT_ARG("static ");
+ BTF_PRINT_TYPE(t->type);
+ BTF_PRINT_ARG(" %s",
+ btf__name_by_offset(btf, t->name_off));
+ break;
+ case BTF_KIND_DATASEC:
+ BTF_PRINT_ARG("section (\"%s\") ",
+ btf__name_by_offset(btf, t->name_off));
+ break;
case BTF_KIND_UNKN:
default:
return -1;
" ATTACH_TYPE := { ingress | egress | sock_create |\n" \
" sock_ops | device | bind4 | bind6 |\n" \
" post_bind4 | post_bind6 | connect4 |\n" \
- " connect6 | sendmsg4 | sendmsg6 }"
+ " connect6 | sendmsg4 | sendmsg6 | sysctl }"
static const char * const attach_type_strings[] = {
[BPF_CGROUP_INET_INGRESS] = "ingress",
[BPF_CGROUP_INET6_POST_BIND] = "post_bind6",
[BPF_CGROUP_UDP4_SENDMSG] = "sendmsg4",
[BPF_CGROUP_UDP6_SENDMSG] = "sendmsg6",
+ [BPF_CGROUP_SYSCTL] = "sysctl",
[__MAX_BPF_ATTACH_TYPE] = NULL,
};
for (type = 0; type < __MAX_BPF_ATTACH_TYPE; type++)
show_attached_bpf_progs(cgroup_fd, type, ftw->level);
+ if (errno == EINVAL)
+ /* Last attach type does not support query.
+ * Do not report an error for this, especially because batch
+ * mode would stop processing commands.
+ */
+ errno = 0;
+
if (json_output) {
jsonw_end_array(json_wtr);
jsonw_end_object(json_wtr);
[BPF_PROG_TYPE_LIRC_MODE2] = "lirc_mode2",
[BPF_PROG_TYPE_SK_REUSEPORT] = "sk_reuseport",
[BPF_PROG_TYPE_FLOW_DISSECTOR] = "flow_dissector",
+ [BPF_PROG_TYPE_CGROUP_SYSCTL] = "cgroup_sysctl",
};
extern const char * const map_type_name[];
/* start of key-value pair */
jsonw_start_object(d->jw);
- jsonw_name(d->jw, "key");
+ if (map_info->btf_key_type_id) {
+ jsonw_name(d->jw, "key");
- ret = btf_dumper_type(d, map_info->btf_key_type_id, key);
- if (ret)
- goto err_end_obj;
+ ret = btf_dumper_type(d, map_info->btf_key_type_id, key);
+ if (ret)
+ goto err_end_obj;
+ }
if (!map_is_per_cpu(map_info->type)) {
jsonw_name(d->jw, "value");
}
static void print_entry_error(struct bpf_map_info *info, unsigned char *key,
- const char *value)
+ const char *error_msg)
{
- int value_size = strlen(value);
+ int msg_size = strlen(error_msg);
bool single_line, break_names;
- break_names = info->key_size > 16 || value_size > 16;
- single_line = info->key_size + value_size <= 24 && !break_names;
+ break_names = info->key_size > 16 || msg_size > 16;
+ single_line = info->key_size + msg_size <= 24 && !break_names;
printf("key:%c", break_names ? '\n' : ' ');
fprint_hex(stdout, key, info->key_size, " ");
printf(single_line ? " " : "\n");
- printf("value:%c%s", break_names ? '\n' : ' ', value);
+ printf("value:%c%s", break_names ? '\n' : ' ', error_msg);
printf("\n");
}
if (info->value_size) {
printf("value:%c", break_names ? '\n' : ' ');
- if (value)
- fprint_hex(stdout, value, info->value_size,
- " ");
- else
- printf("<no entry>");
+ fprint_hex(stdout, value, info->value_size, " ");
}
printf("\n");
for (i = 0; i < n; i++) {
printf("value (CPU %02d):%c",
i, info->value_size > 16 ? '\n' : ' ');
- if (value)
- fprint_hex(stdout, value + i * step,
- info->value_size, " ");
- else
- printf("<no entry>");
+ fprint_hex(stdout, value + i * step,
+ info->value_size, " ");
printf("\n");
}
}
}
close(fd);
+ if (info->btf_id)
+ jsonw_int_field(json_wtr, "btf_id", info->btf_id);
+
if (!hash_empty(map_table.table)) {
struct pinned_obj *obj;
}
close(fd);
- printf("\n");
if (!hash_empty(map_table.table)) {
struct pinned_obj *obj;
hash_for_each_possible(map_table.table, obj, hash, info->id) {
if (obj->id == info->id)
- printf("\tpinned %s\n", obj->path);
+ printf("\n\tpinned %s", obj->path);
}
}
+
+ if (info->btf_id)
+ printf("\n\tbtf_id %d", info->btf_id);
+
+ printf("\n");
return 0;
}
jsonw_string_field(json_wtr, "error", strerror(lookup_errno));
jsonw_end_object(json_wtr);
} else {
+ const char *msg = NULL;
+
if (errno == ENOENT)
- print_entry_plain(map_info, key, NULL);
- else
- print_entry_error(map_info, key,
- strerror(lookup_errno));
+ msg = "<no entry>";
+ else if (lookup_errno == ENOSPC &&
+ map_info->type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY)
+ msg = "<cannot read>";
+
+ print_entry_error(map_info, key,
+ msg ? : strerror(lookup_errno));
}
return 0;
}
}
+ if (info.type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY &&
+ info.value_size != 8)
+ p_info("Warning: cannot read values from %s map with value_size != 8",
+ map_type_name[info.type]);
while (true) {
err = bpf_map_get_next_key(fd, prev_key, key);
if (err) {
if (info->nr_map_ids)
show_prog_maps(fd, info->nr_map_ids);
+ if (info->btf_id)
+ jsonw_int_field(json_wtr, "btf_id", info->btf_id);
+
if (!hash_empty(prog_table.table)) {
struct pinned_obj *obj;
}
}
+ if (info->btf_id)
+ printf("\n\tbtf_id %d", info->btf_id);
+
printf("\n");
}
" tracepoint | raw_tracepoint | xdp | perf_event | cgroup/skb |\n"
" cgroup/sock | cgroup/dev | lwt_in | lwt_out | lwt_xmit |\n"
" lwt_seg6local | sockops | sk_skb | sk_msg | lirc_mode2 |\n"
- " sk_reuseport | flow_dissector |\n"
+ " sk_reuseport | flow_dissector | cgroup/sysctl |\n"
" cgroup/bind4 | cgroup/bind6 | cgroup/post_bind4 |\n"
" cgroup/post_bind6 | cgroup/connect4 | cgroup/connect6 |\n"
" cgroup/sendmsg4 | cgroup/sendmsg6 }\n"
if (insn->src_reg == BPF_PSEUDO_MAP_FD)
snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
"map[id:%u]", insn->imm);
+ else if (insn->src_reg == BPF_PSEUDO_MAP_VALUE)
+ snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
+ "map[id:%u][0]+%u", insn->imm, (insn + 1)->imm);
else
snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
"0x%llx", (unsigned long long)full_imm);
.off = 0, \
.imm = ((__u64) (IMM)) >> 32 })
+#define BPF_LD_IMM64_RAW_FULL(DST, SRC, OFF1, OFF2, IMM1, IMM2) \
+ ((struct bpf_insn) { \
+ .code = BPF_LD | BPF_DW | BPF_IMM, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = OFF1, \
+ .imm = IMM1 }), \
+ ((struct bpf_insn) { \
+ .code = 0, /* zero is reserved opcode */ \
+ .dst_reg = 0, \
+ .src_reg = 0, \
+ .off = OFF2, \
+ .imm = IMM2 })
+
/* pseudo BPF_LD_IMM64 insn used to refer to process-local map_fd */
#define BPF_LD_MAP_FD(DST, MAP_FD) \
- BPF_LD_IMM64_RAW(DST, BPF_PSEUDO_MAP_FD, MAP_FD)
+ BPF_LD_IMM64_RAW_FULL(DST, BPF_PSEUDO_MAP_FD, 0, 0, \
+ MAP_FD, 0)
+
+#define BPF_LD_MAP_VALUE(DST, MAP_FD, VALUE_OFF) \
+ BPF_LD_IMM64_RAW_FULL(DST, BPF_PSEUDO_MAP_VALUE, 0, 0, \
+ MAP_FD, VALUE_OFF)
/* Relative call */
BPF_BTF_GET_FD_BY_ID,
BPF_TASK_FD_QUERY,
BPF_MAP_LOOKUP_AND_DELETE_ELEM,
+ BPF_MAP_FREEZE,
};
enum bpf_map_type {
BPF_PROG_TYPE_LIRC_MODE2,
BPF_PROG_TYPE_SK_REUSEPORT,
BPF_PROG_TYPE_FLOW_DISSECTOR,
+ BPF_PROG_TYPE_CGROUP_SYSCTL,
};
enum bpf_attach_type {
BPF_CGROUP_UDP6_SENDMSG,
BPF_LIRC_MODE2,
BPF_FLOW_DISSECTOR,
+ BPF_CGROUP_SYSCTL,
__MAX_BPF_ATTACH_TYPE
};
*/
#define BPF_F_ANY_ALIGNMENT (1U << 1)
-/* when bpf_ldimm64->src_reg == BPF_PSEUDO_MAP_FD, bpf_ldimm64->imm == fd */
+/* When BPF ldimm64's insn[0].src_reg != 0 then this can have
+ * two extensions:
+ *
+ * insn[0].src_reg: BPF_PSEUDO_MAP_FD BPF_PSEUDO_MAP_VALUE
+ * insn[0].imm: map fd map fd
+ * insn[1].imm: 0 offset into value
+ * insn[0].off: 0 0
+ * insn[1].off: 0 0
+ * ldimm64 rewrite: address of map address of map[0]+offset
+ * verifier type: CONST_PTR_TO_MAP PTR_TO_MAP_VALUE
+ */
#define BPF_PSEUDO_MAP_FD 1
+#define BPF_PSEUDO_MAP_VALUE 2
/* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative
* offset to another bpf function
#define BPF_OBJ_NAME_LEN 16U
-/* Flags for accessing BPF object */
+/* Flags for accessing BPF object from syscall side. */
#define BPF_F_RDONLY (1U << 3)
#define BPF_F_WRONLY (1U << 4)
/* Zero-initialize hash function seed. This should only be used for testing. */
#define BPF_F_ZERO_SEED (1U << 6)
+/* Flags for accessing BPF object from program side. */
+#define BPF_F_RDONLY_PROG (1U << 7)
+#define BPF_F_WRONLY_PROG (1U << 8)
+
/* flags for BPF_PROG_QUERY */
#define BPF_F_QUERY_EFFECTIVE (1U << 0)
__aligned_u64 data_out;
__u32 repeat;
__u32 duration;
+ __u32 ctx_size_in; /* input: len of ctx_in */
+ __u32 ctx_size_out; /* input/output: len of ctx_out
+ * returns ENOSPC if ctx_out
+ * is too small.
+ */
+ __aligned_u64 ctx_in;
+ __aligned_u64 ctx_out;
} test;
struct { /* anonymous struct used by BPF_*_GET_*_ID */
* * **BPF_F_ADJ_ROOM_ENCAP_L4_UDP **:
* Use with ENCAP_L3 flags to further specify the tunnel type.
*
+ * * **BPF_F_ADJ_ROOM_ENCAP_L2(len) **:
+ * Use with ENCAP_L3/L4 flags to further specify the tunnel
+ * type; **len** is the length of the inner MAC header.
+ *
* A call to this helper is susceptible to change the underlaying
* packet buffer. Therefore, at load time, all checks on pointers
* previously done by the verifier are invalidated and must be
* Return
* 0 if iph and th are a valid SYN cookie ACK, or a negative error
* otherwise.
+ *
+ * int bpf_sysctl_get_name(struct bpf_sysctl *ctx, char *buf, size_t buf_len, u64 flags)
+ * Description
+ * Get name of sysctl in /proc/sys/ and copy it into provided by
+ * program buffer *buf* of size *buf_len*.
+ *
+ * The buffer is always NUL terminated, unless it's zero-sized.
+ *
+ * If *flags* is zero, full name (e.g. "net/ipv4/tcp_mem") is
+ * copied. Use **BPF_F_SYSCTL_BASE_NAME** flag to copy base name
+ * only (e.g. "tcp_mem").
+ * Return
+ * Number of character copied (not including the trailing NUL).
+ *
+ * **-E2BIG** if the buffer wasn't big enough (*buf* will contain
+ * truncated name in this case).
+ *
+ * int bpf_sysctl_get_current_value(struct bpf_sysctl *ctx, char *buf, size_t buf_len)
+ * Description
+ * Get current value of sysctl as it is presented in /proc/sys
+ * (incl. newline, etc), and copy it as a string into provided
+ * by program buffer *buf* of size *buf_len*.
+ *
+ * The whole value is copied, no matter what file position user
+ * space issued e.g. sys_read at.
+ *
+ * The buffer is always NUL terminated, unless it's zero-sized.
+ * Return
+ * Number of character copied (not including the trailing NUL).
+ *
+ * **-E2BIG** if the buffer wasn't big enough (*buf* will contain
+ * truncated name in this case).
+ *
+ * **-EINVAL** if current value was unavailable, e.g. because
+ * sysctl is uninitialized and read returns -EIO for it.
+ *
+ * int bpf_sysctl_get_new_value(struct bpf_sysctl *ctx, char *buf, size_t buf_len)
+ * Description
+ * Get new value being written by user space to sysctl (before
+ * the actual write happens) and copy it as a string into
+ * provided by program buffer *buf* of size *buf_len*.
+ *
+ * User space may write new value at file position > 0.
+ *
+ * The buffer is always NUL terminated, unless it's zero-sized.
+ * Return
+ * Number of character copied (not including the trailing NUL).
+ *
+ * **-E2BIG** if the buffer wasn't big enough (*buf* will contain
+ * truncated name in this case).
+ *
+ * **-EINVAL** if sysctl is being read.
+ *
+ * int bpf_sysctl_set_new_value(struct bpf_sysctl *ctx, const char *buf, size_t buf_len)
+ * Description
+ * Override new value being written by user space to sysctl with
+ * value provided by program in buffer *buf* of size *buf_len*.
+ *
+ * *buf* should contain a string in same form as provided by user
+ * space on sysctl write.
+ *
+ * User space may write new value at file position > 0. To override
+ * the whole sysctl value file position should be set to zero.
+ * Return
+ * 0 on success.
+ *
+ * **-E2BIG** if the *buf_len* is too big.
+ *
+ * **-EINVAL** if sysctl is being read.
+ *
+ * int bpf_strtol(const char *buf, size_t buf_len, u64 flags, long *res)
+ * Description
+ * Convert the initial part of the string from buffer *buf* of
+ * size *buf_len* to a long integer according to the given base
+ * and save the result in *res*.
+ *
+ * The string may begin with an arbitrary amount of white space
+ * (as determined by isspace(3)) followed by a single optional '-'
+ * sign.
+ *
+ * Five least significant bits of *flags* encode base, other bits
+ * are currently unused.
+ *
+ * Base must be either 8, 10, 16 or 0 to detect it automatically
+ * similar to user space strtol(3).
+ * Return
+ * Number of characters consumed on success. Must be positive but
+ * no more than buf_len.
+ *
+ * **-EINVAL** if no valid digits were found or unsupported base
+ * was provided.
+ *
+ * **-ERANGE** if resulting value was out of range.
+ *
+ * int bpf_strtoul(const char *buf, size_t buf_len, u64 flags, unsigned long *res)
+ * Description
+ * Convert the initial part of the string from buffer *buf* of
+ * size *buf_len* to an unsigned long integer according to the
+ * given base and save the result in *res*.
+ *
+ * The string may begin with an arbitrary amount of white space
+ * (as determined by isspace(3)).
+ *
+ * Five least significant bits of *flags* encode base, other bits
+ * are currently unused.
+ *
+ * Base must be either 8, 10, 16 or 0 to detect it automatically
+ * similar to user space strtoul(3).
+ * Return
+ * Number of characters consumed on success. Must be positive but
+ * no more than buf_len.
+ *
+ * **-EINVAL** if no valid digits were found or unsupported base
+ * was provided.
+ *
+ * **-ERANGE** if resulting value was out of range.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
FN(skb_ecn_set_ce), \
FN(get_listener_sock), \
FN(skc_lookup_tcp), \
- FN(tcp_check_syncookie),
+ FN(tcp_check_syncookie), \
+ FN(sysctl_get_name), \
+ FN(sysctl_get_current_value), \
+ FN(sysctl_get_new_value), \
+ FN(sysctl_set_new_value), \
+ FN(strtol), \
+ FN(strtoul),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
/* BPF_FUNC_skb_adjust_room flags. */
#define BPF_F_ADJ_ROOM_FIXED_GSO (1ULL << 0)
+#define BPF_ADJ_ROOM_ENCAP_L2_MASK 0xff
+#define BPF_ADJ_ROOM_ENCAP_L2_SHIFT 56
+
#define BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 (1ULL << 1)
#define BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 (1ULL << 2)
#define BPF_F_ADJ_ROOM_ENCAP_L4_GRE (1ULL << 3)
#define BPF_F_ADJ_ROOM_ENCAP_L4_UDP (1ULL << 4)
+#define BPF_F_ADJ_ROOM_ENCAP_L2(len) (((__u64)len & \
+ BPF_ADJ_ROOM_ENCAP_L2_MASK) \
+ << BPF_ADJ_ROOM_ENCAP_L2_SHIFT)
+
+/* BPF_FUNC_sysctl_get_name flags. */
+#define BPF_F_SYSCTL_BASE_NAME (1ULL << 0)
/* Mode for BPF_FUNC_skb_adjust_room helper. */
enum bpf_adj_room_mode {
struct bpf_spin_lock {
__u32 val;
};
+
+struct bpf_sysctl {
+ __u32 write; /* Sysctl is being read (= 0) or written (= 1).
+ * Allows 1,2,4-byte read, but no write.
+ */
+ __u32 file_pos; /* Sysctl file position to read from, write to.
+ * Allows 1,2,4-byte read an 4-byte write.
+ */
+};
+
#endif /* _UAPI__LINUX_BPF_H__ */
* struct, union and fwd
*/
__u32 info;
- /* "size" is used by INT, ENUM, STRUCT and UNION.
+ /* "size" is used by INT, ENUM, STRUCT, UNION and DATASEC.
* "size" tells the size of the type it is describing.
*
* "type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT,
- * FUNC and FUNC_PROTO.
+ * FUNC, FUNC_PROTO and VAR.
* "type" is a type_id referring to another type.
*/
union {
#define BTF_KIND_RESTRICT 11 /* Restrict */
#define BTF_KIND_FUNC 12 /* Function */
#define BTF_KIND_FUNC_PROTO 13 /* Function Proto */
-#define BTF_KIND_MAX 13
-#define NR_BTF_KINDS 14
+#define BTF_KIND_VAR 14 /* Variable */
+#define BTF_KIND_DATASEC 15 /* Section */
+#define BTF_KIND_MAX BTF_KIND_DATASEC
+#define NR_BTF_KINDS (BTF_KIND_MAX + 1)
/* For some specific BTF_KIND, "struct btf_type" is immediately
* followed by extra data.
__u32 type;
};
+enum {
+ BTF_VAR_STATIC = 0,
+ BTF_VAR_GLOBAL_ALLOCATED,
+};
+
+/* BTF_KIND_VAR is followed by a single "struct btf_var" to describe
+ * additional information related to the variable such as its linkage.
+ */
+struct btf_var {
+ __u32 linkage;
+};
+
+/* BTF_KIND_DATASEC is followed by multiple "struct btf_var_secinfo"
+ * to describe all BTF_KIND_VAR types it contains along with it's
+ * in-section offset as well as size.
+ */
+struct btf_var_secinfo {
+ __u32 type;
+ __u32 offset;
+ __u32 size;
+};
+
#endif /* _UAPI__LINUX_BTF_H__ */
#ifndef __KERNEL__
#include <stdlib.h>
+#include <time.h>
#endif
/*
#include "liburing.h"
#include "barrier.h"
-#ifndef IOCQE_FLAG_CACHEHIT
-#define IOCQE_FLAG_CACHEHIT (1U << 0)
-#endif
-
#define min(a, b) ((a < b) ? (a) : (b))
struct io_sq_ring {
unsigned long reaps;
unsigned long done;
unsigned long calls;
- unsigned long cachehit, cachemiss;
volatile int finish;
__s32 *fds;
return -1;
}
}
- if (cqe->flags & IOCQE_FLAG_CACHEHIT)
- s->cachehit++;
- else
- s->cachemiss++;
reaped++;
head++;
} while (1);
int main(int argc, char *argv[])
{
struct submitter *s = &submitters[0];
- unsigned long done, calls, reap, cache_hit, cache_miss;
+ unsigned long done, calls, reap;
int err, i, flags, fd;
char *fdepths;
void *ret;
pthread_create(&s->thread, NULL, submitter_fn, s);
fdepths = malloc(8 * s->nr_files);
- cache_hit = cache_miss = reap = calls = done = 0;
+ reap = calls = done = 0;
do {
unsigned long this_done = 0;
unsigned long this_reap = 0;
unsigned long this_call = 0;
- unsigned long this_cache_hit = 0;
- unsigned long this_cache_miss = 0;
unsigned long rpc = 0, ipc = 0;
- double hit = 0.0;
sleep(1);
this_done += s->done;
this_call += s->calls;
this_reap += s->reaps;
- this_cache_hit += s->cachehit;
- this_cache_miss += s->cachemiss;
- if (this_cache_hit && this_cache_miss) {
- unsigned long hits, total;
-
- hits = this_cache_hit - cache_hit;
- total = hits + this_cache_miss - cache_miss;
- hit = (double) hits / (double) total;
- hit *= 100.0;
- }
if (this_call - calls) {
rpc = (this_done - done) / (this_call - calls);
ipc = (this_reap - reap) / (this_call - calls);
} else
rpc = ipc = -1;
file_depths(fdepths);
- printf("IOPS=%lu, IOS/call=%ld/%ld, inflight=%u (%s), Cachehit=%0.2f%%\n",
+ printf("IOPS=%lu, IOS/call=%ld/%ld, inflight=%u (%s)\n",
this_done - done, rpc, ipc, s->inflight,
- fdepths, hit);
+ fdepths);
done = this_done;
calls = this_call;
reap = this_reap;
- cache_hit = s->cachehit;
- cache_miss = s->cachemiss;
} while (!finish);
pthread_join(s->thread, &ret);
libbpf_version.h
+libbpf.pc
FEATURE-DUMP.libbpf
test_libbpf
BPF_VERSION = 0
BPF_PATCHLEVEL = 0
-BPF_EXTRAVERSION = 2
+BPF_EXTRAVERSION = 3
MAKEFLAGS += --no-print-directory
LIB_TARGET = libbpf.a libbpf.so.$(LIBBPF_VERSION)
LIB_FILE = libbpf.a libbpf.so*
+PC_FILE = libbpf.pc
# Set compile option CFLAGS
ifdef EXTRA_CFLAGS
LIB_TARGET := $(addprefix $(OUTPUT),$(LIB_TARGET))
LIB_FILE := $(addprefix $(OUTPUT),$(LIB_FILE))
+PC_FILE := $(addprefix $(OUTPUT),$(PC_FILE))
GLOBAL_SYM_COUNT = $(shell readelf -s --wide $(BPF_IN) | \
awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {s++} END{print s}')
VERSIONED_SYM_COUNT = $(shell readelf -s --wide $(OUTPUT)libbpf.so | \
grep -Eo '[^ ]+@LIBBPF_' | cut -d@ -f1 | sort -u | wc -l)
-CMD_TARGETS = $(LIB_TARGET)
+CMD_TARGETS = $(LIB_TARGET) $(PC_FILE)
CXX_TEST_TARGET = $(OUTPUT)test_libbpf
$(OUTPUT)test_libbpf: test_libbpf.cpp $(OUTPUT)libbpf.a
$(QUIET_LINK)$(CXX) $(INCLUDES) $^ -lelf -o $@
+$(OUTPUT)libbpf.pc:
+ $(QUIET_GEN)sed -e "s|@PREFIX@|$(prefix)|" \
+ -e "s|@LIBDIR@|$(libdir_SQ)|" \
+ -e "s|@VERSION@|$(LIBBPF_VERSION)|" \
+ < libbpf.pc.template > $@
+
check: check_abi
check_abi: $(OUTPUT)libbpf.so
$(call do_install,btf.h,$(prefix)/include/bpf,644); \
$(call do_install,xsk.h,$(prefix)/include/bpf,644);
-install: install_lib
+install_pkgconfig: $(PC_FILE)
+ $(call QUIET_INSTALL, $(PC_FILE)) \
+ $(call do_install,$(PC_FILE),$(libdir_SQ)/pkgconfig,644)
+
+install: install_lib install_pkgconfig
### Cleaning rules
clean:
$(call QUIET_CLEAN, libbpf) $(RM) $(TARGETS) $(CXX_TEST_TARGET) \
- *.o *~ *.a *.so *.so.$(VERSION) .*.d .*.cmd LIBBPF-CFLAGS
+ *.o *~ *.a *.so *.so.$(VERSION) .*.d .*.cmd *.pc LIBBPF-CFLAGS
$(call QUIET_CLEAN, core-gen) $(RM) $(OUTPUT)FEATURE-DUMP.libbpf
int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr)
{
- __u32 name_len = create_attr->name ? strlen(create_attr->name) : 0;
union bpf_attr attr;
memset(&attr, '\0', sizeof(attr));
attr.value_size = create_attr->value_size;
attr.max_entries = create_attr->max_entries;
attr.map_flags = create_attr->map_flags;
- memcpy(attr.map_name, create_attr->name,
- min(name_len, BPF_OBJ_NAME_LEN - 1));
+ if (create_attr->name)
+ memcpy(attr.map_name, create_attr->name,
+ min(strlen(create_attr->name), BPF_OBJ_NAME_LEN - 1));
attr.numa_node = create_attr->numa_node;
attr.btf_fd = create_attr->btf_fd;
attr.btf_key_type_id = create_attr->btf_key_type_id;
int key_size, int inner_map_fd, int max_entries,
__u32 map_flags, int node)
{
- __u32 name_len = name ? strlen(name) : 0;
union bpf_attr attr;
memset(&attr, '\0', sizeof(attr));
attr.inner_map_fd = inner_map_fd;
attr.max_entries = max_entries;
attr.map_flags = map_flags;
- memcpy(attr.map_name, name, min(name_len, BPF_OBJ_NAME_LEN - 1));
+ if (name)
+ memcpy(attr.map_name, name,
+ min(strlen(name), BPF_OBJ_NAME_LEN - 1));
if (node >= 0) {
attr.map_flags |= BPF_F_NUMA_NODE;
void *finfo = NULL, *linfo = NULL;
union bpf_attr attr;
__u32 log_level;
- __u32 name_len;
int fd;
if (!load_attr || !log_buf != !log_buf_sz)
return -EINVAL;
log_level = load_attr->log_level;
- if (log_level > 2 || (log_level && !log_buf))
+ if (log_level > (4 | 2 | 1) || (log_level && !log_buf))
return -EINVAL;
- name_len = load_attr->name ? strlen(load_attr->name) : 0;
-
memset(&attr, 0, sizeof(attr));
attr.prog_type = load_attr->prog_type;
attr.expected_attach_type = load_attr->expected_attach_type;
attr.line_info_rec_size = load_attr->line_info_rec_size;
attr.line_info_cnt = load_attr->line_info_cnt;
attr.line_info = ptr_to_u64(load_attr->line_info);
- memcpy(attr.prog_name, load_attr->name,
- min(name_len, BPF_OBJ_NAME_LEN - 1));
+ if (load_attr->name)
+ memcpy(attr.prog_name, load_attr->name,
+ min(strlen(load_attr->name), BPF_OBJ_NAME_LEN - 1));
fd = sys_bpf_prog_load(&attr, sizeof(attr));
if (fd >= 0)
return sys_bpf(BPF_MAP_GET_NEXT_KEY, &attr, sizeof(attr));
}
+int bpf_map_freeze(int fd)
+{
+ union bpf_attr attr;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.map_fd = fd;
+
+ return sys_bpf(BPF_MAP_FREEZE, &attr, sizeof(attr));
+}
+
int bpf_obj_pin(int fd, const char *pathname)
{
union bpf_attr attr;
attr.test.data_out = ptr_to_u64(test_attr->data_out);
attr.test.data_size_in = test_attr->data_size_in;
attr.test.data_size_out = test_attr->data_size_out;
+ attr.test.ctx_in = ptr_to_u64(test_attr->ctx_in);
+ attr.test.ctx_out = ptr_to_u64(test_attr->ctx_out);
+ attr.test.ctx_size_in = test_attr->ctx_size_in;
+ attr.test.ctx_size_out = test_attr->ctx_size_out;
attr.test.repeat = test_attr->repeat;
ret = sys_bpf(BPF_PROG_TEST_RUN, &attr, sizeof(attr));
test_attr->data_size_out = attr.test.data_size_out;
+ test_attr->ctx_size_out = attr.test.ctx_size_out;
test_attr->retval = attr.test.retval;
test_attr->duration = attr.test.duration;
return ret;
#define MAPS_RELAX_COMPAT 0x01
/* Recommend log buffer size */
-#define BPF_LOG_BUF_SIZE (256 * 1024)
+#define BPF_LOG_BUF_SIZE (UINT32_MAX >> 8) /* verifier maximum in kernels <= 5.1 */
LIBBPF_API int
bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
char *log_buf, size_t log_buf_sz);
void *value);
LIBBPF_API int bpf_map_delete_elem(int fd, const void *key);
LIBBPF_API int bpf_map_get_next_key(int fd, const void *key, void *next_key);
+LIBBPF_API int bpf_map_freeze(int fd);
LIBBPF_API int bpf_obj_pin(int fd, const char *pathname);
LIBBPF_API int bpf_obj_get(const char *pathname);
LIBBPF_API int bpf_prog_attach(int prog_fd, int attachable_fd,
* out: length of data_out */
__u32 retval; /* out: return code of the BPF program */
__u32 duration; /* out: average per repetition in ns */
+ const void *ctx_in; /* optional */
+ __u32 ctx_size_in;
+ void *ctx_out; /* optional */
+ __u32 ctx_size_out; /* in: max length of ctx_out
+ * out: length of cxt_out */
};
LIBBPF_API int bpf_prog_test_run_xattr(struct bpf_prog_test_run_attr *test_attr);
((k) == BTF_KIND_CONST) || \
((k) == BTF_KIND_RESTRICT))
+#define IS_VAR(k) ((k) == BTF_KIND_VAR)
+
static struct btf_type btf_void;
struct btf {
return base_size + vlen * sizeof(struct btf_member);
case BTF_KIND_FUNC_PROTO:
return base_size + vlen * sizeof(struct btf_param);
+ case BTF_KIND_VAR:
+ return base_size + sizeof(struct btf_var);
+ case BTF_KIND_DATASEC:
+ return base_size + vlen * sizeof(struct btf_var_secinfo);
default:
pr_debug("Unsupported BTF_KIND:%u\n", BTF_INFO_KIND(t->info));
return -EINVAL;
case BTF_KIND_STRUCT:
case BTF_KIND_UNION:
case BTF_KIND_ENUM:
+ case BTF_KIND_DATASEC:
size = t->size;
goto done;
case BTF_KIND_PTR:
case BTF_KIND_VOLATILE:
case BTF_KIND_CONST:
case BTF_KIND_RESTRICT:
+ case BTF_KIND_VAR:
type_id = t->type;
break;
case BTF_KIND_ARRAY:
t = btf__type_by_id(btf, type_id);
while (depth < MAX_RESOLVE_DEPTH &&
!btf_type_is_void_or_null(t) &&
- IS_MODIFIER(BTF_INFO_KIND(t->info))) {
+ (IS_MODIFIER(BTF_INFO_KIND(t->info)) ||
+ IS_VAR(BTF_INFO_KIND(t->info)))) {
type_id = t->type;
t = btf__type_by_id(btf, type_id);
depth++;
return btf;
}
+static int compare_vsi_off(const void *_a, const void *_b)
+{
+ const struct btf_var_secinfo *a = _a;
+ const struct btf_var_secinfo *b = _b;
+
+ return a->offset - b->offset;
+}
+
+static int btf_fixup_datasec(struct bpf_object *obj, struct btf *btf,
+ struct btf_type *t)
+{
+ __u32 size = 0, off = 0, i, vars = BTF_INFO_VLEN(t->info);
+ const char *name = btf__name_by_offset(btf, t->name_off);
+ const struct btf_type *t_var;
+ struct btf_var_secinfo *vsi;
+ struct btf_var *var;
+ int ret;
+
+ if (!name) {
+ pr_debug("No name found in string section for DATASEC kind.\n");
+ return -ENOENT;
+ }
+
+ ret = bpf_object__section_size(obj, name, &size);
+ if (ret || !size || (t->size && t->size != size)) {
+ pr_debug("Invalid size for section %s: %u bytes\n", name, size);
+ return -ENOENT;
+ }
+
+ t->size = size;
+
+ for (i = 0, vsi = (struct btf_var_secinfo *)(t + 1);
+ i < vars; i++, vsi++) {
+ t_var = btf__type_by_id(btf, vsi->type);
+ var = (struct btf_var *)(t_var + 1);
+
+ if (BTF_INFO_KIND(t_var->info) != BTF_KIND_VAR) {
+ pr_debug("Non-VAR type seen in section %s\n", name);
+ return -EINVAL;
+ }
+
+ if (var->linkage == BTF_VAR_STATIC)
+ continue;
+
+ name = btf__name_by_offset(btf, t_var->name_off);
+ if (!name) {
+ pr_debug("No name found in string section for VAR kind\n");
+ return -ENOENT;
+ }
+
+ ret = bpf_object__variable_offset(obj, name, &off);
+ if (ret) {
+ pr_debug("No offset found in symbol table for VAR %s\n", name);
+ return -ENOENT;
+ }
+
+ vsi->offset = off;
+ }
+
+ qsort(t + 1, vars, sizeof(*vsi), compare_vsi_off);
+ return 0;
+}
+
+int btf__finalize_data(struct bpf_object *obj, struct btf *btf)
+{
+ int err = 0;
+ __u32 i;
+
+ for (i = 1; i <= btf->nr_types; i++) {
+ struct btf_type *t = btf->types[i];
+
+ /* Loader needs to fix up some of the things compiler
+ * couldn't get its hands on while emitting BTF. This
+ * is section size and global variable offset. We use
+ * the info from the ELF itself for this purpose.
+ */
+ if (BTF_INFO_KIND(t->info) == BTF_KIND_DATASEC) {
+ err = btf_fixup_datasec(obj, btf, t);
+ if (err)
+ break;
+ }
+ }
+
+ return err;
+}
+
int btf__load(struct btf *btf)
{
__u32 log_buf_size = BPF_LOG_BUF_SIZE;
}
/* special BTF "void" type is made canonical immediately */
d->map[0] = 0;
- for (i = 1; i <= btf->nr_types; i++)
- d->map[i] = BTF_UNPROCESSED_ID;
+ for (i = 1; i <= btf->nr_types; i++) {
+ struct btf_type *t = d->btf->types[i];
+ __u16 kind = BTF_INFO_KIND(t->info);
+
+ /* VAR and DATASEC are never deduped and are self-canonical */
+ if (kind == BTF_KIND_VAR || kind == BTF_KIND_DATASEC)
+ d->map[i] = i;
+ else
+ d->map[i] = BTF_UNPROCESSED_ID;
+ }
d->hypot_map = malloc(sizeof(__u32) * (1 + btf->nr_types));
if (!d->hypot_map) {
case BTF_KIND_UNION:
case BTF_KIND_FUNC:
case BTF_KIND_FUNC_PROTO:
+ case BTF_KIND_VAR:
+ case BTF_KIND_DATASEC:
return 0;
case BTF_KIND_INT:
case BTF_KIND_PTR:
case BTF_KIND_TYPEDEF:
case BTF_KIND_FUNC:
+ case BTF_KIND_VAR:
r = btf_dedup_remap_type_id(d, t->type);
if (r < 0)
return r;
break;
}
+ case BTF_KIND_DATASEC: {
+ struct btf_var_secinfo *var = (struct btf_var_secinfo *)(t + 1);
+ __u16 vlen = BTF_INFO_VLEN(t->info);
+
+ for (i = 0; i < vlen; i++) {
+ r = btf_dedup_remap_type_id(d, var->type);
+ if (r < 0)
+ return r;
+ var->type = r;
+ var++;
+ }
+ break;
+ }
+
default:
return -EINVAL;
}
struct btf_ext;
struct btf_type;
+struct bpf_object;
+
/*
* The .BTF.ext ELF section layout defined as
* struct btf_ext_header
LIBBPF_API void btf__free(struct btf *btf);
LIBBPF_API struct btf *btf__new(__u8 *data, __u32 size);
+LIBBPF_API int btf__finalize_data(struct bpf_object *obj, struct btf *btf);
LIBBPF_API int btf__load(struct btf *btf);
LIBBPF_API __s32 btf__find_by_name(const struct btf *btf,
const char *type_name);
* Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
* Copyright (C) 2015 Huawei Inc.
* Copyright (C) 2017 Nicira, Inc.
+ * Copyright (C) 2019 Isovalent, Inc.
*/
#ifndef _GNU_SOURCE
#define BPF_FS_MAGIC 0xcafe4a11
#endif
+/* vsprintf() in __base_pr() uses nonliteral format string. It may break
+ * compilation if user enables corresponding warning. Disable it explicitly.
+ */
+#pragma GCC diagnostic ignored "-Wformat-nonliteral"
+
#define __printf(a, b) __attribute__((format(printf, a, b)))
static int __base_pr(enum libbpf_print_level level, const char *format,
enum {
RELO_LD64,
RELO_CALL,
+ RELO_DATA,
} type;
int insn_idx;
union {
};
} *reloc_desc;
int nr_reloc;
+ int log_level;
struct {
int nr;
__u32 line_info_cnt;
};
+enum libbpf_map_type {
+ LIBBPF_MAP_UNSPEC,
+ LIBBPF_MAP_DATA,
+ LIBBPF_MAP_BSS,
+ LIBBPF_MAP_RODATA,
+};
+
+static const char * const libbpf_type_to_btf_name[] = {
+ [LIBBPF_MAP_DATA] = ".data",
+ [LIBBPF_MAP_BSS] = ".bss",
+ [LIBBPF_MAP_RODATA] = ".rodata",
+};
+
struct bpf_map {
int fd;
char *name;
__u32 btf_value_type_id;
void *priv;
bpf_map_clear_priv_t clear_priv;
+ enum libbpf_map_type libbpf_type;
+};
+
+struct bpf_secdata {
+ void *rodata;
+ void *data;
};
static LIST_HEAD(bpf_objects_list);
struct bpf_object {
+ char name[BPF_OBJ_NAME_LEN];
char license[64];
__u32 kern_version;
size_t nr_programs;
struct bpf_map *maps;
size_t nr_maps;
+ struct bpf_secdata sections;
bool loaded;
bool has_pseudo_calls;
Elf *elf;
GElf_Ehdr ehdr;
Elf_Data *symbols;
+ Elf_Data *data;
+ Elf_Data *rodata;
+ Elf_Data *bss;
size_t strtabidx;
struct {
GElf_Shdr shdr;
int nr_reloc;
int maps_shndx;
int text_shndx;
+ int data_shndx;
+ int rodata_shndx;
+ int bss_shndx;
} efile;
/*
* All loaded bpf_object is linked in a list, which is
size_t obj_buf_sz)
{
struct bpf_object *obj;
+ char *end;
obj = calloc(1, sizeof(struct bpf_object) + strlen(path) + 1);
if (!obj) {
}
strcpy(obj->path, path);
- obj->efile.fd = -1;
+ /* Using basename() GNU version which doesn't modify arg. */
+ strncpy(obj->name, basename((void *)path),
+ sizeof(obj->name) - 1);
+ end = strchr(obj->name, '.');
+ if (end)
+ *end = 0;
+ obj->efile.fd = -1;
/*
* Caller of this function should also calls
* bpf_object__elf_finish() after data collection to return
obj->efile.obj_buf = obj_buf;
obj->efile.obj_buf_sz = obj_buf_sz;
obj->efile.maps_shndx = -1;
+ obj->efile.data_shndx = -1;
+ obj->efile.rodata_shndx = -1;
+ obj->efile.bss_shndx = -1;
obj->loaded = false;
obj->efile.elf = NULL;
}
obj->efile.symbols = NULL;
+ obj->efile.data = NULL;
+ obj->efile.rodata = NULL;
+ obj->efile.bss = NULL;
zfree(&obj->efile.reloc);
obj->efile.nr_reloc = 0;
return false;
}
+static int bpf_object_search_section_size(const struct bpf_object *obj,
+ const char *name, size_t *d_size)
+{
+ const GElf_Ehdr *ep = &obj->efile.ehdr;
+ Elf *elf = obj->efile.elf;
+ Elf_Scn *scn = NULL;
+ int idx = 0;
+
+ while ((scn = elf_nextscn(elf, scn)) != NULL) {
+ const char *sec_name;
+ Elf_Data *data;
+ GElf_Shdr sh;
+
+ idx++;
+ if (gelf_getshdr(scn, &sh) != &sh) {
+ pr_warning("failed to get section(%d) header from %s\n",
+ idx, obj->path);
+ return -EIO;
+ }
+
+ sec_name = elf_strptr(elf, ep->e_shstrndx, sh.sh_name);
+ if (!sec_name) {
+ pr_warning("failed to get section(%d) name from %s\n",
+ idx, obj->path);
+ return -EIO;
+ }
+
+ if (strcmp(name, sec_name))
+ continue;
+
+ data = elf_getdata(scn, 0);
+ if (!data) {
+ pr_warning("failed to get section(%d) data from %s(%s)\n",
+ idx, name, obj->path);
+ return -EIO;
+ }
+
+ *d_size = data->d_size;
+ return 0;
+ }
+
+ return -ENOENT;
+}
+
+int bpf_object__section_size(const struct bpf_object *obj, const char *name,
+ __u32 *size)
+{
+ int ret = -ENOENT;
+ size_t d_size;
+
+ *size = 0;
+ if (!name) {
+ return -EINVAL;
+ } else if (!strcmp(name, ".data")) {
+ if (obj->efile.data)
+ *size = obj->efile.data->d_size;
+ } else if (!strcmp(name, ".bss")) {
+ if (obj->efile.bss)
+ *size = obj->efile.bss->d_size;
+ } else if (!strcmp(name, ".rodata")) {
+ if (obj->efile.rodata)
+ *size = obj->efile.rodata->d_size;
+ } else {
+ ret = bpf_object_search_section_size(obj, name, &d_size);
+ if (!ret)
+ *size = d_size;
+ }
+
+ return *size ? 0 : ret;
+}
+
+int bpf_object__variable_offset(const struct bpf_object *obj, const char *name,
+ __u32 *off)
+{
+ Elf_Data *symbols = obj->efile.symbols;
+ const char *sname;
+ size_t si;
+
+ if (!name || !off)
+ return -EINVAL;
+
+ for (si = 0; si < symbols->d_size / sizeof(GElf_Sym); si++) {
+ GElf_Sym sym;
+
+ if (!gelf_getsym(symbols, si, &sym))
+ continue;
+ if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL ||
+ GELF_ST_TYPE(sym.st_info) != STT_OBJECT)
+ continue;
+
+ sname = elf_strptr(obj->efile.elf, obj->efile.strtabidx,
+ sym.st_name);
+ if (!sname) {
+ pr_warning("failed to get sym name string for var %s\n",
+ name);
+ return -EIO;
+ }
+ if (strcmp(name, sname) == 0) {
+ *off = sym.st_value;
+ return 0;
+ }
+ }
+
+ return -ENOENT;
+}
+
+static bool bpf_object__has_maps(const struct bpf_object *obj)
+{
+ return obj->efile.maps_shndx >= 0 ||
+ obj->efile.data_shndx >= 0 ||
+ obj->efile.rodata_shndx >= 0 ||
+ obj->efile.bss_shndx >= 0;
+}
+
+static int
+bpf_object__init_internal_map(struct bpf_object *obj, struct bpf_map *map,
+ enum libbpf_map_type type, Elf_Data *data,
+ void **data_buff)
+{
+ struct bpf_map_def *def = &map->def;
+ char map_name[BPF_OBJ_NAME_LEN];
+
+ map->libbpf_type = type;
+ map->offset = ~(typeof(map->offset))0;
+ snprintf(map_name, sizeof(map_name), "%.8s%.7s", obj->name,
+ libbpf_type_to_btf_name[type]);
+ map->name = strdup(map_name);
+ if (!map->name) {
+ pr_warning("failed to alloc map name\n");
+ return -ENOMEM;
+ }
+
+ def->type = BPF_MAP_TYPE_ARRAY;
+ def->key_size = sizeof(int);
+ def->value_size = data->d_size;
+ def->max_entries = 1;
+ def->map_flags = type == LIBBPF_MAP_RODATA ?
+ BPF_F_RDONLY_PROG : 0;
+ if (data_buff) {
+ *data_buff = malloc(data->d_size);
+ if (!*data_buff) {
+ zfree(&map->name);
+ pr_warning("failed to alloc map content buffer\n");
+ return -ENOMEM;
+ }
+ memcpy(*data_buff, data->d_buf, data->d_size);
+ }
+
+ pr_debug("map %td is \"%s\"\n", map - obj->maps, map->name);
+ return 0;
+}
+
static int
bpf_object__init_maps(struct bpf_object *obj, int flags)
{
+ int i, map_idx, map_def_sz = 0, nr_syms, nr_maps = 0, nr_maps_glob = 0;
bool strict = !(flags & MAPS_RELAX_COMPAT);
- int i, map_idx, map_def_sz, nr_maps = 0;
- Elf_Scn *scn;
- Elf_Data *data = NULL;
Elf_Data *symbols = obj->efile.symbols;
+ Elf_Data *data = NULL;
+ int ret = 0;
- if (obj->efile.maps_shndx < 0)
- return -EINVAL;
if (!symbols)
return -EINVAL;
+ nr_syms = symbols->d_size / sizeof(GElf_Sym);
- scn = elf_getscn(obj->efile.elf, obj->efile.maps_shndx);
- if (scn)
- data = elf_getdata(scn, NULL);
- if (!scn || !data) {
- pr_warning("failed to get Elf_Data from map section %d\n",
- obj->efile.maps_shndx);
- return -EINVAL;
+ if (obj->efile.maps_shndx >= 0) {
+ Elf_Scn *scn = elf_getscn(obj->efile.elf,
+ obj->efile.maps_shndx);
+
+ if (scn)
+ data = elf_getdata(scn, NULL);
+ if (!scn || !data) {
+ pr_warning("failed to get Elf_Data from map section %d\n",
+ obj->efile.maps_shndx);
+ return -EINVAL;
+ }
}
/*
*
* TODO: Detect array of map and report error.
*/
- for (i = 0; i < symbols->d_size / sizeof(GElf_Sym); i++) {
+ if (obj->efile.data_shndx >= 0)
+ nr_maps_glob++;
+ if (obj->efile.rodata_shndx >= 0)
+ nr_maps_glob++;
+ if (obj->efile.bss_shndx >= 0)
+ nr_maps_glob++;
+ for (i = 0; data && i < nr_syms; i++) {
GElf_Sym sym;
if (!gelf_getsym(symbols, i, &sym))
/* Alloc obj->maps and fill nr_maps. */
pr_debug("maps in %s: %d maps in %zd bytes\n", obj->path,
nr_maps, data->d_size);
-
- if (!nr_maps)
+ if (!nr_maps && !nr_maps_glob)
return 0;
/* Assume equally sized map definitions */
- map_def_sz = data->d_size / nr_maps;
- if (!data->d_size || (data->d_size % nr_maps) != 0) {
- pr_warning("unable to determine map definition size "
- "section %s, %d maps in %zd bytes\n",
- obj->path, nr_maps, data->d_size);
- return -EINVAL;
+ if (data) {
+ map_def_sz = data->d_size / nr_maps;
+ if (!data->d_size || (data->d_size % nr_maps) != 0) {
+ pr_warning("unable to determine map definition size "
+ "section %s, %d maps in %zd bytes\n",
+ obj->path, nr_maps, data->d_size);
+ return -EINVAL;
+ }
}
+ nr_maps += nr_maps_glob;
obj->maps = calloc(nr_maps, sizeof(obj->maps[0]));
if (!obj->maps) {
pr_warning("alloc maps for object failed\n");
/*
* Fill obj->maps using data in "maps" section.
*/
- for (i = 0, map_idx = 0; i < symbols->d_size / sizeof(GElf_Sym); i++) {
+ for (i = 0, map_idx = 0; data && i < nr_syms; i++) {
GElf_Sym sym;
const char *map_name;
struct bpf_map_def *def;
map_name = elf_strptr(obj->efile.elf,
obj->efile.strtabidx,
sym.st_name);
+
+ obj->maps[map_idx].libbpf_type = LIBBPF_MAP_UNSPEC;
obj->maps[map_idx].offset = sym.st_value;
if (sym.st_value + map_def_sz > data->d_size) {
pr_warning("corrupted maps section in %s: last map \"%s\" too small\n",
map_idx++;
}
- qsort(obj->maps, obj->nr_maps, sizeof(obj->maps[0]), compare_bpf_map);
- return 0;
+ /*
+ * Populate rest of obj->maps with libbpf internal maps.
+ */
+ if (obj->efile.data_shndx >= 0)
+ ret = bpf_object__init_internal_map(obj, &obj->maps[map_idx++],
+ LIBBPF_MAP_DATA,
+ obj->efile.data,
+ &obj->sections.data);
+ if (!ret && obj->efile.rodata_shndx >= 0)
+ ret = bpf_object__init_internal_map(obj, &obj->maps[map_idx++],
+ LIBBPF_MAP_RODATA,
+ obj->efile.rodata,
+ &obj->sections.rodata);
+ if (!ret && obj->efile.bss_shndx >= 0)
+ ret = bpf_object__init_internal_map(obj, &obj->maps[map_idx++],
+ LIBBPF_MAP_BSS,
+ obj->efile.bss, NULL);
+ if (!ret)
+ qsort(obj->maps, obj->nr_maps, sizeof(obj->maps[0]),
+ compare_bpf_map);
+ return ret;
}
static bool section_have_execinstr(struct bpf_object *obj, int idx)
Elf *elf = obj->efile.elf;
GElf_Ehdr *ep = &obj->efile.ehdr;
Elf_Data *btf_ext_data = NULL;
+ Elf_Data *btf_data = NULL;
Elf_Scn *scn = NULL;
int idx = 0, err = 0;
(int)sh.sh_link, (unsigned long)sh.sh_flags,
(int)sh.sh_type);
- if (strcmp(name, "license") == 0)
+ if (strcmp(name, "license") == 0) {
err = bpf_object__init_license(obj,
data->d_buf,
data->d_size);
- else if (strcmp(name, "version") == 0)
+ } else if (strcmp(name, "version") == 0) {
err = bpf_object__init_kversion(obj,
data->d_buf,
data->d_size);
- else if (strcmp(name, "maps") == 0)
+ } else if (strcmp(name, "maps") == 0) {
obj->efile.maps_shndx = idx;
- else if (strcmp(name, BTF_ELF_SEC) == 0) {
- obj->btf = btf__new(data->d_buf, data->d_size);
- if (IS_ERR(obj->btf)) {
- pr_warning("Error loading ELF section %s: %ld. Ignored and continue.\n",
- BTF_ELF_SEC, PTR_ERR(obj->btf));
- obj->btf = NULL;
- continue;
- }
- err = btf__load(obj->btf);
- if (err) {
- pr_warning("Error loading %s into kernel: %d. Ignored and continue.\n",
- BTF_ELF_SEC, err);
- btf__free(obj->btf);
- obj->btf = NULL;
- err = 0;
- }
+ } else if (strcmp(name, BTF_ELF_SEC) == 0) {
+ btf_data = data;
} else if (strcmp(name, BTF_EXT_ELF_SEC) == 0) {
btf_ext_data = data;
} else if (sh.sh_type == SHT_SYMTAB) {
obj->efile.symbols = data;
obj->efile.strtabidx = sh.sh_link;
}
- } else if ((sh.sh_type == SHT_PROGBITS) &&
- (sh.sh_flags & SHF_EXECINSTR) &&
- (data->d_size > 0)) {
- if (strcmp(name, ".text") == 0)
- obj->efile.text_shndx = idx;
- err = bpf_object__add_program(obj, data->d_buf,
- data->d_size, name, idx);
- if (err) {
- char errmsg[STRERR_BUFSIZE];
- char *cp = libbpf_strerror_r(-err, errmsg,
- sizeof(errmsg));
-
- pr_warning("failed to alloc program %s (%s): %s",
- name, obj->path, cp);
+ } else if (sh.sh_type == SHT_PROGBITS && data->d_size > 0) {
+ if (sh.sh_flags & SHF_EXECINSTR) {
+ if (strcmp(name, ".text") == 0)
+ obj->efile.text_shndx = idx;
+ err = bpf_object__add_program(obj, data->d_buf,
+ data->d_size, name, idx);
+ if (err) {
+ char errmsg[STRERR_BUFSIZE];
+ char *cp = libbpf_strerror_r(-err, errmsg,
+ sizeof(errmsg));
+
+ pr_warning("failed to alloc program %s (%s): %s",
+ name, obj->path, cp);
+ }
+ } else if (strcmp(name, ".data") == 0) {
+ obj->efile.data = data;
+ obj->efile.data_shndx = idx;
+ } else if (strcmp(name, ".rodata") == 0) {
+ obj->efile.rodata = data;
+ obj->efile.rodata_shndx = idx;
+ } else {
+ pr_debug("skip section(%d) %s\n", idx, name);
}
} else if (sh.sh_type == SHT_REL) {
void *reloc = obj->efile.reloc;
obj->efile.reloc[n].shdr = sh;
obj->efile.reloc[n].data = data;
}
+ } else if (sh.sh_type == SHT_NOBITS && strcmp(name, ".bss") == 0) {
+ obj->efile.bss = data;
+ obj->efile.bss_shndx = idx;
} else {
pr_debug("skip section(%d) %s\n", idx, name);
}
pr_warning("Corrupted ELF file: index of strtab invalid\n");
return LIBBPF_ERRNO__FORMAT;
}
+ if (btf_data) {
+ obj->btf = btf__new(btf_data->d_buf, btf_data->d_size);
+ if (IS_ERR(obj->btf)) {
+ pr_warning("Error loading ELF section %s: %ld. Ignored and continue.\n",
+ BTF_ELF_SEC, PTR_ERR(obj->btf));
+ obj->btf = NULL;
+ } else {
+ err = btf__finalize_data(obj, obj->btf);
+ if (!err)
+ err = btf__load(obj->btf);
+ if (err) {
+ pr_warning("Error finalizing and loading %s into kernel: %d. Ignored and continue.\n",
+ BTF_ELF_SEC, err);
+ btf__free(obj->btf);
+ obj->btf = NULL;
+ err = 0;
+ }
+ }
+ }
if (btf_ext_data) {
if (!obj->btf) {
pr_debug("Ignore ELF section %s because its depending ELF section %s is not found.\n",
}
}
}
- if (obj->efile.maps_shndx >= 0) {
+ if (bpf_object__has_maps(obj)) {
err = bpf_object__init_maps(obj, flags);
if (err)
goto out;
return NULL;
}
+static bool bpf_object__shndx_is_data(const struct bpf_object *obj,
+ int shndx)
+{
+ return shndx == obj->efile.data_shndx ||
+ shndx == obj->efile.bss_shndx ||
+ shndx == obj->efile.rodata_shndx;
+}
+
+static bool bpf_object__shndx_is_maps(const struct bpf_object *obj,
+ int shndx)
+{
+ return shndx == obj->efile.maps_shndx;
+}
+
+static bool bpf_object__relo_in_known_section(const struct bpf_object *obj,
+ int shndx)
+{
+ return shndx == obj->efile.text_shndx ||
+ bpf_object__shndx_is_maps(obj, shndx) ||
+ bpf_object__shndx_is_data(obj, shndx);
+}
+
+static enum libbpf_map_type
+bpf_object__section_to_libbpf_map_type(const struct bpf_object *obj, int shndx)
+{
+ if (shndx == obj->efile.data_shndx)
+ return LIBBPF_MAP_DATA;
+ else if (shndx == obj->efile.bss_shndx)
+ return LIBBPF_MAP_BSS;
+ else if (shndx == obj->efile.rodata_shndx)
+ return LIBBPF_MAP_RODATA;
+ else
+ return LIBBPF_MAP_UNSPEC;
+}
+
static int
bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr,
Elf_Data *data, struct bpf_object *obj)
{
Elf_Data *symbols = obj->efile.symbols;
- int text_shndx = obj->efile.text_shndx;
- int maps_shndx = obj->efile.maps_shndx;
struct bpf_map *maps = obj->maps;
size_t nr_maps = obj->nr_maps;
int i, nrels;
GElf_Sym sym;
GElf_Rel rel;
unsigned int insn_idx;
+ unsigned int shdr_idx;
struct bpf_insn *insns = prog->insns;
+ enum libbpf_map_type type;
+ const char *name;
size_t map_idx;
if (!gelf_getrel(data, i, &rel)) {
GELF_R_SYM(rel.r_info));
return -LIBBPF_ERRNO__FORMAT;
}
- pr_debug("relo for %lld value %lld name %d\n",
+
+ name = elf_strptr(obj->efile.elf, obj->efile.strtabidx,
+ sym.st_name) ? : "<?>";
+
+ pr_debug("relo for %lld value %lld name %d (\'%s\')\n",
(long long) (rel.r_info >> 32),
- (long long) sym.st_value, sym.st_name);
+ (long long) sym.st_value, sym.st_name, name);
- if (sym.st_shndx != maps_shndx && sym.st_shndx != text_shndx) {
- pr_warning("Program '%s' contains non-map related relo data pointing to section %u\n",
- prog->section_name, sym.st_shndx);
+ shdr_idx = sym.st_shndx;
+ if (!bpf_object__relo_in_known_section(obj, shdr_idx)) {
+ pr_warning("Program '%s' contains unrecognized relo data pointing to section %u\n",
+ prog->section_name, shdr_idx);
return -LIBBPF_ERRNO__RELOC;
}
return -LIBBPF_ERRNO__RELOC;
}
- /* TODO: 'maps' is sorted. We can use bsearch to make it faster. */
- for (map_idx = 0; map_idx < nr_maps; map_idx++) {
- if (maps[map_idx].offset == sym.st_value) {
- pr_debug("relocation: find map %zd (%s) for insn %u\n",
- map_idx, maps[map_idx].name, insn_idx);
- break;
+ if (bpf_object__shndx_is_maps(obj, shdr_idx) ||
+ bpf_object__shndx_is_data(obj, shdr_idx)) {
+ type = bpf_object__section_to_libbpf_map_type(obj, shdr_idx);
+ if (type != LIBBPF_MAP_UNSPEC &&
+ GELF_ST_BIND(sym.st_info) == STB_GLOBAL) {
+ pr_warning("bpf: relocation: not yet supported relo for non-static global \'%s\' variable found in insns[%d].code 0x%x\n",
+ name, insn_idx, insns[insn_idx].code);
+ return -LIBBPF_ERRNO__RELOC;
}
- }
- if (map_idx >= nr_maps) {
- pr_warning("bpf relocation: map_idx %d large than %d\n",
- (int)map_idx, (int)nr_maps - 1);
- return -LIBBPF_ERRNO__RELOC;
- }
+ for (map_idx = 0; map_idx < nr_maps; map_idx++) {
+ if (maps[map_idx].libbpf_type != type)
+ continue;
+ if (type != LIBBPF_MAP_UNSPEC ||
+ (type == LIBBPF_MAP_UNSPEC &&
+ maps[map_idx].offset == sym.st_value)) {
+ pr_debug("relocation: find map %zd (%s) for insn %u\n",
+ map_idx, maps[map_idx].name, insn_idx);
+ break;
+ }
+ }
+
+ if (map_idx >= nr_maps) {
+ pr_warning("bpf relocation: map_idx %d large than %d\n",
+ (int)map_idx, (int)nr_maps - 1);
+ return -LIBBPF_ERRNO__RELOC;
+ }
- prog->reloc_desc[i].type = RELO_LD64;
- prog->reloc_desc[i].insn_idx = insn_idx;
- prog->reloc_desc[i].map_idx = map_idx;
+ prog->reloc_desc[i].type = type != LIBBPF_MAP_UNSPEC ?
+ RELO_DATA : RELO_LD64;
+ prog->reloc_desc[i].insn_idx = insn_idx;
+ prog->reloc_desc[i].map_idx = map_idx;
+ }
}
return 0;
}
static int bpf_map_find_btf_info(struct bpf_map *map, const struct btf *btf)
{
struct bpf_map_def *def = &map->def;
- __u32 key_type_id, value_type_id;
+ __u32 key_type_id = 0, value_type_id = 0;
int ret;
- ret = btf__get_map_kv_tids(btf, map->name, def->key_size,
- def->value_size, &key_type_id,
- &value_type_id);
- if (ret)
+ if (!bpf_map__is_internal(map)) {
+ ret = btf__get_map_kv_tids(btf, map->name, def->key_size,
+ def->value_size, &key_type_id,
+ &value_type_id);
+ } else {
+ /*
+ * LLVM annotates global data differently in BTF, that is,
+ * only as '.data', '.bss' or '.rodata'.
+ */
+ ret = btf__find_by_name(btf,
+ libbpf_type_to_btf_name[map->libbpf_type]);
+ }
+ if (ret < 0)
return ret;
map->btf_key_type_id = key_type_id;
- map->btf_value_type_id = value_type_id;
-
+ map->btf_value_type_id = bpf_map__is_internal(map) ?
+ ret : value_type_id;
return 0;
}
return bpf_object__probe_name(obj);
}
+static int
+bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map)
+{
+ char *cp, errmsg[STRERR_BUFSIZE];
+ int err, zero = 0;
+ __u8 *data;
+
+ /* Nothing to do here since kernel already zero-initializes .bss map. */
+ if (map->libbpf_type == LIBBPF_MAP_BSS)
+ return 0;
+
+ data = map->libbpf_type == LIBBPF_MAP_DATA ?
+ obj->sections.data : obj->sections.rodata;
+
+ err = bpf_map_update_elem(map->fd, &zero, data, 0);
+ /* Freeze .rodata map as read-only from syscall side. */
+ if (!err && map->libbpf_type == LIBBPF_MAP_RODATA) {
+ err = bpf_map_freeze(map->fd);
+ if (err) {
+ cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
+ pr_warning("Error freezing map(%s) as read-only: %s\n",
+ map->name, cp);
+ err = 0;
+ }
+ }
+ return err;
+}
+
static int
bpf_object__create_maps(struct bpf_object *obj)
{
size_t j;
err = *pfd;
+err_out:
cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
pr_warning("failed to create map (name: '%s'): %s\n",
map->name, cp);
zclose(obj->maps[j].fd);
return err;
}
+
+ if (bpf_map__is_internal(map)) {
+ err = bpf_object__populate_internal_map(obj, map);
+ if (err < 0) {
+ zclose(*pfd);
+ goto err_out;
+ }
+ }
+
pr_debug("create map %s: fd=%d\n", map->name, *pfd);
}
return 0;
for (i = 0; i < prog->nr_reloc; i++) {
- if (prog->reloc_desc[i].type == RELO_LD64) {
+ if (prog->reloc_desc[i].type == RELO_LD64 ||
+ prog->reloc_desc[i].type == RELO_DATA) {
+ bool relo_data = prog->reloc_desc[i].type == RELO_DATA;
struct bpf_insn *insns = prog->insns;
int insn_idx, map_idx;
insn_idx = prog->reloc_desc[i].insn_idx;
map_idx = prog->reloc_desc[i].map_idx;
- if (insn_idx >= (int)prog->insns_cnt) {
+ if (insn_idx + 1 >= (int)prog->insns_cnt) {
pr_warning("relocation out of range: '%s'\n",
prog->section_name);
return -LIBBPF_ERRNO__RELOC;
}
- insns[insn_idx].src_reg = BPF_PSEUDO_MAP_FD;
+
+ if (!relo_data) {
+ insns[insn_idx].src_reg = BPF_PSEUDO_MAP_FD;
+ } else {
+ insns[insn_idx].src_reg = BPF_PSEUDO_MAP_VALUE;
+ insns[insn_idx + 1].imm = insns[insn_idx].imm;
+ }
insns[insn_idx].imm = obj->maps[map_idx].fd;
- } else {
+ } else if (prog->reloc_desc[i].type == RELO_CALL) {
err = bpf_program__reloc_text(prog, obj,
&prog->reloc_desc[i]);
if (err)
{
struct bpf_load_program_attr load_attr;
char *cp, errmsg[STRERR_BUFSIZE];
+ int log_buf_size = BPF_LOG_BUF_SIZE;
char *log_buf;
int ret;
load_attr.line_info = prog->line_info;
load_attr.line_info_rec_size = prog->line_info_rec_size;
load_attr.line_info_cnt = prog->line_info_cnt;
+ load_attr.log_level = prog->log_level;
if (!load_attr.insns || !load_attr.insns_cnt)
return -EINVAL;
- log_buf = malloc(BPF_LOG_BUF_SIZE);
+retry_load:
+ log_buf = malloc(log_buf_size);
if (!log_buf)
pr_warning("Alloc log buffer for bpf loader error, continue without log\n");
- ret = bpf_load_program_xattr(&load_attr, log_buf, BPF_LOG_BUF_SIZE);
+ ret = bpf_load_program_xattr(&load_attr, log_buf, log_buf_size);
if (ret >= 0) {
+ if (load_attr.log_level)
+ pr_debug("verifier log:\n%s", log_buf);
*pfd = ret;
ret = 0;
goto out;
}
+ if (errno == ENOSPC) {
+ log_buf_size <<= 1;
+ free(log_buf);
+ goto retry_load;
+ }
ret = -LIBBPF_ERRNO__LOAD;
cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
pr_warning("load bpf program failed: %s\n", cp);
case BPF_PROG_TYPE_TRACEPOINT:
case BPF_PROG_TYPE_RAW_TRACEPOINT:
case BPF_PROG_TYPE_PERF_EVENT:
+ case BPF_PROG_TYPE_CGROUP_SYSCTL:
return false;
case BPF_PROG_TYPE_KPROBE:
default:
obj->maps[i].priv = NULL;
obj->maps[i].clear_priv = NULL;
}
+
+ zfree(&obj->sections.rodata);
+ zfree(&obj->sections.data);
zfree(&obj->maps);
obj->nr_maps = 0;
BPF_CGROUP_UDP4_SENDMSG),
BPF_EAPROG_SEC("cgroup/sendmsg6", BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
BPF_CGROUP_UDP6_SENDMSG),
+ BPF_EAPROG_SEC("cgroup/sysctl", BPF_PROG_TYPE_CGROUP_SYSCTL,
+ BPF_CGROUP_SYSCTL),
};
#undef BPF_PROG_SEC_IMPL
return map->def.type == BPF_MAP_TYPE_PERF_EVENT_ARRAY;
}
+bool bpf_map__is_internal(struct bpf_map *map)
+{
+ return map->libbpf_type != LIBBPF_MAP_UNSPEC;
+}
+
void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex)
{
map->map_ifindex = ifindex;
bpf_program__set_expected_attach_type(prog,
expected_attach_type);
+ prog->log_level = attr->log_level;
if (!first_prog)
first_prog = prog;
}
LIBBPF_API struct bpf_object *bpf_object__open_buffer(void *obj_buf,
size_t obj_buf_sz,
const char *name);
+int bpf_object__section_size(const struct bpf_object *obj, const char *name,
+ __u32 *size);
+int bpf_object__variable_offset(const struct bpf_object *obj, const char *name,
+ __u32 *off);
LIBBPF_API int bpf_object__pin_maps(struct bpf_object *obj, const char *path);
LIBBPF_API int bpf_object__unpin_maps(struct bpf_object *obj,
const char *path);
LIBBPF_API int bpf_map__reuse_fd(struct bpf_map *map, int fd);
LIBBPF_API int bpf_map__resize(struct bpf_map *map, __u32 max_entries);
LIBBPF_API bool bpf_map__is_offload_neutral(struct bpf_map *map);
+LIBBPF_API bool bpf_map__is_internal(struct bpf_map *map);
LIBBPF_API void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex);
LIBBPF_API int bpf_map__pin(struct bpf_map *map, const char *path);
LIBBPF_API int bpf_map__unpin(struct bpf_map *map, const char *path);
enum bpf_prog_type prog_type;
enum bpf_attach_type expected_attach_type;
int ifindex;
+ int log_level;
};
LIBBPF_API int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
bpf_program__bpil_addr_to_offs;
bpf_program__bpil_offs_to_addr;
} LIBBPF_0.0.1;
+
+LIBBPF_0.0.3 {
+ global:
+ bpf_map__is_internal;
+ bpf_map_freeze;
+ btf__finalize_data;
+} LIBBPF_0.0.2;
--- /dev/null
+# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+prefix=@PREFIX@
+libdir=@LIBDIR@
+includedir=${prefix}/include
+
+Name: libbpf
+Description: BPF library
+Version: @VERSION@
+Libs: -L${libdir} -lbpf
+Requires.private: libelf
+Cflags: -I${includedir}
case BPF_PROG_TYPE_LIRC_MODE2:
case BPF_PROG_TYPE_SK_REUSEPORT:
case BPF_PROG_TYPE_FLOW_DISSECTOR:
+ case BPF_PROG_TYPE_CGROUP_SYSCTL:
default:
break;
}
#define pr_info(fmt, ...) __pr(LIBBPF_INFO, fmt, ##__VA_ARGS__)
#define pr_debug(fmt, ...) __pr(LIBBPF_DEBUG, fmt, ##__VA_ARGS__)
+/* Use these barrier functions instead of smp_[rw]mb() when they are
+ * used in a libbpf header file. That way they can be built into the
+ * application that uses libbpf.
+ */
+#if defined(__i386__) || defined(__x86_64__)
+# define libbpf_smp_rmb() asm volatile("" : : : "memory")
+# define libbpf_smp_wmb() asm volatile("" : : : "memory")
+# define libbpf_smp_mb() \
+ asm volatile("lock; addl $0,-4(%%rsp)" : : : "memory", "cc")
+/* Hinders stores to be observed before older loads. */
+# define libbpf_smp_rwmb() asm volatile("" : : : "memory")
+#elif defined(__aarch64__)
+# define libbpf_smp_rmb() asm volatile("dmb ishld" : : : "memory")
+# define libbpf_smp_wmb() asm volatile("dmb ishst" : : : "memory")
+# define libbpf_smp_mb() asm volatile("dmb ish" : : : "memory")
+# define libbpf_smp_rwmb() libbpf_smp_mb()
+#elif defined(__arm__)
+/* These are only valid for armv7 and above */
+# define libbpf_smp_rmb() asm volatile("dmb ish" : : : "memory")
+# define libbpf_smp_wmb() asm volatile("dmb ishst" : : : "memory")
+# define libbpf_smp_mb() asm volatile("dmb ish" : : : "memory")
+# define libbpf_smp_rwmb() libbpf_smp_mb()
+#else
+/* Architecture missing native barrier functions. */
+# define libbpf_smp_rmb() __sync_synchronize()
+# define libbpf_smp_wmb() __sync_synchronize()
+# define libbpf_smp_mb() __sync_synchronize()
+# define libbpf_smp_rwmb() __sync_synchronize()
+#endif
+
#ifdef __cplusplus
} /* extern "C" */
#endif
static int xsk_load_xdp_prog(struct xsk_socket *xsk)
{
- char bpf_log_buf[BPF_LOG_BUF_SIZE];
+ static const int log_buf_size = 16 * 1024;
+ char log_buf[log_buf_size];
int err, prog_fd;
/* This is the C-program:
size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn);
prog_fd = bpf_load_program(BPF_PROG_TYPE_XDP, prog, insns_cnt,
- "LGPL-2.1 or BSD-2-Clause", 0, bpf_log_buf,
- BPF_LOG_BUF_SIZE);
+ "LGPL-2.1 or BSD-2-Clause", 0, log_buf,
+ log_buf_size);
if (prog_fd < 0) {
- pr_warning("BPF log buffer:\n%s", bpf_log_buf);
+ pr_warning("BPF log buffer:\n%s", log_buf);
return prog_fd;
}
#include <linux/if_xdp.h>
#include "libbpf.h"
+#include "libbpf_util.h"
#ifdef __cplusplus
extern "C" {
DEFINE_XSK_RING(xsk_ring_prod);
DEFINE_XSK_RING(xsk_ring_cons);
+/* For a detailed explanation on the memory barriers associated with the
+ * ring, please take a look at net/xdp/xsk_queue.h.
+ */
+
struct xsk_umem;
struct xsk_socket;
static inline size_t xsk_ring_prod__reserve(struct xsk_ring_prod *prod,
size_t nb, __u32 *idx)
{
- if (unlikely(xsk_prod_nb_free(prod, nb) < nb))
+ if (xsk_prod_nb_free(prod, nb) < nb)
return 0;
*idx = prod->cached_prod;
static inline void xsk_ring_prod__submit(struct xsk_ring_prod *prod, size_t nb)
{
- /* Make sure everything has been written to the ring before signalling
- * this to the kernel.
+ /* Make sure everything has been written to the ring before indicating
+ * this to the kernel by writing the producer pointer.
*/
- smp_wmb();
+ libbpf_smp_wmb();
*prod->producer += nb;
}
{
size_t entries = xsk_cons_nb_avail(cons, nb);
- if (likely(entries > 0)) {
+ if (entries > 0) {
/* Make sure we do not speculatively read the data before
* we have received the packet buffers from the ring.
*/
- smp_rmb();
+ libbpf_smp_rmb();
*idx = cons->cached_cons;
cons->cached_cons += entries;
static inline void xsk_ring_cons__release(struct xsk_ring_cons *cons, size_t nb)
{
+ /* Make sure data has been read before indicating we are done
+ * with the entries by updating the consumer pointer.
+ */
+ libbpf_smp_rwmb();
+
*cons->consumer += nb;
}
return val & 0xffffffff;
if (strcmp(type, "u64") == 0 ||
- strcmp(type, "s64"))
+ strcmp(type, "s64") == 0)
return val;
if (strcmp(type, "s8") == 0)
"fortify_panic",
"usercopy_abort",
"machine_real_restart",
+ "rewind_stack_do_exit",
};
if (func->bind == STB_WEAK)
for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
perf_header__set_feat(&session->header, feat);
+ perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT);
perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
* */
.overwrite = 0,
.sample_time = true,
+ .sample_time_set = true,
},
.max_stack = sysctl__max_stack(),
.annotation_opts = annotation__default_options,
'return_id,'
'CASE WHEN flags=0 THEN \'\' WHEN flags=1 THEN \'no call\' WHEN flags=2 THEN \'no return\' WHEN flags=3 THEN \'no call/return\' WHEN flags=6 THEN \'jump\' ELSE flags END AS flags,'
'parent_call_path_id,'
- 'parent_id'
+ 'calls.parent_id'
' FROM calls INNER JOIN call_paths ON call_paths.id = call_path_id')
do_query(query, 'CREATE VIEW samples_view AS '
else if (prog_id > node->info_linear->info.id)
n = n->rb_right;
else
- break;
+ goto out;
}
+ node = NULL;
+out:
up_read(&env->bpf_progs.lock);
return node;
}
else if (btf_id > node->id)
n = n->rb_right;
else
- break;
+ goto out;
}
+ node = NULL;
up_read(&env->bpf_progs.lock);
+out:
return node;
}
{
struct perf_evlist *evlist = arg;
bool draining = false;
- int i;
+ int i, done = 0;
+
+ while (!done) {
+ bool got_data = false;
- while (draining || !(evlist->thread.done)) {
- if (draining)
- draining = false;
- else if (evlist->thread.done)
+ if (evlist->thread.done)
draining = true;
if (!draining)
pr_warning("cannot locate proper evsel for the side band event\n");
perf_mmap__consume(map);
+ got_data = true;
}
perf_mmap__read_done(map);
}
+
+ if (draining && !got_data)
+ break;
}
return NULL;
}
if (data->user_regs.abi) {
u64 mask = evsel->attr.sample_regs_user;
- sz = hweight_long(mask) * sizeof(u64);
+ sz = hweight64(mask) * sizeof(u64);
OVERFLOW_CHECK(array, sz, max_size);
data->user_regs.mask = mask;
data->user_regs.regs = (u64 *)array;
if (data->intr_regs.abi != PERF_SAMPLE_REGS_ABI_NONE) {
u64 mask = evsel->attr.sample_regs_intr;
- sz = hweight_long(mask) * sizeof(u64);
+ sz = hweight64(mask) * sizeof(u64);
OVERFLOW_CHECK(array, sz, max_size);
data->intr_regs.mask = mask;
data->intr_regs.regs = (u64 *)array;
if (type & PERF_SAMPLE_REGS_USER) {
if (sample->user_regs.abi) {
result += sizeof(u64);
- sz = hweight_long(sample->user_regs.mask) * sizeof(u64);
+ sz = hweight64(sample->user_regs.mask) * sizeof(u64);
result += sz;
} else {
result += sizeof(u64);
if (type & PERF_SAMPLE_REGS_INTR) {
if (sample->intr_regs.abi) {
result += sizeof(u64);
- sz = hweight_long(sample->intr_regs.mask) * sizeof(u64);
+ sz = hweight64(sample->intr_regs.mask) * sizeof(u64);
result += sz;
} else {
result += sizeof(u64);
if (type & PERF_SAMPLE_REGS_USER) {
if (sample->user_regs.abi) {
*array++ = sample->user_regs.abi;
- sz = hweight_long(sample->user_regs.mask) * sizeof(u64);
+ sz = hweight64(sample->user_regs.mask) * sizeof(u64);
memcpy(array, sample->user_regs.regs, sz);
array = (void *)array + sz;
} else {
if (type & PERF_SAMPLE_REGS_INTR) {
if (sample->intr_regs.abi) {
*array++ = sample->intr_regs.abi;
- sz = hweight_long(sample->intr_regs.mask) * sizeof(u64);
+ sz = hweight64(sample->intr_regs.mask) * sizeof(u64);
memcpy(array, sample->intr_regs.regs, sz);
array = (void *)array + sz;
} else {
perf_env__insert_bpf_prog_info(env, info_node);
}
+ up_write(&env->bpf_progs.lock);
return 0;
out:
free(info_linear);
static int process_bpf_btf(struct feat_fd *ff, void *data __maybe_unused)
{
struct perf_env *env = &ff->ph->env;
+ struct btf_node *node = NULL;
u32 count, i;
+ int err = -1;
if (ff->ph->needs_swap) {
pr_warning("interpreting btf from systems with endianity is not yet supported\n");
down_write(&env->bpf_progs.lock);
for (i = 0; i < count; ++i) {
- struct btf_node *node;
u32 id, data_size;
if (do_read_u32(ff, &id))
- return -1;
+ goto out;
if (do_read_u32(ff, &data_size))
- return -1;
+ goto out;
node = malloc(sizeof(struct btf_node) + data_size);
if (!node)
- return -1;
+ goto out;
node->id = id;
node->data_size = data_size;
- if (__do_read(ff, node->data, data_size)) {
- free(node);
- return -1;
- }
+ if (__do_read(ff, node->data, data_size))
+ goto out;
perf_env__insert_btf(env, node);
+ node = NULL;
}
+ err = 0;
+out:
up_write(&env->bpf_progs.lock);
- return 0;
+ free(node);
+ return err;
}
struct feature_ops {
return kmap && kmap->name[0];
}
+bool __map__is_bpf_prog(const struct map *map)
+{
+ const char *name;
+
+ if (map->dso->binary_type == DSO_BINARY_TYPE__BPF_PROG_INFO)
+ return true;
+
+ /*
+ * If PERF_RECORD_BPF_EVENT is not included, the dso will not have
+ * type of DSO_BINARY_TYPE__BPF_PROG_INFO. In such cases, we can
+ * guess the type based on name.
+ */
+ name = map->dso->short_name;
+ return name && (strstr(name, "bpf_prog_") == name);
+}
+
bool map__has_symbols(const struct map *map)
{
return dso__has_symbols(map->dso);
rc = strcmp(m->dso->short_name, map->dso->short_name);
if (rc < 0)
p = &(*p)->rb_left;
- else if (rc > 0)
- p = &(*p)->rb_right;
else
- return;
+ p = &(*p)->rb_right;
}
rb_link_node(&map->rb_node_name, parent, p);
rb_insert_color(&map->rb_node_name, &maps->names);
bool __map__is_kernel(const struct map *map);
bool __map__is_extra_kernel_map(const struct map *map);
+bool __map__is_bpf_prog(const struct map *map);
static inline bool __map__is_kmodule(const struct map *map)
{
- return !__map__is_kernel(map) && !__map__is_extra_kernel_map(map);
+ return !__map__is_kernel(map) && !__map__is_extra_kernel_map(map) &&
+ !__map__is_bpf_prog(map);
}
bool map__has_symbols(const struct map *map);
struct nfit_test_sec {
u8 state;
u8 ext_state;
+ u8 old_state;
u8 passphrase[32];
u8 master_passphrase[32];
u64 overwrite_end_time;
static struct gen_pool *nfit_pool;
+static const char zero_key[NVDIMM_PASSPHRASE_LEN];
+
static struct nfit_test *to_nfit_test(struct device *dev)
{
struct platform_device *pdev = to_platform_device(dev);
struct device *dev = &t->pdev.dev;
struct nfit_test_sec *sec = &dimm_sec_info[dimm];
- if (!(sec->state & ND_INTEL_SEC_STATE_ENABLED) ||
- (sec->state & ND_INTEL_SEC_STATE_FROZEN)) {
+ if (sec->state & ND_INTEL_SEC_STATE_FROZEN) {
nd_cmd->status = ND_INTEL_STATUS_INVALID_STATE;
dev_dbg(dev, "secure erase: wrong security state\n");
} else if (memcmp(nd_cmd->passphrase, sec->passphrase,
nd_cmd->status = ND_INTEL_STATUS_INVALID_PASS;
dev_dbg(dev, "secure erase: wrong passphrase\n");
} else {
+ if (!(sec->state & ND_INTEL_SEC_STATE_ENABLED)
+ && (memcmp(nd_cmd->passphrase, zero_key,
+ ND_INTEL_PASSPHRASE_SIZE) != 0)) {
+ dev_dbg(dev, "invalid zero key\n");
+ return 0;
+ }
memset(sec->passphrase, 0, ND_INTEL_PASSPHRASE_SIZE);
memset(sec->master_passphrase, 0, ND_INTEL_PASSPHRASE_SIZE);
sec->state = 0;
return 0;
}
- memset(sec->passphrase, 0, ND_INTEL_PASSPHRASE_SIZE);
+ sec->old_state = sec->state;
sec->state = ND_INTEL_SEC_STATE_OVERWRITE;
dev_dbg(dev, "overwrite progressing.\n");
sec->overwrite_end_time = get_jiffies_64() + 5 * HZ;
if (time_is_before_jiffies64(sec->overwrite_end_time)) {
sec->overwrite_end_time = 0;
- sec->state = 0;
+ sec->state = sec->old_state;
+ sec->old_state = 0;
sec->ext_state = ND_INTEL_SEC_ESTATE_ENABLED;
dev_dbg(dev, "overwrite is complete\n");
} else
test_align test_verifier_log test_dev_cgroup test_tcpbpf_user \
test_sock test_btf test_sockmap test_lirc_mode2_user get_cgroup_id_user \
test_socket_cookie test_cgroup_storage test_select_reuseport test_section_names \
- test_netcnt test_tcpnotify_user test_sock_fields
+ test_netcnt test_tcpnotify_user test_sock_fields test_sysctl
BPF_OBJ_FILES = $(patsubst %.c,%.o, $(notdir $(wildcard progs/*.c)))
TEST_GEN_FILES = $(BPF_OBJ_FILES)
$(OUTPUT)/test_cgroup_storage: cgroup_helpers.c
$(OUTPUT)/test_netcnt: cgroup_helpers.c
$(OUTPUT)/test_sock_fields: cgroup_helpers.c
+$(OUTPUT)/test_sysctl: cgroup_helpers.c
.PHONY: force
endif
PROG_TESTS_H := $(OUTPUT)/prog_tests/tests.h
-$(OUTPUT)/test_progs: $(PROG_TESTS_H)
+test_progs.c: $(PROG_TESTS_H)
$(OUTPUT)/test_progs: CFLAGS += $(TEST_PROGS_CFLAGS)
$(OUTPUT)/test_progs: prog_tests/*.c
) > $(PROG_TESTS_H))
VERIFIER_TESTS_H := $(OUTPUT)/verifier/tests.h
-$(OUTPUT)/test_verifier: $(VERIFIER_TESTS_H)
+test_verifier.c: $(VERIFIER_TESTS_H)
$(OUTPUT)/test_verifier: CFLAGS += $(TEST_VERIFIER_CFLAGS)
VERIFIER_TESTS_DIR = $(OUTPUT)/verifier
#define SEC(NAME) __attribute__((section(NAME), used))
/* helper functions called from eBPF programs written in C */
-static void *(*bpf_map_lookup_elem)(void *map, void *key) =
+static void *(*bpf_map_lookup_elem)(void *map, const void *key) =
(void *) BPF_FUNC_map_lookup_elem;
-static int (*bpf_map_update_elem)(void *map, void *key, void *value,
+static int (*bpf_map_update_elem)(void *map, const void *key, const void *value,
unsigned long long flags) =
(void *) BPF_FUNC_map_update_elem;
-static int (*bpf_map_delete_elem)(void *map, void *key) =
+static int (*bpf_map_delete_elem)(void *map, const void *key) =
(void *) BPF_FUNC_map_delete_elem;
-static int (*bpf_map_push_elem)(void *map, void *value,
+static int (*bpf_map_push_elem)(void *map, const void *value,
unsigned long long flags) =
(void *) BPF_FUNC_map_push_elem;
static int (*bpf_map_pop_elem)(void *map, void *value) =
static int (*bpf_tcp_check_syncookie)(struct bpf_sock *sk,
void *ip, int ip_len, void *tcp, int tcp_len) =
(void *) BPF_FUNC_tcp_check_syncookie;
+static int (*bpf_sysctl_get_name)(void *ctx, char *buf,
+ unsigned long long buf_len,
+ unsigned long long flags) =
+ (void *) BPF_FUNC_sysctl_get_name;
+static int (*bpf_sysctl_get_current_value)(void *ctx, char *buf,
+ unsigned long long buf_len) =
+ (void *) BPF_FUNC_sysctl_get_current_value;
+static int (*bpf_sysctl_get_new_value)(void *ctx, char *buf,
+ unsigned long long buf_len) =
+ (void *) BPF_FUNC_sysctl_get_new_value;
+static int (*bpf_sysctl_set_new_value)(void *ctx, const char *buf,
+ unsigned long long buf_len) =
+ (void *) BPF_FUNC_sysctl_set_new_value;
+static int (*bpf_strtol)(const char *buf, unsigned long long buf_len,
+ unsigned long long flags, long *res) =
+ (void *) BPF_FUNC_strtol;
+static int (*bpf_strtoul)(const char *buf, unsigned long long buf_len,
+ unsigned long long flags, unsigned long *res) =
+ (void *) BPF_FUNC_strtoul;
/* llvm builtin functions that eBPF C program may use to
* emit BPF_LD_ABS and BPF_LD_IND instructions
CONFIG_FTRACE_SYSCALLS=y
CONFIG_IPV6_TUNNEL=y
CONFIG_IPV6_GRE=y
+CONFIG_NET_FOU=m
+CONFIG_NET_FOU_IP_TUNNELS=y
+CONFIG_IPV6_FOU=m
+CONFIG_IPV6_FOU_TUNNEL=m
+CONFIG_MPLS=y
+CONFIG_NET_MPLS_GSO=m
+CONFIG_MPLS_ROUTING=m
+CONFIG_MPLS_IPTUNNEL=m
sprintf(command, "rm -r %s", cfg_pin_path);
ret = system(command);
if (ret)
- error(1, errno, command);
+ error(1, errno, "%s", command);
}
static void parse_opts(int argc, char **argv)
info_len != sizeof(struct bpf_map_info) ||
strcmp((char *)map_infos[i].name, expected_map_name),
"get-map-info(fd)",
- "err %d errno %d type %d(%d) info_len %u(%Zu) key_size %u value_size %u max_entries %u map_flags %X name %s(%s)\n",
+ "err %d errno %d type %d(%d) info_len %u(%zu) key_size %u value_size %u max_entries %u map_flags %X name %s(%s)\n",
err, errno,
map_infos[i].type, BPF_MAP_TYPE_ARRAY,
info_len, sizeof(struct bpf_map_info),
*(int *)(long)prog_infos[i].map_ids != map_infos[i].id ||
strcmp((char *)prog_infos[i].name, expected_prog_name),
"get-prog-info(fd)",
- "err %d errno %d i %d type %d(%d) info_len %u(%Zu) jit_enabled %d jited_prog_len %u xlated_prog_len %u jited_prog %d xlated_prog %d load_time %lu(%lu) uid %u(%u) nr_map_ids %u(%u) map_id %u(%u) name %s(%s)\n",
+ "err %d errno %d i %d type %d(%d) info_len %u(%zu) jit_enabled %d jited_prog_len %u xlated_prog_len %u jited_prog %d xlated_prog %d load_time %lu(%lu) uid %u(%u) nr_map_ids %u(%u) map_id %u(%u) name %s(%s)\n",
err, errno, i,
prog_infos[i].type, BPF_PROG_TYPE_SOCKET_FILTER,
info_len, sizeof(struct bpf_prog_info),
memcmp(&prog_info, &prog_infos[i], info_len) ||
*(int *)(long)prog_info.map_ids != saved_map_id,
"get-prog-info(next_id->fd)",
- "err %d errno %d info_len %u(%Zu) memcmp %d map_id %u(%u)\n",
+ "err %d errno %d info_len %u(%zu) memcmp %d map_id %u(%u)\n",
err, errno, info_len, sizeof(struct bpf_prog_info),
memcmp(&prog_info, &prog_infos[i], info_len),
*(int *)(long)prog_info.map_ids, saved_map_id);
memcmp(&map_info, &map_infos[i], info_len) ||
array_value != array_magic_value,
"check get-map-info(next_id->fd)",
- "err %d errno %d info_len %u(%Zu) memcmp %d array_value %llu(%llu)\n",
+ "err %d errno %d info_len %u(%zu) memcmp %d array_value %llu(%llu)\n",
err, errno, info_len, sizeof(struct bpf_map_info),
memcmp(&map_info, &map_infos[i], info_len),
array_value, array_magic_value);
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+#include <test_progs.h>
+static int libbpf_debug_print(enum libbpf_print_level level,
+ const char *format, va_list args)
+{
+ if (level != LIBBPF_DEBUG)
+ return 0;
+
+ if (!strstr(format, "verifier log"))
+ return 0;
+ return vfprintf(stderr, "%s", args);
+}
+
+static int check_load(const char *file)
+{
+ struct bpf_prog_load_attr attr;
+ struct bpf_object *obj;
+ int err, prog_fd;
+
+ memset(&attr, 0, sizeof(struct bpf_prog_load_attr));
+ attr.file = file;
+ attr.prog_type = BPF_PROG_TYPE_SCHED_CLS;
+ attr.log_level = 4;
+ err = bpf_prog_load_xattr(&attr, &obj, &prog_fd);
+ bpf_object__close(obj);
+ if (err)
+ error_cnt++;
+ return err;
+}
+
+void test_bpf_verif_scale(void)
+{
+ const char *file1 = "./test_verif_scale1.o";
+ const char *file2 = "./test_verif_scale2.o";
+ const char *file3 = "./test_verif_scale3.o";
+ int err;
+
+ if (verifier_stats)
+ libbpf_set_print(libbpf_debug_print);
+
+ err = check_load(file1);
+ err |= check_load(file2);
+ err |= check_load(file3);
+ if (!err)
+ printf("test_verif_scale:OK\n");
+ else
+ printf("test_verif_scale:FAIL\n");
+}
#include <test_progs.h>
#define CHECK_FLOW_KEYS(desc, got, expected) \
- CHECK(memcmp(&got, &expected, sizeof(got)) != 0, \
+ CHECK_ATTR(memcmp(&got, &expected, sizeof(got)) != 0, \
desc, \
"nhoff=%u/%u " \
"thoff=%u/%u " \
"is_frag=%u/%u " \
"is_first_frag=%u/%u " \
"is_encap=%u/%u " \
+ "ip_proto=0x%x/0x%x " \
"n_proto=0x%x/0x%x " \
"sport=%u/%u " \
"dport=%u/%u\n", \
got.is_frag, expected.is_frag, \
got.is_first_frag, expected.is_first_frag, \
got.is_encap, expected.is_encap, \
+ got.ip_proto, expected.ip_proto, \
got.n_proto, expected.n_proto, \
got.sport, expected.sport, \
got.dport, expected.dport)
-static struct bpf_flow_keys pkt_v4_flow_keys = {
- .nhoff = 0,
- .thoff = sizeof(struct iphdr),
- .addr_proto = ETH_P_IP,
- .ip_proto = IPPROTO_TCP,
- .n_proto = __bpf_constant_htons(ETH_P_IP),
-};
-
-static struct bpf_flow_keys pkt_v6_flow_keys = {
- .nhoff = 0,
- .thoff = sizeof(struct ipv6hdr),
- .addr_proto = ETH_P_IPV6,
- .ip_proto = IPPROTO_TCP,
- .n_proto = __bpf_constant_htons(ETH_P_IPV6),
-};
-
-#define VLAN_HLEN 4
+struct ipv4_pkt {
+ struct ethhdr eth;
+ struct iphdr iph;
+ struct tcphdr tcp;
+} __packed;
-static struct {
+struct svlan_ipv4_pkt {
struct ethhdr eth;
__u16 vlan_tci;
__u16 vlan_proto;
struct iphdr iph;
struct tcphdr tcp;
-} __packed pkt_vlan_v4 = {
- .eth.h_proto = __bpf_constant_htons(ETH_P_8021Q),
- .vlan_proto = __bpf_constant_htons(ETH_P_IP),
- .iph.ihl = 5,
- .iph.protocol = IPPROTO_TCP,
- .iph.tot_len = __bpf_constant_htons(MAGIC_BYTES),
- .tcp.urg_ptr = 123,
- .tcp.doff = 5,
-};
+} __packed;
-static struct bpf_flow_keys pkt_vlan_v4_flow_keys = {
- .nhoff = VLAN_HLEN,
- .thoff = VLAN_HLEN + sizeof(struct iphdr),
- .addr_proto = ETH_P_IP,
- .ip_proto = IPPROTO_TCP,
- .n_proto = __bpf_constant_htons(ETH_P_IP),
-};
+struct ipv6_pkt {
+ struct ethhdr eth;
+ struct ipv6hdr iph;
+ struct tcphdr tcp;
+} __packed;
-static struct {
+struct dvlan_ipv6_pkt {
struct ethhdr eth;
__u16 vlan_tci;
__u16 vlan_proto;
__u16 vlan_proto2;
struct ipv6hdr iph;
struct tcphdr tcp;
-} __packed pkt_vlan_v6 = {
- .eth.h_proto = __bpf_constant_htons(ETH_P_8021AD),
- .vlan_proto = __bpf_constant_htons(ETH_P_8021Q),
- .vlan_proto2 = __bpf_constant_htons(ETH_P_IPV6),
- .iph.nexthdr = IPPROTO_TCP,
- .iph.payload_len = __bpf_constant_htons(MAGIC_BYTES),
- .tcp.urg_ptr = 123,
- .tcp.doff = 5,
+} __packed;
+
+struct test {
+ const char *name;
+ union {
+ struct ipv4_pkt ipv4;
+ struct svlan_ipv4_pkt svlan_ipv4;
+ struct ipv6_pkt ipv6;
+ struct dvlan_ipv6_pkt dvlan_ipv6;
+ } pkt;
+ struct bpf_flow_keys keys;
};
-static struct bpf_flow_keys pkt_vlan_v6_flow_keys = {
- .nhoff = VLAN_HLEN * 2,
- .thoff = VLAN_HLEN * 2 + sizeof(struct ipv6hdr),
- .addr_proto = ETH_P_IPV6,
- .ip_proto = IPPROTO_TCP,
- .n_proto = __bpf_constant_htons(ETH_P_IPV6),
+#define VLAN_HLEN 4
+
+struct test tests[] = {
+ {
+ .name = "ipv4",
+ .pkt.ipv4 = {
+ .eth.h_proto = __bpf_constant_htons(ETH_P_IP),
+ .iph.ihl = 5,
+ .iph.protocol = IPPROTO_TCP,
+ .iph.tot_len = __bpf_constant_htons(MAGIC_BYTES),
+ .tcp.doff = 5,
+ },
+ .keys = {
+ .nhoff = 0,
+ .thoff = sizeof(struct iphdr),
+ .addr_proto = ETH_P_IP,
+ .ip_proto = IPPROTO_TCP,
+ .n_proto = __bpf_constant_htons(ETH_P_IP),
+ },
+ },
+ {
+ .name = "ipv6",
+ .pkt.ipv6 = {
+ .eth.h_proto = __bpf_constant_htons(ETH_P_IPV6),
+ .iph.nexthdr = IPPROTO_TCP,
+ .iph.payload_len = __bpf_constant_htons(MAGIC_BYTES),
+ .tcp.doff = 5,
+ },
+ .keys = {
+ .nhoff = 0,
+ .thoff = sizeof(struct ipv6hdr),
+ .addr_proto = ETH_P_IPV6,
+ .ip_proto = IPPROTO_TCP,
+ .n_proto = __bpf_constant_htons(ETH_P_IPV6),
+ },
+ },
+ {
+ .name = "802.1q-ipv4",
+ .pkt.svlan_ipv4 = {
+ .eth.h_proto = __bpf_constant_htons(ETH_P_8021Q),
+ .vlan_proto = __bpf_constant_htons(ETH_P_IP),
+ .iph.ihl = 5,
+ .iph.protocol = IPPROTO_TCP,
+ .iph.tot_len = __bpf_constant_htons(MAGIC_BYTES),
+ .tcp.doff = 5,
+ },
+ .keys = {
+ .nhoff = VLAN_HLEN,
+ .thoff = VLAN_HLEN + sizeof(struct iphdr),
+ .addr_proto = ETH_P_IP,
+ .ip_proto = IPPROTO_TCP,
+ .n_proto = __bpf_constant_htons(ETH_P_IP),
+ },
+ },
+ {
+ .name = "802.1ad-ipv6",
+ .pkt.dvlan_ipv6 = {
+ .eth.h_proto = __bpf_constant_htons(ETH_P_8021AD),
+ .vlan_proto = __bpf_constant_htons(ETH_P_8021Q),
+ .vlan_proto2 = __bpf_constant_htons(ETH_P_IPV6),
+ .iph.nexthdr = IPPROTO_TCP,
+ .iph.payload_len = __bpf_constant_htons(MAGIC_BYTES),
+ .tcp.doff = 5,
+ },
+ .keys = {
+ .nhoff = VLAN_HLEN * 2,
+ .thoff = VLAN_HLEN * 2 + sizeof(struct ipv6hdr),
+ .addr_proto = ETH_P_IPV6,
+ .ip_proto = IPPROTO_TCP,
+ .n_proto = __bpf_constant_htons(ETH_P_IPV6),
+ },
+ },
};
void test_flow_dissector(void)
{
- struct bpf_flow_keys flow_keys;
struct bpf_object *obj;
- __u32 duration, retval;
- int err, prog_fd;
- __u32 size;
+ int i, err, prog_fd;
err = bpf_flow_load(&obj, "./bpf_flow.o", "flow_dissector",
"jmp_table", &prog_fd);
return;
}
- err = bpf_prog_test_run(prog_fd, 10, &pkt_v4, sizeof(pkt_v4),
- &flow_keys, &size, &retval, &duration);
- CHECK(size != sizeof(flow_keys) || err || retval != 1, "ipv4",
- "err %d errno %d retval %d duration %d size %u/%lu\n",
- err, errno, retval, duration, size, sizeof(flow_keys));
- CHECK_FLOW_KEYS("ipv4_flow_keys", flow_keys, pkt_v4_flow_keys);
-
- err = bpf_prog_test_run(prog_fd, 10, &pkt_v6, sizeof(pkt_v6),
- &flow_keys, &size, &retval, &duration);
- CHECK(size != sizeof(flow_keys) || err || retval != 1, "ipv6",
- "err %d errno %d retval %d duration %d size %u/%lu\n",
- err, errno, retval, duration, size, sizeof(flow_keys));
- CHECK_FLOW_KEYS("ipv6_flow_keys", flow_keys, pkt_v6_flow_keys);
+ for (i = 0; i < ARRAY_SIZE(tests); i++) {
+ struct bpf_flow_keys flow_keys;
+ struct bpf_prog_test_run_attr tattr = {
+ .prog_fd = prog_fd,
+ .data_in = &tests[i].pkt,
+ .data_size_in = sizeof(tests[i].pkt),
+ .data_out = &flow_keys,
+ };
- err = bpf_prog_test_run(prog_fd, 10, &pkt_vlan_v4, sizeof(pkt_vlan_v4),
- &flow_keys, &size, &retval, &duration);
- CHECK(size != sizeof(flow_keys) || err || retval != 1, "vlan_ipv4",
- "err %d errno %d retval %d duration %d size %u/%lu\n",
- err, errno, retval, duration, size, sizeof(flow_keys));
- CHECK_FLOW_KEYS("vlan_ipv4_flow_keys", flow_keys,
- pkt_vlan_v4_flow_keys);
-
- err = bpf_prog_test_run(prog_fd, 10, &pkt_vlan_v6, sizeof(pkt_vlan_v6),
- &flow_keys, &size, &retval, &duration);
- CHECK(size != sizeof(flow_keys) || err || retval != 1, "vlan_ipv6",
- "err %d errno %d retval %d duration %d size %u/%lu\n",
- err, errno, retval, duration, size, sizeof(flow_keys));
- CHECK_FLOW_KEYS("vlan_ipv6_flow_keys", flow_keys,
- pkt_vlan_v6_flow_keys);
+ err = bpf_prog_test_run_xattr(&tattr);
+ CHECK_ATTR(tattr.data_size_out != sizeof(flow_keys) ||
+ err || tattr.retval != 1,
+ tests[i].name,
+ "err %d errno %d retval %d duration %d size %u/%lu\n",
+ err, errno, tattr.retval, tattr.duration,
+ tattr.data_size_out, sizeof(flow_keys));
+ CHECK_FLOW_KEYS(tests[i].name, flow_keys, tests[i].keys);
+ }
bpf_object__close(obj);
}
} else {
for (i = 0; i < num_stack; i++) {
ks = ksym_search(raw_data[i]);
- if (strcmp(ks->name, nonjit_func) == 0) {
+ if (ks && (strcmp(ks->name, nonjit_func) == 0)) {
found = true;
break;
}
} else {
for (i = 0; i < num_stack; i++) {
ks = ksym_search(e->kern_stack[i]);
- if (strcmp(ks->name, nonjit_func) == 0) {
+ if (ks && (strcmp(ks->name, nonjit_func) == 0)) {
good_kern_stack = true;
break;
}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+
+static void test_global_data_number(struct bpf_object *obj, __u32 duration)
+{
+ int i, err, map_fd;
+ uint64_t num;
+
+ map_fd = bpf_find_map(__func__, obj, "result_number");
+ if (map_fd < 0) {
+ error_cnt++;
+ return;
+ }
+
+ struct {
+ char *name;
+ uint32_t key;
+ uint64_t num;
+ } tests[] = {
+ { "relocate .bss reference", 0, 0 },
+ { "relocate .data reference", 1, 42 },
+ { "relocate .rodata reference", 2, 24 },
+ { "relocate .bss reference", 3, 0 },
+ { "relocate .data reference", 4, 0xffeeff },
+ { "relocate .rodata reference", 5, 0xabab },
+ { "relocate .bss reference", 6, 1234 },
+ { "relocate .bss reference", 7, 0 },
+ { "relocate .rodata reference", 8, 0xab },
+ { "relocate .rodata reference", 9, 0x1111111111111111 },
+ { "relocate .rodata reference", 10, ~0 },
+ };
+
+ for (i = 0; i < sizeof(tests) / sizeof(tests[0]); i++) {
+ err = bpf_map_lookup_elem(map_fd, &tests[i].key, &num);
+ CHECK(err || num != tests[i].num, tests[i].name,
+ "err %d result %lx expected %lx\n",
+ err, num, tests[i].num);
+ }
+}
+
+static void test_global_data_string(struct bpf_object *obj, __u32 duration)
+{
+ int i, err, map_fd;
+ char str[32];
+
+ map_fd = bpf_find_map(__func__, obj, "result_string");
+ if (map_fd < 0) {
+ error_cnt++;
+ return;
+ }
+
+ struct {
+ char *name;
+ uint32_t key;
+ char str[32];
+ } tests[] = {
+ { "relocate .rodata reference", 0, "abcdefghijklmnopqrstuvwxyz" },
+ { "relocate .data reference", 1, "abcdefghijklmnopqrstuvwxyz" },
+ { "relocate .bss reference", 2, "" },
+ { "relocate .data reference", 3, "abcdexghijklmnopqrstuvwxyz" },
+ { "relocate .bss reference", 4, "\0\0hello" },
+ };
+
+ for (i = 0; i < sizeof(tests) / sizeof(tests[0]); i++) {
+ err = bpf_map_lookup_elem(map_fd, &tests[i].key, str);
+ CHECK(err || memcmp(str, tests[i].str, sizeof(str)),
+ tests[i].name, "err %d result \'%s\' expected \'%s\'\n",
+ err, str, tests[i].str);
+ }
+}
+
+struct foo {
+ __u8 a;
+ __u32 b;
+ __u64 c;
+};
+
+static void test_global_data_struct(struct bpf_object *obj, __u32 duration)
+{
+ int i, err, map_fd;
+ struct foo val;
+
+ map_fd = bpf_find_map(__func__, obj, "result_struct");
+ if (map_fd < 0) {
+ error_cnt++;
+ return;
+ }
+
+ struct {
+ char *name;
+ uint32_t key;
+ struct foo val;
+ } tests[] = {
+ { "relocate .rodata reference", 0, { 42, 0xfefeefef, 0x1111111111111111ULL, } },
+ { "relocate .bss reference", 1, { } },
+ { "relocate .rodata reference", 2, { } },
+ { "relocate .data reference", 3, { 41, 0xeeeeefef, 0x2111111111111111ULL, } },
+ };
+
+ for (i = 0; i < sizeof(tests) / sizeof(tests[0]); i++) {
+ err = bpf_map_lookup_elem(map_fd, &tests[i].key, &val);
+ CHECK(err || memcmp(&val, &tests[i].val, sizeof(val)),
+ tests[i].name, "err %d result { %u, %u, %llu } expected { %u, %u, %llu }\n",
+ err, val.a, val.b, val.c, tests[i].val.a, tests[i].val.b, tests[i].val.c);
+ }
+}
+
+static void test_global_data_rdonly(struct bpf_object *obj, __u32 duration)
+{
+ int err = -ENOMEM, map_fd, zero = 0;
+ struct bpf_map *map;
+ __u8 *buff;
+
+ map = bpf_object__find_map_by_name(obj, "test_glo.rodata");
+ if (!map || !bpf_map__is_internal(map)) {
+ error_cnt++;
+ return;
+ }
+
+ map_fd = bpf_map__fd(map);
+ if (map_fd < 0) {
+ error_cnt++;
+ return;
+ }
+
+ buff = malloc(bpf_map__def(map)->value_size);
+ if (buff)
+ err = bpf_map_update_elem(map_fd, &zero, buff, 0);
+ free(buff);
+ CHECK(!err || errno != EPERM, "test .rodata read-only map",
+ "err %d errno %d\n", err, errno);
+}
+
+void test_global_data(void)
+{
+ const char *file = "./test_global_data.o";
+ __u32 duration = 0, retval;
+ struct bpf_object *obj;
+ int err, prog_fd;
+
+ err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd);
+ if (CHECK(err, "load program", "error %d loading %s\n", err, file))
+ return;
+
+ err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
+ NULL, NULL, &retval, &duration);
+ CHECK(err || retval, "pass global data run",
+ "err %d errno %d retval %d duration %d\n",
+ err, errno, retval, duration);
+
+ test_global_data_number(obj, duration);
+ test_global_data_string(obj, duration);
+ test_global_data_struct(obj, duration);
+ test_global_data_rdonly(obj, duration);
+
+ bpf_object__close(obj);
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+
+void test_skb_ctx(void)
+{
+ struct __sk_buff skb = {
+ .cb[0] = 1,
+ .cb[1] = 2,
+ .cb[2] = 3,
+ .cb[3] = 4,
+ .cb[4] = 5,
+ .priority = 6,
+ };
+ struct bpf_prog_test_run_attr tattr = {
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .ctx_in = &skb,
+ .ctx_size_in = sizeof(skb),
+ .ctx_out = &skb,
+ .ctx_size_out = sizeof(skb),
+ };
+ struct bpf_object *obj;
+ int err;
+ int i;
+
+ err = bpf_prog_load("./test_skb_ctx.o", BPF_PROG_TYPE_SCHED_CLS, &obj,
+ &tattr.prog_fd);
+ if (CHECK_ATTR(err, "load", "err %d errno %d\n", err, errno))
+ return;
+
+ /* ctx_in != NULL, ctx_size_in == 0 */
+
+ tattr.ctx_size_in = 0;
+ err = bpf_prog_test_run_xattr(&tattr);
+ CHECK_ATTR(err == 0, "ctx_size_in", "err %d errno %d\n", err, errno);
+ tattr.ctx_size_in = sizeof(skb);
+
+ /* ctx_out != NULL, ctx_size_out == 0 */
+
+ tattr.ctx_size_out = 0;
+ err = bpf_prog_test_run_xattr(&tattr);
+ CHECK_ATTR(err == 0, "ctx_size_out", "err %d errno %d\n", err, errno);
+ tattr.ctx_size_out = sizeof(skb);
+
+ /* non-zero [len, tc_index] fields should be rejected*/
+
+ skb.len = 1;
+ err = bpf_prog_test_run_xattr(&tattr);
+ CHECK_ATTR(err == 0, "len", "err %d errno %d\n", err, errno);
+ skb.len = 0;
+
+ skb.tc_index = 1;
+ err = bpf_prog_test_run_xattr(&tattr);
+ CHECK_ATTR(err == 0, "tc_index", "err %d errno %d\n", err, errno);
+ skb.tc_index = 0;
+
+ /* non-zero [hash, sk] fields should be rejected */
+
+ skb.hash = 1;
+ err = bpf_prog_test_run_xattr(&tattr);
+ CHECK_ATTR(err == 0, "hash", "err %d errno %d\n", err, errno);
+ skb.hash = 0;
+
+ skb.sk = (struct bpf_sock *)1;
+ err = bpf_prog_test_run_xattr(&tattr);
+ CHECK_ATTR(err == 0, "sk", "err %d errno %d\n", err, errno);
+ skb.sk = 0;
+
+ err = bpf_prog_test_run_xattr(&tattr);
+ CHECK_ATTR(err != 0 || tattr.retval,
+ "run",
+ "err %d errno %d retval %d\n",
+ err, errno, tattr.retval);
+
+ CHECK_ATTR(tattr.ctx_size_out != sizeof(skb),
+ "ctx_size_out",
+ "incorrect output size, want %lu have %u\n",
+ sizeof(skb), tattr.ctx_size_out);
+
+ for (i = 0; i < 5; i++)
+ CHECK_ATTR(skb.cb[i] != i + 2,
+ "ctx_out_cb",
+ "skb->cb[i] == %d, expected %d\n",
+ skb.cb[i], i + 2);
+ CHECK_ATTR(skb.priority != 7,
+ "ctx_out_priority",
+ "skb->priority == %d, expected %d\n",
+ skb.priority, 7);
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Isovalent, Inc.
+
+#include <linux/bpf.h>
+#include <linux/pkt_cls.h>
+#include <string.h>
+
+#include "bpf_helpers.h"
+
+struct bpf_map_def SEC("maps") result_number = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(__u64),
+ .max_entries = 11,
+};
+
+struct bpf_map_def SEC("maps") result_string = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = 32,
+ .max_entries = 5,
+};
+
+struct foo {
+ __u8 a;
+ __u32 b;
+ __u64 c;
+};
+
+struct bpf_map_def SEC("maps") result_struct = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(struct foo),
+ .max_entries = 5,
+};
+
+/* Relocation tests for __u64s. */
+static __u64 num0;
+static __u64 num1 = 42;
+static const __u64 num2 = 24;
+static __u64 num3 = 0;
+static __u64 num4 = 0xffeeff;
+static const __u64 num5 = 0xabab;
+static const __u64 num6 = 0xab;
+
+/* Relocation tests for strings. */
+static const char str0[32] = "abcdefghijklmnopqrstuvwxyz";
+static char str1[32] = "abcdefghijklmnopqrstuvwxyz";
+static char str2[32];
+
+/* Relocation tests for structs. */
+static const struct foo struct0 = {
+ .a = 42,
+ .b = 0xfefeefef,
+ .c = 0x1111111111111111ULL,
+};
+static struct foo struct1;
+static const struct foo struct2;
+static struct foo struct3 = {
+ .a = 41,
+ .b = 0xeeeeefef,
+ .c = 0x2111111111111111ULL,
+};
+
+#define test_reloc(map, num, var) \
+ do { \
+ __u32 key = num; \
+ bpf_map_update_elem(&result_##map, &key, var, 0); \
+ } while (0)
+
+SEC("static_data_load")
+int load_static_data(struct __sk_buff *skb)
+{
+ static const __u64 bar = ~0;
+
+ test_reloc(number, 0, &num0);
+ test_reloc(number, 1, &num1);
+ test_reloc(number, 2, &num2);
+ test_reloc(number, 3, &num3);
+ test_reloc(number, 4, &num4);
+ test_reloc(number, 5, &num5);
+ num4 = 1234;
+ test_reloc(number, 6, &num4);
+ test_reloc(number, 7, &num0);
+ test_reloc(number, 8, &num6);
+
+ test_reloc(string, 0, str0);
+ test_reloc(string, 1, str1);
+ test_reloc(string, 2, str2);
+ str1[5] = 'x';
+ test_reloc(string, 3, str1);
+ __builtin_memcpy(&str2[2], "hello", sizeof("hello"));
+ test_reloc(string, 4, str2);
+
+ test_reloc(struct, 0, &struct0);
+ test_reloc(struct, 1, &struct1);
+ test_reloc(struct, 2, &struct2);
+ test_reloc(struct, 3, &struct3);
+
+ test_reloc(number, 9, &struct0.c);
+ test_reloc(number, 10, &bar);
+
+ return TC_ACT_OK;
+}
+
+char _license[] SEC("license") = "GPL";
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+
+typedef unsigned int u32;
+
+static __attribute__((always_inline)) u32 rol32(u32 word, unsigned int shift)
+{
+ return (word << shift) | (word >> ((-shift) & 31));
+}
+
+#define __jhash_mix(a, b, c) \
+{ \
+ a -= c; a ^= rol32(c, 4); c += b; \
+ b -= a; b ^= rol32(a, 6); a += c; \
+ c -= b; c ^= rol32(b, 8); b += a; \
+ a -= c; a ^= rol32(c, 16); c += b; \
+ b -= a; b ^= rol32(a, 19); a += c; \
+ c -= b; c ^= rol32(b, 4); b += a; \
+}
+
+#define __jhash_final(a, b, c) \
+{ \
+ c ^= b; c -= rol32(b, 14); \
+ a ^= c; a -= rol32(c, 11); \
+ b ^= a; b -= rol32(a, 25); \
+ c ^= b; c -= rol32(b, 16); \
+ a ^= c; a -= rol32(c, 4); \
+ b ^= a; b -= rol32(a, 14); \
+ c ^= b; c -= rol32(b, 24); \
+}
+
+#define JHASH_INITVAL 0xdeadbeef
+
+static ATTR
+u32 jhash(const void *key, u32 length, u32 initval)
+{
+ u32 a, b, c;
+ const unsigned char *k = key;
+
+ a = b = c = JHASH_INITVAL + length + initval;
+
+ while (length > 12) {
+ a += *(volatile u32 *)(k);
+ b += *(volatile u32 *)(k + 4);
+ c += *(volatile u32 *)(k + 8);
+ __jhash_mix(a, b, c);
+ length -= 12;
+ k += 12;
+ }
+ switch (length) {
+ case 12: c += (u32)k[11]<<24;
+ case 11: c += (u32)k[10]<<16;
+ case 10: c += (u32)k[9]<<8;
+ case 9: c += k[8];
+ case 8: b += (u32)k[7]<<24;
+ case 7: b += (u32)k[6]<<16;
+ case 6: b += (u32)k[5]<<8;
+ case 5: b += k[4];
+ case 4: a += (u32)k[3]<<24;
+ case 3: a += (u32)k[2]<<16;
+ case 2: a += (u32)k[1]<<8;
+ case 1: a += k[0];
+ c ^= a;
+ __jhash_final(a, b, c);
+ case 0: /* Nothing left to add */
+ break;
+ }
+
+ return c;
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+int _version SEC("version") = 1;
+char _license[] SEC("license") = "GPL";
+
+SEC("skb_ctx")
+int process(struct __sk_buff *skb)
+{
+ #pragma clang loop unroll(full)
+ for (int i = 0; i < 5; i++) {
+ if (skb->cb[i] != i + 1)
+ return 1;
+ skb->cb[i]++;
+ }
+ skb->priority++;
+
+ return 0;
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+
+#include <stdint.h>
+#include <string.h>
+
+#include <linux/stddef.h>
+#include <linux/bpf.h>
+
+#include "bpf_helpers.h"
+#include "bpf_util.h"
+
+/* Max supported length of a string with unsigned long in base 10 (pow2 - 1). */
+#define MAX_ULONG_STR_LEN 0xF
+
+/* Max supported length of sysctl value string (pow2). */
+#define MAX_VALUE_STR_LEN 0x40
+
+static __always_inline int is_tcp_mem(struct bpf_sysctl *ctx)
+{
+ char tcp_mem_name[] = "net/ipv4/tcp_mem";
+ unsigned char i;
+ char name[64];
+ int ret;
+
+ memset(name, 0, sizeof(name));
+ ret = bpf_sysctl_get_name(ctx, name, sizeof(name), 0);
+ if (ret < 0 || ret != sizeof(tcp_mem_name) - 1)
+ return 0;
+
+#pragma clang loop unroll(full)
+ for (i = 0; i < sizeof(tcp_mem_name); ++i)
+ if (name[i] != tcp_mem_name[i])
+ return 0;
+
+ return 1;
+}
+
+SEC("cgroup/sysctl")
+int sysctl_tcp_mem(struct bpf_sysctl *ctx)
+{
+ unsigned long tcp_mem[3] = {0, 0, 0};
+ char value[MAX_VALUE_STR_LEN];
+ unsigned char i, off = 0;
+ int ret;
+
+ if (ctx->write)
+ return 0;
+
+ if (!is_tcp_mem(ctx))
+ return 0;
+
+ ret = bpf_sysctl_get_current_value(ctx, value, MAX_VALUE_STR_LEN);
+ if (ret < 0 || ret >= MAX_VALUE_STR_LEN)
+ return 0;
+
+#pragma clang loop unroll(full)
+ for (i = 0; i < ARRAY_SIZE(tcp_mem); ++i) {
+ ret = bpf_strtoul(value + off, MAX_ULONG_STR_LEN, 0,
+ tcp_mem + i);
+ if (ret <= 0 || ret > MAX_ULONG_STR_LEN)
+ return 0;
+ off += ret & MAX_ULONG_STR_LEN;
+ }
+
+
+ return tcp_mem[0] < tcp_mem[1] && tcp_mem[1] < tcp_mem[2];
+}
+
+char _license[] SEC("license") = "GPL";
#include <linux/in.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
+#include <linux/mpls.h>
#include <linux/tcp.h>
+#include <linux/udp.h>
#include <linux/pkt_cls.h>
#include <linux/types.h>
static const int cfg_port = 8000;
-struct grev4hdr {
- struct iphdr ip;
+static const int cfg_udp_src = 20000;
+
+#define UDP_PORT 5555
+#define MPLS_OVER_UDP_PORT 6635
+#define ETH_OVER_UDP_PORT 7777
+
+/* MPLS label 1000 with S bit (last label) set and ttl of 255. */
+static const __u32 mpls_label = __bpf_constant_htonl(1000 << 12 |
+ MPLS_LS_S_MASK | 0xff);
+
+struct gre_hdr {
__be16 flags;
__be16 protocol;
} __attribute__((packed));
-struct grev6hdr {
+union l4hdr {
+ struct udphdr udp;
+ struct gre_hdr gre;
+};
+
+struct v4hdr {
+ struct iphdr ip;
+ union l4hdr l4hdr;
+ __u8 pad[16]; /* enough space for L2 header */
+} __attribute__((packed));
+
+struct v6hdr {
struct ipv6hdr ip;
- __be16 flags;
- __be16 protocol;
+ union l4hdr l4hdr;
+ __u8 pad[16]; /* enough space for L2 header */
} __attribute__((packed));
static __always_inline void set_ipv4_csum(struct iphdr *iph)
iph->check = ~((csum & 0xffff) + (csum >> 16));
}
-static __always_inline int encap_ipv4(struct __sk_buff *skb, bool with_gre)
+static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
+ __u16 l2_proto)
{
- struct grev4hdr h_outer;
+ __u16 udp_dst = UDP_PORT;
struct iphdr iph_inner;
+ struct v4hdr h_outer;
struct tcphdr tcph;
+ int olen, l2_len;
__u64 flags;
- int olen;
if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner,
sizeof(iph_inner)) < 0)
if (tcph.dest != __bpf_constant_htons(cfg_port))
return TC_ACT_OK;
+ olen = sizeof(h_outer.ip);
+ l2_len = 0;
+
flags = BPF_F_ADJ_ROOM_FIXED_GSO | BPF_F_ADJ_ROOM_ENCAP_L3_IPV4;
- if (with_gre) {
+
+ switch (l2_proto) {
+ case ETH_P_MPLS_UC:
+ l2_len = sizeof(mpls_label);
+ udp_dst = MPLS_OVER_UDP_PORT;
+ break;
+ case ETH_P_TEB:
+ l2_len = ETH_HLEN;
+ udp_dst = ETH_OVER_UDP_PORT;
+ break;
+ }
+ flags |= BPF_F_ADJ_ROOM_ENCAP_L2(l2_len);
+
+ switch (encap_proto) {
+ case IPPROTO_GRE:
flags |= BPF_F_ADJ_ROOM_ENCAP_L4_GRE;
- olen = sizeof(h_outer);
- } else {
- olen = sizeof(h_outer.ip);
+ olen += sizeof(h_outer.l4hdr.gre);
+ h_outer.l4hdr.gre.protocol = bpf_htons(l2_proto);
+ h_outer.l4hdr.gre.flags = 0;
+ break;
+ case IPPROTO_UDP:
+ flags |= BPF_F_ADJ_ROOM_ENCAP_L4_UDP;
+ olen += sizeof(h_outer.l4hdr.udp);
+ h_outer.l4hdr.udp.source = __bpf_constant_htons(cfg_udp_src);
+ h_outer.l4hdr.udp.dest = bpf_htons(udp_dst);
+ h_outer.l4hdr.udp.check = 0;
+ h_outer.l4hdr.udp.len = bpf_htons(bpf_ntohs(iph_inner.tot_len) +
+ sizeof(h_outer.l4hdr.udp) +
+ l2_len);
+ break;
+ case IPPROTO_IPIP:
+ break;
+ default:
+ return TC_ACT_OK;
+ }
+
+ /* add L2 encap (if specified) */
+ switch (l2_proto) {
+ case ETH_P_MPLS_UC:
+ *((__u32 *)((__u8 *)&h_outer + olen)) = mpls_label;
+ break;
+ case ETH_P_TEB:
+ if (bpf_skb_load_bytes(skb, 0, (__u8 *)&h_outer + olen,
+ ETH_HLEN))
+ return TC_ACT_SHOT;
+ break;
}
+ olen += l2_len;
/* add room between mac and network header */
if (bpf_skb_adjust_room(skb, olen, BPF_ADJ_ROOM_MAC, flags))
/* prepare new outer network header */
h_outer.ip = iph_inner;
h_outer.ip.tot_len = bpf_htons(olen +
- bpf_htons(h_outer.ip.tot_len));
- if (with_gre) {
- h_outer.ip.protocol = IPPROTO_GRE;
- h_outer.protocol = bpf_htons(ETH_P_IP);
- h_outer.flags = 0;
- } else {
- h_outer.ip.protocol = IPPROTO_IPIP;
- }
+ bpf_ntohs(h_outer.ip.tot_len));
+ h_outer.ip.protocol = encap_proto;
set_ipv4_csum((void *)&h_outer.ip);
return TC_ACT_OK;
}
-static __always_inline int encap_ipv6(struct __sk_buff *skb, bool with_gre)
+static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
+ __u16 l2_proto)
{
+ __u16 udp_dst = UDP_PORT;
struct ipv6hdr iph_inner;
- struct grev6hdr h_outer;
+ struct v6hdr h_outer;
struct tcphdr tcph;
+ int olen, l2_len;
+ __u16 tot_len;
__u64 flags;
- int olen;
if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner,
sizeof(iph_inner)) < 0)
if (tcph.dest != __bpf_constant_htons(cfg_port))
return TC_ACT_OK;
+ olen = sizeof(h_outer.ip);
+ l2_len = 0;
+
flags = BPF_F_ADJ_ROOM_FIXED_GSO | BPF_F_ADJ_ROOM_ENCAP_L3_IPV6;
- if (with_gre) {
+
+ switch (l2_proto) {
+ case ETH_P_MPLS_UC:
+ l2_len = sizeof(mpls_label);
+ udp_dst = MPLS_OVER_UDP_PORT;
+ break;
+ case ETH_P_TEB:
+ l2_len = ETH_HLEN;
+ udp_dst = ETH_OVER_UDP_PORT;
+ break;
+ }
+ flags |= BPF_F_ADJ_ROOM_ENCAP_L2(l2_len);
+
+ switch (encap_proto) {
+ case IPPROTO_GRE:
flags |= BPF_F_ADJ_ROOM_ENCAP_L4_GRE;
- olen = sizeof(h_outer);
- } else {
- olen = sizeof(h_outer.ip);
+ olen += sizeof(h_outer.l4hdr.gre);
+ h_outer.l4hdr.gre.protocol = bpf_htons(l2_proto);
+ h_outer.l4hdr.gre.flags = 0;
+ break;
+ case IPPROTO_UDP:
+ flags |= BPF_F_ADJ_ROOM_ENCAP_L4_UDP;
+ olen += sizeof(h_outer.l4hdr.udp);
+ h_outer.l4hdr.udp.source = __bpf_constant_htons(cfg_udp_src);
+ h_outer.l4hdr.udp.dest = bpf_htons(udp_dst);
+ tot_len = bpf_ntohs(iph_inner.payload_len) + sizeof(iph_inner) +
+ sizeof(h_outer.l4hdr.udp);
+ h_outer.l4hdr.udp.check = 0;
+ h_outer.l4hdr.udp.len = bpf_htons(tot_len);
+ break;
+ case IPPROTO_IPV6:
+ break;
+ default:
+ return TC_ACT_OK;
}
+ /* add L2 encap (if specified) */
+ switch (l2_proto) {
+ case ETH_P_MPLS_UC:
+ *((__u32 *)((__u8 *)&h_outer + olen)) = mpls_label;
+ break;
+ case ETH_P_TEB:
+ if (bpf_skb_load_bytes(skb, 0, (__u8 *)&h_outer + olen,
+ ETH_HLEN))
+ return TC_ACT_SHOT;
+ break;
+ }
+ olen += l2_len;
/* add room between mac and network header */
if (bpf_skb_adjust_room(skb, olen, BPF_ADJ_ROOM_MAC, flags))
h_outer.ip = iph_inner;
h_outer.ip.payload_len = bpf_htons(olen +
bpf_ntohs(h_outer.ip.payload_len));
- if (with_gre) {
- h_outer.ip.nexthdr = IPPROTO_GRE;
- h_outer.protocol = bpf_htons(ETH_P_IPV6);
- h_outer.flags = 0;
- } else {
- h_outer.ip.nexthdr = IPPROTO_IPV6;
- }
+
+ h_outer.ip.nexthdr = encap_proto;
/* store new outer network header */
if (bpf_skb_store_bytes(skb, ETH_HLEN, &h_outer, olen,
return TC_ACT_OK;
}
-SEC("encap_ipip")
-int __encap_ipip(struct __sk_buff *skb)
+SEC("encap_ipip_none")
+int __encap_ipip_none(struct __sk_buff *skb)
{
if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
- return encap_ipv4(skb, false);
+ return encap_ipv4(skb, IPPROTO_IPIP, ETH_P_IP);
else
return TC_ACT_OK;
}
-SEC("encap_gre")
-int __encap_gre(struct __sk_buff *skb)
+SEC("encap_gre_none")
+int __encap_gre_none(struct __sk_buff *skb)
{
if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
- return encap_ipv4(skb, true);
+ return encap_ipv4(skb, IPPROTO_GRE, ETH_P_IP);
else
return TC_ACT_OK;
}
-SEC("encap_ip6tnl")
-int __encap_ip6tnl(struct __sk_buff *skb)
+SEC("encap_gre_mpls")
+int __encap_gre_mpls(struct __sk_buff *skb)
+{
+ if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
+ return encap_ipv4(skb, IPPROTO_GRE, ETH_P_MPLS_UC);
+ else
+ return TC_ACT_OK;
+}
+
+SEC("encap_gre_eth")
+int __encap_gre_eth(struct __sk_buff *skb)
+{
+ if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
+ return encap_ipv4(skb, IPPROTO_GRE, ETH_P_TEB);
+ else
+ return TC_ACT_OK;
+}
+
+SEC("encap_udp_none")
+int __encap_udp_none(struct __sk_buff *skb)
+{
+ if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
+ return encap_ipv4(skb, IPPROTO_UDP, ETH_P_IP);
+ else
+ return TC_ACT_OK;
+}
+
+SEC("encap_udp_mpls")
+int __encap_udp_mpls(struct __sk_buff *skb)
+{
+ if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
+ return encap_ipv4(skb, IPPROTO_UDP, ETH_P_MPLS_UC);
+ else
+ return TC_ACT_OK;
+}
+
+SEC("encap_udp_eth")
+int __encap_udp_eth(struct __sk_buff *skb)
+{
+ if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
+ return encap_ipv4(skb, IPPROTO_UDP, ETH_P_TEB);
+ else
+ return TC_ACT_OK;
+}
+
+SEC("encap_ip6tnl_none")
+int __encap_ip6tnl_none(struct __sk_buff *skb)
{
if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
- return encap_ipv6(skb, false);
+ return encap_ipv6(skb, IPPROTO_IPV6, ETH_P_IPV6);
else
return TC_ACT_OK;
}
-SEC("encap_ip6gre")
-int __encap_ip6gre(struct __sk_buff *skb)
+SEC("encap_ip6gre_none")
+int __encap_ip6gre_none(struct __sk_buff *skb)
{
if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
- return encap_ipv6(skb, true);
+ return encap_ipv6(skb, IPPROTO_GRE, ETH_P_IPV6);
+ else
+ return TC_ACT_OK;
+}
+
+SEC("encap_ip6gre_mpls")
+int __encap_ip6gre_mpls(struct __sk_buff *skb)
+{
+ if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
+ return encap_ipv6(skb, IPPROTO_GRE, ETH_P_MPLS_UC);
+ else
+ return TC_ACT_OK;
+}
+
+SEC("encap_ip6gre_eth")
+int __encap_ip6gre_eth(struct __sk_buff *skb)
+{
+ if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
+ return encap_ipv6(skb, IPPROTO_GRE, ETH_P_TEB);
+ else
+ return TC_ACT_OK;
+}
+
+SEC("encap_ip6udp_none")
+int __encap_ip6udp_none(struct __sk_buff *skb)
+{
+ if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
+ return encap_ipv6(skb, IPPROTO_UDP, ETH_P_IPV6);
+ else
+ return TC_ACT_OK;
+}
+
+SEC("encap_ip6udp_mpls")
+int __encap_ip6udp_mpls(struct __sk_buff *skb)
+{
+ if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
+ return encap_ipv6(skb, IPPROTO_UDP, ETH_P_MPLS_UC);
+ else
+ return TC_ACT_OK;
+}
+
+SEC("encap_ip6udp_eth")
+int __encap_ip6udp_eth(struct __sk_buff *skb)
+{
+ if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
+ return encap_ipv6(skb, IPPROTO_UDP, ETH_P_TEB);
else
return TC_ACT_OK;
}
static int decap_internal(struct __sk_buff *skb, int off, int len, char proto)
{
- char buf[sizeof(struct grev6hdr)];
- int olen;
+ char buf[sizeof(struct v6hdr)];
+ struct gre_hdr greh;
+ struct udphdr udph;
+ int olen = len;
switch (proto) {
case IPPROTO_IPIP:
case IPPROTO_IPV6:
- olen = len;
break;
case IPPROTO_GRE:
- olen = len + 4 /* gre hdr */;
+ olen += sizeof(struct gre_hdr);
+ if (bpf_skb_load_bytes(skb, off + len, &greh, sizeof(greh)) < 0)
+ return TC_ACT_OK;
+ switch (bpf_ntohs(greh.protocol)) {
+ case ETH_P_MPLS_UC:
+ olen += sizeof(mpls_label);
+ break;
+ case ETH_P_TEB:
+ olen += ETH_HLEN;
+ break;
+ }
+ break;
+ case IPPROTO_UDP:
+ olen += sizeof(struct udphdr);
+ if (bpf_skb_load_bytes(skb, off + len, &udph, sizeof(udph)) < 0)
+ return TC_ACT_OK;
+ switch (bpf_ntohs(udph.dest)) {
+ case MPLS_OVER_UDP_PORT:
+ olen += sizeof(mpls_label);
+ break;
+ case ETH_OVER_UDP_PORT:
+ olen += ETH_HLEN;
+ break;
+ }
break;
default:
return TC_ACT_OK;
// SPDX-License-Identifier: GPL-2.0
#include <stddef.h>
#include <string.h>
+#include <netinet/in.h>
#include <linux/bpf.h>
#include <linux/if_ether.h>
#include <linux/if_packet.h>
#include <linux/types.h>
#include <linux/socket.h>
#include <linux/tcp.h>
-#include <netinet/in.h>
#include "bpf_helpers.h"
#include "bpf_endian.h"
#include "test_tcpbpf.h"
// SPDX-License-Identifier: GPL-2.0
#include <stddef.h>
#include <string.h>
+#include <netinet/in.h>
#include <linux/bpf.h>
#include <linux/if_ether.h>
#include <linux/if_packet.h>
#include <linux/types.h>
#include <linux/socket.h>
#include <linux/tcp.h>
-#include <netinet/in.h>
#include "bpf_helpers.h"
#include "bpf_endian.h"
#include "test_tcpnotify.h"
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+#define ATTR __attribute__((noinline))
+#include "test_jhash.h"
+
+SEC("scale90_noinline")
+int balancer_ingress(struct __sk_buff *ctx)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ void *ptr;
+ int ret = 0, nh_off, i = 0;
+
+ nh_off = 14;
+
+ /* pragma unroll doesn't work on large loops */
+
+#define C do { \
+ ptr = data + i; \
+ if (ptr + nh_off > data_end) \
+ break; \
+ ctx->tc_index = jhash(ptr, nh_off, ctx->cb[0] + i++); \
+ } while (0);
+#define C30 C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;
+ C30;C30;C30; /* 90 calls */
+ return 0;
+}
+char _license[] SEC("license") = "GPL";
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+#define ATTR __attribute__((always_inline))
+#include "test_jhash.h"
+
+SEC("scale90_inline")
+int balancer_ingress(struct __sk_buff *ctx)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ void *ptr;
+ int ret = 0, nh_off, i = 0;
+
+ nh_off = 14;
+
+ /* pragma unroll doesn't work on large loops */
+
+#define C do { \
+ ptr = data + i; \
+ if (ptr + nh_off > data_end) \
+ break; \
+ ctx->tc_index = jhash(ptr, nh_off, ctx->cb[0] + i++); \
+ } while (0);
+#define C30 C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;
+ C30;C30;C30; /* 90 calls */
+ return 0;
+}
+char _license[] SEC("license") = "GPL";
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+#define ATTR __attribute__((noinline))
+#include "test_jhash.h"
+
+SEC("scale90_noinline32")
+int balancer_ingress(struct __sk_buff *ctx)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ void *ptr;
+ int ret = 0, nh_off, i = 0;
+
+ nh_off = 32;
+
+ /* pragma unroll doesn't work on large loops */
+
+#define C do { \
+ ptr = data + i; \
+ if (ptr + nh_off > data_end) \
+ break; \
+ ctx->tc_index = jhash(ptr, nh_off, ctx->cb[0] + i++); \
+ } while (0);
+#define C30 C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;
+ C30;C30;C30; /* 90 calls */
+ return 0;
+}
+char _license[] SEC("license") = "GPL";
#define BTF_UNION_ENC(name, nr_elems, sz) \
BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_UNION, 0, nr_elems), sz)
+#define BTF_VAR_ENC(name, type, linkage) \
+ BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), type), (linkage)
+#define BTF_VAR_SECINFO_ENC(type, offset, size) \
+ (type), (offset), (size)
+
#define BTF_MEMBER_ENC(name, type, bits_offset) \
(name), (type), (bits_offset)
#define BTF_ENUM_ENC(name, val) (name), (val)
.value_type_id = 3,
.max_entries = 4,
},
-
{
.descr = "struct test #3 Invalid member offset",
.raw_types = {
.btf_load_err = true,
.err_str = "Invalid member bits_offset",
},
-
+/*
+ * struct A {
+ * unsigned long long m;
+ * int n;
+ * char o;
+ * [3 bytes hole]
+ * int p[8];
+ * };
+ */
+{
+ .descr = "global data test #1",
+ .raw_types = {
+ /* int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ /* unsigned long long */
+ BTF_TYPE_INT_ENC(0, 0, 0, 64, 8), /* [2] */
+ /* char */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 8, 1), /* [3] */
+ /* int[8] */
+ BTF_TYPE_ARRAY_ENC(1, 1, 8), /* [4] */
+ /* struct A { */ /* [5] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 4), 48),
+ BTF_MEMBER_ENC(NAME_TBD, 2, 0), /* unsigned long long m;*/
+ BTF_MEMBER_ENC(NAME_TBD, 1, 64),/* int n; */
+ BTF_MEMBER_ENC(NAME_TBD, 3, 96),/* char o; */
+ BTF_MEMBER_ENC(NAME_TBD, 4, 128),/* int p[8] */
+ /* } */
+ BTF_END_RAW,
+ },
+ .str_sec = "\0A\0m\0n\0o\0p",
+ .str_sec_size = sizeof("\0A\0m\0n\0o\0p"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "struct_test1_map",
+ .key_size = sizeof(int),
+ .value_size = 48,
+ .key_type_id = 1,
+ .value_type_id = 5,
+ .max_entries = 4,
+},
+/*
+ * struct A {
+ * unsigned long long m;
+ * int n;
+ * char o;
+ * [3 bytes hole]
+ * int p[8];
+ * };
+ * static struct A t; <- in .bss
+ */
+{
+ .descr = "global data test #2",
+ .raw_types = {
+ /* int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ /* unsigned long long */
+ BTF_TYPE_INT_ENC(0, 0, 0, 64, 8), /* [2] */
+ /* char */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 8, 1), /* [3] */
+ /* int[8] */
+ BTF_TYPE_ARRAY_ENC(1, 1, 8), /* [4] */
+ /* struct A { */ /* [5] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 4), 48),
+ BTF_MEMBER_ENC(NAME_TBD, 2, 0), /* unsigned long long m;*/
+ BTF_MEMBER_ENC(NAME_TBD, 1, 64),/* int n; */
+ BTF_MEMBER_ENC(NAME_TBD, 3, 96),/* char o; */
+ BTF_MEMBER_ENC(NAME_TBD, 4, 128),/* int p[8] */
+ /* } */
+ /* static struct A t */
+ BTF_VAR_ENC(NAME_TBD, 5, 0), /* [6] */
+ /* .bss section */ /* [7] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 48),
+ BTF_VAR_SECINFO_ENC(6, 0, 48),
+ BTF_END_RAW,
+ },
+ .str_sec = "\0A\0m\0n\0o\0p\0t\0.bss",
+ .str_sec_size = sizeof("\0A\0m\0n\0o\0p\0t\0.bss"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = ".bss",
+ .key_size = sizeof(int),
+ .value_size = 48,
+ .key_type_id = 0,
+ .value_type_id = 7,
+ .max_entries = 1,
+},
+{
+ .descr = "global data test #3",
+ .raw_types = {
+ /* int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ /* static int t */
+ BTF_VAR_ENC(NAME_TBD, 1, 0), /* [2] */
+ /* .bss section */ /* [3] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4),
+ BTF_VAR_SECINFO_ENC(2, 0, 4),
+ BTF_END_RAW,
+ },
+ .str_sec = "\0t\0.bss",
+ .str_sec_size = sizeof("\0t\0.bss"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = ".bss",
+ .key_size = sizeof(int),
+ .value_size = 4,
+ .key_type_id = 0,
+ .value_type_id = 3,
+ .max_entries = 1,
+},
+{
+ .descr = "global data test #4, unsupported linkage",
+ .raw_types = {
+ /* int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ /* static int t */
+ BTF_VAR_ENC(NAME_TBD, 1, 2), /* [2] */
+ /* .bss section */ /* [3] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4),
+ BTF_VAR_SECINFO_ENC(2, 0, 4),
+ BTF_END_RAW,
+ },
+ .str_sec = "\0t\0.bss",
+ .str_sec_size = sizeof("\0t\0.bss"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = ".bss",
+ .key_size = sizeof(int),
+ .value_size = 4,
+ .key_type_id = 0,
+ .value_type_id = 3,
+ .max_entries = 1,
+ .btf_load_err = true,
+ .err_str = "Linkage not supported",
+},
+{
+ .descr = "global data test #5, invalid var type",
+ .raw_types = {
+ /* static void t */
+ BTF_VAR_ENC(NAME_TBD, 0, 0), /* [1] */
+ /* .bss section */ /* [2] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4),
+ BTF_VAR_SECINFO_ENC(1, 0, 4),
+ BTF_END_RAW,
+ },
+ .str_sec = "\0t\0.bss",
+ .str_sec_size = sizeof("\0t\0.bss"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = ".bss",
+ .key_size = sizeof(int),
+ .value_size = 4,
+ .key_type_id = 0,
+ .value_type_id = 2,
+ .max_entries = 1,
+ .btf_load_err = true,
+ .err_str = "Invalid type_id",
+},
+{
+ .descr = "global data test #6, invalid var type (fwd type)",
+ .raw_types = {
+ /* union A */
+ BTF_TYPE_ENC(NAME_TBD,
+ BTF_INFO_ENC(BTF_KIND_FWD, 1, 0), 0), /* [1] */
+ /* static union A t */
+ BTF_VAR_ENC(NAME_TBD, 1, 0), /* [2] */
+ /* .bss section */ /* [3] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4),
+ BTF_VAR_SECINFO_ENC(2, 0, 4),
+ BTF_END_RAW,
+ },
+ .str_sec = "\0A\0t\0.bss",
+ .str_sec_size = sizeof("\0A\0t\0.bss"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = ".bss",
+ .key_size = sizeof(int),
+ .value_size = 4,
+ .key_type_id = 0,
+ .value_type_id = 2,
+ .max_entries = 1,
+ .btf_load_err = true,
+ .err_str = "Invalid type",
+},
+{
+ .descr = "global data test #7, invalid var type (fwd type)",
+ .raw_types = {
+ /* union A */
+ BTF_TYPE_ENC(NAME_TBD,
+ BTF_INFO_ENC(BTF_KIND_FWD, 1, 0), 0), /* [1] */
+ /* static union A t */
+ BTF_VAR_ENC(NAME_TBD, 1, 0), /* [2] */
+ /* .bss section */ /* [3] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4),
+ BTF_VAR_SECINFO_ENC(1, 0, 4),
+ BTF_END_RAW,
+ },
+ .str_sec = "\0A\0t\0.bss",
+ .str_sec_size = sizeof("\0A\0t\0.bss"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = ".bss",
+ .key_size = sizeof(int),
+ .value_size = 4,
+ .key_type_id = 0,
+ .value_type_id = 2,
+ .max_entries = 1,
+ .btf_load_err = true,
+ .err_str = "Invalid type",
+},
+{
+ .descr = "global data test #8, invalid var size",
+ .raw_types = {
+ /* int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ /* unsigned long long */
+ BTF_TYPE_INT_ENC(0, 0, 0, 64, 8), /* [2] */
+ /* char */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 8, 1), /* [3] */
+ /* int[8] */
+ BTF_TYPE_ARRAY_ENC(1, 1, 8), /* [4] */
+ /* struct A { */ /* [5] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 4), 48),
+ BTF_MEMBER_ENC(NAME_TBD, 2, 0), /* unsigned long long m;*/
+ BTF_MEMBER_ENC(NAME_TBD, 1, 64),/* int n; */
+ BTF_MEMBER_ENC(NAME_TBD, 3, 96),/* char o; */
+ BTF_MEMBER_ENC(NAME_TBD, 4, 128),/* int p[8] */
+ /* } */
+ /* static struct A t */
+ BTF_VAR_ENC(NAME_TBD, 5, 0), /* [6] */
+ /* .bss section */ /* [7] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 48),
+ BTF_VAR_SECINFO_ENC(6, 0, 47),
+ BTF_END_RAW,
+ },
+ .str_sec = "\0A\0m\0n\0o\0p\0t\0.bss",
+ .str_sec_size = sizeof("\0A\0m\0n\0o\0p\0t\0.bss"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = ".bss",
+ .key_size = sizeof(int),
+ .value_size = 48,
+ .key_type_id = 0,
+ .value_type_id = 7,
+ .max_entries = 1,
+ .btf_load_err = true,
+ .err_str = "Invalid size",
+},
+{
+ .descr = "global data test #9, invalid var size",
+ .raw_types = {
+ /* int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ /* unsigned long long */
+ BTF_TYPE_INT_ENC(0, 0, 0, 64, 8), /* [2] */
+ /* char */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 8, 1), /* [3] */
+ /* int[8] */
+ BTF_TYPE_ARRAY_ENC(1, 1, 8), /* [4] */
+ /* struct A { */ /* [5] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 4), 48),
+ BTF_MEMBER_ENC(NAME_TBD, 2, 0), /* unsigned long long m;*/
+ BTF_MEMBER_ENC(NAME_TBD, 1, 64),/* int n; */
+ BTF_MEMBER_ENC(NAME_TBD, 3, 96),/* char o; */
+ BTF_MEMBER_ENC(NAME_TBD, 4, 128),/* int p[8] */
+ /* } */
+ /* static struct A t */
+ BTF_VAR_ENC(NAME_TBD, 5, 0), /* [6] */
+ /* .bss section */ /* [7] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 46),
+ BTF_VAR_SECINFO_ENC(6, 0, 48),
+ BTF_END_RAW,
+ },
+ .str_sec = "\0A\0m\0n\0o\0p\0t\0.bss",
+ .str_sec_size = sizeof("\0A\0m\0n\0o\0p\0t\0.bss"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = ".bss",
+ .key_size = sizeof(int),
+ .value_size = 48,
+ .key_type_id = 0,
+ .value_type_id = 7,
+ .max_entries = 1,
+ .btf_load_err = true,
+ .err_str = "Invalid size",
+},
+{
+ .descr = "global data test #10, invalid var size",
+ .raw_types = {
+ /* int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ /* unsigned long long */
+ BTF_TYPE_INT_ENC(0, 0, 0, 64, 8), /* [2] */
+ /* char */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 8, 1), /* [3] */
+ /* int[8] */
+ BTF_TYPE_ARRAY_ENC(1, 1, 8), /* [4] */
+ /* struct A { */ /* [5] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 4), 48),
+ BTF_MEMBER_ENC(NAME_TBD, 2, 0), /* unsigned long long m;*/
+ BTF_MEMBER_ENC(NAME_TBD, 1, 64),/* int n; */
+ BTF_MEMBER_ENC(NAME_TBD, 3, 96),/* char o; */
+ BTF_MEMBER_ENC(NAME_TBD, 4, 128),/* int p[8] */
+ /* } */
+ /* static struct A t */
+ BTF_VAR_ENC(NAME_TBD, 5, 0), /* [6] */
+ /* .bss section */ /* [7] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 46),
+ BTF_VAR_SECINFO_ENC(6, 0, 46),
+ BTF_END_RAW,
+ },
+ .str_sec = "\0A\0m\0n\0o\0p\0t\0.bss",
+ .str_sec_size = sizeof("\0A\0m\0n\0o\0p\0t\0.bss"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = ".bss",
+ .key_size = sizeof(int),
+ .value_size = 48,
+ .key_type_id = 0,
+ .value_type_id = 7,
+ .max_entries = 1,
+ .btf_load_err = true,
+ .err_str = "Invalid size",
+},
+{
+ .descr = "global data test #11, multiple section members",
+ .raw_types = {
+ /* int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ /* unsigned long long */
+ BTF_TYPE_INT_ENC(0, 0, 0, 64, 8), /* [2] */
+ /* char */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 8, 1), /* [3] */
+ /* int[8] */
+ BTF_TYPE_ARRAY_ENC(1, 1, 8), /* [4] */
+ /* struct A { */ /* [5] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 4), 48),
+ BTF_MEMBER_ENC(NAME_TBD, 2, 0), /* unsigned long long m;*/
+ BTF_MEMBER_ENC(NAME_TBD, 1, 64),/* int n; */
+ BTF_MEMBER_ENC(NAME_TBD, 3, 96),/* char o; */
+ BTF_MEMBER_ENC(NAME_TBD, 4, 128),/* int p[8] */
+ /* } */
+ /* static struct A t */
+ BTF_VAR_ENC(NAME_TBD, 5, 0), /* [6] */
+ /* static int u */
+ BTF_VAR_ENC(NAME_TBD, 1, 0), /* [7] */
+ /* .bss section */ /* [8] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 2), 62),
+ BTF_VAR_SECINFO_ENC(6, 10, 48),
+ BTF_VAR_SECINFO_ENC(7, 58, 4),
+ BTF_END_RAW,
+ },
+ .str_sec = "\0A\0m\0n\0o\0p\0t\0u\0.bss",
+ .str_sec_size = sizeof("\0A\0m\0n\0o\0p\0t\0u\0.bss"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = ".bss",
+ .key_size = sizeof(int),
+ .value_size = 62,
+ .key_type_id = 0,
+ .value_type_id = 8,
+ .max_entries = 1,
+},
+{
+ .descr = "global data test #12, invalid offset",
+ .raw_types = {
+ /* int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ /* unsigned long long */
+ BTF_TYPE_INT_ENC(0, 0, 0, 64, 8), /* [2] */
+ /* char */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 8, 1), /* [3] */
+ /* int[8] */
+ BTF_TYPE_ARRAY_ENC(1, 1, 8), /* [4] */
+ /* struct A { */ /* [5] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 4), 48),
+ BTF_MEMBER_ENC(NAME_TBD, 2, 0), /* unsigned long long m;*/
+ BTF_MEMBER_ENC(NAME_TBD, 1, 64),/* int n; */
+ BTF_MEMBER_ENC(NAME_TBD, 3, 96),/* char o; */
+ BTF_MEMBER_ENC(NAME_TBD, 4, 128),/* int p[8] */
+ /* } */
+ /* static struct A t */
+ BTF_VAR_ENC(NAME_TBD, 5, 0), /* [6] */
+ /* static int u */
+ BTF_VAR_ENC(NAME_TBD, 1, 0), /* [7] */
+ /* .bss section */ /* [8] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 2), 62),
+ BTF_VAR_SECINFO_ENC(6, 10, 48),
+ BTF_VAR_SECINFO_ENC(7, 60, 4),
+ BTF_END_RAW,
+ },
+ .str_sec = "\0A\0m\0n\0o\0p\0t\0u\0.bss",
+ .str_sec_size = sizeof("\0A\0m\0n\0o\0p\0t\0u\0.bss"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = ".bss",
+ .key_size = sizeof(int),
+ .value_size = 62,
+ .key_type_id = 0,
+ .value_type_id = 8,
+ .max_entries = 1,
+ .btf_load_err = true,
+ .err_str = "Invalid offset+size",
+},
+{
+ .descr = "global data test #13, invalid offset",
+ .raw_types = {
+ /* int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ /* unsigned long long */
+ BTF_TYPE_INT_ENC(0, 0, 0, 64, 8), /* [2] */
+ /* char */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 8, 1), /* [3] */
+ /* int[8] */
+ BTF_TYPE_ARRAY_ENC(1, 1, 8), /* [4] */
+ /* struct A { */ /* [5] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 4), 48),
+ BTF_MEMBER_ENC(NAME_TBD, 2, 0), /* unsigned long long m;*/
+ BTF_MEMBER_ENC(NAME_TBD, 1, 64),/* int n; */
+ BTF_MEMBER_ENC(NAME_TBD, 3, 96),/* char o; */
+ BTF_MEMBER_ENC(NAME_TBD, 4, 128),/* int p[8] */
+ /* } */
+ /* static struct A t */
+ BTF_VAR_ENC(NAME_TBD, 5, 0), /* [6] */
+ /* static int u */
+ BTF_VAR_ENC(NAME_TBD, 1, 0), /* [7] */
+ /* .bss section */ /* [8] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 2), 62),
+ BTF_VAR_SECINFO_ENC(6, 10, 48),
+ BTF_VAR_SECINFO_ENC(7, 12, 4),
+ BTF_END_RAW,
+ },
+ .str_sec = "\0A\0m\0n\0o\0p\0t\0u\0.bss",
+ .str_sec_size = sizeof("\0A\0m\0n\0o\0p\0t\0u\0.bss"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = ".bss",
+ .key_size = sizeof(int),
+ .value_size = 62,
+ .key_type_id = 0,
+ .value_type_id = 8,
+ .max_entries = 1,
+ .btf_load_err = true,
+ .err_str = "Invalid offset",
+},
+{
+ .descr = "global data test #14, invalid offset",
+ .raw_types = {
+ /* int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ /* unsigned long long */
+ BTF_TYPE_INT_ENC(0, 0, 0, 64, 8), /* [2] */
+ /* char */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 8, 1), /* [3] */
+ /* int[8] */
+ BTF_TYPE_ARRAY_ENC(1, 1, 8), /* [4] */
+ /* struct A { */ /* [5] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 4), 48),
+ BTF_MEMBER_ENC(NAME_TBD, 2, 0), /* unsigned long long m;*/
+ BTF_MEMBER_ENC(NAME_TBD, 1, 64),/* int n; */
+ BTF_MEMBER_ENC(NAME_TBD, 3, 96),/* char o; */
+ BTF_MEMBER_ENC(NAME_TBD, 4, 128),/* int p[8] */
+ /* } */
+ /* static struct A t */
+ BTF_VAR_ENC(NAME_TBD, 5, 0), /* [6] */
+ /* static int u */
+ BTF_VAR_ENC(NAME_TBD, 1, 0), /* [7] */
+ /* .bss section */ /* [8] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 2), 62),
+ BTF_VAR_SECINFO_ENC(7, 58, 4),
+ BTF_VAR_SECINFO_ENC(6, 10, 48),
+ BTF_END_RAW,
+ },
+ .str_sec = "\0A\0m\0n\0o\0p\0t\0u\0.bss",
+ .str_sec_size = sizeof("\0A\0m\0n\0o\0p\0t\0u\0.bss"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = ".bss",
+ .key_size = sizeof(int),
+ .value_size = 62,
+ .key_type_id = 0,
+ .value_type_id = 8,
+ .max_entries = 1,
+ .btf_load_err = true,
+ .err_str = "Invalid offset",
+},
+{
+ .descr = "global data test #15, not var kind",
+ .raw_types = {
+ /* int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_VAR_ENC(NAME_TBD, 1, 0), /* [2] */
+ /* .bss section */ /* [3] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4),
+ BTF_VAR_SECINFO_ENC(1, 0, 4),
+ BTF_END_RAW,
+ },
+ .str_sec = "\0A\0t\0.bss",
+ .str_sec_size = sizeof("\0A\0t\0.bss"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = ".bss",
+ .key_size = sizeof(int),
+ .value_size = 4,
+ .key_type_id = 0,
+ .value_type_id = 3,
+ .max_entries = 1,
+ .btf_load_err = true,
+ .err_str = "Not a VAR kind member",
+},
+{
+ .descr = "global data test #16, invalid var referencing sec",
+ .raw_types = {
+ /* int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_VAR_ENC(NAME_TBD, 5, 0), /* [2] */
+ BTF_VAR_ENC(NAME_TBD, 2, 0), /* [3] */
+ /* a section */ /* [4] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4),
+ BTF_VAR_SECINFO_ENC(3, 0, 4),
+ /* a section */ /* [5] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4),
+ BTF_VAR_SECINFO_ENC(6, 0, 4),
+ BTF_VAR_ENC(NAME_TBD, 1, 0), /* [6] */
+ BTF_END_RAW,
+ },
+ .str_sec = "\0A\0t\0s\0a\0a",
+ .str_sec_size = sizeof("\0A\0t\0s\0a\0a"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = ".bss",
+ .key_size = sizeof(int),
+ .value_size = 4,
+ .key_type_id = 0,
+ .value_type_id = 4,
+ .max_entries = 1,
+ .btf_load_err = true,
+ .err_str = "Invalid type_id",
+},
+{
+ .descr = "global data test #17, invalid var referencing var",
+ .raw_types = {
+ /* int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_VAR_ENC(NAME_TBD, 1, 0), /* [2] */
+ BTF_VAR_ENC(NAME_TBD, 2, 0), /* [3] */
+ /* a section */ /* [4] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4),
+ BTF_VAR_SECINFO_ENC(3, 0, 4),
+ BTF_END_RAW,
+ },
+ .str_sec = "\0A\0t\0s\0a\0a",
+ .str_sec_size = sizeof("\0A\0t\0s\0a\0a"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = ".bss",
+ .key_size = sizeof(int),
+ .value_size = 4,
+ .key_type_id = 0,
+ .value_type_id = 4,
+ .max_entries = 1,
+ .btf_load_err = true,
+ .err_str = "Invalid type_id",
+},
+{
+ .descr = "global data test #18, invalid var loop",
+ .raw_types = {
+ /* int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_VAR_ENC(NAME_TBD, 2, 0), /* [2] */
+ /* .bss section */ /* [3] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4),
+ BTF_VAR_SECINFO_ENC(2, 0, 4),
+ BTF_END_RAW,
+ },
+ .str_sec = "\0A\0t\0aaa",
+ .str_sec_size = sizeof("\0A\0t\0aaa"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = ".bss",
+ .key_size = sizeof(int),
+ .value_size = 4,
+ .key_type_id = 0,
+ .value_type_id = 4,
+ .max_entries = 1,
+ .btf_load_err = true,
+ .err_str = "Invalid type_id",
+},
+{
+ .descr = "global data test #19, invalid var referencing var",
+ .raw_types = {
+ /* int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_VAR_ENC(NAME_TBD, 3, 0), /* [2] */
+ BTF_VAR_ENC(NAME_TBD, 1, 0), /* [3] */
+ BTF_END_RAW,
+ },
+ .str_sec = "\0A\0t\0s\0a\0a",
+ .str_sec_size = sizeof("\0A\0t\0s\0a\0a"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = ".bss",
+ .key_size = sizeof(int),
+ .value_size = 4,
+ .key_type_id = 0,
+ .value_type_id = 4,
+ .max_entries = 1,
+ .btf_load_err = true,
+ .err_str = "Invalid type_id",
+},
+{
+ .descr = "global data test #20, invalid ptr referencing var",
+ .raw_types = {
+ /* int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ /* PTR type_id=3 */ /* [2] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 3),
+ BTF_VAR_ENC(NAME_TBD, 1, 0), /* [3] */
+ BTF_END_RAW,
+ },
+ .str_sec = "\0A\0t\0s\0a\0a",
+ .str_sec_size = sizeof("\0A\0t\0s\0a\0a"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = ".bss",
+ .key_size = sizeof(int),
+ .value_size = 4,
+ .key_type_id = 0,
+ .value_type_id = 4,
+ .max_entries = 1,
+ .btf_load_err = true,
+ .err_str = "Invalid type_id",
+},
+{
+ .descr = "global data test #21, var included in struct",
+ .raw_types = {
+ /* int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ /* struct A { */ /* [2] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), sizeof(int) * 2),
+ BTF_MEMBER_ENC(NAME_TBD, 1, 0), /* int m; */
+ BTF_MEMBER_ENC(NAME_TBD, 3, 32),/* VAR type_id=3; */
+ /* } */
+ BTF_VAR_ENC(NAME_TBD, 1, 0), /* [3] */
+ BTF_END_RAW,
+ },
+ .str_sec = "\0A\0t\0s\0a\0a",
+ .str_sec_size = sizeof("\0A\0t\0s\0a\0a"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = ".bss",
+ .key_size = sizeof(int),
+ .value_size = 4,
+ .key_type_id = 0,
+ .value_type_id = 4,
+ .max_entries = 1,
+ .btf_load_err = true,
+ .err_str = "Invalid member",
+},
+{
+ .descr = "global data test #22, array of var",
+ .raw_types = {
+ /* int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPE_ARRAY_ENC(3, 1, 4), /* [2] */
+ BTF_VAR_ENC(NAME_TBD, 1, 0), /* [3] */
+ BTF_END_RAW,
+ },
+ .str_sec = "\0A\0t\0s\0a\0a",
+ .str_sec_size = sizeof("\0A\0t\0s\0a\0a"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = ".bss",
+ .key_size = sizeof(int),
+ .value_size = 4,
+ .key_type_id = 0,
+ .value_type_id = 4,
+ .max_entries = 1,
+ .btf_load_err = true,
+ .err_str = "Invalid elem",
+},
/* Test member exceeds the size of struct.
*
* struct A {
} aenum;
uint32_t ui32b;
uint32_t bits2c:2;
+ uint8_t si8_4[2][2];
};
#ifdef __SIZEOF_INT128__
BTF_ENUM_ENC(NAME_TBD, 2),
BTF_ENUM_ENC(NAME_TBD, 3),
/* struct pprint_mapv */ /* [16] */
- BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 10), 40),
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 11), 40),
BTF_MEMBER_ENC(NAME_TBD, 11, 0), /* uint32_t ui32 */
BTF_MEMBER_ENC(NAME_TBD, 10, 32), /* uint16_t ui16 */
BTF_MEMBER_ENC(NAME_TBD, 12, 64), /* int32_t si32 */
BTF_MEMBER_ENC(NAME_TBD, 15, 192), /* aenum */
BTF_MEMBER_ENC(NAME_TBD, 11, 224), /* uint32_t ui32b */
BTF_MEMBER_ENC(NAME_TBD, 6, 256), /* bits2c */
+ BTF_MEMBER_ENC(NAME_TBD, 17, 264), /* si8_4 */
+ BTF_TYPE_ARRAY_ENC(18, 1, 2), /* [17] */
+ BTF_TYPE_ARRAY_ENC(1, 1, 2), /* [18] */
BTF_END_RAW,
},
- BTF_STR_SEC("\0unsigned char\0unsigned short\0unsigned int\0int\0unsigned long long\0uint8_t\0uint16_t\0uint32_t\0int32_t\0uint64_t\0ui64\0ui8a\0ENUM_ZERO\0ENUM_ONE\0ENUM_TWO\0ENUM_THREE\0pprint_mapv\0ui32\0ui16\0si32\0unused_bits2a\0bits28\0unused_bits2b\0aenum\0ui32b\0bits2c"),
+ BTF_STR_SEC("\0unsigned char\0unsigned short\0unsigned int\0int\0unsigned long long\0uint8_t\0uint16_t\0uint32_t\0int32_t\0uint64_t\0ui64\0ui8a\0ENUM_ZERO\0ENUM_ONE\0ENUM_TWO\0ENUM_THREE\0pprint_mapv\0ui32\0ui16\0si32\0unused_bits2a\0bits28\0unused_bits2b\0aenum\0ui32b\0bits2c\0si8_4"),
.key_size = sizeof(unsigned int),
.value_size = sizeof(struct pprint_mapv),
.key_type_id = 3, /* unsigned int */
BTF_ENUM_ENC(NAME_TBD, 2),
BTF_ENUM_ENC(NAME_TBD, 3),
/* struct pprint_mapv */ /* [16] */
- BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 10), 40),
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 11), 40),
BTF_MEMBER_ENC(NAME_TBD, 11, BTF_MEMBER_OFFSET(0, 0)), /* uint32_t ui32 */
BTF_MEMBER_ENC(NAME_TBD, 10, BTF_MEMBER_OFFSET(0, 32)), /* uint16_t ui16 */
BTF_MEMBER_ENC(NAME_TBD, 12, BTF_MEMBER_OFFSET(0, 64)), /* int32_t si32 */
BTF_MEMBER_ENC(NAME_TBD, 15, BTF_MEMBER_OFFSET(0, 192)), /* aenum */
BTF_MEMBER_ENC(NAME_TBD, 11, BTF_MEMBER_OFFSET(0, 224)), /* uint32_t ui32b */
BTF_MEMBER_ENC(NAME_TBD, 6, BTF_MEMBER_OFFSET(2, 256)), /* bits2c */
+ BTF_MEMBER_ENC(NAME_TBD, 17, 264), /* si8_4 */
+ BTF_TYPE_ARRAY_ENC(18, 1, 2), /* [17] */
+ BTF_TYPE_ARRAY_ENC(1, 1, 2), /* [18] */
BTF_END_RAW,
},
- BTF_STR_SEC("\0unsigned char\0unsigned short\0unsigned int\0int\0unsigned long long\0uint8_t\0uint16_t\0uint32_t\0int32_t\0uint64_t\0ui64\0ui8a\0ENUM_ZERO\0ENUM_ONE\0ENUM_TWO\0ENUM_THREE\0pprint_mapv\0ui32\0ui16\0si32\0unused_bits2a\0bits28\0unused_bits2b\0aenum\0ui32b\0bits2c"),
+ BTF_STR_SEC("\0unsigned char\0unsigned short\0unsigned int\0int\0unsigned long long\0uint8_t\0uint16_t\0uint32_t\0int32_t\0uint64_t\0ui64\0ui8a\0ENUM_ZERO\0ENUM_ONE\0ENUM_TWO\0ENUM_THREE\0pprint_mapv\0ui32\0ui16\0si32\0unused_bits2a\0bits28\0unused_bits2b\0aenum\0ui32b\0bits2c\0si8_4"),
.key_size = sizeof(unsigned int),
.value_size = sizeof(struct pprint_mapv),
.key_type_id = 3, /* unsigned int */
BTF_ENUM_ENC(NAME_TBD, 2),
BTF_ENUM_ENC(NAME_TBD, 3),
/* struct pprint_mapv */ /* [16] */
- BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 10), 40),
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 11), 40),
BTF_MEMBER_ENC(NAME_TBD, 11, BTF_MEMBER_OFFSET(0, 0)), /* uint32_t ui32 */
BTF_MEMBER_ENC(NAME_TBD, 10, BTF_MEMBER_OFFSET(0, 32)), /* uint16_t ui16 */
BTF_MEMBER_ENC(NAME_TBD, 12, BTF_MEMBER_OFFSET(0, 64)), /* int32_t si32 */
BTF_MEMBER_ENC(NAME_TBD, 15, BTF_MEMBER_OFFSET(0, 192)), /* aenum */
BTF_MEMBER_ENC(NAME_TBD, 11, BTF_MEMBER_OFFSET(0, 224)), /* uint32_t ui32b */
BTF_MEMBER_ENC(NAME_TBD, 17, BTF_MEMBER_OFFSET(2, 256)), /* bits2c */
+ BTF_MEMBER_ENC(NAME_TBD, 20, BTF_MEMBER_OFFSET(0, 264)), /* si8_4 */
/* typedef unsigned int ___int */ /* [17] */
BTF_TYPEDEF_ENC(NAME_TBD, 18),
BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_VOLATILE, 0, 0), 6), /* [18] */
BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 15), /* [19] */
+ BTF_TYPE_ARRAY_ENC(21, 1, 2), /* [20] */
+ BTF_TYPE_ARRAY_ENC(1, 1, 2), /* [21] */
BTF_END_RAW,
},
- BTF_STR_SEC("\0unsigned char\0unsigned short\0unsigned int\0int\0unsigned long long\0uint8_t\0uint16_t\0uint32_t\0int32_t\0uint64_t\0ui64\0ui8a\0ENUM_ZERO\0ENUM_ONE\0ENUM_TWO\0ENUM_THREE\0pprint_mapv\0ui32\0ui16\0si32\0unused_bits2a\0bits28\0unused_bits2b\0aenum\0ui32b\0bits2c\0___int"),
+ BTF_STR_SEC("\0unsigned char\0unsigned short\0unsigned int\0int\0unsigned long long\0uint8_t\0uint16_t\0uint32_t\0int32_t\0uint64_t\0ui64\0ui8a\0ENUM_ZERO\0ENUM_ONE\0ENUM_TWO\0ENUM_THREE\0pprint_mapv\0ui32\0ui16\0si32\0unused_bits2a\0bits28\0unused_bits2b\0aenum\0ui32b\0bits2c\0___int\0si8_4"),
.key_size = sizeof(unsigned int),
.value_size = sizeof(struct pprint_mapv),
.key_type_id = 3, /* unsigned int */
v->aenum = i & 0x03;
v->ui32b = 4;
v->bits2c = 1;
+ v->si8_4[0][0] = (cpu + i) & 0xff;
+ v->si8_4[0][1] = (cpu + i + 1) & 0xff;
+ v->si8_4[1][0] = (cpu + i + 2) & 0xff;
+ v->si8_4[1][1] = (cpu + i + 3) & 0xff;
v = (void *)v + rounded_value_size;
}
}
nexpected_line = snprintf(expected_line, line_size,
"%s%u: {%u,0,%d,0x%x,0x%x,0x%x,"
"{%lu|[%u,%u,%u,%u,%u,%u,%u,%u]},%s,"
- "%u,0x%x}\n",
+ "%u,0x%x,[[%d,%d],[%d,%d]]}\n",
percpu_map ? "\tcpu" : "",
percpu_map ? cpu : next_key,
v->ui32, v->si32,
v->ui8a[6], v->ui8a[7],
pprint_enum_str[v->aenum],
v->ui32b,
- v->bits2c);
+ v->bits2c,
+ v->si8_4[0][0], v->si8_4[0][1],
+ v->si8_4[1][0], v->si8_4[1][1]);
}
#ifdef __SIZEOF_INT128__
.dont_resolve_fwds = false,
},
},
+{
+ .descr = "dedup: datasec and vars pass-through",
+ .input = {
+ .raw_types = {
+ /* int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ /* static int t */
+ BTF_VAR_ENC(NAME_NTH(2), 1, 0), /* [2] */
+ /* .bss section */ /* [3] */
+ BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4),
+ BTF_VAR_SECINFO_ENC(2, 0, 4),
+ /* int, referenced from [5] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [4] */
+ /* another static int t */
+ BTF_VAR_ENC(NAME_NTH(2), 4, 0), /* [5] */
+ /* another .bss section */ /* [6] */
+ BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4),
+ BTF_VAR_SECINFO_ENC(5, 0, 4),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0.bss\0t"),
+ },
+ .expect = {
+ .raw_types = {
+ /* int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ /* static int t */
+ BTF_VAR_ENC(NAME_NTH(2), 1, 0), /* [2] */
+ /* .bss section */ /* [3] */
+ BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4),
+ BTF_VAR_SECINFO_ENC(2, 0, 4),
+ /* another static int t */
+ BTF_VAR_ENC(NAME_NTH(2), 1, 0), /* [4] */
+ /* another .bss section */ /* [5] */
+ BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4),
+ BTF_VAR_SECINFO_ENC(4, 0, 4),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0.bss\0t"),
+ },
+ .opts = {
+ .dont_resolve_fwds = false,
+ .dedup_table_size = 1
+ },
+},
};
return base_size + vlen * sizeof(struct btf_member);
case BTF_KIND_FUNC_PROTO:
return base_size + vlen * sizeof(struct btf_param);
+ case BTF_KIND_VAR:
+ return base_size + sizeof(struct btf_var);
+ case BTF_KIND_DATASEC:
+ return base_size + vlen * sizeof(struct btf_var_secinfo);
default:
fprintf(stderr, "Unsupported BTF_KIND:%u\n", kind);
return -EINVAL;
ip link set veth7 netns ${NS2}
ip link set veth8 netns ${NS3}
+ if [ ! -z "${VRF}" ] ; then
+ ip -netns ${NS1} link add red type vrf table 1001
+ ip -netns ${NS1} link set red up
+ ip -netns ${NS1} route add table 1001 unreachable default metric 8192
+ ip -netns ${NS1} -6 route add table 1001 unreachable default metric 8192
+ ip -netns ${NS1} link set veth1 vrf red
+ ip -netns ${NS1} link set veth5 vrf red
+
+ ip -netns ${NS2} link add red type vrf table 1001
+ ip -netns ${NS2} link set red up
+ ip -netns ${NS2} route add table 1001 unreachable default metric 8192
+ ip -netns ${NS2} -6 route add table 1001 unreachable default metric 8192
+ ip -netns ${NS2} link set veth2 vrf red
+ ip -netns ${NS2} link set veth3 vrf red
+ ip -netns ${NS2} link set veth6 vrf red
+ ip -netns ${NS2} link set veth7 vrf red
+ fi
+
# configure addesses: the top route (1-2-3-4)
ip -netns ${NS1} addr add ${IPv4_1}/24 dev veth1
ip -netns ${NS2} addr add ${IPv4_2}/24 dev veth2
# NS1
# top route
- ip -netns ${NS1} route add ${IPv4_2}/32 dev veth1
- ip -netns ${NS1} route add default dev veth1 via ${IPv4_2} # go top by default
- ip -netns ${NS1} -6 route add ${IPv6_2}/128 dev veth1
- ip -netns ${NS1} -6 route add default dev veth1 via ${IPv6_2} # go top by default
+ ip -netns ${NS1} route add ${IPv4_2}/32 dev veth1 ${VRF}
+ ip -netns ${NS1} route add default dev veth1 via ${IPv4_2} ${VRF} # go top by default
+ ip -netns ${NS1} -6 route add ${IPv6_2}/128 dev veth1 ${VRF}
+ ip -netns ${NS1} -6 route add default dev veth1 via ${IPv6_2} ${VRF} # go top by default
# bottom route
- ip -netns ${NS1} route add ${IPv4_6}/32 dev veth5
- ip -netns ${NS1} route add ${IPv4_7}/32 dev veth5 via ${IPv4_6}
- ip -netns ${NS1} route add ${IPv4_8}/32 dev veth5 via ${IPv4_6}
- ip -netns ${NS1} -6 route add ${IPv6_6}/128 dev veth5
- ip -netns ${NS1} -6 route add ${IPv6_7}/128 dev veth5 via ${IPv6_6}
- ip -netns ${NS1} -6 route add ${IPv6_8}/128 dev veth5 via ${IPv6_6}
+ ip -netns ${NS1} route add ${IPv4_6}/32 dev veth5 ${VRF}
+ ip -netns ${NS1} route add ${IPv4_7}/32 dev veth5 via ${IPv4_6} ${VRF}
+ ip -netns ${NS1} route add ${IPv4_8}/32 dev veth5 via ${IPv4_6} ${VRF}
+ ip -netns ${NS1} -6 route add ${IPv6_6}/128 dev veth5 ${VRF}
+ ip -netns ${NS1} -6 route add ${IPv6_7}/128 dev veth5 via ${IPv6_6} ${VRF}
+ ip -netns ${NS1} -6 route add ${IPv6_8}/128 dev veth5 via ${IPv6_6} ${VRF}
# NS2
# top route
- ip -netns ${NS2} route add ${IPv4_1}/32 dev veth2
- ip -netns ${NS2} route add ${IPv4_4}/32 dev veth3
- ip -netns ${NS2} -6 route add ${IPv6_1}/128 dev veth2
- ip -netns ${NS2} -6 route add ${IPv6_4}/128 dev veth3
+ ip -netns ${NS2} route add ${IPv4_1}/32 dev veth2 ${VRF}
+ ip -netns ${NS2} route add ${IPv4_4}/32 dev veth3 ${VRF}
+ ip -netns ${NS2} -6 route add ${IPv6_1}/128 dev veth2 ${VRF}
+ ip -netns ${NS2} -6 route add ${IPv6_4}/128 dev veth3 ${VRF}
# bottom route
- ip -netns ${NS2} route add ${IPv4_5}/32 dev veth6
- ip -netns ${NS2} route add ${IPv4_8}/32 dev veth7
- ip -netns ${NS2} -6 route add ${IPv6_5}/128 dev veth6
- ip -netns ${NS2} -6 route add ${IPv6_8}/128 dev veth7
+ ip -netns ${NS2} route add ${IPv4_5}/32 dev veth6 ${VRF}
+ ip -netns ${NS2} route add ${IPv4_8}/32 dev veth7 ${VRF}
+ ip -netns ${NS2} -6 route add ${IPv6_5}/128 dev veth6 ${VRF}
+ ip -netns ${NS2} -6 route add ${IPv6_8}/128 dev veth7 ${VRF}
# NS3
# top route
ip -netns ${NS3} tunnel add gre_dev mode gre remote ${IPv4_1} local ${IPv4_GRE} ttl 255
ip -netns ${NS3} link set gre_dev up
ip -netns ${NS3} addr add ${IPv4_GRE} dev gre_dev
- ip -netns ${NS1} route add ${IPv4_GRE}/32 dev veth5 via ${IPv4_6}
- ip -netns ${NS2} route add ${IPv4_GRE}/32 dev veth7 via ${IPv4_8}
+ ip -netns ${NS1} route add ${IPv4_GRE}/32 dev veth5 via ${IPv4_6} ${VRF}
+ ip -netns ${NS2} route add ${IPv4_GRE}/32 dev veth7 via ${IPv4_8} ${VRF}
# configure IPv6 GRE device in NS3, and a route to it via the "bottom" route
ip -netns ${NS3} -6 tunnel add name gre6_dev mode ip6gre remote ${IPv6_1} local ${IPv6_GRE} ttl 255
ip -netns ${NS3} link set gre6_dev up
ip -netns ${NS3} -6 addr add ${IPv6_GRE} nodad dev gre6_dev
- ip -netns ${NS1} -6 route add ${IPv6_GRE}/128 dev veth5 via ${IPv6_6}
- ip -netns ${NS2} -6 route add ${IPv6_GRE}/128 dev veth7 via ${IPv6_8}
+ ip -netns ${NS1} -6 route add ${IPv6_GRE}/128 dev veth5 via ${IPv6_6} ${VRF}
+ ip -netns ${NS2} -6 route add ${IPv6_GRE}/128 dev veth7 via ${IPv6_8} ${VRF}
# rp_filter gets confused by what these tests are doing, so disable it
ip netns exec ${NS1} sysctl -wq net.ipv4.conf.all.rp_filter=0
remove_routes_to_gredev()
{
- ip -netns ${NS1} route del ${IPv4_GRE} dev veth5
- ip -netns ${NS2} route del ${IPv4_GRE} dev veth7
- ip -netns ${NS1} -6 route del ${IPv6_GRE}/128 dev veth5
- ip -netns ${NS2} -6 route del ${IPv6_GRE}/128 dev veth7
+ ip -netns ${NS1} route del ${IPv4_GRE} dev veth5 ${VRF}
+ ip -netns ${NS2} route del ${IPv4_GRE} dev veth7 ${VRF}
+ ip -netns ${NS1} -6 route del ${IPv6_GRE}/128 dev veth5 ${VRF}
+ ip -netns ${NS2} -6 route del ${IPv6_GRE}/128 dev veth7 ${VRF}
}
add_unreachable_routes_to_gredev()
{
- ip -netns ${NS1} route add unreachable ${IPv4_GRE}/32
- ip -netns ${NS2} route add unreachable ${IPv4_GRE}/32
- ip -netns ${NS1} -6 route add unreachable ${IPv6_GRE}/128
- ip -netns ${NS2} -6 route add unreachable ${IPv6_GRE}/128
+ ip -netns ${NS1} route add unreachable ${IPv4_GRE}/32 ${VRF}
+ ip -netns ${NS2} route add unreachable ${IPv4_GRE}/32 ${VRF}
+ ip -netns ${NS1} -6 route add unreachable ${IPv6_GRE}/128 ${VRF}
+ ip -netns ${NS2} -6 route add unreachable ${IPv6_GRE}/128 ${VRF}
}
test_ping()
local RET=0
if [ "${PROTO}" == "IPv4" ] ; then
- ip netns exec ${NS1} ping -c 1 -W 1 -I ${IPv4_SRC} ${IPv4_DST} 2>&1 > /dev/null
+ ip netns exec ${NS1} ping -c 1 -W 1 -I veth1 ${IPv4_DST} 2>&1 > /dev/null
RET=$?
elif [ "${PROTO}" == "IPv6" ] ; then
- ip netns exec ${NS1} ping6 -c 1 -W 6 -I ${IPv6_SRC} ${IPv6_DST} 2>&1 > /dev/null
+ ip netns exec ${NS1} ping6 -c 1 -W 6 -I veth1 ${IPv6_DST} 2>&1 > /dev/null
RET=$?
else
echo " test_ping: unknown PROTO: ${PROTO}"
test_egress()
{
local readonly ENCAP=$1
- echo "starting egress ${ENCAP} encap test"
+ echo "starting egress ${ENCAP} encap test ${VRF}"
setup
# by default, pings work
test_ping IPv6 0
# remove NS2->DST routes, ping fails
- ip -netns ${NS2} route del ${IPv4_DST}/32 dev veth3
- ip -netns ${NS2} -6 route del ${IPv6_DST}/128 dev veth3
+ ip -netns ${NS2} route del ${IPv4_DST}/32 dev veth3 ${VRF}
+ ip -netns ${NS2} -6 route del ${IPv6_DST}/128 dev veth3 ${VRF}
test_ping IPv4 1
test_ping IPv6 1
# install replacement routes (LWT/eBPF), pings succeed
if [ "${ENCAP}" == "IPv4" ] ; then
- ip -netns ${NS1} route add ${IPv4_DST} encap bpf xmit obj test_lwt_ip_encap.o sec encap_gre dev veth1
- ip -netns ${NS1} -6 route add ${IPv6_DST} encap bpf xmit obj test_lwt_ip_encap.o sec encap_gre dev veth1
+ ip -netns ${NS1} route add ${IPv4_DST} encap bpf xmit obj \
+ test_lwt_ip_encap.o sec encap_gre dev veth1 ${VRF}
+ ip -netns ${NS1} -6 route add ${IPv6_DST} encap bpf xmit obj \
+ test_lwt_ip_encap.o sec encap_gre dev veth1 ${VRF}
elif [ "${ENCAP}" == "IPv6" ] ; then
- ip -netns ${NS1} route add ${IPv4_DST} encap bpf xmit obj test_lwt_ip_encap.o sec encap_gre6 dev veth1
- ip -netns ${NS1} -6 route add ${IPv6_DST} encap bpf xmit obj test_lwt_ip_encap.o sec encap_gre6 dev veth1
+ ip -netns ${NS1} route add ${IPv4_DST} encap bpf xmit obj \
+ test_lwt_ip_encap.o sec encap_gre6 dev veth1 ${VRF}
+ ip -netns ${NS1} -6 route add ${IPv6_DST} encap bpf xmit obj \
+ test_lwt_ip_encap.o sec encap_gre6 dev veth1 ${VRF}
else
echo " unknown encap ${ENCAP}"
TEST_STATUS=1
fi
test_ping IPv4 0
test_ping IPv6 0
- test_gso IPv4
- test_gso IPv6
+
+ # skip GSO tests with VRF: VRF routing needs properly assigned
+ # source IP/device, which is easy to do with ping and hard with dd/nc.
+ if [ -z "${VRF}" ] ; then
+ test_gso IPv4
+ test_gso IPv6
+ fi
# a negative test: remove routes to GRE devices: ping fails
remove_routes_to_gredev
test_ingress()
{
local readonly ENCAP=$1
- echo "starting ingress ${ENCAP} encap test"
+ echo "starting ingress ${ENCAP} encap test ${VRF}"
setup
# need to wait a bit for IPv6 to autoconf, otherwise
test_ping IPv6 0
# remove NS2->DST routes, pings fail
- ip -netns ${NS2} route del ${IPv4_DST}/32 dev veth3
- ip -netns ${NS2} -6 route del ${IPv6_DST}/128 dev veth3
+ ip -netns ${NS2} route del ${IPv4_DST}/32 dev veth3 ${VRF}
+ ip -netns ${NS2} -6 route del ${IPv6_DST}/128 dev veth3 ${VRF}
test_ping IPv4 1
test_ping IPv6 1
# install replacement routes (LWT/eBPF), pings succeed
if [ "${ENCAP}" == "IPv4" ] ; then
- ip -netns ${NS2} route add ${IPv4_DST} encap bpf in obj test_lwt_ip_encap.o sec encap_gre dev veth2
- ip -netns ${NS2} -6 route add ${IPv6_DST} encap bpf in obj test_lwt_ip_encap.o sec encap_gre dev veth2
+ ip -netns ${NS2} route add ${IPv4_DST} encap bpf in obj \
+ test_lwt_ip_encap.o sec encap_gre dev veth2 ${VRF}
+ ip -netns ${NS2} -6 route add ${IPv6_DST} encap bpf in obj \
+ test_lwt_ip_encap.o sec encap_gre dev veth2 ${VRF}
elif [ "${ENCAP}" == "IPv6" ] ; then
- ip -netns ${NS2} route add ${IPv4_DST} encap bpf in obj test_lwt_ip_encap.o sec encap_gre6 dev veth2
- ip -netns ${NS2} -6 route add ${IPv6_DST} encap bpf in obj test_lwt_ip_encap.o sec encap_gre6 dev veth2
+ ip -netns ${NS2} route add ${IPv4_DST} encap bpf in obj \
+ test_lwt_ip_encap.o sec encap_gre6 dev veth2 ${VRF}
+ ip -netns ${NS2} -6 route add ${IPv6_DST} encap bpf in obj \
+ test_lwt_ip_encap.o sec encap_gre6 dev veth2 ${VRF}
else
echo "FAIL: unknown encap ${ENCAP}"
TEST_STATUS=1
process_test_results
}
+VRF=""
+test_egress IPv4
+test_egress IPv6
+test_ingress IPv4
+test_ingress IPv6
+
+VRF="vrf red"
test_egress IPv4
test_egress IPv6
test_ingress IPv4
#!/usr/bin/python3
# Copyright (C) 2017 Netronome Systems, Inc.
+# Copyright (c) 2019 Mellanox Technologies. All rights reserved
#
# This software is licensed under the GNU General License Version 2,
# June 1991 as shown in the file COPYING in the top-level directory of this
from datetime import datetime
import argparse
+import errno
import json
import os
import pprint
import random
+import re
import string
import struct
import subprocess
_, out = cmd('ls ' + path)
for f in out.split():
+ if f == "ports":
+ continue
p = os.path.join(path, f)
if os.path.isfile(p):
_, out = cmd('cat %s/%s' % (path, f))
return dfs
-class NetdevSim:
+class NetdevSimDev:
"""
- Class for netdevsim netdevice and its attributes.
+ Class for netdevsim bus device and its attributes.
"""
- def __init__(self, link=None):
- self.link = link
+ def __init__(self, port_count=1):
+ addr = 0
+ while True:
+ try:
+ with open("/sys/bus/netdevsim/new_device", "w") as f:
+ f.write("%u %u" % (addr, port_count))
+ except OSError as e:
+ if e.errno == errno.ENOSPC:
+ addr += 1
+ continue
+ raise e
+ break
+ self.addr = addr
+
+ # As probe of netdevsim device might happen from a workqueue,
+ # so wait here until all netdevs appear.
+ self.wait_for_netdevs(port_count)
+
+ ret, out = cmd("udevadm settle", fail=False)
+ if ret:
+ raise Exception("udevadm settle failed")
+ ifnames = self.get_ifnames()
- self.dev = self._netdevsim_create()
devs.append(self)
+ self.dfs_dir = "/sys/kernel/debug/netdevsim/netdevsim%u/" % addr
+
+ self.nsims = []
+ for port_index in range(port_count):
+ self.nsims.append(NetdevSim(self, port_index, ifnames[port_index]))
+
+ def get_ifnames(self):
+ ifnames = []
+ listdir = os.listdir("/sys/bus/netdevsim/devices/netdevsim%u/net/" % self.addr)
+ for ifname in listdir:
+ ifnames.append(ifname)
+ ifnames.sort()
+ return ifnames
+
+ def wait_for_netdevs(self, port_count):
+ timeout = 5
+ timeout_start = time.time()
+
+ while True:
+ try:
+ ifnames = self.get_ifnames()
+ except FileNotFoundError as e:
+ ifnames = []
+ if len(ifnames) == port_count:
+ break
+ if time.time() < timeout_start + timeout:
+ continue
+ raise Exception("netdevices did not appear within timeout")
- self.ns = ""
+ def dfs_num_bound_progs(self):
+ path = os.path.join(self.dfs_dir, "bpf_bound_progs")
+ _, progs = cmd('ls %s' % (path))
+ return len(progs.split())
- self.dfs_dir = '/sys/kernel/debug/netdevsim/%s' % (self.dev['ifname'])
- self.sdev_dir = self.dfs_dir + '/sdev/'
- self.dfs_refresh()
+ def dfs_get_bound_progs(self, expected):
+ progs = DebugfsDir(os.path.join(self.dfs_dir, "bpf_bound_progs"))
+ if expected is not None:
+ if len(progs) != expected:
+ fail(True, "%d BPF programs bound, expected %d" %
+ (len(progs), expected))
+ return progs
- def __getitem__(self, key):
- return self.dev[key]
+ def remove(self):
+ with open("/sys/bus/netdevsim/del_device", "w") as f:
+ f.write("%u" % self.addr)
+ devs.remove(self)
- def _netdevsim_create(self):
- link = "" if self.link is None else "link " + self.link.dev['ifname']
- _, old = ip("link show")
- ip("link add sim%d {link} type netdevsim".format(link=link))
- _, new = ip("link show")
+ def remove_nsim(self, nsim):
+ self.nsims.remove(nsim)
+ with open("/sys/bus/netdevsim/devices/netdevsim%u/del_port" % self.addr ,"w") as f:
+ f.write("%u" % nsim.port_index)
- for dev in new:
- f = filter(lambda x: x["ifname"] == dev["ifname"], old)
- if len(list(f)) == 0:
- return dev
+class NetdevSim:
+ """
+ Class for netdevsim netdevice and its attributes.
+ """
+
+ def __init__(self, nsimdev, port_index, ifname):
+ # In case udev renamed the netdev to according to new schema,
+ # check if the name matches the port_index.
+ nsimnamere = re.compile("eni\d+np(\d+)")
+ match = nsimnamere.match(ifname)
+ if match and int(match.groups()[0]) != port_index + 1:
+ raise Exception("netdevice name mismatches the expected one")
+
+ self.nsimdev = nsimdev
+ self.port_index = port_index
+ self.ns = ""
+ self.dfs_dir = "%s/ports/%u/" % (nsimdev.dfs_dir, port_index)
+ self.dfs_refresh()
+ _, [self.dev] = ip("link show dev %s" % ifname)
- raise Exception("failed to create netdevsim device")
+ def __getitem__(self, key):
+ return self.dev[key]
def remove(self):
- devs.remove(self)
- ip("link del dev %s" % (self.dev["ifname"]), ns=self.ns)
+ self.nsimdev.remove_nsim(self)
def dfs_refresh(self):
self.dfs = DebugfsDir(self.dfs_dir)
_, data = cmd('cat %s' % (path))
return data.strip()
- def dfs_num_bound_progs(self):
- path = os.path.join(self.sdev_dir, "bpf_bound_progs")
- _, progs = cmd('ls %s' % (path))
- return len(progs.split())
-
- def dfs_get_bound_progs(self, expected):
- progs = DebugfsDir(os.path.join(self.sdev_dir, "bpf_bound_progs"))
- if expected is not None:
- if len(progs) != expected:
- fail(True, "%d BPF programs bound, expected %d" %
- (len(progs), expected))
- return progs
-
def wait_for_flush(self, bound=0, total=0, n_retry=20):
for i in range(n_retry):
- nbound = self.dfs_num_bound_progs()
+ nbound = self.nsimdev.dfs_num_bound_progs()
nprogs = len(bpftool_prog_list())
if nbound == bound and nprogs == total:
return
include_stderr=True)
check_no_extack(res, needle)
-def test_multi_prog(sim, obj, modename, modeid):
+def test_multi_prog(simdev, sim, obj, modename, modeid):
start_test("Test multi-attachment XDP - %s + offload..." %
(modename or "default", ))
sim.set_xdp(obj, "offload")
check_multi_basic(two_xdps)
start_test("Test multi-attachment XDP - device remove...")
- sim.remove()
+ simdev.remove()
- sim = NetdevSim()
+ simdev = NetdevSimDev()
+ sim, = simdev.nsims
sim.set_ethtool_tc_offloads(True)
- return sim
+ return [simdev, sim]
# Parse command line
parser = argparse.ArgumentParser()
bytecode = bpf_bytecode("1,6 0 0 4294967295,")
start_test("Test destruction of generic XDP...")
- sim = NetdevSim()
+ simdev = NetdevSimDev()
+ sim, = simdev.nsims
sim.set_xdp(obj, "generic")
- sim.remove()
+ simdev.remove()
bpftool_prog_list_wait(expected=0)
- sim = NetdevSim()
+ simdev = NetdevSimDev()
+ sim, = simdev.nsims
sim.tc_add_ingress()
start_test("Test TC non-offloaded...")
start_test("Test TC non-offloaded isn't getting bound...")
ret, _ = sim.cls_bpf_add_filter(obj, fail=False)
fail(ret != 0, "Software TC filter did not load")
- sim.dfs_get_bound_progs(expected=0)
+ simdev.dfs_get_bound_progs(expected=0)
sim.tc_flush_filters()
start_test("Test TC offload by default...")
ret, _ = sim.cls_bpf_add_filter(obj, fail=False)
fail(ret != 0, "Software TC filter did not load")
- sim.dfs_get_bound_progs(expected=0)
+ simdev.dfs_get_bound_progs(expected=0)
ingress = sim.tc_show_ingress(expected=1)
fltr = ingress[0]
fail(not fltr["in_hw"], "Filter not offloaded by default")
start_test("Test TC cBPF bytcode tries offload by default...")
ret, _ = sim.cls_bpf_add_filter(bytecode, fail=False)
fail(ret != 0, "Software TC filter did not load")
- sim.dfs_get_bound_progs(expected=0)
+ simdev.dfs_get_bound_progs(expected=0)
ingress = sim.tc_show_ingress(expected=1)
fltr = ingress[0]
fail(not fltr["in_hw"], "Bytecode not offloaded by default")
check_verifier_log(err, "[netdevsim] Hello from netdevsim!")
start_test("Test TC offload basics...")
- dfs = sim.dfs_get_bound_progs(expected=1)
+ dfs = simdev.dfs_get_bound_progs(expected=1)
progs = bpftool_prog_list(expected=1)
ingress = sim.tc_show_ingress(expected=1)
start_test("Test destroying device gets rid of TC filters...")
sim.cls_bpf_add_filter(obj, skip_sw=True)
- sim.remove()
+ simdev.remove()
bpftool_prog_list_wait(expected=0)
- sim = NetdevSim()
+ simdev = NetdevSimDev()
+ sim, = simdev.nsims
sim.set_ethtool_tc_offloads(True)
start_test("Test destroying device gets rid of XDP...")
sim.set_xdp(obj, "offload")
- sim.remove()
+ simdev.remove()
bpftool_prog_list_wait(expected=0)
- sim = NetdevSim()
+ simdev = NetdevSimDev()
+ sim, = simdev.nsims
sim.set_ethtool_tc_offloads(True)
start_test("Test XDP prog reporting...")
check_verifier_log(err, "[netdevsim] Hello from netdevsim!")
start_test("Test XDP offload is device bound...")
- dfs = sim.dfs_get_bound_progs(expected=1)
+ dfs = simdev.dfs_get_bound_progs(expected=1)
dprog = dfs[0]
fail(prog["id"] != link_xdp["id"], "Program IDs don't match")
bpftool_prog_list_wait(expected=0)
start_test("Test attempt to use a program for a wrong device...")
- sim2 = NetdevSim()
+ simdev2 = NetdevSimDev()
+ sim2, = simdev2.nsims
sim2.set_xdp(obj, "offload")
pin_file, pinned = pin_prog("/sys/fs/bpf/tmp")
fail=False, include_stderr=True)
fail(ret == 0, "Pinned program loaded for a different device accepted")
check_extack_nsim(err, "program bound to different dev.", args)
- sim2.remove()
+ simdev2.remove()
ret, _, err = sim.set_xdp(pinned, "offload",
fail=False, include_stderr=True)
fail(ret == 0, "Pinned program loaded for a removed device accepted")
rm(pin_file)
bpftool_prog_list_wait(expected=0)
- sim = test_multi_prog(sim, obj, "", 1)
- sim = test_multi_prog(sim, obj, "drv", 1)
- sim = test_multi_prog(sim, obj, "generic", 2)
+ simdev, sim = test_multi_prog(simdev, sim, obj, "", 1)
+ simdev, sim = test_multi_prog(simdev, sim, obj, "drv", 1)
+ simdev, sim = test_multi_prog(simdev, sim, obj, "generic", 2)
start_test("Test mixing of TC and XDP...")
sim.tc_add_ingress()
start_test("Test if netdev removal waits for translation...")
delay_msec = 500
- sim.dfs["bpf_bind_verifier_delay"] = delay_msec
+ sim.dfs["dev/bpf_bind_verifier_delay"] = delay_msec
start = time.time()
cmd_line = "tc filter add dev %s ingress bpf %s da skip_sw" % \
(sim['ifname'], obj)
tc_proc = cmd(cmd_line, background=True, fail=False)
# Wait for the verifier to start
- while sim.dfs_num_bound_progs() <= 2:
+ while simdev.dfs_num_bound_progs() <= 2:
pass
- sim.remove()
+ simdev.remove()
end = time.time()
ret, _ = cmd_result(tc_proc, fail=False)
time_diff = end - start
clean_up()
bpftool_prog_list_wait(expected=0)
- sim = NetdevSim()
+ simdev = NetdevSimDev()
+ sim, = simdev.nsims
map_obj = bpf_obj("sample_map_ret0.o")
start_test("Test loading program with maps...")
sim.set_xdp(map_obj, "offload", JSON=False) # map fixup msg breaks JSON
prog_file, _ = pin_prog("/sys/fs/bpf/tmp_prog")
map_file, _ = pin_map("/sys/fs/bpf/tmp_map", idx=1, expected=2)
- sim.remove()
+ simdev.remove()
start_test("Test bpftool bound info reporting (removed dev)...")
check_dev_info_removed(prog_file=prog_file, map_file=map_file)
clean_up()
bpftool_prog_list_wait(expected=0)
- sim = NetdevSim()
+ simdev = NetdevSimDev()
+ sim, = simdev.nsims
start_test("Test map update (no flags)...")
sim.set_xdp(map_obj, "offload", JSON=False) # map fixup msg breaks JSON
start_test("Test map remove...")
sim.unset_xdp("offload")
bpftool_map_list_wait(expected=0)
- sim.remove()
+ simdev.remove()
- sim = NetdevSim()
+ simdev = NetdevSimDev()
+ sim, = simdev.nsims
sim.set_xdp(map_obj, "offload", JSON=False) # map fixup msg breaks JSON
- sim.remove()
+ simdev.remove()
bpftool_map_list_wait(expected=0)
start_test("Test map creation fail path...")
- sim = NetdevSim()
+ simdev = NetdevSimDev()
+ sim, = simdev.nsims
sim.dfs["bpf_map_accept"] = "N"
ret, _ = sim.set_xdp(map_obj, "offload", JSON=False, fail=False)
fail(ret == 0,
"netdevsim didn't refuse to create a map with offload disabled")
- sim.remove()
+ simdev.remove()
start_test("Test multi-dev ASIC program reuse...")
- simA = NetdevSim()
- simB1 = NetdevSim()
- simB2 = NetdevSim(link=simB1)
- simB3 = NetdevSim(link=simB1)
+ simdevA = NetdevSimDev()
+ simA, = simdevA.nsims
+ simdevB = NetdevSimDev(3)
+ simB1, simB2, simB3 = simdevB.nsims
sims = (simA, simB1, simB2, simB3)
simB = (simB1, simB2, simB3)
progB = bpf_pinned("/sys/fs/bpf/nsimB")
simA.set_xdp(progA, "offload", JSON=False)
- for d in simB:
+ for d in simdevB.nsims:
d.set_xdp(progB, "offload", JSON=False)
start_test("Test multi-dev ASIC cross-dev replace...")
ret, _ = simA.set_xdp(progB, "offload", force=True, JSON=False, fail=False)
fail(ret == 0, "cross-ASIC program allowed")
- for d in simB:
+ for d in simdevB.nsims:
ret, _ = d.set_xdp(progA, "offload", force=True, JSON=False, fail=False)
fail(ret == 0, "cross-ASIC program allowed")
fail=False, include_stderr=True)
fail(ret == 0, "cross-ASIC program allowed")
check_extack_nsim(err, "program bound to different dev.", args)
- for d in simB:
+ for d in simdevB.nsims:
ret, _, err = d.set_xdp(progA, "offload", force=True, JSON=False,
fail=False, include_stderr=True)
fail(ret == 0, "cross-ASIC program allowed")
start_test("Test multi-dev ASIC cross-dev destruction...")
bpftool_prog_list_wait(expected=2)
- simA.remove()
+ simdevA.remove()
bpftool_prog_list_wait(expected=1)
ifnameB = bpftool("prog show %s" % (progB))[1]["dev"]["ifname"]
fail(ifnameB != simB3['ifname'], "program not bound to remaining device")
simB3.remove()
+ simdevB.remove()
bpftool_prog_list_wait(expected=0)
start_test("Test multi-dev ASIC cross-dev destruction - orphaned...")
int error_cnt, pass_cnt;
bool jit_enabled;
+bool verifier_stats = false;
struct ipv4_packet pkt_v4 = {
.eth.h_proto = __bpf_constant_htons(ETH_P_IP),
#include <prog_tests/tests.h>
#undef DECLARE
-int main(void)
+int main(int ac, char **av)
{
srand(time(NULL));
jit_enabled = is_jit_enabled();
+ if (ac == 2 && strcmp(av[1], "-s") == 0)
+ verifier_stats = true;
+
#define CALL
#include <prog_tests/tests.h>
#undef CALL
extern int error_cnt, pass_cnt;
extern bool jit_enabled;
+extern bool verifier_stats;
#define MAGIC_BYTES 123
{0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_SENDMSG},
{0, BPF_CGROUP_UDP6_SENDMSG},
},
+ {
+ "cgroup/sysctl",
+ {0, BPF_PROG_TYPE_CGROUP_SYSCTL, BPF_CGROUP_SYSCTL},
+ {0, BPF_CGROUP_SYSCTL},
+ },
};
static int test_prog_type_by_name(const struct sec_name_test *test)
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+
+#include <fcntl.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <linux/filter.h>
+
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include "bpf_rlimit.h"
+#include "bpf_util.h"
+#include "cgroup_helpers.h"
+
+#define CG_PATH "/foo"
+#define MAX_INSNS 512
+#define FIXUP_SYSCTL_VALUE 0
+
+char bpf_log_buf[BPF_LOG_BUF_SIZE];
+
+struct sysctl_test {
+ const char *descr;
+ size_t fixup_value_insn;
+ struct bpf_insn insns[MAX_INSNS];
+ const char *prog_file;
+ enum bpf_attach_type attach_type;
+ const char *sysctl;
+ int open_flags;
+ const char *newval;
+ const char *oldval;
+ enum {
+ LOAD_REJECT,
+ ATTACH_REJECT,
+ OP_EPERM,
+ SUCCESS,
+ } result;
+};
+
+static struct sysctl_test tests[] = {
+ {
+ .descr = "sysctl wrong attach_type",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .attach_type = 0,
+ .sysctl = "kernel/ostype",
+ .open_flags = O_RDONLY,
+ .result = ATTACH_REJECT,
+ },
+ {
+ .descr = "sysctl:read allow all",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .attach_type = BPF_CGROUP_SYSCTL,
+ .sysctl = "kernel/ostype",
+ .open_flags = O_RDONLY,
+ .result = SUCCESS,
+ },
+ {
+ .descr = "sysctl:read deny all",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .attach_type = BPF_CGROUP_SYSCTL,
+ .sysctl = "kernel/ostype",
+ .open_flags = O_RDONLY,
+ .result = OP_EPERM,
+ },
+ {
+ .descr = "ctx:write sysctl:read read ok",
+ .insns = {
+ /* If (write) */
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+ offsetof(struct bpf_sysctl, write)),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 1, 2),
+
+ /* return DENY; */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_JMP_A(1),
+
+ /* else return ALLOW; */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .attach_type = BPF_CGROUP_SYSCTL,
+ .sysctl = "kernel/ostype",
+ .open_flags = O_RDONLY,
+ .result = SUCCESS,
+ },
+ {
+ .descr = "ctx:write sysctl:write read ok",
+ .insns = {
+ /* If (write) */
+ BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_1,
+ offsetof(struct bpf_sysctl, write)),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 1, 2),
+
+ /* return DENY; */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_JMP_A(1),
+
+ /* else return ALLOW; */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .attach_type = BPF_CGROUP_SYSCTL,
+ .sysctl = "kernel/domainname",
+ .open_flags = O_WRONLY,
+ .newval = "(none)", /* same as default, should fail anyway */
+ .result = OP_EPERM,
+ },
+ {
+ .descr = "ctx:write sysctl:read write reject",
+ .insns = {
+ /* write = X */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+ offsetof(struct bpf_sysctl, write)),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .attach_type = BPF_CGROUP_SYSCTL,
+ .sysctl = "kernel/ostype",
+ .open_flags = O_RDONLY,
+ .result = LOAD_REJECT,
+ },
+ {
+ .descr = "ctx:file_pos sysctl:read read ok",
+ .insns = {
+ /* If (file_pos == X) */
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+ offsetof(struct bpf_sysctl, file_pos)),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0, 2),
+
+ /* return ALLOW; */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_JMP_A(1),
+
+ /* else return DENY; */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .attach_type = BPF_CGROUP_SYSCTL,
+ .sysctl = "kernel/ostype",
+ .open_flags = O_RDONLY,
+ .result = SUCCESS,
+ },
+ {
+ .descr = "ctx:file_pos sysctl:read read ok narrow",
+ .insns = {
+ /* If (file_pos == X) */
+ BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_1,
+ offsetof(struct bpf_sysctl, file_pos)),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0, 2),
+
+ /* return ALLOW; */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_JMP_A(1),
+
+ /* else return DENY; */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .attach_type = BPF_CGROUP_SYSCTL,
+ .sysctl = "kernel/ostype",
+ .open_flags = O_RDONLY,
+ .result = SUCCESS,
+ },
+ {
+ .descr = "ctx:file_pos sysctl:read write ok",
+ .insns = {
+ /* file_pos = X */
+ BPF_MOV64_IMM(BPF_REG_0, 2),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+ offsetof(struct bpf_sysctl, file_pos)),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .attach_type = BPF_CGROUP_SYSCTL,
+ .sysctl = "kernel/ostype",
+ .open_flags = O_RDONLY,
+ .oldval = "nux\n",
+ .result = SUCCESS,
+ },
+ {
+ .descr = "sysctl_get_name sysctl_value:base ok",
+ .insns = {
+ /* sysctl_get_name arg2 (buf) */
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
+
+ /* sysctl_get_name arg3 (buf_len) */
+ BPF_MOV64_IMM(BPF_REG_3, 8),
+
+ /* sysctl_get_name arg4 (flags) */
+ BPF_MOV64_IMM(BPF_REG_4, BPF_F_SYSCTL_BASE_NAME),
+
+ /* sysctl_get_name(ctx, buf, buf_len, flags) */
+ BPF_EMIT_CALL(BPF_FUNC_sysctl_get_name),
+
+ /* if (ret == expected && */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, sizeof("tcp_mem") - 1, 6),
+ /* buf == "tcp_mem\0") */
+ BPF_LD_IMM64(BPF_REG_8, 0x006d656d5f706374ULL),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
+ BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2),
+
+ /* return ALLOW; */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_JMP_A(1),
+
+ /* else return DENY; */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .attach_type = BPF_CGROUP_SYSCTL,
+ .sysctl = "net/ipv4/tcp_mem",
+ .open_flags = O_RDONLY,
+ .result = SUCCESS,
+ },
+ {
+ .descr = "sysctl_get_name sysctl_value:base E2BIG truncated",
+ .insns = {
+ /* sysctl_get_name arg2 (buf) */
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
+
+ /* sysctl_get_name arg3 (buf_len) too small */
+ BPF_MOV64_IMM(BPF_REG_3, 7),
+
+ /* sysctl_get_name arg4 (flags) */
+ BPF_MOV64_IMM(BPF_REG_4, BPF_F_SYSCTL_BASE_NAME),
+
+ /* sysctl_get_name(ctx, buf, buf_len, flags) */
+ BPF_EMIT_CALL(BPF_FUNC_sysctl_get_name),
+
+ /* if (ret == expected && */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -E2BIG, 6),
+
+ /* buf[0:7] == "tcp_me\0") */
+ BPF_LD_IMM64(BPF_REG_8, 0x00656d5f706374ULL),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
+ BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2),
+
+ /* return ALLOW; */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_JMP_A(1),
+
+ /* else return DENY; */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .attach_type = BPF_CGROUP_SYSCTL,
+ .sysctl = "net/ipv4/tcp_mem",
+ .open_flags = O_RDONLY,
+ .result = SUCCESS,
+ },
+ {
+ .descr = "sysctl_get_name sysctl:full ok",
+ .insns = {
+ /* sysctl_get_name arg2 (buf) */
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -24),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 8),
+ BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 16),
+
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
+
+ /* sysctl_get_name arg3 (buf_len) */
+ BPF_MOV64_IMM(BPF_REG_3, 17),
+
+ /* sysctl_get_name arg4 (flags) */
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+
+ /* sysctl_get_name(ctx, buf, buf_len, flags) */
+ BPF_EMIT_CALL(BPF_FUNC_sysctl_get_name),
+
+ /* if (ret == expected && */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 16, 14),
+
+ /* buf[0:8] == "net/ipv4" && */
+ BPF_LD_IMM64(BPF_REG_8, 0x347670692f74656eULL),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
+ BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 10),
+
+ /* buf[8:16] == "/tcp_mem" && */
+ BPF_LD_IMM64(BPF_REG_8, 0x6d656d5f7063742fULL),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 8),
+ BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 6),
+
+ /* buf[16:24] == "\0") */
+ BPF_LD_IMM64(BPF_REG_8, 0x0ULL),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 16),
+ BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2),
+
+ /* return ALLOW; */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_JMP_A(1),
+
+ /* else return DENY; */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .attach_type = BPF_CGROUP_SYSCTL,
+ .sysctl = "net/ipv4/tcp_mem",
+ .open_flags = O_RDONLY,
+ .result = SUCCESS,
+ },
+ {
+ .descr = "sysctl_get_name sysctl:full E2BIG truncated",
+ .insns = {
+ /* sysctl_get_name arg2 (buf) */
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -16),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 8),
+
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
+
+ /* sysctl_get_name arg3 (buf_len) */
+ BPF_MOV64_IMM(BPF_REG_3, 16),
+
+ /* sysctl_get_name arg4 (flags) */
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+
+ /* sysctl_get_name(ctx, buf, buf_len, flags) */
+ BPF_EMIT_CALL(BPF_FUNC_sysctl_get_name),
+
+ /* if (ret == expected && */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -E2BIG, 10),
+
+ /* buf[0:8] == "net/ipv4" && */
+ BPF_LD_IMM64(BPF_REG_8, 0x347670692f74656eULL),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
+ BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 6),
+
+ /* buf[8:16] == "/tcp_me\0") */
+ BPF_LD_IMM64(BPF_REG_8, 0x00656d5f7063742fULL),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 8),
+ BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2),
+
+ /* return ALLOW; */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_JMP_A(1),
+
+ /* else return DENY; */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .attach_type = BPF_CGROUP_SYSCTL,
+ .sysctl = "net/ipv4/tcp_mem",
+ .open_flags = O_RDONLY,
+ .result = SUCCESS,
+ },
+ {
+ .descr = "sysctl_get_name sysctl:full E2BIG truncated small",
+ .insns = {
+ /* sysctl_get_name arg2 (buf) */
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
+
+ /* sysctl_get_name arg3 (buf_len) */
+ BPF_MOV64_IMM(BPF_REG_3, 7),
+
+ /* sysctl_get_name arg4 (flags) */
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+
+ /* sysctl_get_name(ctx, buf, buf_len, flags) */
+ BPF_EMIT_CALL(BPF_FUNC_sysctl_get_name),
+
+ /* if (ret == expected && */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -E2BIG, 6),
+
+ /* buf[0:8] == "net/ip\0") */
+ BPF_LD_IMM64(BPF_REG_8, 0x000070692f74656eULL),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
+ BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2),
+
+ /* return ALLOW; */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_JMP_A(1),
+
+ /* else return DENY; */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .attach_type = BPF_CGROUP_SYSCTL,
+ .sysctl = "net/ipv4/tcp_mem",
+ .open_flags = O_RDONLY,
+ .result = SUCCESS,
+ },
+ {
+ .descr = "sysctl_get_current_value sysctl:read ok, gt",
+ .insns = {
+ /* sysctl_get_current_value arg2 (buf) */
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
+
+ /* sysctl_get_current_value arg3 (buf_len) */
+ BPF_MOV64_IMM(BPF_REG_3, 8),
+
+ /* sysctl_get_current_value(ctx, buf, buf_len) */
+ BPF_EMIT_CALL(BPF_FUNC_sysctl_get_current_value),
+
+ /* if (ret == expected && */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 6, 6),
+
+ /* buf[0:6] == "Linux\n\0") */
+ BPF_LD_IMM64(BPF_REG_8, 0x000a78756e694cULL),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
+ BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2),
+
+ /* return ALLOW; */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_JMP_A(1),
+
+ /* else return DENY; */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .attach_type = BPF_CGROUP_SYSCTL,
+ .sysctl = "kernel/ostype",
+ .open_flags = O_RDONLY,
+ .result = SUCCESS,
+ },
+ {
+ .descr = "sysctl_get_current_value sysctl:read ok, eq",
+ .insns = {
+ /* sysctl_get_current_value arg2 (buf) */
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_B, BPF_REG_7, BPF_REG_0, 7),
+
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
+
+ /* sysctl_get_current_value arg3 (buf_len) */
+ BPF_MOV64_IMM(BPF_REG_3, 7),
+
+ /* sysctl_get_current_value(ctx, buf, buf_len) */
+ BPF_EMIT_CALL(BPF_FUNC_sysctl_get_current_value),
+
+ /* if (ret == expected && */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 6, 6),
+
+ /* buf[0:6] == "Linux\n\0") */
+ BPF_LD_IMM64(BPF_REG_8, 0x000a78756e694cULL),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
+ BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2),
+
+ /* return ALLOW; */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_JMP_A(1),
+
+ /* else return DENY; */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .attach_type = BPF_CGROUP_SYSCTL,
+ .sysctl = "kernel/ostype",
+ .open_flags = O_RDONLY,
+ .result = SUCCESS,
+ },
+ {
+ .descr = "sysctl_get_current_value sysctl:read E2BIG truncated",
+ .insns = {
+ /* sysctl_get_current_value arg2 (buf) */
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_H, BPF_REG_7, BPF_REG_0, 6),
+
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
+
+ /* sysctl_get_current_value arg3 (buf_len) */
+ BPF_MOV64_IMM(BPF_REG_3, 6),
+
+ /* sysctl_get_current_value(ctx, buf, buf_len) */
+ BPF_EMIT_CALL(BPF_FUNC_sysctl_get_current_value),
+
+ /* if (ret == expected && */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -E2BIG, 6),
+
+ /* buf[0:6] == "Linux\0") */
+ BPF_LD_IMM64(BPF_REG_8, 0x000078756e694cULL),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
+ BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2),
+
+ /* return ALLOW; */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_JMP_A(1),
+
+ /* else return DENY; */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .attach_type = BPF_CGROUP_SYSCTL,
+ .sysctl = "kernel/ostype",
+ .open_flags = O_RDONLY,
+ .result = SUCCESS,
+ },
+ {
+ .descr = "sysctl_get_current_value sysctl:read EINVAL",
+ .insns = {
+ /* sysctl_get_current_value arg2 (buf) */
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
+
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
+
+ /* sysctl_get_current_value arg3 (buf_len) */
+ BPF_MOV64_IMM(BPF_REG_3, 8),
+
+ /* sysctl_get_current_value(ctx, buf, buf_len) */
+ BPF_EMIT_CALL(BPF_FUNC_sysctl_get_current_value),
+
+ /* if (ret == expected && */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -EINVAL, 4),
+
+ /* buf[0:8] is NUL-filled) */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_9, 0, 2),
+
+ /* return DENY; */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_JMP_A(1),
+
+ /* else return ALLOW; */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .attach_type = BPF_CGROUP_SYSCTL,
+ .sysctl = "net/ipv6/conf/lo/stable_secret", /* -EIO */
+ .open_flags = O_RDONLY,
+ .result = OP_EPERM,
+ },
+ {
+ .descr = "sysctl_get_current_value sysctl:write ok",
+ .fixup_value_insn = 6,
+ .insns = {
+ /* sysctl_get_current_value arg2 (buf) */
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
+
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
+
+ /* sysctl_get_current_value arg3 (buf_len) */
+ BPF_MOV64_IMM(BPF_REG_3, 8),
+
+ /* sysctl_get_current_value(ctx, buf, buf_len) */
+ BPF_EMIT_CALL(BPF_FUNC_sysctl_get_current_value),
+
+ /* if (ret == expected && */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 4, 6),
+
+ /* buf[0:4] == expected) */
+ BPF_LD_IMM64(BPF_REG_8, FIXUP_SYSCTL_VALUE),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
+ BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2),
+
+ /* return DENY; */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_JMP_A(1),
+
+ /* else return ALLOW; */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .attach_type = BPF_CGROUP_SYSCTL,
+ .sysctl = "net/ipv4/route/mtu_expires",
+ .open_flags = O_WRONLY,
+ .newval = "600", /* same as default, should fail anyway */
+ .result = OP_EPERM,
+ },
+ {
+ .descr = "sysctl_get_new_value sysctl:read EINVAL",
+ .insns = {
+ /* sysctl_get_new_value arg2 (buf) */
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
+
+ /* sysctl_get_new_value arg3 (buf_len) */
+ BPF_MOV64_IMM(BPF_REG_3, 8),
+
+ /* sysctl_get_new_value(ctx, buf, buf_len) */
+ BPF_EMIT_CALL(BPF_FUNC_sysctl_get_new_value),
+
+ /* if (ret == expected) */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -EINVAL, 2),
+
+ /* return ALLOW; */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_JMP_A(1),
+
+ /* else return DENY; */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .attach_type = BPF_CGROUP_SYSCTL,
+ .sysctl = "net/ipv4/tcp_mem",
+ .open_flags = O_RDONLY,
+ .result = SUCCESS,
+ },
+ {
+ .descr = "sysctl_get_new_value sysctl:write ok",
+ .insns = {
+ /* sysctl_get_new_value arg2 (buf) */
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
+
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
+
+ /* sysctl_get_new_value arg3 (buf_len) */
+ BPF_MOV64_IMM(BPF_REG_3, 4),
+
+ /* sysctl_get_new_value(ctx, buf, buf_len) */
+ BPF_EMIT_CALL(BPF_FUNC_sysctl_get_new_value),
+
+ /* if (ret == expected && */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 3, 4),
+
+ /* buf[0:4] == "606\0") */
+ BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_7, 0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_9, 0x00363036, 2),
+
+ /* return DENY; */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_JMP_A(1),
+
+ /* else return ALLOW; */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .attach_type = BPF_CGROUP_SYSCTL,
+ .sysctl = "net/ipv4/route/mtu_expires",
+ .open_flags = O_WRONLY,
+ .newval = "606",
+ .result = OP_EPERM,
+ },
+ {
+ .descr = "sysctl_get_new_value sysctl:write ok long",
+ .insns = {
+ /* sysctl_get_new_value arg2 (buf) */
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -24),
+
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
+
+ /* sysctl_get_new_value arg3 (buf_len) */
+ BPF_MOV64_IMM(BPF_REG_3, 24),
+
+ /* sysctl_get_new_value(ctx, buf, buf_len) */
+ BPF_EMIT_CALL(BPF_FUNC_sysctl_get_new_value),
+
+ /* if (ret == expected && */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 23, 14),
+
+ /* buf[0:8] == "3000000 " && */
+ BPF_LD_IMM64(BPF_REG_8, 0x2030303030303033ULL),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
+ BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 10),
+
+ /* buf[8:16] == "4000000 " && */
+ BPF_LD_IMM64(BPF_REG_8, 0x2030303030303034ULL),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 8),
+ BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 6),
+
+ /* buf[16:24] == "6000000\0") */
+ BPF_LD_IMM64(BPF_REG_8, 0x0030303030303036ULL),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 16),
+ BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2),
+
+ /* return DENY; */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_JMP_A(1),
+
+ /* else return ALLOW; */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .attach_type = BPF_CGROUP_SYSCTL,
+ .sysctl = "net/ipv4/tcp_mem",
+ .open_flags = O_WRONLY,
+ .newval = "3000000 4000000 6000000",
+ .result = OP_EPERM,
+ },
+ {
+ .descr = "sysctl_get_new_value sysctl:write E2BIG",
+ .insns = {
+ /* sysctl_get_new_value arg2 (buf) */
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_B, BPF_REG_7, BPF_REG_0, 3),
+
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
+
+ /* sysctl_get_new_value arg3 (buf_len) */
+ BPF_MOV64_IMM(BPF_REG_3, 3),
+
+ /* sysctl_get_new_value(ctx, buf, buf_len) */
+ BPF_EMIT_CALL(BPF_FUNC_sysctl_get_new_value),
+
+ /* if (ret == expected && */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -E2BIG, 4),
+
+ /* buf[0:3] == "60\0") */
+ BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_7, 0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_9, 0x003036, 2),
+
+ /* return DENY; */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_JMP_A(1),
+
+ /* else return ALLOW; */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .attach_type = BPF_CGROUP_SYSCTL,
+ .sysctl = "net/ipv4/route/mtu_expires",
+ .open_flags = O_WRONLY,
+ .newval = "606",
+ .result = OP_EPERM,
+ },
+ {
+ .descr = "sysctl_set_new_value sysctl:read EINVAL",
+ .insns = {
+ /* sysctl_set_new_value arg2 (buf) */
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0x00303036),
+ BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
+
+ /* sysctl_set_new_value arg3 (buf_len) */
+ BPF_MOV64_IMM(BPF_REG_3, 3),
+
+ /* sysctl_set_new_value(ctx, buf, buf_len) */
+ BPF_EMIT_CALL(BPF_FUNC_sysctl_set_new_value),
+
+ /* if (ret == expected) */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -EINVAL, 2),
+
+ /* return ALLOW; */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_JMP_A(1),
+
+ /* else return DENY; */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .attach_type = BPF_CGROUP_SYSCTL,
+ .sysctl = "net/ipv4/route/mtu_expires",
+ .open_flags = O_RDONLY,
+ .result = SUCCESS,
+ },
+ {
+ .descr = "sysctl_set_new_value sysctl:write ok",
+ .fixup_value_insn = 2,
+ .insns = {
+ /* sysctl_set_new_value arg2 (buf) */
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
+ BPF_MOV64_IMM(BPF_REG_0, FIXUP_SYSCTL_VALUE),
+ BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
+
+ /* sysctl_set_new_value arg3 (buf_len) */
+ BPF_MOV64_IMM(BPF_REG_3, 3),
+
+ /* sysctl_set_new_value(ctx, buf, buf_len) */
+ BPF_EMIT_CALL(BPF_FUNC_sysctl_set_new_value),
+
+ /* if (ret == expected) */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+
+ /* return ALLOW; */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_JMP_A(1),
+
+ /* else return DENY; */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .attach_type = BPF_CGROUP_SYSCTL,
+ .sysctl = "net/ipv4/route/mtu_expires",
+ .open_flags = O_WRONLY,
+ .newval = "606",
+ .result = SUCCESS,
+ },
+ {
+ "bpf_strtoul one number string",
+ .insns = {
+ /* arg1 (buf) */
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0x00303036),
+ BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+
+ /* arg2 (buf_len) */
+ BPF_MOV64_IMM(BPF_REG_2, 4),
+
+ /* arg3 (flags) */
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+
+ /* arg4 (res) */
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
+ BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_7),
+
+ BPF_EMIT_CALL(BPF_FUNC_strtoul),
+
+ /* if (ret == expected && */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 3, 4),
+ /* res == expected) */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_9, 600, 2),
+
+ /* return ALLOW; */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_JMP_A(1),
+
+ /* else return DENY; */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .attach_type = BPF_CGROUP_SYSCTL,
+ .sysctl = "net/ipv4/route/mtu_expires",
+ .open_flags = O_RDONLY,
+ .result = SUCCESS,
+ },
+ {
+ "bpf_strtoul multi number string",
+ .insns = {
+ /* arg1 (buf) */
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
+ /* "600 602\0" */
+ BPF_LD_IMM64(BPF_REG_0, 0x0032303620303036ULL),
+ BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+
+ /* arg2 (buf_len) */
+ BPF_MOV64_IMM(BPF_REG_2, 8),
+
+ /* arg3 (flags) */
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+
+ /* arg4 (res) */
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
+ BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_7),
+
+ BPF_EMIT_CALL(BPF_FUNC_strtoul),
+
+ /* if (ret == expected && */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 3, 18),
+ /* res == expected) */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_9, 600, 16),
+
+ /* arg1 (buf) */
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+
+ /* arg2 (buf_len) */
+ BPF_MOV64_IMM(BPF_REG_2, 8),
+ BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_0),
+
+ /* arg3 (flags) */
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+
+ /* arg4 (res) */
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -16),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_7),
+
+ BPF_EMIT_CALL(BPF_FUNC_strtoul),
+
+ /* if (ret == expected && */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 4, 4),
+ /* res == expected) */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_9, 602, 2),
+
+ /* return ALLOW; */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_JMP_A(1),
+
+ /* else return DENY; */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .attach_type = BPF_CGROUP_SYSCTL,
+ .sysctl = "net/ipv4/tcp_mem",
+ .open_flags = O_RDONLY,
+ .result = SUCCESS,
+ },
+ {
+ "bpf_strtoul buf_len = 0, reject",
+ .insns = {
+ /* arg1 (buf) */
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0x00303036),
+ BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+
+ /* arg2 (buf_len) */
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+
+ /* arg3 (flags) */
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+
+ /* arg4 (res) */
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
+ BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_7),
+
+ BPF_EMIT_CALL(BPF_FUNC_strtoul),
+
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .attach_type = BPF_CGROUP_SYSCTL,
+ .sysctl = "net/ipv4/route/mtu_expires",
+ .open_flags = O_RDONLY,
+ .result = LOAD_REJECT,
+ },
+ {
+ "bpf_strtoul supported base, ok",
+ .insns = {
+ /* arg1 (buf) */
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0x00373730),
+ BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+
+ /* arg2 (buf_len) */
+ BPF_MOV64_IMM(BPF_REG_2, 4),
+
+ /* arg3 (flags) */
+ BPF_MOV64_IMM(BPF_REG_3, 8),
+
+ /* arg4 (res) */
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
+ BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_7),
+
+ BPF_EMIT_CALL(BPF_FUNC_strtoul),
+
+ /* if (ret == expected && */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 3, 4),
+ /* res == expected) */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_9, 63, 2),
+
+ /* return ALLOW; */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_JMP_A(1),
+
+ /* else return DENY; */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .attach_type = BPF_CGROUP_SYSCTL,
+ .sysctl = "net/ipv4/route/mtu_expires",
+ .open_flags = O_RDONLY,
+ .result = SUCCESS,
+ },
+ {
+ "bpf_strtoul unsupported base, EINVAL",
+ .insns = {
+ /* arg1 (buf) */
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0x00303036),
+ BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+
+ /* arg2 (buf_len) */
+ BPF_MOV64_IMM(BPF_REG_2, 4),
+
+ /* arg3 (flags) */
+ BPF_MOV64_IMM(BPF_REG_3, 3),
+
+ /* arg4 (res) */
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
+ BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_7),
+
+ BPF_EMIT_CALL(BPF_FUNC_strtoul),
+
+ /* if (ret == expected) */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -EINVAL, 2),
+
+ /* return ALLOW; */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_JMP_A(1),
+
+ /* else return DENY; */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .attach_type = BPF_CGROUP_SYSCTL,
+ .sysctl = "net/ipv4/route/mtu_expires",
+ .open_flags = O_RDONLY,
+ .result = SUCCESS,
+ },
+ {
+ "bpf_strtoul buf with spaces only, EINVAL",
+ .insns = {
+ /* arg1 (buf) */
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0x090a0c0d),
+ BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+
+ /* arg2 (buf_len) */
+ BPF_MOV64_IMM(BPF_REG_2, 4),
+
+ /* arg3 (flags) */
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+
+ /* arg4 (res) */
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
+ BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_7),
+
+ BPF_EMIT_CALL(BPF_FUNC_strtoul),
+
+ /* if (ret == expected) */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -EINVAL, 2),
+
+ /* return ALLOW; */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_JMP_A(1),
+
+ /* else return DENY; */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .attach_type = BPF_CGROUP_SYSCTL,
+ .sysctl = "net/ipv4/route/mtu_expires",
+ .open_flags = O_RDONLY,
+ .result = SUCCESS,
+ },
+ {
+ "bpf_strtoul negative number, EINVAL",
+ .insns = {
+ /* arg1 (buf) */
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0x00362d0a), /* " -6\0" */
+ BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+
+ /* arg2 (buf_len) */
+ BPF_MOV64_IMM(BPF_REG_2, 4),
+
+ /* arg3 (flags) */
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+
+ /* arg4 (res) */
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
+ BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_7),
+
+ BPF_EMIT_CALL(BPF_FUNC_strtoul),
+
+ /* if (ret == expected) */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -EINVAL, 2),
+
+ /* return ALLOW; */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_JMP_A(1),
+
+ /* else return DENY; */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .attach_type = BPF_CGROUP_SYSCTL,
+ .sysctl = "net/ipv4/route/mtu_expires",
+ .open_flags = O_RDONLY,
+ .result = SUCCESS,
+ },
+ {
+ "bpf_strtol negative number, ok",
+ .insns = {
+ /* arg1 (buf) */
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0x00362d0a), /* " -6\0" */
+ BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+
+ /* arg2 (buf_len) */
+ BPF_MOV64_IMM(BPF_REG_2, 4),
+
+ /* arg3 (flags) */
+ BPF_MOV64_IMM(BPF_REG_3, 10),
+
+ /* arg4 (res) */
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
+ BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_7),
+
+ BPF_EMIT_CALL(BPF_FUNC_strtol),
+
+ /* if (ret == expected && */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 3, 4),
+ /* res == expected) */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_9, -6, 2),
+
+ /* return ALLOW; */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_JMP_A(1),
+
+ /* else return DENY; */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .attach_type = BPF_CGROUP_SYSCTL,
+ .sysctl = "net/ipv4/route/mtu_expires",
+ .open_flags = O_RDONLY,
+ .result = SUCCESS,
+ },
+ {
+ "bpf_strtol hex number, ok",
+ .insns = {
+ /* arg1 (buf) */
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0x65667830), /* "0xfe" */
+ BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+
+ /* arg2 (buf_len) */
+ BPF_MOV64_IMM(BPF_REG_2, 4),
+
+ /* arg3 (flags) */
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+
+ /* arg4 (res) */
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
+ BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_7),
+
+ BPF_EMIT_CALL(BPF_FUNC_strtol),
+
+ /* if (ret == expected && */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 4, 4),
+ /* res == expected) */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_9, 254, 2),
+
+ /* return ALLOW; */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_JMP_A(1),
+
+ /* else return DENY; */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .attach_type = BPF_CGROUP_SYSCTL,
+ .sysctl = "net/ipv4/route/mtu_expires",
+ .open_flags = O_RDONLY,
+ .result = SUCCESS,
+ },
+ {
+ "bpf_strtol max long",
+ .insns = {
+ /* arg1 (buf) 9223372036854775807 */
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -24),
+ BPF_LD_IMM64(BPF_REG_0, 0x3032373333323239ULL),
+ BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+ BPF_LD_IMM64(BPF_REG_0, 0x3537373435383633ULL),
+ BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 8),
+ BPF_LD_IMM64(BPF_REG_0, 0x0000000000373038ULL),
+ BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 16),
+
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+
+ /* arg2 (buf_len) */
+ BPF_MOV64_IMM(BPF_REG_2, 19),
+
+ /* arg3 (flags) */
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+
+ /* arg4 (res) */
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
+ BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_7),
+
+ BPF_EMIT_CALL(BPF_FUNC_strtol),
+
+ /* if (ret == expected && */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 19, 6),
+ /* res == expected) */
+ BPF_LD_IMM64(BPF_REG_8, 0x7fffffffffffffffULL),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
+ BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2),
+
+ /* return ALLOW; */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_JMP_A(1),
+
+ /* else return DENY; */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .attach_type = BPF_CGROUP_SYSCTL,
+ .sysctl = "net/ipv4/route/mtu_expires",
+ .open_flags = O_RDONLY,
+ .result = SUCCESS,
+ },
+ {
+ "bpf_strtol overflow, ERANGE",
+ .insns = {
+ /* arg1 (buf) 9223372036854775808 */
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -24),
+ BPF_LD_IMM64(BPF_REG_0, 0x3032373333323239ULL),
+ BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+ BPF_LD_IMM64(BPF_REG_0, 0x3537373435383633ULL),
+ BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 8),
+ BPF_LD_IMM64(BPF_REG_0, 0x0000000000383038ULL),
+ BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 16),
+
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+
+ /* arg2 (buf_len) */
+ BPF_MOV64_IMM(BPF_REG_2, 19),
+
+ /* arg3 (flags) */
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+
+ /* arg4 (res) */
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
+ BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_7),
+
+ BPF_EMIT_CALL(BPF_FUNC_strtol),
+
+ /* if (ret == expected) */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -ERANGE, 2),
+
+ /* return ALLOW; */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_JMP_A(1),
+
+ /* else return DENY; */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .attach_type = BPF_CGROUP_SYSCTL,
+ .sysctl = "net/ipv4/route/mtu_expires",
+ .open_flags = O_RDONLY,
+ .result = SUCCESS,
+ },
+ {
+ "C prog: deny all writes",
+ .prog_file = "./test_sysctl_prog.o",
+ .attach_type = BPF_CGROUP_SYSCTL,
+ .sysctl = "net/ipv4/tcp_mem",
+ .open_flags = O_WRONLY,
+ .newval = "123 456 789",
+ .result = OP_EPERM,
+ },
+ {
+ "C prog: deny access by name",
+ .prog_file = "./test_sysctl_prog.o",
+ .attach_type = BPF_CGROUP_SYSCTL,
+ .sysctl = "net/ipv4/route/mtu_expires",
+ .open_flags = O_RDONLY,
+ .result = OP_EPERM,
+ },
+ {
+ "C prog: read tcp_mem",
+ .prog_file = "./test_sysctl_prog.o",
+ .attach_type = BPF_CGROUP_SYSCTL,
+ .sysctl = "net/ipv4/tcp_mem",
+ .open_flags = O_RDONLY,
+ .result = SUCCESS,
+ },
+};
+
+static size_t probe_prog_length(const struct bpf_insn *fp)
+{
+ size_t len;
+
+ for (len = MAX_INSNS - 1; len > 0; --len)
+ if (fp[len].code != 0 || fp[len].imm != 0)
+ break;
+ return len + 1;
+}
+
+static int fixup_sysctl_value(const char *buf, size_t buf_len,
+ struct bpf_insn *prog, size_t insn_num)
+{
+ uint32_t value_num = 0;
+ uint8_t c, i;
+
+ if (buf_len > sizeof(value_num)) {
+ log_err("Value is too big (%zd) to use in fixup", buf_len);
+ return -1;
+ }
+
+ for (i = 0; i < buf_len; ++i) {
+ c = buf[i];
+ value_num |= (c << i * 8);
+ }
+
+ prog[insn_num].imm = value_num;
+
+ return 0;
+}
+
+static int load_sysctl_prog_insns(struct sysctl_test *test,
+ const char *sysctl_path)
+{
+ struct bpf_insn *prog = test->insns;
+ struct bpf_load_program_attr attr;
+ int ret;
+
+ memset(&attr, 0, sizeof(struct bpf_load_program_attr));
+ attr.prog_type = BPF_PROG_TYPE_CGROUP_SYSCTL;
+ attr.insns = prog;
+ attr.insns_cnt = probe_prog_length(attr.insns);
+ attr.license = "GPL";
+
+ if (test->fixup_value_insn) {
+ char buf[128];
+ ssize_t len;
+ int fd;
+
+ fd = open(sysctl_path, O_RDONLY | O_CLOEXEC);
+ if (fd < 0) {
+ log_err("open(%s) failed", sysctl_path);
+ return -1;
+ }
+ len = read(fd, buf, sizeof(buf));
+ if (len == -1) {
+ log_err("read(%s) failed", sysctl_path);
+ close(fd);
+ return -1;
+ }
+ close(fd);
+ if (fixup_sysctl_value(buf, len, prog, test->fixup_value_insn))
+ return -1;
+ }
+
+ ret = bpf_load_program_xattr(&attr, bpf_log_buf, BPF_LOG_BUF_SIZE);
+ if (ret < 0 && test->result != LOAD_REJECT) {
+ log_err(">>> Loading program error.\n"
+ ">>> Verifier output:\n%s\n-------\n", bpf_log_buf);
+ }
+
+ return ret;
+}
+
+static int load_sysctl_prog_file(struct sysctl_test *test)
+{
+ struct bpf_prog_load_attr attr;
+ struct bpf_object *obj;
+ int prog_fd;
+
+ memset(&attr, 0, sizeof(struct bpf_prog_load_attr));
+ attr.file = test->prog_file;
+ attr.prog_type = BPF_PROG_TYPE_CGROUP_SYSCTL;
+
+ if (bpf_prog_load_xattr(&attr, &obj, &prog_fd)) {
+ if (test->result != LOAD_REJECT)
+ log_err(">>> Loading program (%s) error.\n",
+ test->prog_file);
+ return -1;
+ }
+
+ return prog_fd;
+}
+
+static int load_sysctl_prog(struct sysctl_test *test, const char *sysctl_path)
+{
+ return test->prog_file
+ ? load_sysctl_prog_file(test)
+ : load_sysctl_prog_insns(test, sysctl_path);
+}
+
+static int access_sysctl(const char *sysctl_path,
+ const struct sysctl_test *test)
+{
+ int err = 0;
+ int fd;
+
+ fd = open(sysctl_path, test->open_flags | O_CLOEXEC);
+ if (fd < 0)
+ return fd;
+
+ if (test->open_flags == O_RDONLY) {
+ char buf[128];
+
+ if (read(fd, buf, sizeof(buf)) == -1)
+ goto err;
+ if (test->oldval &&
+ strncmp(buf, test->oldval, strlen(test->oldval))) {
+ log_err("Read value %s != %s", buf, test->oldval);
+ goto err;
+ }
+ } else if (test->open_flags == O_WRONLY) {
+ if (!test->newval) {
+ log_err("New value for sysctl is not set");
+ goto err;
+ }
+ if (write(fd, test->newval, strlen(test->newval)) == -1)
+ goto err;
+ } else {
+ log_err("Unexpected sysctl access: neither read nor write");
+ goto err;
+ }
+
+ goto out;
+err:
+ err = -1;
+out:
+ close(fd);
+ return err;
+}
+
+static int run_test_case(int cgfd, struct sysctl_test *test)
+{
+ enum bpf_attach_type atype = test->attach_type;
+ char sysctl_path[128];
+ int progfd = -1;
+ int err = 0;
+
+ printf("Test case: %s .. ", test->descr);
+
+ snprintf(sysctl_path, sizeof(sysctl_path), "/proc/sys/%s",
+ test->sysctl);
+
+ progfd = load_sysctl_prog(test, sysctl_path);
+ if (progfd < 0) {
+ if (test->result == LOAD_REJECT)
+ goto out;
+ else
+ goto err;
+ }
+
+ if (bpf_prog_attach(progfd, cgfd, atype, BPF_F_ALLOW_OVERRIDE) == -1) {
+ if (test->result == ATTACH_REJECT)
+ goto out;
+ else
+ goto err;
+ }
+
+ if (access_sysctl(sysctl_path, test) == -1) {
+ if (test->result == OP_EPERM && errno == EPERM)
+ goto out;
+ else
+ goto err;
+ }
+
+ if (test->result != SUCCESS) {
+ log_err("Unexpected failure");
+ goto err;
+ }
+
+ goto out;
+err:
+ err = -1;
+out:
+ /* Detaching w/o checking return code: best effort attempt. */
+ if (progfd != -1)
+ bpf_prog_detach(cgfd, atype);
+ close(progfd);
+ printf("[%s]\n", err ? "FAIL" : "PASS");
+ return err;
+}
+
+static int run_tests(int cgfd)
+{
+ int passes = 0;
+ int fails = 0;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(tests); ++i) {
+ if (run_test_case(cgfd, &tests[i]))
+ ++fails;
+ else
+ ++passes;
+ }
+ printf("Summary: %d PASSED, %d FAILED\n", passes, fails);
+ return fails ? -1 : 0;
+}
+
+int main(int argc, char **argv)
+{
+ int cgfd = -1;
+ int err = 0;
+
+ if (setup_cgroup_environment())
+ goto err;
+
+ cgfd = create_and_get_cgroup(CG_PATH);
+ if (cgfd < 0)
+ goto err;
+
+ if (join_cgroup(CG_PATH))
+ goto err;
+
+ if (run_tests(cgfd))
+ goto err;
+
+ goto out;
+err:
+ err = -1;
+out:
+ close(cgfd);
+ cleanup_cgroup_environment();
+ return err;
+}
readonly ns1_v6=fd::1
readonly ns2_v6=fd::2
+# Must match port used by bpf program
+readonly udpport=5555
+# MPLSoverUDP
+readonly mplsudpport=6635
+readonly mplsproto=137
+
readonly infile="$(mktemp)"
readonly outfile="$(mktemp)"
# clamp route to reserve room for tunnel headers
ip -netns "${ns1}" -4 route flush table main
ip -netns "${ns1}" -6 route flush table main
- ip -netns "${ns1}" -4 route add "${ns2_v4}" mtu 1476 dev veth1
- ip -netns "${ns1}" -6 route add "${ns2_v6}" mtu 1456 dev veth1
+ ip -netns "${ns1}" -4 route add "${ns2_v4}" mtu 1458 dev veth1
+ ip -netns "${ns1}" -6 route add "${ns2_v6}" mtu 1438 dev veth1
sleep 1
# no arguments: automated test, run all
if [[ "$#" -eq "0" ]]; then
echo "ipip"
- $0 ipv4 ipip 100
+ $0 ipv4 ipip none 100
echo "ip6ip6"
- $0 ipv6 ip6tnl 100
+ $0 ipv6 ip6tnl none 100
+
+ for mac in none mpls eth ; do
+ echo "ip gre $mac"
+ $0 ipv4 gre $mac 100
+
+ echo "ip6 gre $mac"
+ $0 ipv6 ip6gre $mac 100
+
+ echo "ip gre $mac gso"
+ $0 ipv4 gre $mac 2000
- echo "ip gre"
- $0 ipv4 gre 100
+ echo "ip6 gre $mac gso"
+ $0 ipv6 ip6gre $mac 2000
- echo "ip6 gre"
- $0 ipv6 ip6gre 100
+ echo "ip udp $mac"
+ $0 ipv4 udp $mac 100
- echo "ip gre gso"
- $0 ipv4 gre 2000
+ echo "ip6 udp $mac"
+ $0 ipv6 ip6udp $mac 100
- echo "ip6 gre gso"
- $0 ipv6 ip6gre 2000
+ echo "ip udp $mac gso"
+ $0 ipv4 udp $mac 2000
+
+ echo "ip6 udp $mac gso"
+ $0 ipv6 ip6udp $mac 2000
+ done
echo "OK. All tests passed"
exit 0
fi
-if [[ "$#" -ne "3" ]]; then
+if [[ "$#" -ne "4" ]]; then
echo "Usage: $0"
- echo " or: $0 <ipv4|ipv6> <tuntype> <data_len>"
+ echo " or: $0 <ipv4|ipv6> <tuntype> <none|mpls|eth> <data_len>"
exit 1
fi
"ipv4")
readonly addr1="${ns1_v4}"
readonly addr2="${ns2_v4}"
- readonly netcat_opt=-4
+ readonly ipproto=4
+ readonly netcat_opt=-${ipproto}
+ readonly foumod=fou
+ readonly foutype=ipip
+ readonly fouproto=4
+ readonly fouproto_mpls=${mplsproto}
+ readonly gretaptype=gretap
;;
"ipv6")
readonly addr1="${ns1_v6}"
readonly addr2="${ns2_v6}"
- readonly netcat_opt=-6
+ readonly ipproto=6
+ readonly netcat_opt=-${ipproto}
+ readonly foumod=fou6
+ readonly foutype=ip6tnl
+ readonly fouproto="41 -6"
+ readonly fouproto_mpls="${mplsproto} -6"
+ readonly gretaptype=ip6gretap
;;
*)
echo "unknown arg: $1"
esac
readonly tuntype=$2
-readonly datalen=$3
+readonly mac=$3
+readonly datalen=$4
-echo "encap ${addr1} to ${addr2}, type ${tuntype}, len ${datalen}"
+echo "encap ${addr1} to ${addr2}, type ${tuntype}, mac ${mac} len ${datalen}"
trap cleanup EXIT
ip netns exec "${ns1}" tc qdisc add dev veth1 clsact
ip netns exec "${ns1}" tc filter add dev veth1 egress \
bpf direct-action object-file ./test_tc_tunnel.o \
- section "encap_${tuntype}"
+ section "encap_${tuntype}_${mac}"
echo "test bpf encap without decap (expect failure)"
server_listen
! client_connect
+if [[ "$tuntype" =~ "udp" ]]; then
+ # Set up fou tunnel.
+ ttype="${foutype}"
+ targs="encap fou encap-sport auto encap-dport $udpport"
+ # fou may be a module; allow this to fail.
+ modprobe "${foumod}" ||true
+ if [[ "$mac" == "mpls" ]]; then
+ dport=${mplsudpport}
+ dproto=${fouproto_mpls}
+ tmode="mode any ttl 255"
+ else
+ dport=${udpport}
+ dproto=${fouproto}
+ fi
+ ip netns exec "${ns2}" ip fou add port $dport ipproto ${dproto}
+ targs="encap fou encap-sport auto encap-dport $dport"
+elif [[ "$tuntype" =~ "gre" && "$mac" == "eth" ]]; then
+ ttype=$gretaptype
+else
+ ttype=$tuntype
+ targs=""
+fi
+
# serverside, insert decap module
# server is still running
# client can connect again
-ip netns exec "${ns2}" ip link add dev testtun0 type "${tuntype}" \
- remote "${addr1}" local "${addr2}"
+ip netns exec "${ns2}" ip link add name testtun0 type "${ttype}" \
+ ${tmode} remote "${addr1}" local "${addr2}" $targs
+
+expect_tun_fail=0
+
+if [[ "$tuntype" == "ip6udp" && "$mac" == "mpls" ]]; then
+ # No support for MPLS IPv6 fou tunnel; expect failure.
+ expect_tun_fail=1
+elif [[ "$tuntype" =~ "udp" && "$mac" == "eth" ]]; then
+ # No support for TEB fou tunnel; expect failure.
+ expect_tun_fail=1
+elif [[ "$tuntype" =~ "gre" && "$mac" == "eth" ]]; then
+ # Share ethernet address between tunnel/veth2 so L2 decap works.
+ ethaddr=$(ip netns exec "${ns2}" ip link show veth2 | \
+ awk '/ether/ { print $2 }')
+ ip netns exec "${ns2}" ip link set testtun0 address $ethaddr
+elif [[ "$mac" == "mpls" ]]; then
+ modprobe mpls_iptunnel ||true
+ modprobe mpls_gso ||true
+ ip netns exec "${ns2}" sysctl -qw net.mpls.platform_labels=65536
+ ip netns exec "${ns2}" ip -f mpls route add 1000 dev lo
+ ip netns exec "${ns2}" ip link set lo up
+ ip netns exec "${ns2}" sysctl -qw net.mpls.conf.testtun0.input=1
+ ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.lo.rp_filter=0
+fi
+
# Because packets are decapped by the tunnel they arrive on testtun0 from
# the IP stack perspective. Ensure reverse path filtering is disabled
# otherwise we drop the TCP SYN as arriving on testtun0 instead of the
# selected as the max of the "all" and device-specific values.
ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.testtun0.rp_filter=0
ip netns exec "${ns2}" ip link set dev testtun0 up
-echo "test bpf encap with tunnel device decap"
-client_connect
-verify_data
+if [[ "$expect_tun_fail" == 1 ]]; then
+ # This tunnel mode is not supported, so we expect failure.
+ echo "test bpf encap with tunnel device decap (expect failure)"
+ ! client_connect
+else
+ echo "test bpf encap with tunnel device decap"
+ client_connect
+ verify_data
+ server_listen
+fi
# serverside, use BPF for decap
ip netns exec "${ns2}" ip link del dev testtun0
ip netns exec "${ns2}" tc qdisc add dev veth2 clsact
ip netns exec "${ns2}" tc filter add dev veth2 ingress \
bpf direct-action object-file ./test_tc_tunnel.o section decap
-server_listen
echo "test bpf encap with bpf decap"
client_connect
verify_data
#include "../../../include/linux/filter.h"
#define MAX_INSNS BPF_MAXINSNS
+#define MAX_TEST_INSNS 1000000
#define MAX_FIXUPS 8
-#define MAX_NR_MAPS 14
+#define MAX_NR_MAPS 17
#define MAX_TEST_RUNS 8
#define POINTER_VALUE 0xcafe4all
#define TEST_DATA_LEN 64
struct bpf_test {
const char *descr;
struct bpf_insn insns[MAX_INSNS];
+ struct bpf_insn *fill_insns;
int fixup_map_hash_8b[MAX_FIXUPS];
int fixup_map_hash_48b[MAX_FIXUPS];
int fixup_map_hash_16b[MAX_FIXUPS];
int fixup_cgroup_storage[MAX_FIXUPS];
int fixup_percpu_cgroup_storage[MAX_FIXUPS];
int fixup_map_spin_lock[MAX_FIXUPS];
+ int fixup_map_array_ro[MAX_FIXUPS];
+ int fixup_map_array_wo[MAX_FIXUPS];
+ int fixup_map_array_small[MAX_FIXUPS];
const char *errstr;
const char *errstr_unpriv;
uint32_t retval, retval_unpriv, insn_processed;
+ int prog_len;
enum {
UNDEF,
ACCEPT,
static void bpf_fill_ld_abs_vlan_push_pop(struct bpf_test *self)
{
- /* test: {skb->data[0], vlan_push} x 68 + {skb->data[0], vlan_pop} x 68 */
+ /* test: {skb->data[0], vlan_push} x 51 + {skb->data[0], vlan_pop} x 51 */
#define PUSH_CNT 51
- unsigned int len = BPF_MAXINSNS;
- struct bpf_insn *insn = self->insns;
+ /* jump range is limited to 16 bit. PUSH_CNT of ld_abs needs room */
+ unsigned int len = (1 << 15) - PUSH_CNT * 2 * 5 * 6;
+ struct bpf_insn *insn = self->fill_insns;
int i = 0, j, k = 0;
insn[i++] = BPF_MOV64_REG(BPF_REG_6, BPF_REG_1);
for (; i < len - 1; i++)
insn[i] = BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, 0xbef);
insn[len - 1] = BPF_EXIT_INSN();
+ self->prog_len = len;
}
static void bpf_fill_jump_around_ld_abs(struct bpf_test *self)
{
- struct bpf_insn *insn = self->insns;
- unsigned int len = BPF_MAXINSNS;
+ struct bpf_insn *insn = self->fill_insns;
+ /* jump range is limited to 16 bit. every ld_abs is replaced by 6 insns */
+ unsigned int len = (1 << 15) / 6;
int i = 0;
insn[i++] = BPF_MOV64_REG(BPF_REG_6, BPF_REG_1);
while (i < len - 1)
insn[i++] = BPF_LD_ABS(BPF_B, 1);
insn[i] = BPF_EXIT_INSN();
+ self->prog_len = i + 1;
}
static void bpf_fill_rand_ld_dw(struct bpf_test *self)
{
- struct bpf_insn *insn = self->insns;
+ struct bpf_insn *insn = self->fill_insns;
uint64_t res = 0;
int i = 0;
insn[i++] = BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 32);
insn[i++] = BPF_ALU64_REG(BPF_XOR, BPF_REG_0, BPF_REG_1);
insn[i] = BPF_EXIT_INSN();
+ self->prog_len = i + 1;
res ^= (res >> 32);
self->retval = (uint32_t)res;
}
+/* test the sequence of 1k jumps */
+static void bpf_fill_scale1(struct bpf_test *self)
+{
+ struct bpf_insn *insn = self->fill_insns;
+ int i = 0, k = 0;
+
+ insn[i++] = BPF_MOV64_REG(BPF_REG_6, BPF_REG_1);
+ /* test to check that the sequence of 1024 jumps is acceptable */
+ while (k++ < 1024) {
+ insn[i++] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_get_prandom_u32);
+ insn[i++] = BPF_JMP_IMM(BPF_JGT, BPF_REG_0, bpf_semi_rand_get(), 2);
+ insn[i++] = BPF_MOV64_REG(BPF_REG_1, BPF_REG_10);
+ insn[i++] = BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6,
+ -8 * (k % 64 + 1));
+ }
+ /* every jump adds 1024 steps to insn_processed, so to stay exactly
+ * within 1m limit add MAX_TEST_INSNS - 1025 MOVs and 1 EXIT
+ */
+ while (i < MAX_TEST_INSNS - 1025)
+ insn[i++] = BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, 42);
+ insn[i] = BPF_EXIT_INSN();
+ self->prog_len = i + 1;
+ self->retval = 42;
+}
+
+/* test the sequence of 1k jumps in inner most function (function depth 8)*/
+static void bpf_fill_scale2(struct bpf_test *self)
+{
+ struct bpf_insn *insn = self->fill_insns;
+ int i = 0, k = 0;
+
+#define FUNC_NEST 7
+ for (k = 0; k < FUNC_NEST; k++) {
+ insn[i++] = BPF_CALL_REL(1);
+ insn[i++] = BPF_EXIT_INSN();
+ }
+ insn[i++] = BPF_MOV64_REG(BPF_REG_6, BPF_REG_1);
+ /* test to check that the sequence of 1024 jumps is acceptable */
+ while (k++ < 1024) {
+ insn[i++] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_get_prandom_u32);
+ insn[i++] = BPF_JMP_IMM(BPF_JGT, BPF_REG_0, bpf_semi_rand_get(), 2);
+ insn[i++] = BPF_MOV64_REG(BPF_REG_1, BPF_REG_10);
+ insn[i++] = BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6,
+ -8 * (k % (64 - 4 * FUNC_NEST) + 1));
+ }
+ /* every jump adds 1024 steps to insn_processed, so to stay exactly
+ * within 1m limit add MAX_TEST_INSNS - 1025 MOVs and 1 EXIT
+ */
+ while (i < MAX_TEST_INSNS - 1025)
+ insn[i++] = BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, 42);
+ insn[i] = BPF_EXIT_INSN();
+ self->prog_len = i + 1;
+ self->retval = 42;
+}
+
+static void bpf_fill_scale(struct bpf_test *self)
+{
+ switch (self->retval) {
+ case 1:
+ return bpf_fill_scale1(self);
+ case 2:
+ return bpf_fill_scale2(self);
+ default:
+ self->prog_len = 0;
+ break;
+ }
+}
+
/* BPF_SK_LOOKUP contains 13 instructions, if you need to fix up maps */
#define BPF_SK_LOOKUP(func) \
/* struct bpf_sock_tuple tuple = {} */ \
return false;
}
-static int create_map(uint32_t type, uint32_t size_key,
- uint32_t size_value, uint32_t max_elem)
+static int __create_map(uint32_t type, uint32_t size_key,
+ uint32_t size_value, uint32_t max_elem,
+ uint32_t extra_flags)
{
int fd;
fd = bpf_create_map(type, size_key, size_value, max_elem,
- type == BPF_MAP_TYPE_HASH ? BPF_F_NO_PREALLOC : 0);
+ (type == BPF_MAP_TYPE_HASH ?
+ BPF_F_NO_PREALLOC : 0) | extra_flags);
if (fd < 0) {
if (skip_unsupported_map(type))
return -1;
return fd;
}
+static int create_map(uint32_t type, uint32_t size_key,
+ uint32_t size_value, uint32_t max_elem)
+{
+ return __create_map(type, size_key, size_value, max_elem, 0);
+}
+
static void update_map(int fd, int index)
{
struct test_val value = {
int *fixup_cgroup_storage = test->fixup_cgroup_storage;
int *fixup_percpu_cgroup_storage = test->fixup_percpu_cgroup_storage;
int *fixup_map_spin_lock = test->fixup_map_spin_lock;
+ int *fixup_map_array_ro = test->fixup_map_array_ro;
+ int *fixup_map_array_wo = test->fixup_map_array_wo;
+ int *fixup_map_array_small = test->fixup_map_array_small;
- if (test->fill_helper)
+ if (test->fill_helper) {
+ test->fill_insns = calloc(MAX_TEST_INSNS, sizeof(struct bpf_insn));
test->fill_helper(test);
+ }
/* Allocating HTs with 1 elem is fine here, since we only test
* for verifier and not do a runtime lookup, so the only thing
fixup_map_spin_lock++;
} while (*fixup_map_spin_lock);
}
+ if (*fixup_map_array_ro) {
+ map_fds[14] = __create_map(BPF_MAP_TYPE_ARRAY, sizeof(int),
+ sizeof(struct test_val), 1,
+ BPF_F_RDONLY_PROG);
+ update_map(map_fds[14], 0);
+ do {
+ prog[*fixup_map_array_ro].imm = map_fds[14];
+ fixup_map_array_ro++;
+ } while (*fixup_map_array_ro);
+ }
+ if (*fixup_map_array_wo) {
+ map_fds[15] = __create_map(BPF_MAP_TYPE_ARRAY, sizeof(int),
+ sizeof(struct test_val), 1,
+ BPF_F_WRONLY_PROG);
+ update_map(map_fds[15], 0);
+ do {
+ prog[*fixup_map_array_wo].imm = map_fds[15];
+ fixup_map_array_wo++;
+ } while (*fixup_map_array_wo);
+ }
+ if (*fixup_map_array_small) {
+ map_fds[16] = __create_map(BPF_MAP_TYPE_ARRAY, sizeof(int),
+ 1, 1, 0);
+ update_map(map_fds[16], 0);
+ do {
+ prog[*fixup_map_array_small].imm = map_fds[16];
+ fixup_map_array_small++;
+ } while (*fixup_map_array_small);
+ }
}
static int set_admin(bool admin)
prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
fixup_skips = skips;
do_test_fixup(test, prog_type, prog, map_fds);
+ if (test->fill_insns) {
+ prog = test->fill_insns;
+ prog_len = test->prog_len;
+ } else {
+ prog_len = probe_filter_length(prog);
+ }
/* If there were some map skips during fixup due to missing bpf
* features, skip this test.
*/
if (fixup_skips != skips)
return;
- prog_len = probe_filter_length(prog);
pflags = 0;
if (test->flags & F_LOAD_WITH_STRICT_ALIGNMENT)
if (test->flags & F_NEEDS_EFFICIENT_UNALIGNED_ACCESS)
pflags |= BPF_F_ANY_ALIGNMENT;
fd_prog = bpf_verify_program(prog_type, prog, prog_len, pflags,
- "GPL", 0, bpf_vlog, sizeof(bpf_vlog), 1);
+ "GPL", 0, bpf_vlog, sizeof(bpf_vlog), 4);
if (fd_prog < 0 && !bpf_probe_prog_type(prog_type, 0)) {
printf("SKIP (unsupported program type %d)\n", prog_type);
skips++;
goto fail_log;
}
close_fds:
+ if (test->fill_insns)
+ free(test->fill_insns);
close(fd_prog);
for (i = 0; i < MAX_NR_MAPS; i++)
close(map_fds[i]);
int start = 0, end = sym_cnt;
int result;
+ /* kallsyms not loaded. return NULL */
+ if (sym_cnt <= 0)
+ return NULL;
+
while (start < end) {
size_t mid = start + (end - start) / 2;
.result = REJECT,
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
+{
+ "valid read map access into a read-only array 1",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_ro = { 3 },
+ .result = ACCEPT,
+ .retval = 28,
+},
+{
+ "valid read map access into a read-only array 2",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_MOV64_IMM(BPF_REG_2, 4),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_MOV64_IMM(BPF_REG_5, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_csum_diff),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_map_array_ro = { 3 },
+ .result = ACCEPT,
+ .retval = -29,
+},
+{
+ "invalid write map access into a read-only array 1",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 42),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_ro = { 3 },
+ .result = REJECT,
+ .errstr = "write into map forbidden",
+},
+{
+ "invalid write map access into a read-only array 2",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_0),
+ BPF_MOV64_IMM(BPF_REG_4, 8),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_skb_load_bytes),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_map_array_ro = { 4 },
+ .result = REJECT,
+ .errstr = "write into map forbidden",
+},
+{
+ "valid write map access into a write-only array 1",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 42),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_wo = { 3 },
+ .result = ACCEPT,
+ .retval = 1,
+},
+{
+ "valid write map access into a write-only array 2",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_0),
+ BPF_MOV64_IMM(BPF_REG_4, 8),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_skb_load_bytes),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_map_array_wo = { 4 },
+ .result = ACCEPT,
+ .retval = 0,
+},
+{
+ "invalid read map access into a write-only array 1",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_wo = { 3 },
+ .result = REJECT,
+ .errstr = "read from map forbidden",
+},
+{
+ "invalid read map access into a write-only array 2",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_MOV64_IMM(BPF_REG_2, 4),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_MOV64_IMM(BPF_REG_5, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_csum_diff),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_map_array_wo = { 3 },
+ .result = REJECT,
+ .errstr = "read from map forbidden",
+},
.errstr = "invalid bpf_context access",
.result = REJECT,
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
{
"check cb access: half, wrong type",
--- /dev/null
+{
+ "direct map access, write test 1",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_LD_MAP_VALUE(BPF_REG_1, 0, 0),
+ BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 4242),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_48b = { 1 },
+ .result = ACCEPT,
+ .retval = 1,
+},
+{
+ "direct map access, write test 2",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_LD_MAP_VALUE(BPF_REG_1, 0, 8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 4242),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_48b = { 1 },
+ .result = ACCEPT,
+ .retval = 1,
+},
+{
+ "direct map access, write test 3",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_LD_MAP_VALUE(BPF_REG_1, 0, 8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_1, 8, 4242),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_48b = { 1 },
+ .result = ACCEPT,
+ .retval = 1,
+},
+{
+ "direct map access, write test 4",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_LD_MAP_VALUE(BPF_REG_1, 0, 40),
+ BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 4242),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_48b = { 1 },
+ .result = ACCEPT,
+ .retval = 1,
+},
+{
+ "direct map access, write test 5",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_LD_MAP_VALUE(BPF_REG_1, 0, 32),
+ BPF_ST_MEM(BPF_DW, BPF_REG_1, 8, 4242),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_48b = { 1 },
+ .result = ACCEPT,
+ .retval = 1,
+},
+{
+ "direct map access, write test 6",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_LD_MAP_VALUE(BPF_REG_1, 0, 40),
+ BPF_ST_MEM(BPF_DW, BPF_REG_1, 4, 4242),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_48b = { 1 },
+ .result = REJECT,
+ .errstr = "R1 min value is outside of the array range",
+},
+{
+ "direct map access, write test 7",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_LD_MAP_VALUE(BPF_REG_1, 0, -1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_1, 4, 4242),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_48b = { 1 },
+ .result = REJECT,
+ .errstr = "direct value offset of 4294967295 is not allowed",
+},
+{
+ "direct map access, write test 8",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_LD_MAP_VALUE(BPF_REG_1, 0, 1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_1, -1, 4242),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_48b = { 1 },
+ .result = ACCEPT,
+ .retval = 1,
+},
+{
+ "direct map access, write test 9",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_LD_MAP_VALUE(BPF_REG_1, 0, 48),
+ BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 4242),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_48b = { 1 },
+ .result = REJECT,
+ .errstr = "invalid access to map value pointer",
+},
+{
+ "direct map access, write test 10",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_LD_MAP_VALUE(BPF_REG_1, 0, 47),
+ BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 4),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_48b = { 1 },
+ .result = ACCEPT,
+ .retval = 1,
+},
+{
+ "direct map access, write test 11",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_LD_MAP_VALUE(BPF_REG_1, 0, 48),
+ BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 4),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_48b = { 1 },
+ .result = REJECT,
+ .errstr = "invalid access to map value pointer",
+},
+{
+ "direct map access, write test 12",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_LD_MAP_VALUE(BPF_REG_1, 0, (1<<29)),
+ BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 4),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_48b = { 1 },
+ .result = REJECT,
+ .errstr = "direct value offset of 536870912 is not allowed",
+},
+{
+ "direct map access, write test 13",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_LD_MAP_VALUE(BPF_REG_1, 0, (1<<29)-1),
+ BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 4),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_48b = { 1 },
+ .result = REJECT,
+ .errstr = "invalid access to map value pointer, value_size=48 off=536870911",
+},
+{
+ "direct map access, write test 14",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_LD_MAP_VALUE(BPF_REG_1, 0, 47),
+ BPF_LD_MAP_VALUE(BPF_REG_2, 0, 46),
+ BPF_ST_MEM(BPF_H, BPF_REG_2, 0, 0xffff),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_48b = { 1, 3 },
+ .result = ACCEPT,
+ .retval = 0xff,
+},
+{
+ "direct map access, write test 15",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_LD_MAP_VALUE(BPF_REG_1, 0, 46),
+ BPF_LD_MAP_VALUE(BPF_REG_2, 0, 46),
+ BPF_ST_MEM(BPF_H, BPF_REG_2, 0, 0xffff),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_48b = { 1, 3 },
+ .result = ACCEPT,
+ .retval = 0xffff,
+},
+{
+ "direct map access, write test 16",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_LD_MAP_VALUE(BPF_REG_1, 0, 46),
+ BPF_LD_MAP_VALUE(BPF_REG_2, 0, 47),
+ BPF_ST_MEM(BPF_H, BPF_REG_2, 0, 0xffff),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_48b = { 1, 3 },
+ .result = REJECT,
+ .errstr = "invalid access to map value, value_size=48 off=47 size=2",
+},
+{
+ "direct map access, write test 17",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_LD_MAP_VALUE(BPF_REG_1, 0, 46),
+ BPF_LD_MAP_VALUE(BPF_REG_2, 0, 46),
+ BPF_ST_MEM(BPF_H, BPF_REG_2, 1, 0xffff),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_48b = { 1, 3 },
+ .result = REJECT,
+ .errstr = "invalid access to map value, value_size=48 off=47 size=2",
+},
+{
+ "direct map access, write test 18",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_LD_MAP_VALUE(BPF_REG_1, 0, 0),
+ BPF_ST_MEM(BPF_H, BPF_REG_1, 0, 42),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_small = { 1 },
+ .result = REJECT,
+ .errstr = "R1 min value is outside of the array range",
+},
+{
+ "direct map access, write test 19",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_LD_MAP_VALUE(BPF_REG_1, 0, 0),
+ BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 42),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_small = { 1 },
+ .result = ACCEPT,
+ .retval = 1,
+},
+{
+ "direct map access, write test 20",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_LD_MAP_VALUE(BPF_REG_1, 0, 1),
+ BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 42),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_small = { 1 },
+ .result = REJECT,
+ .errstr = "invalid access to map value pointer",
+},
+{
+ "direct map access, invalid insn test 1",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_VALUE, 0, 1, 0, 47),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_48b = { 1 },
+ .result = REJECT,
+ .errstr = "invalid bpf_ld_imm64 insn",
+},
+{
+ "direct map access, invalid insn test 2",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_VALUE, 1, 0, 0, 47),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_48b = { 1 },
+ .result = REJECT,
+ .errstr = "BPF_LD_IMM64 uses reserved fields",
+},
+{
+ "direct map access, invalid insn test 3",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_VALUE, ~0, 0, 0, 47),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_48b = { 1 },
+ .result = REJECT,
+ .errstr = "BPF_LD_IMM64 uses reserved fields",
+},
+{
+ "direct map access, invalid insn test 4",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_VALUE, 0, ~0, 0, 47),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_48b = { 1 },
+ .result = REJECT,
+ .errstr = "invalid bpf_ld_imm64 insn",
+},
+{
+ "direct map access, invalid insn test 5",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_VALUE, ~0, ~0, 0, 47),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_48b = { 1 },
+ .result = REJECT,
+ .errstr = "invalid bpf_ld_imm64 insn",
+},
+{
+ "direct map access, invalid insn test 6",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_FD, ~0, 0, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_48b = { 1 },
+ .result = REJECT,
+ .errstr = "BPF_LD_IMM64 uses reserved fields",
+},
+{
+ "direct map access, invalid insn test 7",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_FD, 0, ~0, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_48b = { 1 },
+ .result = REJECT,
+ .errstr = "invalid bpf_ld_imm64 insn",
+},
+{
+ "direct map access, invalid insn test 8",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_FD, ~0, ~0, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_48b = { 1 },
+ .result = REJECT,
+ .errstr = "invalid bpf_ld_imm64 insn",
+},
+{
+ "direct map access, invalid insn test 9",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_FD, 0, 0, 0, 47),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_48b = { 1 },
+ .result = REJECT,
+ .errstr = "unrecognized bpf_ld_imm64 insn",
+},
--- /dev/null
+{
+ "ARG_PTR_TO_LONG uninitialized",
+ .insns = {
+ /* bpf_strtoul arg1 (buf) */
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0x00303036),
+ BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+
+ /* bpf_strtoul arg2 (buf_len) */
+ BPF_MOV64_IMM(BPF_REG_2, 4),
+
+ /* bpf_strtoul arg3 (flags) */
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+
+ /* bpf_strtoul arg4 (res) */
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_7),
+
+ /* bpf_strtoul() */
+ BPF_EMIT_CALL(BPF_FUNC_strtoul),
+
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_CGROUP_SYSCTL,
+ .errstr = "invalid indirect read from stack off -16+0 size 8",
+},
+{
+ "ARG_PTR_TO_LONG half-uninitialized",
+ .insns = {
+ /* bpf_strtoul arg1 (buf) */
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0x00303036),
+ BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+
+ /* bpf_strtoul arg2 (buf_len) */
+ BPF_MOV64_IMM(BPF_REG_2, 4),
+
+ /* bpf_strtoul arg3 (flags) */
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+
+ /* bpf_strtoul arg4 (res) */
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
+ BPF_STX_MEM(BPF_W, BPF_REG_7, BPF_REG_0, 0),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_7),
+
+ /* bpf_strtoul() */
+ BPF_EMIT_CALL(BPF_FUNC_strtoul),
+
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_CGROUP_SYSCTL,
+ .errstr = "invalid indirect read from stack off -16+4 size 8",
+},
+{
+ "ARG_PTR_TO_LONG misaligned",
+ .insns = {
+ /* bpf_strtoul arg1 (buf) */
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0x00303036),
+ BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+
+ /* bpf_strtoul arg2 (buf_len) */
+ BPF_MOV64_IMM(BPF_REG_2, 4),
+
+ /* bpf_strtoul arg3 (flags) */
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+
+ /* bpf_strtoul arg4 (res) */
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -12),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_W, BPF_REG_7, BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 4),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_7),
+
+ /* bpf_strtoul() */
+ BPF_EMIT_CALL(BPF_FUNC_strtoul),
+
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_CGROUP_SYSCTL,
+ .errstr = "misaligned stack access off (0x0; 0x0)+-20+0 size 8",
+},
+{
+ "ARG_PTR_TO_LONG size < sizeof(long)",
+ .insns = {
+ /* bpf_strtoul arg1 (buf) */
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -16),
+ BPF_MOV64_IMM(BPF_REG_0, 0x00303036),
+ BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+
+ /* bpf_strtoul arg2 (buf_len) */
+ BPF_MOV64_IMM(BPF_REG_2, 4),
+
+ /* bpf_strtoul arg3 (flags) */
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+
+ /* bpf_strtoul arg4 (res) */
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, 12),
+ BPF_STX_MEM(BPF_W, BPF_REG_7, BPF_REG_0, 0),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_7),
+
+ /* bpf_strtoul() */
+ BPF_EMIT_CALL(BPF_FUNC_strtoul),
+
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_CGROUP_SYSCTL,
+ .errstr = "invalid stack type R4 off=-4 access_size=8",
+},
+{
+ "ARG_PTR_TO_LONG initialized",
+ .insns = {
+ /* bpf_strtoul arg1 (buf) */
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0x00303036),
+ BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+
+ /* bpf_strtoul arg2 (buf_len) */
+ BPF_MOV64_IMM(BPF_REG_2, 4),
+
+ /* bpf_strtoul arg3 (flags) */
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+
+ /* bpf_strtoul arg4 (res) */
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
+ BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_7),
+
+ /* bpf_strtoul() */
+ BPF_EMIT_CALL(BPF_FUNC_strtoul),
+
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_CGROUP_SYSCTL,
+},
.result = ACCEPT,
.retval = 5,
},
+{
+ "ld_dw: xor semi-random 64 bit imms, test 5",
+ .insns = { },
+ .data = { },
+ .fill_helper = bpf_fill_rand_ld_dw,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 1000000 - 6,
+},
--- /dev/null
+{
+ "scale: scale test 1",
+ .insns = { },
+ .data = { },
+ .fill_helper = bpf_fill_scale,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 1,
+},
+{
+ "scale: scale test 2",
+ .insns = { },
+ .data = { },
+ .fill_helper = bpf_fill_scale,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 2,
+},
.prog_type = BPF_PROG_TYPE_LWT_IN,
},
{
- "indirect variable-offset stack access",
+ "indirect variable-offset stack access, unbounded",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_2, 6),
+ BPF_MOV64_IMM(BPF_REG_3, 28),
+ /* Fill the top 16 bytes of the stack. */
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, 0),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ /* Get an unknown value. */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_1, offsetof(struct bpf_sock_ops,
+ bytes_received)),
+ /* Check the lower bound but don't check the upper one. */
+ BPF_JMP_IMM(BPF_JSLT, BPF_REG_4, 0, 4),
+ /* Point the lower bound to initialized stack. Offset is now in range
+ * from fp-16 to fp+0x7fffffffffffffef, i.e. max value is unbounded.
+ */
+ BPF_ALU64_IMM(BPF_SUB, BPF_REG_4, 16),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_10),
+ BPF_MOV64_IMM(BPF_REG_5, 8),
+ /* Dereference it indirectly. */
+ BPF_EMIT_CALL(BPF_FUNC_getsockopt),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R4 unbounded indirect variable offset stack access",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SOCK_OPS,
+},
+{
+ "indirect variable-offset stack access, max out of bound",
.insns = {
/* Fill the top 8 bytes of the stack */
BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
BPF_EXIT_INSN(),
},
.fixup_map_hash_8b = { 5 },
- .errstr = "variable stack read R2",
+ .errstr = "R2 max value is outside of stack bound",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_LWT_IN,
+},
+{
+ "indirect variable-offset stack access, min out of bound",
+ .insns = {
+ /* Fill the top 8 bytes of the stack */
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ /* Get an unknown value */
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
+ /* Make it small and 4-byte aligned */
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4),
+ BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 516),
+ /* add it to fp. We now have either fp-516 or fp-512, but
+ * we don't know which
+ */
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10),
+ /* dereference it indirectly */
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_hash_8b = { 5 },
+ .errstr = "R2 min value is outside of stack bound",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_LWT_IN,
+},
+{
+ "indirect variable-offset stack access, max_off+size > max_initialized",
+ .insns = {
+ /* Fill only the second from top 8 bytes of the stack. */
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, 0),
+ /* Get an unknown value. */
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
+ /* Make it small and 4-byte aligned. */
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4),
+ BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 16),
+ /* Add it to fp. We now have either fp-12 or fp-16, but we don't know
+ * which. fp-12 size 8 is partially uninitialized stack.
+ */
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10),
+ /* Dereference it indirectly. */
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_hash_8b = { 5 },
+ .errstr = "invalid indirect read from stack var_off",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_LWT_IN,
+},
+{
+ "indirect variable-offset stack access, min_off < min_initialized",
+ .insns = {
+ /* Fill only the top 8 bytes of the stack. */
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ /* Get an unknown value */
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
+ /* Make it small and 4-byte aligned. */
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4),
+ BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 16),
+ /* Add it to fp. We now have either fp-12 or fp-16, but we don't know
+ * which. fp-16 size 8 is partially uninitialized stack.
+ */
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10),
+ /* Dereference it indirectly. */
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_hash_8b = { 5 },
+ .errstr = "invalid indirect read from stack var_off",
.result = REJECT,
.prog_type = BPF_PROG_TYPE_LWT_IN,
},
+{
+ "indirect variable-offset stack access, priv vs unpriv",
+ .insns = {
+ /* Fill the top 16 bytes of the stack. */
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, 0),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ /* Get an unknown value. */
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
+ /* Make it small and 4-byte aligned. */
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4),
+ BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 16),
+ /* Add it to fp. We now have either fp-12 or fp-16, we don't know
+ * which, but either way it points to initialized stack.
+ */
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10),
+ /* Dereference it indirectly. */
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_hash_8b = { 6 },
+ .errstr_unpriv = "R2 stack pointer arithmetic goes out of range, prohibited for !root",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+},
+{
+ "indirect variable-offset stack access, uninitialized",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_2, 6),
+ BPF_MOV64_IMM(BPF_REG_3, 28),
+ /* Fill the top 16 bytes of the stack. */
+ BPF_ST_MEM(BPF_W, BPF_REG_10, -16, 0),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ /* Get an unknown value. */
+ BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, 0),
+ /* Make it small and 4-byte aligned. */
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_4, 4),
+ BPF_ALU64_IMM(BPF_SUB, BPF_REG_4, 16),
+ /* Add it to fp. We now have either fp-12 or fp-16, we don't know
+ * which, but either way it points to initialized stack.
+ */
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_10),
+ BPF_MOV64_IMM(BPF_REG_5, 8),
+ /* Dereference it indirectly. */
+ BPF_EMIT_CALL(BPF_FUNC_getsockopt),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid indirect read from stack var_off",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SOCK_OPS,
+},
+{
+ "indirect variable-offset stack access, ok",
+ .insns = {
+ /* Fill the top 16 bytes of the stack. */
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, 0),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ /* Get an unknown value. */
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
+ /* Make it small and 4-byte aligned. */
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4),
+ BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 16),
+ /* Add it to fp. We now have either fp-12 or fp-16, we don't know
+ * which, but either way it points to initialized stack.
+ */
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10),
+ /* Dereference it indirectly. */
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_hash_8b = { 6 },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_LWT_IN,
+},
ALL_TESTS="
rif_set_addr_test
+ rif_vrf_set_addr_test
rif_inherit_bridge_addr_test
rif_non_inherit_bridge_addr_test
vlan_interface_deletion_test
lag_dev_deletion_test
vlan_interface_uppers_test
bridge_extern_learn_test
+ neigh_offload_test
devlink_reload_test
"
NUM_NETIFS=2
ip link set dev $swp1 addr $swp1_mac
}
+rif_vrf_set_addr_test()
+{
+ # Test that it is possible to set an IP address on a VRF upper despite
+ # its random MAC address.
+ RET=0
+
+ ip link add name vrf-test type vrf table 10
+ ip link set dev $swp1 master vrf-test
+
+ ip -4 address add 192.0.2.1/24 dev vrf-test
+ check_err $? "failed to set IPv4 address on VRF"
+ ip -6 address add 2001:db8:1::1/64 dev vrf-test
+ check_err $? "failed to set IPv6 address on VRF"
+
+ log_test "RIF - setting IP address on VRF"
+
+ ip link del dev vrf-test
+}
+
rif_inherit_bridge_addr_test()
{
RET=0
ip link del dev br0
}
+neigh_offload_test()
+{
+ # Test that IPv4 and IPv6 neighbour entries are marked as offloaded
+ RET=0
+
+ ip -4 address add 192.0.2.1/24 dev $swp1
+ ip -6 address add 2001:db8:1::1/64 dev $swp1
+
+ ip -4 neigh add 192.0.2.2 lladdr de:ad:be:ef:13:37 nud perm dev $swp1
+ ip -6 neigh add 2001:db8:1::2 lladdr de:ad:be:ef:13:37 nud perm \
+ dev $swp1
+
+ ip -4 neigh show dev $swp1 | grep 192.0.2.2 | grep -q offload
+ check_err $? "ipv4 neigh entry not marked as offloaded when should"
+ ip -6 neigh show dev $swp1 | grep 2001:db8:1::2 | grep -q offload
+ check_err $? "ipv6 neigh entry not marked as offloaded when should"
+
+ log_test "neighbour offload indication"
+
+ ip -6 neigh del 2001:db8:1::2 dev $swp1
+ ip -4 neigh del 192.0.2.2 dev $swp1
+ ip -6 address del 2001:db8:1::1/64 dev $swp1
+ ip -4 address del 192.0.2.1/24 dev $swp1
+}
+
devlink_reload_test()
{
# Test that after executing all the above configuration tests, a
+include ../../../../scripts/Kbuild.include
+
all:
top_srcdir = ../../../..
TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test
TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid
TEST_GEN_PROGS_x86_64 += x86_64/vmx_close_while_nested_test
+TEST_GEN_PROGS_x86_64 += x86_64/smm_test
TEST_GEN_PROGS_x86_64 += dirty_log_test
TEST_GEN_PROGS_x86_64 += clear_dirty_log_test
LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/
LINUX_TOOL_INCLUDE = $(top_srcdir)/tools/include
CFLAGS += -O2 -g -std=gnu99 -fno-stack-protector -fno-PIE -I$(LINUX_TOOL_INCLUDE) -I$(LINUX_HDR_PATH) -Iinclude -I$(<D) -Iinclude/$(UNAME_M) -I..
-LDFLAGS += -pthread -no-pie
+
+no-pie-option := $(call try-run, echo 'int main() { return 0; }' | \
+ $(CC) -Werror $(KBUILD_CPPFLAGS) $(CC_OPTION_CFLAGS) -no-pie -x c - -o "$$TMP", -no-pie)
+
+LDFLAGS += -pthread $(no-pie-option)
# After inclusion, $(OUTPUT) is defined and
# $(TEST_GEN_PROGS) starts with $(OUTPUT)/
#define MSR_IA32_APICBASE_ENABLE (1<<11)
#define MSR_IA32_APICBASE_BASE (0xfffff<<12)
+#define APIC_BASE_MSR 0x800
+#define X2APIC_ENABLE (1UL << 10)
+#define APIC_ICR 0x300
+#define APIC_DEST_SELF 0x40000
+#define APIC_DEST_ALLINC 0x80000
+#define APIC_DEST_ALLBUT 0xC0000
+#define APIC_ICR_RR_MASK 0x30000
+#define APIC_ICR_RR_INVALID 0x00000
+#define APIC_ICR_RR_INPROG 0x10000
+#define APIC_ICR_RR_VALID 0x20000
+#define APIC_INT_LEVELTRIG 0x08000
+#define APIC_INT_ASSERT 0x04000
+#define APIC_ICR_BUSY 0x01000
+#define APIC_DEST_LOGICAL 0x00800
+#define APIC_DEST_PHYSICAL 0x00000
+#define APIC_DM_FIXED 0x00000
+#define APIC_DM_FIXED_MASK 0x00700
+#define APIC_DM_LOWEST 0x00100
+#define APIC_DM_SMI 0x00200
+#define APIC_DM_REMRD 0x00300
+#define APIC_DM_NMI 0x00400
+#define APIC_DM_INIT 0x00500
+#define APIC_DM_STARTUP 0x00600
+#define APIC_DM_EXTINT 0x00700
+#define APIC_VECTOR_MASK 0x000FF
+#define APIC_ICR2 0x310
+
#define MSR_IA32_TSCDEADLINE 0x000006e0
#define MSR_IA32_UCODE_WRITE 0x00000079
if (vm->kvm_fd < 0)
exit(KSFT_SKIP);
+ if (!kvm_check_cap(KVM_CAP_IMMEDIATE_EXIT)) {
+ fprintf(stderr, "immediate_exit not available, skipping test\n");
+ exit(KSFT_SKIP);
+ }
+
vm->fd = ioctl(vm->kvm_fd, KVM_CREATE_VM, type);
TEST_ASSERT(vm->fd >= 0, "KVM_CREATE_VM ioctl failed, "
"rc: %i errno: %i", vm->fd, errno);
nested_size, sizeof(state->nested_));
}
+ /*
+ * When KVM exits to userspace with KVM_EXIT_IO, KVM guarantees
+ * guest state is consistent only after userspace re-enters the
+ * kernel with KVM_RUN. Complete IO prior to migrating state
+ * to a new VM.
+ */
+ vcpu_run_complete_io(vm, vcpuid);
+
nmsrs = kvm_get_num_msrs(vm);
list = malloc(sizeof(*list) + nmsrs * sizeof(list->indices[0]));
list->nmsrs = nmsrs;
struct vcpu *vcpu = vcpu_find(vm, vcpuid);
int r;
- if (state->nested.size) {
- r = ioctl(vcpu->fd, KVM_SET_NESTED_STATE, &state->nested);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_NESTED_STATE, r: %i",
- r);
- }
-
r = ioctl(vcpu->fd, KVM_SET_XSAVE, &state->xsave);
TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_XSAVE, r: %i",
r);
r = ioctl(vcpu->fd, KVM_SET_REGS, &state->regs);
TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_REGS, r: %i",
r);
+
+ if (state->nested.size) {
+ r = ioctl(vcpu->fd, KVM_SET_NESTED_STATE, &state->nested);
+ TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_NESTED_STATE, r: %i",
+ r);
+ }
}
stage, run->exit_reason,
exit_reason_str(run->exit_reason));
- memset(®s1, 0, sizeof(regs1));
- vcpu_regs_get(vm, VCPU_ID, ®s1);
switch (get_ucall(vm, VCPU_ID, &uc)) {
case UCALL_ABORT:
TEST_ASSERT(false, "%s at %s:%d", (const char *)uc.args[0],
stage, (ulong)uc.args[1]);
state = vcpu_save_state(vm, VCPU_ID);
+ memset(®s1, 0, sizeof(regs1));
+ vcpu_regs_get(vm, VCPU_ID, ®s1);
+
kvm_vm_release(vm);
/* Restore state in a new VM. */
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2018, Red Hat, Inc.
+ *
+ * Tests for SMM.
+ */
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+
+#include "vmx.h"
+
+#define VCPU_ID 1
+
+#define PAGE_SIZE 4096
+
+#define SMRAM_SIZE 65536
+#define SMRAM_MEMSLOT ((1 << 16) | 1)
+#define SMRAM_PAGES (SMRAM_SIZE / PAGE_SIZE)
+#define SMRAM_GPA 0x1000000
+#define SMRAM_STAGE 0xfe
+
+#define STR(x) #x
+#define XSTR(s) STR(s)
+
+#define SYNC_PORT 0xe
+#define DONE 0xff
+
+/*
+ * This is compiled as normal 64-bit code, however, SMI handler is executed
+ * in real-address mode. To stay simple we're limiting ourselves to a mode
+ * independent subset of asm here.
+ * SMI handler always report back fixed stage SMRAM_STAGE.
+ */
+uint8_t smi_handler[] = {
+ 0xb0, SMRAM_STAGE, /* mov $SMRAM_STAGE, %al */
+ 0xe4, SYNC_PORT, /* in $SYNC_PORT, %al */
+ 0x0f, 0xaa, /* rsm */
+};
+
+void sync_with_host(uint64_t phase)
+{
+ asm volatile("in $" XSTR(SYNC_PORT)", %%al \n"
+ : : "a" (phase));
+}
+
+void self_smi(void)
+{
+ wrmsr(APIC_BASE_MSR + (APIC_ICR >> 4),
+ APIC_DEST_SELF | APIC_INT_ASSERT | APIC_DM_SMI);
+}
+
+void guest_code(struct vmx_pages *vmx_pages)
+{
+ uint64_t apicbase = rdmsr(MSR_IA32_APICBASE);
+
+ sync_with_host(1);
+
+ wrmsr(MSR_IA32_APICBASE, apicbase | X2APIC_ENABLE);
+
+ sync_with_host(2);
+
+ self_smi();
+
+ sync_with_host(4);
+
+ if (vmx_pages) {
+ GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+
+ sync_with_host(5);
+
+ self_smi();
+
+ sync_with_host(7);
+ }
+
+ sync_with_host(DONE);
+}
+
+int main(int argc, char *argv[])
+{
+ struct vmx_pages *vmx_pages = NULL;
+ vm_vaddr_t vmx_pages_gva = 0;
+
+ struct kvm_regs regs;
+ struct kvm_vm *vm;
+ struct kvm_run *run;
+ struct kvm_x86_state *state;
+ int stage, stage_reported;
+
+ /* Create VM */
+ vm = vm_create_default(VCPU_ID, 0, guest_code);
+
+ vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+
+ run = vcpu_state(vm, VCPU_ID);
+
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, SMRAM_GPA,
+ SMRAM_MEMSLOT, SMRAM_PAGES, 0);
+ TEST_ASSERT(vm_phy_pages_alloc(vm, SMRAM_PAGES, SMRAM_GPA, SMRAM_MEMSLOT)
+ == SMRAM_GPA, "could not allocate guest physical addresses?");
+
+ memset(addr_gpa2hva(vm, SMRAM_GPA), 0x0, SMRAM_SIZE);
+ memcpy(addr_gpa2hva(vm, SMRAM_GPA) + 0x8000, smi_handler,
+ sizeof(smi_handler));
+
+ vcpu_set_msr(vm, VCPU_ID, MSR_IA32_SMBASE, SMRAM_GPA);
+
+ if (kvm_check_cap(KVM_CAP_NESTED_STATE)) {
+ vmx_pages = vcpu_alloc_vmx(vm, &vmx_pages_gva);
+ vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
+ } else {
+ printf("will skip SMM test with VMX enabled\n");
+ vcpu_args_set(vm, VCPU_ID, 1, 0);
+ }
+
+ for (stage = 1;; stage++) {
+ _vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+ "Stage %d: unexpected exit reason: %u (%s),\n",
+ stage, run->exit_reason,
+ exit_reason_str(run->exit_reason));
+
+ memset(®s, 0, sizeof(regs));
+ vcpu_regs_get(vm, VCPU_ID, ®s);
+
+ stage_reported = regs.rax & 0xff;
+
+ if (stage_reported == DONE)
+ goto done;
+
+ TEST_ASSERT(stage_reported == stage ||
+ stage_reported == SMRAM_STAGE,
+ "Unexpected stage: #%x, got %x",
+ stage, stage_reported);
+
+ state = vcpu_save_state(vm, VCPU_ID);
+ kvm_vm_release(vm);
+ kvm_vm_restart(vm, O_RDWR);
+ vm_vcpu_add(vm, VCPU_ID, 0, 0);
+ vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+ vcpu_load_state(vm, VCPU_ID, state);
+ run = vcpu_state(vm, VCPU_ID);
+ free(state);
+ }
+
+done:
+ kvm_vm_free(vm);
+}
struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1);
- if (!kvm_check_cap(KVM_CAP_IMMEDIATE_EXIT)) {
- fprintf(stderr, "immediate_exit not available, skipping test\n");
- exit(KSFT_SKIP);
- }
-
/* Create VM */
vm = vm_create_default(VCPU_ID, 0, guest_code);
vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
uc.args[1] == stage, "Unexpected register values vmexit #%lx, got %lx",
stage, (ulong)uc.args[1]);
- /*
- * When KVM exits to userspace with KVM_EXIT_IO, KVM guarantees
- * guest state is consistent only after userspace re-enters the
- * kernel with KVM_RUN. Complete IO prior to migrating state
- * to a new VM.
- */
- vcpu_run_complete_io(vm, VCPU_ID);
-
+ state = vcpu_save_state(vm, VCPU_ID);
memset(®s1, 0, sizeof(regs1));
vcpu_regs_get(vm, VCPU_ID, ®s1);
- state = vcpu_save_state(vm, VCPU_ID);
kvm_vm_release(vm);
/* Restore state in a new VM. */
ksft_skip=4
# all tests in this script. Can be overridden with -t option
-TESTS="unregister down carrier nexthop ipv6_rt ipv4_rt ipv6_addr_metric ipv4_addr_metric ipv6_route_metrics ipv4_route_metrics"
+TESTS="unregister down carrier nexthop ipv6_rt ipv4_rt ipv6_addr_metric ipv4_addr_metric ipv6_route_metrics ipv4_route_metrics ipv4_route_v6_gw"
+
VERBOSE=0
PAUSE_ON_FAIL=no
PAUSE=no
{
set -e
ip netns add ns1
+ ip netns set ns1 auto
$IP link set dev lo up
ip netns exec ns1 sysctl -qw net.ipv4.ip_forward=1
ip netns exec ns1 sysctl -qw net.ipv6.conf.all.forwarding=1
return $rc
}
+check_expected()
+{
+ local out="$1"
+ local expected="$2"
+ local rc=0
+
+ [ "${out}" = "${expected}" ] && return 0
+
+ if [ -z "${out}" ]; then
+ if [ "$VERBOSE" = "1" ]; then
+ printf "\nNo route entry found\n"
+ printf "Expected:\n"
+ printf " ${expected}\n"
+ fi
+ return 1
+ fi
+
+ # tricky way to convert output to 1-line without ip's
+ # messy '\'; this drops all extra white space
+ out=$(echo ${out})
+ if [ "${out}" != "${expected}" ]; then
+ rc=1
+ if [ "${VERBOSE}" = "1" ]; then
+ printf " Unexpected route entry. Have:\n"
+ printf " ${out}\n"
+ printf " Expected:\n"
+ printf " ${expected}\n\n"
+ fi
+ fi
+
+ return $rc
+}
+
# add route for a prefix, flushing any existing routes first
# expected to be the first step of a test
add_route6()
pfx=$1
out=$($IP -6 ro ls match ${pfx} | sed -e 's/ pref medium//')
- [ "${out}" = "${expected}" ] && return 0
-
- if [ -z "${out}" ]; then
- if [ "$VERBOSE" = "1" ]; then
- printf "\nNo route entry found\n"
- printf "Expected:\n"
- printf " ${expected}\n"
- fi
- return 1
- fi
-
- # tricky way to convert output to 1-line without ip's
- # messy '\'; this drops all extra white space
- out=$(echo ${out})
- if [ "${out}" != "${expected}" ]; then
- rc=1
- if [ "${VERBOSE}" = "1" ]; then
- printf " Unexpected route entry. Have:\n"
- printf " ${out}\n"
- printf " Expected:\n"
- printf " ${expected}\n\n"
- fi
- fi
-
- return $rc
+ check_expected "${out}" "${expected}"
}
route_cleanup()
set -e
ip netns add ns2
+ ip netns set ns2 auto
ip -netns ns2 link set dev lo up
ip netns exec ns2 sysctl -qw net.ipv4.ip_forward=1
ip netns exec ns2 sysctl -qw net.ipv6.conf.all.forwarding=1
ip -netns ns2 addr add 172.16.103.2/24 dev veth4
ip -netns ns2 addr add 172.16.104.1/24 dev dummy1
- set +ex
+ set +e
}
# assumption is that basic add of a single path route works
run_cmd "$IP li set dev dummy2 down"
rc=$?
if [ $rc -eq 0 ]; then
- check_route6 ""
+ out=$($IP -6 ro ls match 2001:db8:104::/64)
+ check_expected "${out}" ""
rc=$?
fi
log_test $rc 0 "Prefix route removed on link down"
local pfx
local expected="$1"
local out
- local rc=0
set -- $expected
pfx=$1
[ "${pfx}" = "unreachable" ] && pfx=$2
out=$($IP ro ls match ${pfx})
- [ "${out}" = "${expected}" ] && return 0
-
- if [ -z "${out}" ]; then
- if [ "$VERBOSE" = "1" ]; then
- printf "\nNo route entry found\n"
- printf "Expected:\n"
- printf " ${expected}\n"
- fi
- return 1
- fi
-
- # tricky way to convert output to 1-line without ip's
- # messy '\'; this drops all extra white space
- out=$(echo ${out})
- if [ "${out}" != "${expected}" ]; then
- rc=1
- if [ "${VERBOSE}" = "1" ]; then
- printf " Unexpected route entry. Have:\n"
- printf " ${out}\n"
- printf " Expected:\n"
- printf " ${expected}\n\n"
- fi
- fi
-
- return $rc
+ check_expected "${out}" "${expected}"
}
# assumption is that basic add of a single path route works
run_cmd "$IP li set dev dummy2 down"
rc=$?
if [ $rc -eq 0 ]; then
- check_route ""
+ out=$($IP ro ls match 172.16.104.0/24)
+ check_expected "${out}" ""
rc=$?
fi
log_test $rc 0 "Prefix route removed on link down"
route_cleanup
}
+ipv4_route_v6_gw_test()
+{
+ local rc
+
+ echo
+ echo "IPv4 route with IPv6 gateway tests"
+
+ route_setup
+ sleep 2
+
+ #
+ # single path route
+ #
+ run_cmd "$IP ro add 172.16.104.0/24 via inet6 2001:db8:101::2"
+ rc=$?
+ log_test $rc 0 "Single path route with IPv6 gateway"
+ if [ $rc -eq 0 ]; then
+ check_route "172.16.104.0/24 via inet6 2001:db8:101::2 dev veth1"
+ fi
+
+ run_cmd "ip netns exec ns1 ping -w1 -c1 172.16.104.1"
+ log_test $rc 0 "Single path route with IPv6 gateway - ping"
+
+ run_cmd "$IP ro del 172.16.104.0/24 via inet6 2001:db8:101::2"
+ rc=$?
+ log_test $rc 0 "Single path route delete"
+ if [ $rc -eq 0 ]; then
+ check_route "172.16.112.0/24"
+ fi
+
+ #
+ # multipath - v6 then v4
+ #
+ run_cmd "$IP ro add 172.16.104.0/24 nexthop via inet6 2001:db8:101::2 dev veth1 nexthop via 172.16.103.2 dev veth3"
+ rc=$?
+ log_test $rc 0 "Multipath route add - v6 nexthop then v4"
+ if [ $rc -eq 0 ]; then
+ check_route "172.16.104.0/24 nexthop via inet6 2001:db8:101::2 dev veth1 weight 1 nexthop via 172.16.103.2 dev veth3 weight 1"
+ fi
+
+ run_cmd "$IP ro del 172.16.104.0/24 nexthop via 172.16.103.2 dev veth3 nexthop via inet6 2001:db8:101::2 dev veth1"
+ log_test $? 2 " Multipath route delete - nexthops in wrong order"
+
+ run_cmd "$IP ro del 172.16.104.0/24 nexthop via inet6 2001:db8:101::2 dev veth1 nexthop via 172.16.103.2 dev veth3"
+ log_test $? 0 " Multipath route delete exact match"
+
+ #
+ # multipath - v4 then v6
+ #
+ run_cmd "$IP ro add 172.16.104.0/24 nexthop via 172.16.103.2 dev veth3 nexthop via inet6 2001:db8:101::2 dev veth1"
+ rc=$?
+ log_test $rc 0 "Multipath route add - v4 nexthop then v6"
+ if [ $rc -eq 0 ]; then
+ check_route "172.16.104.0/24 nexthop via 172.16.103.2 dev veth3 weight 1 nexthop via inet6 2001:db8:101::2 dev veth1 weight 1"
+ fi
+
+ run_cmd "$IP ro del 172.16.104.0/24 nexthop via inet6 2001:db8:101::2 dev veth1 nexthop via 172.16.103.2 dev veth3"
+ log_test $? 2 " Multipath route delete - nexthops in wrong order"
+
+ run_cmd "$IP ro del 172.16.104.0/24 nexthop via 172.16.103.2 dev veth3 nexthop via inet6 2001:db8:101::2 dev veth1"
+ log_test $? 0 " Multipath route delete exact match"
+
+ route_cleanup
+}
################################################################################
# usage
ipv4_addr_metric) ipv4_addr_metric_test;;
ipv6_route_metrics) ipv6_route_metrics_test;;
ipv4_route_metrics) ipv4_route_metrics_test;;
+ ipv4_route_v6_gw) ipv4_route_v6_gw_test;;
help) echo "Test names: $TESTS"; exit 0;;
esac
# Kselftest framework requirement - SKIP code is 4.
ksft_skip=4
+PAUSE_ON_FAIL=no
+VERBOSE=0
+TRACING=0
+
# Some systems don't have a ping6 binary anymore
which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping)
err_buf=
}
+run_cmd() {
+ cmd="$*"
+
+ if [ "$VERBOSE" = "1" ]; then
+ printf " COMMAND: $cmd\n"
+ fi
+
+ out="$($cmd 2>&1)"
+ rc=$?
+ if [ "$VERBOSE" = "1" -a -n "$out" ]; then
+ echo " $out"
+ echo
+ fi
+
+ return $rc
+}
+
# Find the auto-generated name for this namespace
nsname() {
eval echo \$NS_$1
fi
fi
- ${ns_a} ip fou add port 5555 ipproto ${ipproto} || return 2
- ${ns_a} ip link add ${encap}_a type ${type} ${mode} local ${a_addr} remote ${b_addr} encap ${encap} encap-sport auto encap-dport 5556 || return 2
+ run_cmd ${ns_a} ip fou add port 5555 ipproto ${ipproto} || return 2
+ run_cmd ${ns_a} ip link add ${encap}_a type ${type} ${mode} local ${a_addr} remote ${b_addr} encap ${encap} encap-sport auto encap-dport 5556 || return 2
- ${ns_b} ip fou add port 5556 ipproto ${ipproto}
- ${ns_b} ip link add ${encap}_b type ${type} ${mode} local ${b_addr} remote ${a_addr} encap ${encap} encap-sport auto encap-dport 5555
+ run_cmd ${ns_b} ip fou add port 5556 ipproto ${ipproto}
+ run_cmd ${ns_b} ip link add ${encap}_b type ${type} ${mode} local ${b_addr} remote ${a_addr} encap ${encap} encap-sport auto encap-dport 5555
if [ "${inner}" = "4" ]; then
- ${ns_a} ip addr add ${tunnel4_a_addr}/${tunnel4_mask} dev ${encap}_a
- ${ns_b} ip addr add ${tunnel4_b_addr}/${tunnel4_mask} dev ${encap}_b
+ run_cmd ${ns_a} ip addr add ${tunnel4_a_addr}/${tunnel4_mask} dev ${encap}_a
+ run_cmd ${ns_b} ip addr add ${tunnel4_b_addr}/${tunnel4_mask} dev ${encap}_b
else
- ${ns_a} ip addr add ${tunnel6_a_addr}/${tunnel6_mask} dev ${encap}_a
- ${ns_b} ip addr add ${tunnel6_b_addr}/${tunnel6_mask} dev ${encap}_b
+ run_cmd ${ns_a} ip addr add ${tunnel6_a_addr}/${tunnel6_mask} dev ${encap}_a
+ run_cmd ${ns_b} ip addr add ${tunnel6_b_addr}/${tunnel6_mask} dev ${encap}_b
fi
- ${ns_a} ip link set ${encap}_a up
- ${ns_b} ip link set ${encap}_b up
+ run_cmd ${ns_a} ip link set ${encap}_a up
+ run_cmd ${ns_b} ip link set ${encap}_b up
}
setup_fou44() {
}
setup_veth() {
- ${ns_a} ip link add veth_a type veth peer name veth_b || return 1
- ${ns_a} ip link set veth_b netns ${NS_B}
+ run_cmd ${ns_a} ip link add veth_a type veth peer name veth_b || return 1
+ run_cmd ${ns_a} ip link set veth_b netns ${NS_B}
- ${ns_a} ip addr add ${veth4_a_addr}/${veth4_mask} dev veth_a
- ${ns_b} ip addr add ${veth4_b_addr}/${veth4_mask} dev veth_b
+ run_cmd ${ns_a} ip addr add ${veth4_a_addr}/${veth4_mask} dev veth_a
+ run_cmd ${ns_b} ip addr add ${veth4_b_addr}/${veth4_mask} dev veth_b
- ${ns_a} ip addr add ${veth6_a_addr}/${veth6_mask} dev veth_a
- ${ns_b} ip addr add ${veth6_b_addr}/${veth6_mask} dev veth_b
+ run_cmd ${ns_a} ip addr add ${veth6_a_addr}/${veth6_mask} dev veth_a
+ run_cmd ${ns_b} ip addr add ${veth6_b_addr}/${veth6_mask} dev veth_b
- ${ns_a} ip link set veth_a up
- ${ns_b} ip link set veth_b up
+ run_cmd ${ns_a} ip link set veth_a up
+ run_cmd ${ns_b} ip link set veth_b up
}
setup_vti() {
[ ${proto} -eq 6 ] && vti_type="vti6" || vti_type="vti"
- ${ns_a} ip link add vti${proto}_a type ${vti_type} local ${veth_a_addr} remote ${veth_b_addr} key 10 || return 1
- ${ns_b} ip link add vti${proto}_b type ${vti_type} local ${veth_b_addr} remote ${veth_a_addr} key 10
+ run_cmd ${ns_a} ip link add vti${proto}_a type ${vti_type} local ${veth_a_addr} remote ${veth_b_addr} key 10 || return 1
+ run_cmd ${ns_b} ip link add vti${proto}_b type ${vti_type} local ${veth_b_addr} remote ${veth_a_addr} key 10
- ${ns_a} ip addr add ${vti_a_addr}/${vti_mask} dev vti${proto}_a
- ${ns_b} ip addr add ${vti_b_addr}/${vti_mask} dev vti${proto}_b
+ run_cmd ${ns_a} ip addr add ${vti_a_addr}/${vti_mask} dev vti${proto}_a
+ run_cmd ${ns_b} ip addr add ${vti_b_addr}/${vti_mask} dev vti${proto}_b
- ${ns_a} ip link set vti${proto}_a up
- ${ns_b} ip link set vti${proto}_b up
+ run_cmd ${ns_a} ip link set vti${proto}_a up
+ run_cmd ${ns_b} ip link set vti${proto}_b up
}
setup_vti4() {
opts_b=""
fi
- ${ns_a} ip link add ${type}_a type ${type} id 1 ${opts_a} remote ${b_addr} ${opts} || return 1
- ${ns_b} ip link add ${type}_b type ${type} id 1 ${opts_b} remote ${a_addr} ${opts}
+ run_cmd ${ns_a} ip link add ${type}_a type ${type} id 1 ${opts_a} remote ${b_addr} ${opts} || return 1
+ run_cmd ${ns_b} ip link add ${type}_b type ${type} id 1 ${opts_b} remote ${a_addr} ${opts}
- ${ns_a} ip addr add ${tunnel4_a_addr}/${tunnel4_mask} dev ${type}_a
- ${ns_b} ip addr add ${tunnel4_b_addr}/${tunnel4_mask} dev ${type}_b
+ run_cmd ${ns_a} ip addr add ${tunnel4_a_addr}/${tunnel4_mask} dev ${type}_a
+ run_cmd ${ns_b} ip addr add ${tunnel4_b_addr}/${tunnel4_mask} dev ${type}_b
- ${ns_a} ip addr add ${tunnel6_a_addr}/${tunnel6_mask} dev ${type}_a
- ${ns_b} ip addr add ${tunnel6_b_addr}/${tunnel6_mask} dev ${type}_b
+ run_cmd ${ns_a} ip addr add ${tunnel6_a_addr}/${tunnel6_mask} dev ${type}_a
+ run_cmd ${ns_b} ip addr add ${tunnel6_b_addr}/${tunnel6_mask} dev ${type}_b
- ${ns_a} ip link set ${type}_a up
- ${ns_b} ip link set ${type}_b up
+ run_cmd ${ns_a} ip link set ${type}_a up
+ run_cmd ${ns_b} ip link set ${type}_b up
}
setup_geneve4() {
veth_a_addr="${2}"
veth_b_addr="${3}"
- ${ns_a} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead "rfc4106(gcm(aes))" 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel || return 1
- ${ns_a} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead "rfc4106(gcm(aes))" 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel
- ${ns_a} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel
- ${ns_a} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel
+ run_cmd "${ns_a} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel" || return 1
+ run_cmd "${ns_a} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel"
+ run_cmd "${ns_a} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel"
+ run_cmd "${ns_a} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel"
- ${ns_b} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead "rfc4106(gcm(aes))" 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel
- ${ns_b} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead "rfc4106(gcm(aes))" 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel
- ${ns_b} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel
- ${ns_b} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel
+ run_cmd "${ns_b} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel"
+ run_cmd "${ns_b} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel"
+ run_cmd "${ns_b} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel"
+ run_cmd "${ns_b} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel"
}
setup_xfrm4() {
}
trace() {
- [ $tracing -eq 0 ] && return
+ [ $TRACING -eq 0 ] && return
for arg do
[ "${ns_cmd}" = "" ] && ns_cmd="${arg}" && continue
mtu "${ns_b}" veth_B-R2 1500
# Create route exceptions
- ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1800 ${dst1} > /dev/null
- ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1800 ${dst2} > /dev/null
+ run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1800 ${dst1}
+ run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1800 ${dst2}
# Check that exceptions have been created with the correct PMTU
pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst1})"
# Decrease remote MTU on path via R2, get new exception
mtu "${ns_r2}" veth_R2-B 400
mtu "${ns_b}" veth_B-R2 400
- ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1400 ${dst2} > /dev/null
+ run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1400 ${dst2}
pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
check_pmtu_value "lock 552" "${pmtu_2}" "exceeding MTU, with MTU < min_pmtu" || return 1
check_pmtu_value "1500" "${pmtu_2}" "increasing local MTU" || return 1
# Get new exception
- ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1400 ${dst2} > /dev/null
+ run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1400 ${dst2}
pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
check_pmtu_value "lock 552" "${pmtu_2}" "exceeding MTU, with MTU < min_pmtu" || return 1
}
mtu "${ns_a}" ${type}_a $((${ll_mtu} + 1000))
mtu "${ns_b}" ${type}_b $((${ll_mtu} + 1000))
- ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s $((${ll_mtu} + 500)) ${dst} > /dev/null
+ run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s $((${ll_mtu} + 500)) ${dst}
# Check that exception was created
pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst})"
mtu "${ns_a}" ${encap}_a $((${ll_mtu} + 1000))
mtu "${ns_b}" ${encap}_b $((${ll_mtu} + 1000))
- ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s $((${ll_mtu} + 500)) ${dst} > /dev/null
+ run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s $((${ll_mtu} + 500)) ${dst}
# Check that exception was created
pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst})"
# Send DF packet without exceeding link layer MTU, check that no
# exception is created
- ${ns_a} ping -q -M want -i 0.1 -w 1 -s ${ping_payload} ${tunnel4_b_addr} > /dev/null
+ run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s ${ping_payload} ${tunnel4_b_addr}
pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})"
check_pmtu_value "" "${pmtu}" "sending packet smaller than PMTU (IP payload length ${esp_payload_rfc4106})" || return 1
# Now exceed link layer MTU by one byte, check that exception is created
# with the right PMTU value
- ${ns_a} ping -q -M want -i 0.1 -w 1 -s $((ping_payload + 1)) ${tunnel4_b_addr} > /dev/null
+ run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s $((ping_payload + 1)) ${tunnel4_b_addr}
pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})"
check_pmtu_value "${esp_payload_rfc4106}" "${pmtu}" "exceeding PMTU (IP payload length $((esp_payload_rfc4106 + 1)))"
}
mtu "${ns_b}" veth_b 4000
mtu "${ns_a}" vti6_a 5000
mtu "${ns_b}" vti6_b 5000
- ${ns_a} ${ping6} -q -i 0.1 -w 1 -s 60000 ${tunnel6_b_addr} > /dev/null
+ run_cmd ${ns_a} ${ping6} -q -i 0.1 -w 1 -s 60000 ${tunnel6_b_addr}
# Check that exception was created
pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})"
test_pmtu_vti4_link_add_mtu() {
setup namespaces || return 2
- ${ns_a} ip link add vti4_a type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10
+ run_cmd ${ns_a} ip link add vti4_a type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10
[ $? -ne 0 ] && err " vti not supported" && return 2
- ${ns_a} ip link del vti4_a
+ run_cmd ${ns_a} ip link del vti4_a
fail=0
max=$((65535 - 20))
# Check invalid values first
for v in $((min - 1)) $((max + 1)); do
- ${ns_a} ip link add vti4_a mtu ${v} type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10 2>/dev/null
+ run_cmd ${ns_a} ip link add vti4_a mtu ${v} type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10
# This can fail, or MTU can be adjusted to a proper value
[ $? -ne 0 ] && continue
mtu="$(link_get_mtu "${ns_a}" vti4_a)"
err " vti tunnel created with invalid MTU ${mtu}"
fail=1
fi
- ${ns_a} ip link del vti4_a
+ run_cmd ${ns_a} ip link del vti4_a
done
# Now check valid values
for v in ${min} 1300 ${max}; do
- ${ns_a} ip link add vti4_a mtu ${v} type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10
+ run_cmd ${ns_a} ip link add vti4_a mtu ${v} type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10
mtu="$(link_get_mtu "${ns_a}" vti4_a)"
- ${ns_a} ip link del vti4_a
+ run_cmd ${ns_a} ip link del vti4_a
if [ "${mtu}" != "${v}" ]; then
err " vti MTU ${mtu} doesn't match configured value ${v}"
fail=1
test_pmtu_vti6_link_add_mtu() {
setup namespaces || return 2
- ${ns_a} ip link add vti6_a type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10
+ run_cmd ${ns_a} ip link add vti6_a type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10
[ $? -ne 0 ] && err " vti6 not supported" && return 2
- ${ns_a} ip link del vti6_a
+ run_cmd ${ns_a} ip link del vti6_a
fail=0
max=$((65535 - 40))
# Check invalid values first
for v in $((min - 1)) $((max + 1)); do
- ${ns_a} ip link add vti6_a mtu ${v} type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10 2>/dev/null
+ run_cmd ${ns_a} ip link add vti6_a mtu ${v} type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10
# This can fail, or MTU can be adjusted to a proper value
[ $? -ne 0 ] && continue
mtu="$(link_get_mtu "${ns_a}" vti6_a)"
err " vti6 tunnel created with invalid MTU ${v}"
fail=1
fi
- ${ns_a} ip link del vti6_a
+ run_cmd ${ns_a} ip link del vti6_a
done
# Now check valid values
for v in 68 1280 1300 $((65535 - 40)); do
- ${ns_a} ip link add vti6_a mtu ${v} type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10
+ run_cmd ${ns_a} ip link add vti6_a mtu ${v} type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10
mtu="$(link_get_mtu "${ns_a}" vti6_a)"
- ${ns_a} ip link del vti6_a
+ run_cmd ${ns_a} ip link del vti6_a
if [ "${mtu}" != "${v}" ]; then
err " vti6 MTU ${mtu} doesn't match configured value ${v}"
fail=1
test_pmtu_vti6_link_change_mtu() {
setup namespaces || return 2
- ${ns_a} ip link add dummy0 mtu 1500 type dummy
+ run_cmd ${ns_a} ip link add dummy0 mtu 1500 type dummy
[ $? -ne 0 ] && err " dummy not supported" && return 2
- ${ns_a} ip link add dummy1 mtu 3000 type dummy
- ${ns_a} ip link set dummy0 up
- ${ns_a} ip link set dummy1 up
+ run_cmd ${ns_a} ip link add dummy1 mtu 3000 type dummy
+ run_cmd ${ns_a} ip link set dummy0 up
+ run_cmd ${ns_a} ip link set dummy1 up
- ${ns_a} ip addr add ${dummy6_0_addr}/${dummy6_mask} dev dummy0
- ${ns_a} ip addr add ${dummy6_1_addr}/${dummy6_mask} dev dummy1
+ run_cmd ${ns_a} ip addr add ${dummy6_0_addr}/${dummy6_mask} dev dummy0
+ run_cmd ${ns_a} ip addr add ${dummy6_1_addr}/${dummy6_mask} dev dummy1
fail=0
# Create vti6 interface bound to device, passing MTU, check it
- ${ns_a} ip link add vti6_a mtu 1300 type vti6 remote ${dummy6_0_addr} local ${dummy6_0_addr}
+ run_cmd ${ns_a} ip link add vti6_a mtu 1300 type vti6 remote ${dummy6_0_addr} local ${dummy6_0_addr}
mtu="$(link_get_mtu "${ns_a}" vti6_a)"
if [ ${mtu} -ne 1300 ]; then
err " vti6 MTU ${mtu} doesn't match configured value 1300"
# Move to another device with different MTU, without passing MTU, check
# MTU is adjusted
- ${ns_a} ip link set vti6_a type vti6 remote ${dummy6_1_addr} local ${dummy6_1_addr}
+ run_cmd ${ns_a} ip link set vti6_a type vti6 remote ${dummy6_1_addr} local ${dummy6_1_addr}
mtu="$(link_get_mtu "${ns_a}" vti6_a)"
if [ ${mtu} -ne $((3000 - 40)) ]; then
err " vti MTU ${mtu} is not dummy MTU 3000 minus IPv6 header length"
fi
# Move it back, passing MTU, check MTU is not overridden
- ${ns_a} ip link set vti6_a mtu 1280 type vti6 remote ${dummy6_0_addr} local ${dummy6_0_addr}
+ run_cmd ${ns_a} ip link set vti6_a mtu 1280 type vti6 remote ${dummy6_0_addr} local ${dummy6_0_addr}
mtu="$(link_get_mtu "${ns_a}" vti6_a)"
if [ ${mtu} -ne 1280 ]; then
err " vti6 MTU ${mtu} doesn't match configured value 1280"
# Fill exception cache for multiple CPUs (2)
# we can always use inner IPv4 for that
for cpu in ${cpu_list}; do
- taskset --cpu-list ${cpu} ${ns_a} ping -q -M want -i 0.1 -w 1 -s $((${ll_mtu} + 500)) ${tunnel4_b_addr} > /dev/null
+ run_cmd taskset --cpu-list ${cpu} ${ns_a} ping -q -M want -i 0.1 -w 1 -s $((${ll_mtu} + 500)) ${tunnel4_b_addr}
done
${ns_a} ip link del dev veth_A-R1 &
exit 1
}
+################################################################################
+#
exitcode=0
desc=0
+
+while getopts :ptv o
+do
+ case $o in
+ p) PAUSE_ON_FAIL=yes;;
+ v) VERBOSE=1;;
+ t) if which tcpdump > /dev/null 2>&1; then
+ TRACING=1
+ else
+ echo "=== tcpdump not available, tracing disabled"
+ fi
+ ;;
+ *) usage;;
+ esac
+done
+shift $(($OPTIND-1))
+
IFS="
"
-tracing=0
for arg do
- if [ "${arg}" != "${arg#--*}" ]; then
- opt="${arg#--}"
- if [ "${opt}" = "trace" ]; then
- if which tcpdump > /dev/null 2>&1; then
- tracing=1
- else
- echo "=== tcpdump not available, tracing disabled"
- fi
- else
- usage
- fi
- else
- # Check first that all requested tests are available before
- # running any
- command -v > /dev/null "test_${arg}" || { echo "=== Test ${arg} not found"; usage; }
- fi
+ # Check first that all requested tests are available before running any
+ command -v > /dev/null "test_${arg}" || { echo "=== Test ${arg} not found"; usage; }
done
trap cleanup EXIT
(
unset IFS
+
+ if [ "$VERBOSE" = "1" ]; then
+ printf "\n##########################################################################\n\n"
+ fi
+
eval test_${name}
ret=$?
cleanup
printf "TEST: %-60s [ OK ]\n" "${t}"
elif [ $ret -eq 1 ]; then
printf "TEST: %-60s [FAIL]\n" "${t}"
+ if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+ echo
+ echo "Pausing. Hit enter to continue"
+ read a
+ fi
err_flush
exit 1
elif [ $ret -eq 2 ]; then
algo="aead rfc4106(gcm(aes)) 0x3132333435363738393031323334353664636261 128"
srcip=192.168.123.3
dstip=192.168.123.4
- dev=simx1
- sysfsd=/sys/kernel/debug/netdevsim/$dev
+ sysfsd=/sys/kernel/debug/netdevsim/netdevsim0/ports/0/
sysfsf=$sysfsd/ipsec
+ sysfsnet=/sys/bus/netdevsim/devices/netdevsim0/net/
# setup netdevsim since dummydev doesn't have offload support
modprobe netdevsim
return 1
fi
- ip link add $dev type netdevsim
+ echo "0" > /sys/bus/netdevsim/new_device
+ while [ ! -d $sysfsnet ] ; do :; done
+ udevadm settle
+ dev=`ls $sysfsnet`
+
ip addr add $srcip dev $dev
ip link set $dev up
if [ ! -d $sysfsd ] ; then
fi
# clean up any leftovers
- ip link del $dev
rmmod netdevsim
if [ $ret -ne 0 ]; then
exit 0
fi
+ret=0
echo "--------------------"
echo "running psock_fanout test"
echo "--------------------"
./in_netns.sh ./psock_fanout
if [ $? -ne 0 ]; then
echo "[FAIL]"
+ ret=1
else
echo "[PASS]"
fi
./in_netns.sh ./psock_tpacket
if [ $? -ne 0 ]; then
echo "[FAIL]"
+ ret=1
else
echo "[PASS]"
fi
./in_netns.sh ./txring_overwrite
if [ $? -ne 0 ]; then
echo "[FAIL]"
+ ret=1
else
echo "[PASS]"
fi
+exit $ret
./socket
if [ $? -ne 0 ]; then
echo "[FAIL]"
+ exit 1
else
echo "[PASS]"
fi
-
# SPDX-License-Identifier: GPL-2.0
# Makefile for netfilter selftests
-TEST_PROGS := nft_trans_stress.sh nft_nat.sh
+TEST_PROGS := nft_trans_stress.sh nft_nat.sh bridge_brouter.sh \
+ conntrack_icmp_related.sh
include ../lib.mk
--- /dev/null
+#!/bin/bash
+#
+# This test is for bridge 'brouting', i.e. make some packets being routed
+# rather than getting bridged even though they arrive on interface that is
+# part of a bridge.
+
+# eth0 br0 eth0
+# setup is: ns1 <-> ns0 <-> ns2
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+ret=0
+
+ebtables -V > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without ebtables"
+ exit $ksft_skip
+fi
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without ip tool"
+ exit $ksft_skip
+fi
+
+ip netns add ns0
+ip netns add ns1
+ip netns add ns2
+
+ip link add veth0 netns ns0 type veth peer name eth0 netns ns1
+if [ $? -ne 0 ]; then
+ echo "SKIP: Can't create veth device"
+ exit $ksft_skip
+fi
+ip link add veth1 netns ns0 type veth peer name eth0 netns ns2
+
+ip -net ns0 link set lo up
+ip -net ns0 link set veth0 up
+ip -net ns0 link set veth1 up
+
+ip -net ns0 link add br0 type bridge
+if [ $? -ne 0 ]; then
+ echo "SKIP: Can't create bridge br0"
+ exit $ksft_skip
+fi
+
+ip -net ns0 link set veth0 master br0
+ip -net ns0 link set veth1 master br0
+ip -net ns0 link set br0 up
+ip -net ns0 addr add 10.0.0.1/24 dev br0
+
+# place both in same subnet, ns1 and ns2 connected via ns0:br0
+for i in 1 2; do
+ ip -net ns$i link set lo up
+ ip -net ns$i link set eth0 up
+ ip -net ns$i addr add 10.0.0.1$i/24 dev eth0
+done
+
+test_ebtables_broute()
+{
+ local cipt
+
+ # redirect is needed so the dstmac is rewritten to the bridge itself,
+ # ip stack won't process OTHERHOST (foreign unicast mac) packets.
+ ip netns exec ns0 ebtables -t broute -A BROUTING -p ipv4 --ip-protocol icmp -j redirect --redirect-target=DROP
+ if [ $? -ne 0 ]; then
+ echo "SKIP: Could not add ebtables broute redirect rule"
+ return $ksft_skip
+ fi
+
+ # ping netns1, expected to not work (ip forwarding is off)
+ ip netns exec ns1 ping -q -c 1 10.0.0.12 > /dev/null 2>&1
+ if [ $? -eq 0 ]; then
+ echo "ERROR: ping works, should have failed" 1>&2
+ return 1
+ fi
+
+ # enable forwarding on both interfaces.
+ # neither needs an ip address, but at least the bridge needs
+ # an ip address in same network segment as ns1 and ns2 (ns0
+ # needs to be able to determine route for to-be-forwarded packet).
+ ip netns exec ns0 sysctl -q net.ipv4.conf.veth0.forwarding=1
+ ip netns exec ns0 sysctl -q net.ipv4.conf.veth1.forwarding=1
+
+ sleep 1
+
+ ip netns exec ns1 ping -q -c 1 10.0.0.12 > /dev/null
+ if [ $? -ne 0 ]; then
+ echo "ERROR: ping did not work, but it should (broute+forward)" 1>&2
+ return 1
+ fi
+
+ echo "PASS: ns1/ns2 connectivity with active broute rule"
+ ip netns exec ns0 ebtables -t broute -F
+
+ # ping netns1, expected to work (frames are bridged)
+ ip netns exec ns1 ping -q -c 1 10.0.0.12 > /dev/null
+ if [ $? -ne 0 ]; then
+ echo "ERROR: ping did not work, but it should (bridged)" 1>&2
+ return 1
+ fi
+
+ ip netns exec ns0 ebtables -t filter -A FORWARD -p ipv4 --ip-protocol icmp -j DROP
+
+ # ping netns1, expected to not work (DROP in bridge forward)
+ ip netns exec ns1 ping -q -c 1 10.0.0.12 > /dev/null 2>&1
+ if [ $? -eq 0 ]; then
+ echo "ERROR: ping works, should have failed (icmp forward drop)" 1>&2
+ return 1
+ fi
+
+ # re-activate brouter
+ ip netns exec ns0 ebtables -t broute -A BROUTING -p ipv4 --ip-protocol icmp -j redirect --redirect-target=DROP
+
+ ip netns exec ns2 ping -q -c 1 10.0.0.11 > /dev/null
+ if [ $? -ne 0 ]; then
+ echo "ERROR: ping did not work, but it should (broute+forward 2)" 1>&2
+ return 1
+ fi
+
+ echo "PASS: ns1/ns2 connectivity with active broute rule and bridge forward drop"
+ return 0
+}
+
+# test basic connectivity
+ip netns exec ns1 ping -c 1 -q 10.0.0.12 > /dev/null
+if [ $? -ne 0 ]; then
+ echo "ERROR: Could not reach ns2 from ns1" 1>&2
+ ret=1
+fi
+
+ip netns exec ns2 ping -c 1 -q 10.0.0.11 > /dev/null
+if [ $? -ne 0 ]; then
+ echo "ERROR: Could not reach ns1 from ns2" 1>&2
+ ret=1
+fi
+
+if [ $ret -eq 0 ];then
+ echo "PASS: netns connectivity: ns1 and ns2 can reach each other"
+fi
+
+test_ebtables_broute
+ret=$?
+for i in 0 1 2; do ip netns del ns$i;done
+
+exit $ret
--- /dev/null
+#!/bin/bash
+#
+# check that ICMP df-needed/pkttoobig icmp are set are set as related
+# state
+#
+# Setup is:
+#
+# nsclient1 -> nsrouter1 -> nsrouter2 -> nsclient2
+# MTU 1500, except for nsrouter2 <-> nsclient2 link (1280).
+# ping nsclient2 from nsclient1, checking that conntrack did set RELATED
+# 'fragmentation needed' icmp packet.
+#
+# In addition, nsrouter1 will perform IP masquerading, i.e. also
+# check the icmp errors are propagated to the correct host as per
+# nat of "established" icmp-echo "connection".
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+ret=0
+
+nft --version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without nft tool"
+ exit $ksft_skip
+fi
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without ip tool"
+ exit $ksft_skip
+fi
+
+cleanup() {
+ for i in 1 2;do ip netns del nsclient$i;done
+ for i in 1 2;do ip netns del nsrouter$i;done
+}
+
+ipv4() {
+ echo -n 192.168.$1.2
+}
+
+ipv6 () {
+ echo -n dead:$1::2
+}
+
+check_counter()
+{
+ ns=$1
+ name=$2
+ expect=$3
+ local lret=0
+
+ cnt=$(ip netns exec $ns nft list counter inet filter "$name" | grep -q "$expect")
+ if [ $? -ne 0 ]; then
+ echo "ERROR: counter $name in $ns has unexpected value (expected $expect)" 1>&2
+ ip netns exec $ns nft list counter inet filter "$name" 1>&2
+ lret=1
+ fi
+
+ return $lret
+}
+
+check_unknown()
+{
+ expect="packets 0 bytes 0"
+ for n in nsclient1 nsclient2 nsrouter1 nsrouter2; do
+ check_counter $n "unknown" "$expect"
+ if [ $? -ne 0 ] ;then
+ return 1
+ fi
+ done
+
+ return 0
+}
+
+for n in nsclient1 nsclient2 nsrouter1 nsrouter2; do
+ ip netns add $n
+ ip -net $n link set lo up
+done
+
+DEV=veth0
+ip link add $DEV netns nsclient1 type veth peer name eth1 netns nsrouter1
+DEV=veth0
+ip link add $DEV netns nsclient2 type veth peer name eth1 netns nsrouter2
+
+DEV=veth0
+ip link add $DEV netns nsrouter1 type veth peer name eth2 netns nsrouter2
+
+DEV=veth0
+for i in 1 2; do
+ ip -net nsclient$i link set $DEV up
+ ip -net nsclient$i addr add $(ipv4 $i)/24 dev $DEV
+ ip -net nsclient$i addr add $(ipv6 $i)/64 dev $DEV
+done
+
+ip -net nsrouter1 link set eth1 up
+ip -net nsrouter1 link set veth0 up
+
+ip -net nsrouter2 link set eth1 up
+ip -net nsrouter2 link set eth2 up
+
+ip -net nsclient1 route add default via 192.168.1.1
+ip -net nsclient1 -6 route add default via dead:1::1
+
+ip -net nsclient2 route add default via 192.168.2.1
+ip -net nsclient2 route add default via dead:2::1
+
+i=3
+ip -net nsrouter1 addr add 192.168.1.1/24 dev eth1
+ip -net nsrouter1 addr add 192.168.3.1/24 dev veth0
+ip -net nsrouter1 addr add dead:1::1/64 dev eth1
+ip -net nsrouter1 addr add dead:3::1/64 dev veth0
+ip -net nsrouter1 route add default via 192.168.3.10
+ip -net nsrouter1 -6 route add default via dead:3::10
+
+ip -net nsrouter2 addr add 192.168.2.1/24 dev eth1
+ip -net nsrouter2 addr add 192.168.3.10/24 dev eth2
+ip -net nsrouter2 addr add dead:2::1/64 dev eth1
+ip -net nsrouter2 addr add dead:3::10/64 dev eth2
+ip -net nsrouter2 route add default via 192.168.3.1
+ip -net nsrouter2 route add default via dead:3::1
+
+sleep 2
+for i in 4 6; do
+ ip netns exec nsrouter1 sysctl -q net.ipv$i.conf.all.forwarding=1
+ ip netns exec nsrouter2 sysctl -q net.ipv$i.conf.all.forwarding=1
+done
+
+for netns in nsrouter1 nsrouter2; do
+ip netns exec $netns nft -f - <<EOF
+table inet filter {
+ counter unknown { }
+ counter related { }
+ chain forward {
+ type filter hook forward priority 0; policy accept;
+ meta l4proto icmpv6 icmpv6 type "packet-too-big" ct state "related" counter name "related" accept
+ meta l4proto icmp icmp type "destination-unreachable" ct state "related" counter name "related" accept
+ meta l4proto { icmp, icmpv6 } ct state new,established accept
+ counter name "unknown" drop
+ }
+}
+EOF
+done
+
+ip netns exec nsclient1 nft -f - <<EOF
+table inet filter {
+ counter unknown { }
+ counter related { }
+ chain input {
+ type filter hook input priority 0; policy accept;
+ meta l4proto { icmp, icmpv6 } ct state established,untracked accept
+
+ meta l4proto { icmp, icmpv6 } ct state "related" counter name "related" accept
+ counter name "unknown" drop
+ }
+}
+EOF
+
+ip netns exec nsclient2 nft -f - <<EOF
+table inet filter {
+ counter unknown { }
+ counter new { }
+ counter established { }
+
+ chain input {
+ type filter hook input priority 0; policy accept;
+ meta l4proto { icmp, icmpv6 } ct state established,untracked accept
+
+ meta l4proto { icmp, icmpv6 } ct state "new" counter name "new" accept
+ meta l4proto { icmp, icmpv6 } ct state "established" counter name "established" accept
+ counter name "unknown" drop
+ }
+ chain output {
+ type filter hook output priority 0; policy accept;
+ meta l4proto { icmp, icmpv6 } ct state established,untracked accept
+
+ meta l4proto { icmp, icmpv6 } ct state "new" counter name "new"
+ meta l4proto { icmp, icmpv6 } ct state "established" counter name "established"
+ counter name "unknown" drop
+ }
+}
+EOF
+
+
+# make sure NAT core rewrites adress of icmp error if nat is used according to
+# conntrack nat information (icmp error will be directed at nsrouter1 address,
+# but it needs to be routed to nsclient1 address).
+ip netns exec nsrouter1 nft -f - <<EOF
+table ip nat {
+ chain postrouting {
+ type nat hook postrouting priority 0; policy accept;
+ ip protocol icmp oifname "veth0" counter masquerade
+ }
+}
+table ip6 nat {
+ chain postrouting {
+ type nat hook postrouting priority 0; policy accept;
+ ip6 nexthdr icmpv6 oifname "veth0" counter masquerade
+ }
+}
+EOF
+
+ip netns exec nsrouter2 ip link set eth1 mtu 1280
+ip netns exec nsclient2 ip link set veth0 mtu 1280
+sleep 1
+
+ip netns exec nsclient1 ping -c 1 -s 1000 -q -M do 192.168.2.2 >/dev/null
+if [ $? -ne 0 ]; then
+ echo "ERROR: netns ip routing/connectivity broken" 1>&2
+ cleanup
+ exit 1
+fi
+ip netns exec nsclient1 ping6 -q -c 1 -s 1000 dead:2::2 >/dev/null
+if [ $? -ne 0 ]; then
+ echo "ERROR: netns ipv6 routing/connectivity broken" 1>&2
+ cleanup
+ exit 1
+fi
+
+check_unknown
+if [ $? -ne 0 ]; then
+ ret=1
+fi
+
+expect="packets 0 bytes 0"
+for netns in nsrouter1 nsrouter2 nsclient1;do
+ check_counter "$netns" "related" "$expect"
+ if [ $? -ne 0 ]; then
+ ret=1
+ fi
+done
+
+expect="packets 2 bytes 2076"
+check_counter nsclient2 "new" "$expect"
+if [ $? -ne 0 ]; then
+ ret=1
+fi
+
+ip netns exec nsclient1 ping -q -c 1 -s 1300 -M do 192.168.2.2 > /dev/null
+if [ $? -eq 0 ]; then
+ echo "ERROR: ping should have failed with PMTU too big error" 1>&2
+ ret=1
+fi
+
+# nsrouter2 should have generated the icmp error, so
+# related counter should be 0 (its in forward).
+expect="packets 0 bytes 0"
+check_counter "nsrouter2" "related" "$expect"
+if [ $? -ne 0 ]; then
+ ret=1
+fi
+
+# but nsrouter1 should have seen it, same for nsclient1.
+expect="packets 1 bytes 576"
+for netns in nsrouter1 nsclient1;do
+ check_counter "$netns" "related" "$expect"
+ if [ $? -ne 0 ]; then
+ ret=1
+ fi
+done
+
+ip netns exec nsclient1 ping6 -c 1 -s 1300 dead:2::2 > /dev/null
+if [ $? -eq 0 ]; then
+ echo "ERROR: ping6 should have failed with PMTU too big error" 1>&2
+ ret=1
+fi
+
+expect="packets 2 bytes 1856"
+for netns in nsrouter1 nsclient1;do
+ check_counter "$netns" "related" "$expect"
+ if [ $? -ne 0 ]; then
+ ret=1
+ fi
+done
+
+if [ $ret -eq 0 ];then
+ echo "PASS: icmp mtu error had RELATED state"
+else
+ echo "ERROR: icmp error RELATED state test has failed"
+fi
+
+cleanup
+exit $ret
# Kselftest framework requirement - SKIP code is 4.
ksft_skip=4
ret=0
+test_inet_nat=true
nft --version > /dev/null 2>&1
if [ $? -ne 0 ];then
test_local_dnat6()
{
+ local family=$1
local lret=0
+ local IPF=""
+
+ if [ $family = "inet" ];then
+ IPF="ip6"
+ fi
+
ip netns exec ns0 nft -f - <<EOF
-table ip6 nat {
+table $family nat {
chain output {
type nat hook output priority 0; policy accept;
- ip6 daddr dead:1::99 dnat to dead:2::99
+ ip6 daddr dead:1::99 dnat $IPF to dead:2::99
}
}
EOF
if [ $? -ne 0 ]; then
- echo "SKIP: Could not add add ip6 dnat hook"
+ echo "SKIP: Could not add add $family dnat hook"
return $ksft_skip
fi
fi
done
- test $lret -eq 0 && echo "PASS: ipv6 ping to ns1 was NATted to ns2"
+ test $lret -eq 0 && echo "PASS: ipv6 ping to ns1 was $family NATted to ns2"
ip netns exec ns0 nft flush chain ip6 nat output
return $lret
test_local_dnat()
{
+ local family=$1
local lret=0
-ip netns exec ns0 nft -f - <<EOF
-table ip nat {
+ local IPF=""
+
+ if [ $family = "inet" ];then
+ IPF="ip"
+ fi
+
+ip netns exec ns0 nft -f - <<EOF 2>/dev/null
+table $family nat {
chain output {
type nat hook output priority 0; policy accept;
- ip daddr 10.0.1.99 dnat to 10.0.2.99
+ ip daddr 10.0.1.99 dnat $IPF to 10.0.2.99
}
}
EOF
+ if [ $? -ne 0 ]; then
+ if [ $family = "inet" ];then
+ echo "SKIP: inet nat tests"
+ test_inet_nat=false
+ return $ksft_skip
+ fi
+ echo "SKIP: Could not add add $family dnat hook"
+ return $ksft_skip
+ fi
+
# ping netns1, expect rewrite to netns2
ip netns exec ns0 ping -q -c 1 10.0.1.99 > /dev/null
if [ $? -ne 0 ]; then
fi
done
- test $lret -eq 0 && echo "PASS: ping to ns1 was NATted to ns2"
+ test $lret -eq 0 && echo "PASS: ping to ns1 was $family NATted to ns2"
- ip netns exec ns0 nft flush chain ip nat output
+ ip netns exec ns0 nft flush chain $family nat output
reset_counters
ip netns exec ns0 ping -q -c 1 10.0.1.99 > /dev/null
fi
done
- test $lret -eq 0 && echo "PASS: ping to ns1 OK after nat output chain flush"
+ test $lret -eq 0 && echo "PASS: ping to ns1 OK after $family nat output chain flush"
return $lret
}
test_masquerade6()
{
+ local family=$1
+ local natflags=$1
local lret=0
ip netns exec ns0 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
# add masquerading rule
ip netns exec ns0 nft -f - <<EOF
-table ip6 nat {
+table $family nat {
chain postrouting {
type nat hook postrouting priority 0; policy accept;
- meta oif veth0 masquerade
+ meta oif veth0 masquerade $natflags
}
}
EOF
+ if [ $? -ne 0 ]; then
+ echo "SKIP: Could not add add $family masquerade hook"
+ return $ksft_skip
+ fi
+
ip netns exec ns2 ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1
if [ $? -ne 0 ] ; then
- echo "ERROR: cannot ping ns1 from ns2 with active ipv6 masquerading"
+<<<<<<< HEAD
+ echo "ERROR: cannot ping ns1 from ns2 with active $family masquerading"
+=======
+ echo "ERROR: cannot ping ns1 from ns2 with active ipv6 masquerade $natflags"
+>>>>>>> cd8dead0c39457e58ec1d36db93aedca811d48f1
lret=1
fi
fi
done
+<<<<<<< HEAD
+ ip netns exec ns0 nft flush chain $family nat postrouting
+=======
+ ip netns exec ns2 ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1
+ if [ $? -ne 0 ] ; then
+ echo "ERROR: cannot ping ns1 from ns2 with active ipv6 masquerade $natflags (attempt 2)"
+ lret=1
+ fi
+
ip netns exec ns0 nft flush chain ip6 nat postrouting
+>>>>>>> cd8dead0c39457e58ec1d36db93aedca811d48f1
if [ $? -ne 0 ]; then
- echo "ERROR: Could not flush ip6 nat postrouting" 1>&2
+ echo "ERROR: Could not flush $family nat postrouting" 1>&2
lret=1
fi
- test $lret -eq 0 && echo "PASS: IPv6 masquerade for ns2"
+<<<<<<< HEAD
+ test $lret -eq 0 && echo "PASS: $family IPv6 masquerade for ns2"
+=======
+ test $lret -eq 0 && echo "PASS: IPv6 masquerade $natflags for ns2"
+>>>>>>> cd8dead0c39457e58ec1d36db93aedca811d48f1
return $lret
}
test_masquerade()
{
+<<<<<<< HEAD
+ local family=$1
+=======
+ local natflags=$1
+>>>>>>> cd8dead0c39457e58ec1d36db93aedca811d48f1
local lret=0
ip netns exec ns0 sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
ip netns exec ns2 ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1
if [ $? -ne 0 ] ; then
- echo "ERROR: canot ping ns1 from ns2"
+ echo "ERROR: cannot ping ns1 from ns2 $natflags"
lret=1
fi
# add masquerading rule
ip netns exec ns0 nft -f - <<EOF
-table ip nat {
+table $family nat {
chain postrouting {
type nat hook postrouting priority 0; policy accept;
- meta oif veth0 masquerade
+ meta oif veth0 masquerade $natflags
}
}
EOF
+ if [ $? -ne 0 ]; then
+ echo "SKIP: Could not add add $family masquerade hook"
+ return $ksft_skip
+ fi
+
ip netns exec ns2 ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1
if [ $? -ne 0 ] ; then
- echo "ERROR: cannot ping ns1 from ns2 with active ip masquerading"
+<<<<<<< HEAD
+ echo "ERROR: cannot ping ns1 from ns2 with active $family masquerading"
+=======
+ echo "ERROR: cannot ping ns1 from ns2 with active ip masquere $natflags"
+>>>>>>> cd8dead0c39457e58ec1d36db93aedca811d48f1
lret=1
fi
fi
done
+<<<<<<< HEAD
+ ip netns exec ns0 nft flush chain $family nat postrouting
+=======
+ ip netns exec ns2 ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1
+ if [ $? -ne 0 ] ; then
+ echo "ERROR: cannot ping ns1 from ns2 with active ip masquerade $natflags (attempt 2)"
+ lret=1
+ fi
+
ip netns exec ns0 nft flush chain ip nat postrouting
+>>>>>>> cd8dead0c39457e58ec1d36db93aedca811d48f1
if [ $? -ne 0 ]; then
- echo "ERROR: Could not flush nat postrouting" 1>&2
+ echo "ERROR: Could not flush $family nat postrouting" 1>&2
lret=1
fi
- test $lret -eq 0 && echo "PASS: IP masquerade for ns2"
+<<<<<<< HEAD
+ test $lret -eq 0 && echo "PASS: $family IP masquerade for ns2"
+=======
+ test $lret -eq 0 && echo "PASS: IP masquerade $natflags for ns2"
+>>>>>>> cd8dead0c39457e58ec1d36db93aedca811d48f1
return $lret
}
test_redirect6()
{
+ local family=$1
local lret=0
ip netns exec ns0 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
# add redirect rule
ip netns exec ns0 nft -f - <<EOF
-table ip6 nat {
+table $family nat {
chain prerouting {
type nat hook prerouting priority 0; policy accept;
meta iif veth1 meta l4proto icmpv6 ip6 saddr dead:2::99 ip6 daddr dead:1::99 redirect
}
}
EOF
+ if [ $? -ne 0 ]; then
+ echo "SKIP: Could not add add $family redirect hook"
+ return $ksft_skip
+ fi
+
ip netns exec ns2 ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1
if [ $? -ne 0 ] ; then
- echo "ERROR: cannot ping ns1 from ns2 with active ip6 redirect"
+ echo "ERROR: cannot ping ns1 from ns2 via ipv6 with active $family redirect"
lret=1
fi
fi
done
- ip netns exec ns0 nft delete table ip6 nat
+ ip netns exec ns0 nft delete table $family nat
if [ $? -ne 0 ]; then
- echo "ERROR: Could not delete ip6 nat table" 1>&2
+ echo "ERROR: Could not delete $family nat table" 1>&2
lret=1
fi
- test $lret -eq 0 && echo "PASS: IPv6 redirection for ns2"
+ test $lret -eq 0 && echo "PASS: $family IPv6 redirection for ns2"
return $lret
}
test_redirect()
{
+ local family=$1
local lret=0
ip netns exec ns0 sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
# add redirect rule
ip netns exec ns0 nft -f - <<EOF
-table ip nat {
+table $family nat {
chain prerouting {
type nat hook prerouting priority 0; policy accept;
meta iif veth1 ip protocol icmp ip saddr 10.0.2.99 ip daddr 10.0.1.99 redirect
}
}
EOF
+ if [ $? -ne 0 ]; then
+ echo "SKIP: Could not add add $family redirect hook"
+ return $ksft_skip
+ fi
+
ip netns exec ns2 ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1
if [ $? -ne 0 ] ; then
- echo "ERROR: cannot ping ns1 from ns2 with active ip redirect"
+ echo "ERROR: cannot ping ns1 from ns2 with active $family ip redirect"
lret=1
fi
fi
done
- ip netns exec ns0 nft delete table ip nat
+ ip netns exec ns0 nft delete table $family nat
if [ $? -ne 0 ]; then
- echo "ERROR: Could not delete nat table" 1>&2
+ echo "ERROR: Could not delete $family nat table" 1>&2
lret=1
fi
- test $lret -eq 0 && echo "PASS: IP redirection for ns2"
+ test $lret -eq 0 && echo "PASS: $family IP redirection for ns2"
return $lret
}
fi
reset_counters
-test_local_dnat
-test_local_dnat6
+test_local_dnat ip
+test_local_dnat6 ip6
+reset_counters
+$test_inet_nat && test_local_dnat inet
+$test_inet_nat && test_local_dnat6 inet
reset_counters
-test_masquerade
-test_masquerade6
+<<<<<<< HEAD
+test_masquerade ip
+test_masquerade6 ip6
+reset_counters
+$test_inet_nat && test_masquerade inet
+$test_inet_nat && test_masquerade6 inet
+=======
+test_masquerade ""
+test_masquerade6 ""
reset_counters
-test_redirect
-test_redirect6
+test_masquerade "fully-random"
+test_masquerade6 "fully-random"
+>>>>>>> cd8dead0c39457e58ec1d36db93aedca811d48f1
+
+reset_counters
+test_redirect ip
+test_redirect6 ip6
+reset_counters
+$test_inet_nat && test_redirect inet
+$test_inet_nat && test_redirect6 inet
for i in 0 1 2; do ip netns del ns$i;done
ph.p_offset = 0;
ph.p_vaddr = VADDR;
ph.p_paddr = 0;
- ph.p_filesz = sizeof(struct elf64_hdr) + sizeof(struct elf64_phdr) + sizeof(payload);
- ph.p_memsz = sizeof(struct elf64_hdr) + sizeof(struct elf64_phdr) + sizeof(payload);
+ ph.p_filesz = sizeof(struct elf64_hdr) + sizeof(struct elf64_phdr) + len;
+ ph.p_memsz = sizeof(struct elf64_hdr) + sizeof(struct elf64_phdr) + len;
ph.p_align = 4096;
fd = openat(AT_FDCWD, "/tmp", O_WRONLY|O_EXCL|O_TMPFILE, 0700);
int main(void)
{
- const unsigned int PAGE_SIZE = sysconf(_SC_PAGESIZE);
-#ifdef __arm__
- unsigned long va = 2 * PAGE_SIZE;
-#else
- unsigned long va = 0;
-#endif
+ const int PAGE_SIZE = sysconf(_SC_PAGESIZE);
+ const unsigned long va_max = 1UL << 32;
+ unsigned long va;
void *p;
int fd;
unsigned long a, b;
if (fd == -1)
return 1;
- p = mmap((void *)va, PAGE_SIZE, PROT_NONE, MAP_PRIVATE|MAP_FILE|MAP_FIXED, fd, 0);
- if (p == MAP_FAILED) {
- if (errno == EPERM)
- return 4;
+ for (va = 0; va < va_max; va += PAGE_SIZE) {
+ p = mmap((void *)va, PAGE_SIZE, PROT_NONE, MAP_PRIVATE|MAP_FILE|MAP_FIXED, fd, 0);
+ if (p == (void *)va)
+ break;
+ }
+ if (va == va_max) {
+ fprintf(stderr, "error: mmap doesn't like you\n");
return 1;
}
"$TC qdisc del dev $DEV1 ingress"
]
},
+ {
+ "id": "2638",
+ "name": "Add matchall and try to get it",
+ "category": [
+ "filter",
+ "matchall"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 clsact",
+ "$TC filter add dev $DEV1 protocol all pref 1 ingress handle 0x1234 matchall action ok"
+ ],
+ "cmdUnderTest": "$TC filter get dev $DEV1 protocol all pref 1 ingress handle 0x1234 matchall",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter show dev $DEV1 ingress",
+ "matchPattern": "filter protocol all pref 1 matchall chain 0 handle 0x1234",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 clsact"
+ ]
+ },
{
"id": "d052",
"name": "Add 1M filters with the same action",
"$TC qdisc del dev $DEV2 ingress",
"/bin/rm $BATCH_FILE"
]
+ },
+ {
+ "id": "4cbd",
+ "name": "Try to add filter with duplicate key",
+ "category": [
+ "filter",
+ "flower"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV2 ingress",
+ "$TC filter add dev $DEV2 protocol ip prio 1 parent ffff: flower dst_mac e4:11:22:11:4a:51 src_mac e4:11:22:11:4a:50 ip_proto tcp src_ip 1.1.1.1 dst_ip 2.2.2.2 action drop"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV2 protocol ip prio 1 parent ffff: flower dst_mac e4:11:22:11:4a:51 src_mac e4:11:22:11:4a:50 ip_proto tcp src_ip 1.1.1.1 dst_ip 2.2.2.2 action drop",
+ "expExitCode": "2",
+ "verifyCmd": "$TC -s filter show dev $DEV2 ingress",
+ "matchPattern": "filter protocol ip pref 1 flower chain 0 handle",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV2 ingress"
+ ]
}
]
TPM2_CC_FLUSH_CONTEXT = 0x0165
TPM2_CC_START_AUTH_SESSION = 0x0176
TPM2_CC_GET_CAPABILITY = 0x017A
+TPM2_CC_GET_RANDOM = 0x017B
TPM2_CC_PCR_READ = 0x017E
TPM2_CC_POLICY_PCR = 0x017F
TPM2_CC_PCR_EXTEND = 0x0182
self.flags = flags
if (self.flags & Client.FLAG_SPACE) == 0:
- self.tpm = open('/dev/tpm0', 'r+b')
+ self.tpm = open('/dev/tpm0', 'r+b', buffering=0)
else:
- self.tpm = open('/dev/tpmrm0', 'r+b')
+ self.tpm = open('/dev/tpmrm0', 'r+b', buffering=0)
def close(self):
self.tpm.close()
pass
self.assertEqual(rejected, True)
+ def test_read_partial_resp(self):
+ try:
+ fmt = '>HIIH'
+ cmd = struct.pack(fmt,
+ tpm2.TPM2_ST_NO_SESSIONS,
+ struct.calcsize(fmt),
+ tpm2.TPM2_CC_GET_RANDOM,
+ 0x20)
+ self.client.tpm.write(cmd)
+ hdr = self.client.tpm.read(10)
+ sz = struct.unpack('>I', hdr[2:6])[0]
+ rsp = self.client.tpm.read()
+ except:
+ pass
+ self.assertEqual(sz, 10 + 2 + 32)
+ self.assertEqual(len(rsp), 2 + 32)
+
+ def test_read_partial_overwrite(self):
+ try:
+ fmt = '>HIIH'
+ cmd = struct.pack(fmt,
+ tpm2.TPM2_ST_NO_SESSIONS,
+ struct.calcsize(fmt),
+ tpm2.TPM2_CC_GET_RANDOM,
+ 0x20)
+ self.client.tpm.write(cmd)
+ # Read part of the respone
+ rsp1 = self.client.tpm.read(15)
+
+ # Send a new cmd
+ self.client.tpm.write(cmd)
+
+ # Read the whole respone
+ rsp2 = self.client.tpm.read()
+ except:
+ pass
+ self.assertEqual(len(rsp1), 15)
+ self.assertEqual(len(rsp2), 10 + 2 + 32)
+
+ def test_send_two_cmds(self):
+ rejected = False
+ try:
+ fmt = '>HIIH'
+ cmd = struct.pack(fmt,
+ tpm2.TPM2_ST_NO_SESSIONS,
+ struct.calcsize(fmt),
+ tpm2.TPM2_CC_GET_RANDOM,
+ 0x20)
+ self.client.tpm.write(cmd)
+
+ # expect the second one to raise -EBUSY error
+ self.client.tpm.write(cmd)
+ rsp = self.client.tpm.read()
+
+ except IOError, e:
+ # read the response
+ rsp = self.client.tpm.read()
+ rejected = True
+ pass
+ except:
+ pass
+ self.assertEqual(rejected, True)
+
class SpaceTest(unittest.TestCase):
def setUp(self):
logging.basicConfig(filename='SpaceTest.log', level=logging.DEBUG)
{
struct kvm_kernel_irq_routing_entry *ei;
int r;
+ u32 gsi = array_index_nospec(ue->gsi, KVM_MAX_IRQ_ROUTES);
/*
* Do not allow GSI to be mapped to the same irqchip more than once.
* Allow only one to one mapping between GSI and non-irqchip routing.
*/
- hlist_for_each_entry(ei, &rt->map[ue->gsi], link)
+ hlist_for_each_entry(ei, &rt->map[gsi], link)
if (ei->type != KVM_IRQ_ROUTING_IRQCHIP ||
ue->type != KVM_IRQ_ROUTING_IRQCHIP ||
ue->u.irqchip.irqchip == ei->irqchip.irqchip)
return -EINVAL;
- e->gsi = ue->gsi;
+ e->gsi = gsi;
e->type = ue->type;
r = kvm_set_routing_entry(kvm, e, ue);
if (r)
struct kvm_device_ops *ops = NULL;
struct kvm_device *dev;
bool test = cd->flags & KVM_CREATE_DEVICE_TEST;
+ int type;
int ret;
if (cd->type >= ARRAY_SIZE(kvm_device_ops_table))
return -ENODEV;
- ops = kvm_device_ops_table[cd->type];
+ type = array_index_nospec(cd->type, ARRAY_SIZE(kvm_device_ops_table));
+ ops = kvm_device_ops_table[type];
if (ops == NULL)
return -ENODEV;
dev->kvm = kvm;
mutex_lock(&kvm->lock);
- ret = ops->create(dev, cd->type);
+ ret = ops->create(dev, type);
if (ret < 0) {
mutex_unlock(&kvm->lock);
kfree(dev);